Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#include <errno.h>                // errno
18
#ifdef WITH_MIMALLOC
19
// Forward declarations of functions used in our mimalloc modifications
20
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
21
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
22
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
23
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
24
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
25
#  include "pycore_mimalloc.h"
26
#  include "mimalloc/static.c"
27
#  include "mimalloc/internal.h"  // for stats
28
#endif
29
30
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
31
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
32
#endif
33
34
#undef  uint
35
1.99G
#define uint pymem_uint
36
37
38
/* Defined in tracemalloc.c */
39
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
40
41
static void _PyObject_DebugDumpAddress(const void *p);
42
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
43
44
45
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
46
static void set_up_debug_hooks_unlocked(void);
47
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
49
50
51
/***************************************/
52
/* low-level allocator implementations */
53
/***************************************/
54
55
/* the default raw allocator (wraps malloc) */
56
57
void *
58
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
59
250M
{
60
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
61
       for malloc(0), which would be treated as an error. Some platforms would
62
       return a pointer with no memory behind it, which would break pymalloc.
63
       To solve these problems, allocate an extra byte. */
64
250M
    if (size == 0)
65
42.0M
        size = 1;
66
250M
    return malloc(size);
67
250M
}
68
69
void *
70
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
71
152k
{
72
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
73
       for calloc(0, 0), which would be treated as an error. Some platforms
74
       would return a pointer with no memory behind it, which would break
75
       pymalloc.  To solve these problems, allocate an extra byte. */
76
152k
    if (nelem == 0 || elsize == 0) {
77
671
        nelem = 1;
78
671
        elsize = 1;
79
671
    }
80
152k
    return calloc(nelem, elsize);
81
152k
}
82
83
void *
84
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
85
9.00M
{
86
9.00M
    if (size == 0)
87
0
        size = 1;
88
9.00M
    return realloc(ptr, size);
89
9.00M
}
90
91
void
92
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
93
251M
{
94
251M
    free(ptr);
95
251M
}
96
97
#ifdef WITH_MIMALLOC
98
99
static void
100
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
101
0
{
102
#ifdef Py_GIL_DISABLED
103
    // Clear the QSBR goal and remove the page from the QSBR linked list.
104
    page->qsbr_goal = 0;
105
    if (page->qsbr_node.next != NULL) {
106
        llist_remove(&page->qsbr_node);
107
    }
108
#endif
109
0
}
110
111
// Check if an empty, newly reclaimed page is safe to free now.
112
static bool
113
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
114
0
{
115
0
    assert(mi_page_all_free(page));
116
#ifdef Py_GIL_DISABLED
117
    assert(page->qsbr_node.next == NULL);
118
    if (page->use_qsbr && page->qsbr_goal != 0) {
119
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
120
        if (tstate == NULL) {
121
            return false;
122
        }
123
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
124
    }
125
#endif
126
0
    return true;
127
128
0
}
129
130
#ifdef Py_GIL_DISABLED
131
132
// If we are deferring collection of more than this amount of memory for
133
// mimalloc pages, advance the write sequence.  Advancing allows these
134
// pages to be re-used in a different thread or for a different size class.
135
#define QSBR_PAGE_MEM_LIMIT 4096*20
136
137
// Return true if the global write sequence should be advanced for a mimalloc
138
// page that is deferred from collection.
139
static bool
140
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
141
{
142
    size_t bsize = mi_page_block_size(page);
143
    size_t page_size = page->capacity*bsize;
144
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
145
        qsbr->deferred_page_memory = 0;
146
        return true;
147
    }
148
    qsbr->deferred_page_memory += page_size;
149
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
150
        qsbr->deferred_page_memory = 0;
151
        return true;
152
    }
153
    return false;
154
}
155
156
static _PyThreadStateImpl *
157
tstate_from_heap(mi_heap_t *heap)
158
{
159
    return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
160
}
161
#endif
162
163
static bool
164
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
165
0
{
166
#ifdef Py_GIL_DISABLED
167
    assert(mi_page_all_free(page));
168
    if (page->use_qsbr) {
169
        struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
170
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
171
            _PyMem_mi_page_clear_qsbr(page);
172
            _mi_page_free(page, pq, force);
173
            return true;
174
        }
175
176
        // gh-145615: since we are not freeing this page yet, we want to
177
        // make it available for allocations. Note that the QSBR goal and
178
        // linked list node remain set even if the page is later used for
179
        // an allocation. We only detect and clear the QSBR goal when the
180
        // page becomes empty again (used == 0).
181
        if (mi_page_is_in_full(page)) {
182
            _mi_page_unfull(page);
183
        }
184
185
        _PyMem_mi_page_clear_qsbr(page);
186
        page->retire_expire = 0;
187
188
        if (should_advance_qsbr_for_page(qsbr, page)) {
189
            page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
190
        }
191
        else {
192
            page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
193
        }
194
195
        // We may be freeing a page belonging to a different thread during a
196
        // stop-the-world event. Find the _PyThreadStateImpl for the page.
197
        _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
198
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
199
        return false;
200
    }
201
#endif
202
0
    _mi_page_free(page, pq, force);
203
0
    return true;
204
0
}
205
206
static void
207
_PyMem_mi_page_reclaimed(mi_page_t *page)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    assert(page->qsbr_node.next == NULL);
211
    if (page->qsbr_goal != 0) {
212
        if (mi_page_all_free(page)) {
213
            assert(page->qsbr_node.next == NULL);
214
            _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
215
            assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
216
            page->retire_expire = 0;
217
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
218
        }
219
        else {
220
            page->qsbr_goal = 0;
221
        }
222
    }
223
#endif
224
0
}
225
226
static void
227
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
228
0
{
229
#ifdef Py_GIL_DISABLED
230
    if (!heap->page_use_qsbr) {
231
        return;
232
    }
233
234
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
235
    struct llist_node *head = &tstate->mimalloc.page_list;
236
    if (llist_empty(head)) {
237
        return;
238
    }
239
240
    struct llist_node *node;
241
    llist_for_each_safe(node, head) {
242
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
243
        if (!mi_page_all_free(page)) {
244
            // We allocated from this page some point after the delayed free
245
            _PyMem_mi_page_clear_qsbr(page);
246
            continue;
247
        }
248
249
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
250
            return;
251
        }
252
253
        _PyMem_mi_page_clear_qsbr(page);
254
        _mi_page_free(page, mi_page_queue_of(page), false);
255
    }
256
#endif
257
0
}
258
259
void *
260
_PyMem_MiMalloc(void *ctx, size_t size)
261
0
{
262
#ifdef Py_GIL_DISABLED
263
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
264
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
265
    return mi_heap_malloc(heap, size);
266
#else
267
0
    return mi_malloc(size);
268
0
#endif
269
0
}
270
271
void *
272
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
273
0
{
274
#ifdef Py_GIL_DISABLED
275
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
276
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
277
    return mi_heap_calloc(heap, nelem, elsize);
278
#else
279
0
    return mi_calloc(nelem, elsize);
280
0
#endif
281
0
}
282
283
void *
284
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
285
0
{
286
#ifdef Py_GIL_DISABLED
287
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
288
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
289
    return mi_heap_realloc(heap, ptr, size);
290
#else
291
0
    return mi_realloc(ptr, size);
292
0
#endif
293
0
}
294
295
void
296
_PyMem_MiFree(void *ctx, void *ptr)
297
0
{
298
0
    mi_free(ptr);
299
0
}
300
301
void *
302
_PyObject_MiMalloc(void *ctx, size_t nbytes)
303
0
{
304
#ifdef Py_GIL_DISABLED
305
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
306
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
307
    return mi_heap_malloc(heap, nbytes);
308
#else
309
0
    return mi_malloc(nbytes);
310
0
#endif
311
0
}
312
313
void *
314
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
315
0
{
316
#ifdef Py_GIL_DISABLED
317
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    return mi_heap_calloc(heap, nelem, elsize);
320
#else
321
0
    return mi_calloc(nelem, elsize);
322
0
#endif
323
0
}
324
325
326
void *
327
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
328
0
{
329
#ifdef Py_GIL_DISABLED
330
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
331
    // Implement our own realloc logic so that we can copy PyObject header
332
    // in a thread-safe way.
333
    size_t size = mi_usable_size(ptr);
334
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
335
        return ptr;
336
    }
337
338
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
339
    void* newp = mi_heap_malloc(heap, nbytes);
340
    if (newp == NULL) {
341
        return NULL;
342
    }
343
344
    // Free threaded Python allows access from other threads to the PyObject reference count
345
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
346
    // These fields are typically initialized by PyObject_Init() using relaxed
347
    // atomic stores. We need to copy these fields in a thread-safe way here.
348
    // We use the "debug_offset" to determine how many bytes to copy -- it
349
    // includes the PyObject header and plus any extra pre-headers.
350
    size_t offset = heap->debug_offset;
351
    assert(offset % sizeof(void*) == 0);
352
353
    size_t copy_size = (size < nbytes ? size : nbytes);
354
    if (copy_size >= offset) {
355
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
356
            // Use memcpy to avoid strict-aliasing issues. However, we probably
357
            // still have unavoidable strict-aliasing issues with
358
            // _Py_atomic_store_ptr_relaxed here.
359
            void *word;
360
            memcpy(&word, (char*)ptr + i, sizeof(void*));
361
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
362
        }
363
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
364
    }
365
    else {
366
        _mi_memcpy(newp, ptr, copy_size);
367
    }
368
    mi_free(ptr);
369
    return newp;
370
#else
371
0
    return mi_realloc(ptr, nbytes);
372
0
#endif
373
0
}
374
375
void
376
_PyObject_MiFree(void *ctx, void *ptr)
377
0
{
378
0
    mi_free(ptr);
379
0
}
380
381
void *
382
_PyMem_MiRawMalloc(void *ctx, size_t size)
383
0
{
384
0
    return mi_malloc(size);
385
0
}
386
387
void *
388
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
389
0
{
390
0
    return mi_calloc(nelem, elsize);
391
0
}
392
393
void *
394
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
395
0
{
396
0
    return mi_realloc(ptr, size);
397
0
}
398
399
void
400
_PyMem_MiRawFree(void *ctx, void *ptr)
401
0
{
402
0
    mi_free(ptr);
403
0
}
404
#endif // WITH_MIMALLOC
405
406
407
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
408
409
410
#ifdef WITH_MIMALLOC
411
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
412
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
413
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
414
#endif
415
416
/* the pymalloc allocator */
417
418
// The actual implementation is further down.
419
420
#if defined(WITH_PYMALLOC)
421
void* _PyObject_Malloc(void *ctx, size_t size);
422
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
423
void _PyObject_Free(void *ctx, void *p);
424
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
425
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
426
#endif  // WITH_PYMALLOC
427
428
#if defined(Py_GIL_DISABLED)
429
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
430
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
431
#  define PYMEM_ALLOC MIMALLOC_ALLOC
432
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
433
#elif defined(WITH_PYMALLOC)
434
0
#  define PYRAW_ALLOC MALLOC_ALLOC
435
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
436
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
437
#else
438
#  define PYRAW_ALLOC MALLOC_ALLOC
439
#  define PYMEM_ALLOC MALLOC_ALLOC
440
#  define PYOBJ_ALLOC MALLOC_ALLOC
441
#endif
442
443
444
/* the default debug allocators */
445
446
// The actual implementation is further down.
447
448
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
449
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
450
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
451
void _PyMem_DebugRawFree(void *ctx, void *ptr);
452
453
void* _PyMem_DebugMalloc(void *ctx, size_t size);
454
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
455
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
456
void _PyMem_DebugFree(void *ctx, void *p);
457
458
#define PYDBGRAW_ALLOC \
459
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
460
#define PYDBGMEM_ALLOC \
461
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
462
#define PYDBGOBJ_ALLOC \
463
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
464
465
/* default raw allocator (not swappable) */
466
467
void *
468
_PyMem_DefaultRawMalloc(size_t size)
469
288
{
470
#ifdef Py_DEBUG
471
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
472
#else
473
288
    return _PyMem_RawMalloc(NULL, size);
474
288
#endif
475
288
}
476
477
void *
478
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
479
0
{
480
#ifdef Py_DEBUG
481
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
482
#else
483
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
484
0
#endif
485
0
}
486
487
void *
488
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
489
0
{
490
#ifdef Py_DEBUG
491
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
492
#else
493
0
    return _PyMem_RawRealloc(NULL, ptr, size);
494
0
#endif
495
0
}
496
497
void
498
_PyMem_DefaultRawFree(void *ptr)
499
324
{
500
#ifdef Py_DEBUG
501
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
502
#else
503
324
    _PyMem_RawFree(NULL, ptr);
504
324
#endif
505
324
}
506
507
wchar_t*
508
_PyMem_DefaultRawWcsdup(const wchar_t *str)
509
216
{
510
216
    assert(str != NULL);
511
512
216
    size_t len = wcslen(str);
513
216
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
514
0
        return NULL;
515
0
    }
516
517
216
    size_t size = (len + 1) * sizeof(wchar_t);
518
216
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
519
216
    if (str2 == NULL) {
520
0
        return NULL;
521
0
    }
522
523
216
    memcpy(str2, str, size);
524
216
    return str2;
525
216
}
526
527
/* the low-level virtual memory allocator */
528
529
#ifdef WITH_PYMALLOC
530
#  ifdef MS_WINDOWS
531
#    include <windows.h>
532
#  elif defined(HAVE_MMAP)
533
#    include <sys/mman.h>
534
#    ifdef MAP_ANONYMOUS
535
#      define ARENAS_USE_MMAP
536
#    endif
537
#  endif
538
#endif
539
540
/* Return the system's default huge page size in bytes, or 0 if it
541
 * cannot be determined.  The result is cached after the first call.
542
 *
543
 * This is Linux-only (/proc/meminfo).  On other systems that define
544
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
545
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
546
static size_t
547
_pymalloc_system_hugepage_size(void)
548
{
549
    static size_t hp_size = 0;
550
    static int initialized = 0;
551
552
    if (initialized) {
553
        return hp_size;
554
    }
555
556
#ifdef __linux__
557
    FILE *f = fopen("/proc/meminfo", "r");
558
    if (f != NULL) {
559
        char line[256];
560
        while (fgets(line, sizeof(line), f)) {
561
            unsigned long size_kb;
562
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
563
                hp_size = (size_t)size_kb * 1024;
564
                break;
565
            }
566
        }
567
        fclose(f);
568
    }
569
#endif
570
571
    initialized = 1;
572
    return hp_size;
573
}
574
#endif
575
576
#if (defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)) || \
577
    (defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB))
578
static size_t
579
_pymalloc_round_up_to_multiple(size_t size, size_t multiple)
580
{
581
    if (multiple == 0 || size == 0) {
582
        return size;
583
    }
584
585
    size_t remainder = size % multiple;
586
    if (remainder == 0) {
587
        return size;
588
    }
589
590
    size_t padding = multiple - remainder;
591
    if (size > SIZE_MAX - padding) {
592
        return 0;
593
    }
594
    return size + padding;
595
}
596
#endif
597
598
static size_t
599
_pymalloc_virtual_alloc_size(size_t size)
600
82.8k
{
601
#if defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)
602
    if (_PyRuntime.allocators.use_hugepages) {
603
        SIZE_T large_page_size = GetLargePageMinimum();
604
        if (large_page_size > 0) {
605
            return _pymalloc_round_up_to_multiple(size, (size_t)large_page_size);
606
        }
607
    }
608
#elif defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
609
    if (_PyRuntime.allocators.use_hugepages) {
610
        size_t hp_size = _pymalloc_system_hugepage_size();
611
        if (hp_size > 0) {
612
            return _pymalloc_round_up_to_multiple(size, hp_size);
613
        }
614
    }
615
#endif
616
82.8k
    return size;
617
82.8k
}
618
619
void *
620
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
621
47.6k
{
622
#ifdef MS_WINDOWS
623
#  ifdef PYMALLOC_USE_HUGEPAGES
624
    if (_PyRuntime.allocators.use_hugepages) {
625
        SIZE_T lp_size = GetLargePageMinimum();
626
        if (lp_size > 0 && size % lp_size == 0) {
627
            void *ptr = VirtualAlloc(NULL, size,
628
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
629
                            PAGE_READWRITE);
630
            if (ptr != NULL)
631
                return ptr;
632
        }
633
    }
634
    /* Fall back to regular pages */
635
#  endif
636
    return VirtualAlloc(NULL, size,
637
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
638
#elif defined(ARENAS_USE_MMAP)
639
    void *ptr;
640
#  ifdef PYMALLOC_USE_HUGEPAGES
641
#    ifdef MAP_HUGETLB
642
    if (_PyRuntime.allocators.use_hugepages) {
643
        size_t hp_size = _pymalloc_system_hugepage_size();
644
        /* Only use huge pages if the arena size is a multiple of the
645
         * system's default huge page size.  When the arena is smaller
646
         * than the huge page, mmap still succeeds but silently
647
         * allocates an entire huge page; the subsequent munmap with
648
         * the smaller arena size then fails with EINVAL, leaking
649
         * all of that memory. */
650
        if (hp_size > 0 && size % hp_size == 0) {
651
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
652
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
653
            if (ptr != MAP_FAILED) {
654
                assert(ptr != NULL);
655
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
656
                return ptr;
657
            }
658
        }
659
    }
660
    /* Fall back to regular pages */
661
#    endif
662
#  endif
663
47.6k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
664
47.6k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
665
47.6k
    if (ptr == MAP_FAILED)
666
0
        return NULL;
667
47.6k
    assert(ptr != NULL);
668
47.6k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
669
47.6k
    return ptr;
670
#else
671
    return malloc(size);
672
#endif
673
47.6k
}
674
675
void
676
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
677
#if defined(ARENAS_USE_MMAP)
678
    size_t size
679
#else
680
    size_t Py_UNUSED(size)
681
#endif
682
)
683
47.0k
{
684
#ifdef MS_WINDOWS
685
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
686
    if (ptr == NULL) {
687
        return;
688
    }
689
    VirtualFree(ptr, 0, MEM_RELEASE);
690
#elif defined(ARENAS_USE_MMAP)
691
    /* Unlike free(), munmap() does not special-case NULL to noop. */
692
47.0k
    if (ptr == NULL) {
693
0
        return;
694
0
    }
695
47.0k
    if (munmap(ptr, size) < 0) {
696
0
        _Py_FatalErrorFormat(__func__,
697
0
                             "munmap(%p, %zu) failed with errno %d",
698
0
                             ptr, size, errno);
699
0
    }
700
#else
701
    free(ptr);
702
#endif
703
47.0k
}
704
705
/*******************************************/
706
/* end low-level allocator implementations */
707
/*******************************************/
708
709
710
72
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
711
1.02G
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
712
2.29G
#define _PyMem (_PyRuntime.allocators.standard.mem)
713
5.84G
#define _PyObject (_PyRuntime.allocators.standard.obj)
714
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
715
189k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
716
717
718
/***************************/
719
/* managing the allocators */
720
/***************************/
721
722
static int
723
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
724
                               PyMemAllocatorEx *old_alloc)
725
0
{
726
0
    if (old_alloc != NULL) {
727
0
        get_allocator_unlocked(domain, old_alloc);
728
0
    }
729
730
731
0
    PyMemAllocatorEx new_alloc;
732
0
    switch(domain)
733
0
    {
734
0
    case PYMEM_DOMAIN_RAW:
735
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
736
0
        break;
737
0
    case PYMEM_DOMAIN_MEM:
738
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
739
0
        break;
740
0
    case PYMEM_DOMAIN_OBJ:
741
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
742
0
        break;
743
0
    default:
744
        /* unknown domain */
745
0
        return -1;
746
0
    }
747
0
    set_allocator_unlocked(domain, &new_alloc);
748
0
    if (debug) {
749
0
        set_up_debug_hooks_domain_unlocked(domain);
750
0
    }
751
0
    return 0;
752
0
}
753
754
755
#ifdef Py_DEBUG
756
static const int pydebug = 1;
757
#else
758
static const int pydebug = 0;
759
#endif
760
761
int
762
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
763
0
{
764
0
    if (name == NULL || *name == '\0') {
765
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
766
           nameions): use default memory allocators */
767
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
768
0
    }
769
0
    else if (strcmp(name, "default") == 0) {
770
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
771
0
    }
772
0
    else if (strcmp(name, "debug") == 0) {
773
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
774
0
    }
775
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
776
0
    else if (strcmp(name, "pymalloc") == 0) {
777
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
778
0
    }
779
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
780
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
781
0
    }
782
0
#endif
783
0
#ifdef WITH_MIMALLOC
784
0
    else if (strcmp(name, "mimalloc") == 0) {
785
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
786
0
    }
787
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
788
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
789
0
    }
790
0
#endif
791
0
#ifndef Py_GIL_DISABLED
792
0
    else if (strcmp(name, "malloc") == 0) {
793
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
794
0
    }
795
0
    else if (strcmp(name, "malloc_debug") == 0) {
796
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
797
0
    }
798
0
#endif
799
0
    else {
800
        /* unknown allocator */
801
0
        return -1;
802
0
    }
803
0
    return 0;
804
0
}
805
806
807
static int
808
set_up_allocators_unlocked(PyMemAllocatorName allocator)
809
0
{
810
0
    switch (allocator) {
811
0
    case PYMEM_ALLOCATOR_NOT_SET:
812
        /* do nothing */
813
0
        break;
814
815
0
    case PYMEM_ALLOCATOR_DEFAULT:
816
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
817
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
818
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
819
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
820
0
        break;
821
822
0
    case PYMEM_ALLOCATOR_DEBUG:
823
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
824
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
825
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
826
0
        _PyRuntime.allocators.is_debug_enabled = 1;
827
0
        break;
828
829
0
#ifdef WITH_PYMALLOC
830
0
    case PYMEM_ALLOCATOR_PYMALLOC:
831
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
832
0
    {
833
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
834
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
835
836
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
837
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
838
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
839
840
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
841
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
842
0
        if (is_debug) {
843
0
            set_up_debug_hooks_unlocked();
844
0
        }
845
0
        break;
846
0
    }
847
0
#endif
848
0
#ifdef WITH_MIMALLOC
849
0
    case PYMEM_ALLOCATOR_MIMALLOC:
850
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
851
0
    {
852
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
853
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
854
855
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
856
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
857
858
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
859
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
860
861
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
862
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
863
0
        if (is_debug) {
864
0
            set_up_debug_hooks_unlocked();
865
0
        }
866
867
0
        break;
868
0
    }
869
0
#endif
870
871
0
    case PYMEM_ALLOCATOR_MALLOC:
872
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
873
0
    {
874
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
875
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
876
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
877
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
878
879
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
880
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
881
0
        if (is_debug) {
882
0
            set_up_debug_hooks_unlocked();
883
0
        }
884
0
        break;
885
0
    }
886
887
0
    default:
888
        /* unknown allocator */
889
0
        return -1;
890
0
    }
891
892
0
    return 0;
893
0
}
894
895
int
896
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
897
0
{
898
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
899
0
    int res = set_up_allocators_unlocked(allocator);
900
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
901
0
    return res;
902
0
}
903
904
905
static int
906
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
907
0
{
908
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
909
0
}
910
911
912
static const char*
913
get_current_allocator_name_unlocked(void)
914
0
{
915
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
916
0
#ifdef WITH_PYMALLOC
917
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
918
0
#endif
919
0
#ifdef WITH_MIMALLOC
920
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
921
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
922
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
923
0
#endif
924
925
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
926
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
927
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
928
0
    {
929
0
        return "malloc";
930
0
    }
931
0
#ifdef WITH_PYMALLOC
932
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
933
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
934
0
        pymemallocator_eq(&_PyObject, &pymalloc))
935
0
    {
936
0
        return "pymalloc";
937
0
    }
938
0
#endif
939
0
#ifdef WITH_MIMALLOC
940
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
941
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
942
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
943
0
    {
944
0
        return "mimalloc";
945
0
    }
946
0
#endif
947
948
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
949
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
950
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
951
952
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
953
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
954
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
955
0
    {
956
        /* Debug hooks installed */
957
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
958
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
959
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
960
0
        {
961
0
            return "malloc_debug";
962
0
        }
963
0
#ifdef WITH_PYMALLOC
964
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
965
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
966
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
967
0
        {
968
0
            return "pymalloc_debug";
969
0
        }
970
0
#endif
971
0
#ifdef WITH_MIMALLOC
972
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
973
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
974
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
975
0
        {
976
0
            return "mimalloc_debug";
977
0
        }
978
0
#endif
979
0
    }
980
0
    return NULL;
981
0
}
982
983
const char*
984
_PyMem_GetCurrentAllocatorName(void)
985
0
{
986
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
987
0
    const char *name = get_current_allocator_name_unlocked();
988
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
989
0
    return name;
990
0
}
991
992
993
int
994
_PyMem_DebugEnabled(void)
995
0
{
996
0
    return _PyRuntime.allocators.is_debug_enabled;
997
0
}
998
999
#ifdef WITH_PYMALLOC
1000
static int
1001
_PyMem_PymallocEnabled(void)
1002
0
{
1003
0
    if (_PyMem_DebugEnabled()) {
1004
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
1005
0
    }
1006
0
    else {
1007
0
        return (_PyObject.malloc == _PyObject_Malloc);
1008
0
    }
1009
0
}
1010
1011
#ifdef WITH_MIMALLOC
1012
static int
1013
_PyMem_MimallocEnabled(void)
1014
0
{
1015
#ifdef Py_GIL_DISABLED
1016
    return 1;
1017
#else
1018
0
    if (_PyMem_DebugEnabled()) {
1019
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
1020
0
    }
1021
0
    else {
1022
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
1023
0
    }
1024
0
#endif
1025
0
}
1026
#endif  // WITH_MIMALLOC
1027
1028
#endif  // WITH_PYMALLOC
1029
1030
1031
static void
1032
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
1033
0
{
1034
0
    PyMemAllocatorEx alloc;
1035
1036
0
    if (domain == PYMEM_DOMAIN_RAW) {
1037
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
1038
0
            return;
1039
0
        }
1040
1041
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
1042
0
        alloc.ctx = &_PyMem_Debug.raw;
1043
0
        alloc.malloc = _PyMem_DebugRawMalloc;
1044
0
        alloc.calloc = _PyMem_DebugRawCalloc;
1045
0
        alloc.realloc = _PyMem_DebugRawRealloc;
1046
0
        alloc.free = _PyMem_DebugRawFree;
1047
0
        set_allocator_unlocked(domain, &alloc);
1048
0
    }
1049
0
    else if (domain == PYMEM_DOMAIN_MEM) {
1050
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
1051
0
            return;
1052
0
        }
1053
1054
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
1055
0
        alloc.ctx = &_PyMem_Debug.mem;
1056
0
        alloc.malloc = _PyMem_DebugMalloc;
1057
0
        alloc.calloc = _PyMem_DebugCalloc;
1058
0
        alloc.realloc = _PyMem_DebugRealloc;
1059
0
        alloc.free = _PyMem_DebugFree;
1060
0
        set_allocator_unlocked(domain, &alloc);
1061
0
    }
1062
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
1063
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
1064
0
            return;
1065
0
        }
1066
1067
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1068
0
        alloc.ctx = &_PyMem_Debug.obj;
1069
0
        alloc.malloc = _PyMem_DebugMalloc;
1070
0
        alloc.calloc = _PyMem_DebugCalloc;
1071
0
        alloc.realloc = _PyMem_DebugRealloc;
1072
0
        alloc.free = _PyMem_DebugFree;
1073
0
        set_allocator_unlocked(domain, &alloc);
1074
0
    }
1075
0
}
1076
1077
1078
static void
1079
set_up_debug_hooks_unlocked(void)
1080
0
{
1081
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1082
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1083
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1084
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1085
0
}
1086
1087
void
1088
PyMem_SetupDebugHooks(void)
1089
0
{
1090
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1091
0
    set_up_debug_hooks_unlocked();
1092
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1093
0
}
1094
1095
static void
1096
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1097
36
{
1098
36
    switch(domain)
1099
36
    {
1100
36
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1101
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1102
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1103
0
    default:
1104
        /* unknown domain: set all attributes to NULL */
1105
0
        allocator->ctx = NULL;
1106
0
        allocator->malloc = NULL;
1107
0
        allocator->calloc = NULL;
1108
0
        allocator->realloc = NULL;
1109
0
        allocator->free = NULL;
1110
36
    }
1111
36
}
1112
1113
static void
1114
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1115
0
{
1116
0
    switch(domain)
1117
0
    {
1118
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1119
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1120
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1121
    /* ignore unknown domain */
1122
0
    }
1123
0
}
1124
1125
void
1126
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1127
36
{
1128
36
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1129
36
    get_allocator_unlocked(domain, allocator);
1130
36
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1131
36
}
1132
1133
void
1134
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1135
0
{
1136
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1137
0
    set_allocator_unlocked(domain, allocator);
1138
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1139
0
}
1140
1141
void
1142
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1143
0
{
1144
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1145
0
    *allocator = _PyObject_Arena;
1146
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1147
0
}
1148
1149
void
1150
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1151
0
{
1152
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1153
0
    _PyObject_Arena = *allocator;
1154
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1155
0
}
1156
1157
1158
/* Note that there is a possible, but very unlikely, race in any place
1159
 * below where we call one of the allocator functions.  We access two
1160
 * fields in each case:  "malloc", etc. and "ctx".
1161
 *
1162
 * It is unlikely that the allocator will be changed while one of those
1163
 * calls is happening, much less in that very narrow window.
1164
 * Furthermore, the likelihood of a race is drastically reduced by the
1165
 * fact that the allocator may not be changed after runtime init
1166
 * (except with a wrapper).
1167
 *
1168
 * With the above in mind, we currently don't worry about locking
1169
 * around these uses of the runtime-global allocators state. */
1170
1171
1172
/*************************/
1173
/* the "arena" allocator */
1174
/*************************/
1175
1176
void *
1177
_PyObject_VirtualAlloc(size_t size)
1178
41.4k
{
1179
41.4k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1180
41.4k
    if (alloc_size == 0 && size != 0) {
1181
0
        return NULL;
1182
0
    }
1183
41.4k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, alloc_size);
1184
41.4k
}
1185
1186
void
1187
_PyObject_VirtualFree(void *obj, size_t size)
1188
41.3k
{
1189
41.3k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1190
41.3k
    assert(alloc_size != 0 || size == 0);
1191
41.3k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, alloc_size);
1192
41.3k
}
1193
1194
1195
/***********************/
1196
/* the "raw" allocator */
1197
/***********************/
1198
1199
void *
1200
PyMem_RawMalloc(size_t size)
1201
250M
{
1202
    /*
1203
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1204
     * Most python internals blindly use a signed Py_ssize_t to track
1205
     * things without checking for overflows or negatives.
1206
     * As size_t is unsigned, checking for size < 0 is not required.
1207
     */
1208
250M
    if (size > (size_t)PY_SSIZE_T_MAX)
1209
0
        return NULL;
1210
250M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1211
250M
}
1212
1213
void *
1214
PyMem_RawCalloc(size_t nelem, size_t elsize)
1215
152k
{
1216
    /* see PyMem_RawMalloc() */
1217
152k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1218
0
        return NULL;
1219
152k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1220
152k
}
1221
1222
void*
1223
PyMem_RawRealloc(void *ptr, size_t new_size)
1224
9.00M
{
1225
    /* see PyMem_RawMalloc() */
1226
9.00M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1227
0
        return NULL;
1228
9.00M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1229
9.00M
}
1230
1231
void PyMem_RawFree(void *ptr)
1232
251M
{
1233
251M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1234
251M
}
1235
1236
1237
/***********************/
1238
/* the "mem" allocator */
1239
/***********************/
1240
1241
void *
1242
PyMem_Malloc(size_t size)
1243
273M
{
1244
    /* see PyMem_RawMalloc() */
1245
273M
    if (size > (size_t)PY_SSIZE_T_MAX)
1246
0
        return NULL;
1247
273M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1248
273M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1249
273M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1250
273M
    OBJECT_STAT_INC(allocations);
1251
273M
    return _PyMem.malloc(_PyMem.ctx, size);
1252
273M
}
1253
1254
void *
1255
PyMem_Calloc(size_t nelem, size_t elsize)
1256
46.2M
{
1257
    /* see PyMem_RawMalloc() */
1258
46.2M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1259
0
        return NULL;
1260
46.2M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1261
46.2M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1262
46.2M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1263
46.2M
    OBJECT_STAT_INC(allocations);
1264
46.2M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1265
46.2M
}
1266
1267
void *
1268
PyMem_Realloc(void *ptr, size_t new_size)
1269
269M
{
1270
    /* see PyMem_RawMalloc() */
1271
269M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1272
0
        return NULL;
1273
269M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1274
269M
}
1275
1276
void
1277
PyMem_Free(void *ptr)
1278
557M
{
1279
557M
    OBJECT_STAT_INC(frees);
1280
557M
    _PyMem.free(_PyMem.ctx, ptr);
1281
557M
}
1282
1283
1284
/***************************/
1285
/* pymem utility functions */
1286
/***************************/
1287
1288
wchar_t*
1289
_PyMem_RawWcsdup(const wchar_t *str)
1290
1.44k
{
1291
1.44k
    assert(str != NULL);
1292
1293
1.44k
    size_t len = wcslen(str);
1294
1.44k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1295
0
        return NULL;
1296
0
    }
1297
1298
1.44k
    size_t size = (len + 1) * sizeof(wchar_t);
1299
1.44k
    wchar_t *str2 = PyMem_RawMalloc(size);
1300
1.44k
    if (str2 == NULL) {
1301
0
        return NULL;
1302
0
    }
1303
1304
1.44k
    memcpy(str2, str, size);
1305
1.44k
    return str2;
1306
1.44k
}
1307
1308
char *
1309
_PyMem_RawStrdup(const char *str)
1310
108
{
1311
108
    assert(str != NULL);
1312
108
    size_t size = strlen(str) + 1;
1313
108
    char *copy = PyMem_RawMalloc(size);
1314
108
    if (copy == NULL) {
1315
0
        return NULL;
1316
0
    }
1317
108
    memcpy(copy, str, size);
1318
108
    return copy;
1319
108
}
1320
1321
char *
1322
_PyMem_Strdup(const char *str)
1323
0
{
1324
0
    assert(str != NULL);
1325
0
    size_t size = strlen(str) + 1;
1326
0
    char *copy = PyMem_Malloc(size);
1327
0
    if (copy == NULL) {
1328
0
        return NULL;
1329
0
    }
1330
0
    memcpy(copy, str, size);
1331
0
    return copy;
1332
0
}
1333
1334
/***********************************************/
1335
/* Delayed freeing support for Py_GIL_DISABLED */
1336
/***********************************************/
1337
1338
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1339
#define WORK_ITEMS_PER_CHUNK 254
1340
1341
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1342
struct _mem_work_item {
1343
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1344
    uint64_t qsbr_goal;
1345
};
1346
1347
// A fixed-size buffer of pointers to be freed
1348
struct _mem_work_chunk {
1349
    // Linked list node of chunks in queue
1350
    struct llist_node node;
1351
1352
    Py_ssize_t rd_idx;  // index of next item to read
1353
    Py_ssize_t wr_idx;  // index of next item to write
1354
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1355
};
1356
1357
static int
1358
work_item_should_decref(uintptr_t ptr)
1359
0
{
1360
0
    return ptr & 0x01;
1361
0
}
1362
1363
static void
1364
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1365
0
{
1366
0
    if (work_item_should_decref(ptr)) {
1367
0
        PyObject *obj = (PyObject *)(ptr - 1);
1368
#ifdef Py_GIL_DISABLED
1369
        if (cb == NULL) {
1370
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1371
            Py_DECREF(obj);
1372
            return;
1373
        }
1374
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1375
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1376
        if (refcount == 0) {
1377
            cb(obj, state);
1378
        }
1379
#else
1380
0
        Py_DECREF(obj);
1381
0
#endif
1382
0
    }
1383
0
    else {
1384
0
        PyMem_Free((void *)ptr);
1385
0
    }
1386
0
}
1387
1388
1389
#ifdef Py_GIL_DISABLED
1390
1391
// For deferred advance on free: the number of deferred items before advancing
1392
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1393
// want to process a chunk before it overflows.
1394
#define QSBR_DEFERRED_LIMIT 127
1395
1396
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1397
// helps limit memory usage due to QSBR delaying frees too long.
1398
#define QSBR_FREE_MEM_LIMIT 1024*1024
1399
1400
// Return true if the global write sequence should be advanced for a deferred
1401
// memory free.
1402
static bool
1403
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1404
{
1405
    if (size > QSBR_FREE_MEM_LIMIT) {
1406
        qsbr->deferred_count = 0;
1407
        qsbr->deferred_memory = 0;
1408
        qsbr->should_process = true;
1409
        return true;
1410
    }
1411
    qsbr->deferred_count++;
1412
    qsbr->deferred_memory += size;
1413
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1414
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1415
        qsbr->deferred_count = 0;
1416
        qsbr->deferred_memory = 0;
1417
        qsbr->should_process = true;
1418
        return true;
1419
    }
1420
    return false;
1421
}
1422
#endif
1423
1424
static void
1425
free_delayed(uintptr_t ptr, size_t size)
1426
0
{
1427
0
#ifndef Py_GIL_DISABLED
1428
0
    free_work_item(ptr, NULL, NULL);
1429
#else
1430
    PyInterpreterState *interp = _PyInterpreterState_GET();
1431
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1432
        interp->stoptheworld.world_stopped)
1433
    {
1434
        // Free immediately during interpreter shutdown or if the world is
1435
        // stopped.
1436
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1437
        free_work_item(ptr, NULL, NULL);
1438
        return;
1439
    }
1440
1441
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1442
    struct llist_node *head = &tstate->mem_free_queue;
1443
1444
    struct _mem_work_chunk *buf = NULL;
1445
    if (!llist_empty(head)) {
1446
        // Try to re-use the last buffer
1447
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1448
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1449
            // already full
1450
            buf = NULL;
1451
        }
1452
    }
1453
1454
    if (buf == NULL) {
1455
        buf = PyMem_Calloc(1, sizeof(*buf));
1456
        if (buf != NULL) {
1457
            llist_insert_tail(head, &buf->node);
1458
        }
1459
    }
1460
1461
    if (buf == NULL) {
1462
        // failed to allocate a buffer, free immediately
1463
        PyObject *to_dealloc = NULL;
1464
        _PyEval_StopTheWorld(tstate->base.interp);
1465
        if (work_item_should_decref(ptr)) {
1466
            PyObject *obj = (PyObject *)(ptr - 1);
1467
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1468
            if (refcount == 0) {
1469
                to_dealloc = obj;
1470
            }
1471
        }
1472
        else {
1473
            PyMem_Free((void *)ptr);
1474
        }
1475
        _PyEval_StartTheWorld(tstate->base.interp);
1476
        if (to_dealloc != NULL) {
1477
            _Py_Dealloc(to_dealloc);
1478
        }
1479
        return;
1480
    }
1481
1482
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1483
    uint64_t seq;
1484
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1485
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1486
    }
1487
    else {
1488
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1489
    }
1490
    buf->array[buf->wr_idx].ptr = ptr;
1491
    buf->array[buf->wr_idx].qsbr_goal = seq;
1492
    buf->wr_idx++;
1493
1494
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1495
        // Normally the processing of delayed items is done from the eval
1496
        // breaker.  Processing here is a safety measure to ensure too much
1497
        // work does not accumulate.
1498
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1499
    }
1500
#endif
1501
0
}
1502
1503
void
1504
_PyMem_FreeDelayed(void *ptr, size_t size)
1505
0
{
1506
0
    assert(!((uintptr_t)ptr & 0x01));
1507
0
    if (ptr != NULL) {
1508
0
        free_delayed((uintptr_t)ptr, size);
1509
0
    }
1510
0
}
1511
1512
#ifdef Py_GIL_DISABLED
1513
void
1514
_PyObject_XDecRefDelayed(PyObject *ptr)
1515
{
1516
    assert(!((uintptr_t)ptr & 0x01));
1517
    if (ptr != NULL) {
1518
        // We use 0 as the size since we don't have an easy way to know the
1519
        // actual size.  If we are freeing many objects, the write sequence
1520
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1521
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1522
    }
1523
}
1524
#endif
1525
1526
#ifdef Py_GIL_DISABLED
1527
void
1528
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1529
{
1530
    PyObject *old = *ptr;
1531
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1532
    if (old == NULL) {
1533
        return;
1534
    }
1535
    if (!_Py_IsImmortal(old)) {
1536
         _PyObject_XDecRefDelayed(old);
1537
    }
1538
}
1539
#endif
1540
1541
static struct _mem_work_chunk *
1542
work_queue_first(struct llist_node *head)
1543
0
{
1544
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1545
0
}
1546
1547
static void
1548
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1549
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1550
0
{
1551
0
    while (!llist_empty(head)) {
1552
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1553
1554
0
        if (buf->rd_idx < buf->wr_idx) {
1555
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1556
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1557
0
                return;
1558
0
            }
1559
1560
0
            buf->rd_idx++;
1561
            // NB: free_work_item may re-enter or execute arbitrary code
1562
0
            free_work_item(item->ptr, cb, state);
1563
0
            continue;
1564
0
        }
1565
1566
0
        assert(buf->rd_idx == buf->wr_idx);
1567
0
        if (keep_empty && buf->node.next == head) {
1568
            // Keep the last buffer in the queue to reduce re-allocations
1569
0
            buf->rd_idx = buf->wr_idx = 0;
1570
0
            return;
1571
0
        }
1572
1573
0
        llist_remove(&buf->node);
1574
0
        PyMem_Free(buf);
1575
0
    }
1576
0
}
1577
1578
static void
1579
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1580
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1581
                     void *state)
1582
0
{
1583
0
    assert(PyMutex_IsLocked(&queue->mutex));
1584
0
    process_queue(&queue->head, tstate, false, cb, state);
1585
1586
0
    int more_work = !llist_empty(&queue->head);
1587
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1588
0
}
1589
1590
static void
1591
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1592
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1593
                           void *state)
1594
0
{
1595
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1596
0
        return;
1597
0
    }
1598
1599
    // Try to acquire the lock, but don't block if it's already held.
1600
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1601
0
        process_interp_queue(queue, tstate, cb, state);
1602
0
        PyMutex_Unlock(&queue->mutex);
1603
0
    }
1604
0
}
1605
1606
void
1607
_PyMem_ProcessDelayed(PyThreadState *tstate)
1608
0
{
1609
0
    PyInterpreterState *interp = tstate->interp;
1610
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1611
1612
0
    tstate_impl->qsbr->should_process = false;
1613
1614
    // Process thread-local work
1615
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1616
1617
    // Process shared interpreter work
1618
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1619
0
}
1620
1621
void
1622
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1623
0
{
1624
0
    PyInterpreterState *interp = tstate->interp;
1625
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1626
1627
    // Process thread-local work
1628
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1629
1630
    // Process shared interpreter work
1631
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1632
0
}
1633
1634
void
1635
_PyMem_AbandonDelayed(PyThreadState *tstate)
1636
0
{
1637
0
    PyInterpreterState *interp = tstate->interp;
1638
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1639
1640
0
    if (llist_empty(queue)) {
1641
0
        return;
1642
0
    }
1643
1644
    // Check if the queue contains one empty buffer
1645
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1646
0
    if (buf->rd_idx == buf->wr_idx) {
1647
0
        llist_remove(&buf->node);
1648
0
        PyMem_Free(buf);
1649
0
        assert(llist_empty(queue));
1650
0
        return;
1651
0
    }
1652
1653
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1654
1655
    // Merge the thread's work queue into the interpreter's work queue.
1656
0
    llist_concat(&interp->mem_free_queue.head, queue);
1657
1658
    // Process the merged queue now (see gh-130794).
1659
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1660
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1661
1662
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1663
1664
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1665
0
}
1666
1667
void
1668
_PyMem_FiniDelayed(PyInterpreterState *interp)
1669
0
{
1670
0
    struct llist_node *head = &interp->mem_free_queue.head;
1671
0
    while (!llist_empty(head)) {
1672
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1673
1674
0
        if (buf->rd_idx < buf->wr_idx) {
1675
            // Free the remaining items immediately. There should be no other
1676
            // threads accessing the memory at this point during shutdown.
1677
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1678
0
            buf->rd_idx++;
1679
            // NB: free_work_item may re-enter or execute arbitrary code
1680
0
            free_work_item(item->ptr, NULL, NULL);
1681
0
            continue;
1682
0
        }
1683
1684
0
        llist_remove(&buf->node);
1685
0
        PyMem_Free(buf);
1686
0
    }
1687
0
}
1688
1689
/**************************/
1690
/* the "object" allocator */
1691
/**************************/
1692
1693
void *
1694
PyObject_Malloc(size_t size)
1695
1.42G
{
1696
    /* see PyMem_RawMalloc() */
1697
1.42G
    if (size > (size_t)PY_SSIZE_T_MAX)
1698
0
        return NULL;
1699
1.42G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1700
1.42G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1701
1.42G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1702
1.42G
    OBJECT_STAT_INC(allocations);
1703
1.42G
    return _PyObject.malloc(_PyObject.ctx, size);
1704
1.42G
}
1705
1706
void *
1707
PyObject_Calloc(size_t nelem, size_t elsize)
1708
0
{
1709
    /* see PyMem_RawMalloc() */
1710
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1711
0
        return NULL;
1712
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1713
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1714
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1715
0
    OBJECT_STAT_INC(allocations);
1716
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1717
0
}
1718
1719
void *
1720
PyObject_Realloc(void *ptr, size_t new_size)
1721
68.3M
{
1722
    /* see PyMem_RawMalloc() */
1723
68.3M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1724
0
        return NULL;
1725
68.3M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1726
68.3M
}
1727
1728
void
1729
PyObject_Free(void *ptr)
1730
1.42G
{
1731
1.42G
    OBJECT_STAT_INC(frees);
1732
1.42G
    _PyObject.free(_PyObject.ctx, ptr);
1733
1.42G
}
1734
1735
1736
/* Use __builtin_expect() where available to reduce overhead of
1737
   the valgrind checks */
1738
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1739
12.1G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1740
5.44G
#  define LIKELY(value) __builtin_expect((value), 1)
1741
#else
1742
#  define UNLIKELY(value) (value)
1743
#  define LIKELY(value) (value)
1744
#endif
1745
1746
#ifdef WITH_PYMALLOC
1747
1748
#ifdef WITH_VALGRIND
1749
#include <valgrind/valgrind.h>
1750
1751
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1752
static int running_on_valgrind = -1;
1753
#endif
1754
1755
typedef struct _obmalloc_state OMState;
1756
1757
/* obmalloc state for main interpreter and shared by all interpreters without
1758
 * their own obmalloc state.  By not explicitly initializing this structure, it
1759
 * will be allocated in the BSS which is a small performance win.  The radix
1760
 * tree arrays are fairly large but are sparsely used.  */
1761
static struct _obmalloc_state obmalloc_state_main;
1762
static bool obmalloc_state_initialized;
1763
1764
static inline int
1765
has_own_state(PyInterpreterState *interp)
1766
0
{
1767
0
    return (_Py_IsMainInterpreter(interp) ||
1768
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1769
0
            _Py_IsMainInterpreterFinalizing(interp));
1770
0
}
1771
1772
static inline OMState *
1773
get_state(void)
1774
4.18G
{
1775
4.18G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1776
4.18G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1777
4.18G
    return interp->obmalloc;
1778
4.18G
}
1779
1780
// These macros all rely on a local "state" variable.
1781
1.99G
#define usedpools (state->pools.used)
1782
2.95M
#define allarenas (state->mgmt.arenas)
1783
378
#define maxarenas (state->mgmt.maxarenas)
1784
30.0k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1785
25.0M
#define usable_arenas (state->mgmt.usable_arenas)
1786
17.5M
#define nfp2lasta (state->mgmt.nfp2lasta)
1787
19.2k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1788
6.23k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1789
7.35k
#define narenas_highwater (state->mgmt.narenas_highwater)
1790
500M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1791
1792
#ifdef WITH_MIMALLOC
1793
static bool count_blocks(
1794
    const mi_heap_t* heap, const mi_heap_area_t* area,
1795
    void* block, size_t block_size, void* allocated_blocks)
1796
0
{
1797
0
    *(size_t *)allocated_blocks += area->used;
1798
0
    return 1;
1799
0
}
1800
1801
static Py_ssize_t
1802
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1803
0
{
1804
0
    size_t allocated_blocks = 0;
1805
#ifdef Py_GIL_DISABLED
1806
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1807
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1808
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1809
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1810
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1811
        }
1812
    }
1813
1814
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1815
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1816
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1817
                                        &allocated_blocks);
1818
    }
1819
#else
1820
    // TODO(sgross): this only counts the current thread's blocks.
1821
0
    mi_heap_t *heap = mi_heap_get_default();
1822
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1823
0
#endif
1824
0
    return allocated_blocks;
1825
0
}
1826
#endif
1827
1828
Py_ssize_t
1829
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1830
0
{
1831
0
#ifdef WITH_MIMALLOC
1832
0
    if (_PyMem_MimallocEnabled()) {
1833
0
        return get_mimalloc_allocated_blocks(interp);
1834
0
    }
1835
0
#endif
1836
1837
#ifdef Py_DEBUG
1838
    assert(has_own_state(interp));
1839
#else
1840
0
    if (!has_own_state(interp)) {
1841
0
        _Py_FatalErrorFunc(__func__,
1842
0
                           "the interpreter doesn't have its own allocator");
1843
0
    }
1844
0
#endif
1845
0
    OMState *state = interp->obmalloc;
1846
1847
0
    if (state == NULL) {
1848
0
        return 0;
1849
0
    }
1850
1851
0
    Py_ssize_t n = raw_allocated_blocks;
1852
    /* add up allocated blocks for used pools */
1853
0
    for (uint i = 0; i < maxarenas; ++i) {
1854
        /* Skip arenas which are not allocated. */
1855
0
        if (allarenas[i].address == 0) {
1856
0
            continue;
1857
0
        }
1858
1859
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1860
1861
        /* visit every pool in the arena */
1862
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1863
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1864
0
            poolp p = (poolp)base;
1865
0
            n += p->ref.count;
1866
0
        }
1867
0
    }
1868
0
    return n;
1869
0
}
1870
1871
static void free_obmalloc_arenas(PyInterpreterState *interp);
1872
1873
void
1874
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1875
0
{
1876
0
#ifdef WITH_MIMALLOC
1877
0
    if (_PyMem_MimallocEnabled()) {
1878
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1879
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1880
0
        return;
1881
0
    }
1882
0
#endif
1883
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1884
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1885
0
        assert(has_own_state(interp) || leaked == 0);
1886
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1887
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1888
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1889
            // then some of the memory allocated by obmalloc has not been
1890
            // freed.  It might be safe to free the arenas in that case but
1891
            // it's possible that extension modules are still using that
1892
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1893
            // should be warning when this happens.  It should be possible to
1894
            // use a tool like "-fsanitize=address" to track down these leaks.
1895
0
            free_obmalloc_arenas(interp);
1896
0
        }
1897
0
    }
1898
0
}
1899
1900
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1901
1902
/* We preserve the number of blocks leaked during runtime finalization,
1903
   so they can be reported if the runtime is initialized again. */
1904
// XXX We don't lose any information by dropping this,
1905
// so we should consider doing so.
1906
static Py_ssize_t last_final_leaks = 0;
1907
1908
void
1909
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1910
0
{
1911
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1912
0
    runtime->obmalloc.interpreter_leaks = 0;
1913
0
}
1914
1915
static Py_ssize_t
1916
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1917
0
{
1918
0
    Py_ssize_t total = 0;
1919
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1920
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1921
0
        if (interp == NULL) {
1922
            /* We are at the very end of runtime finalization.
1923
               We can't rely on finalizing->interp since that thread
1924
               state is probably already freed, so we don't worry
1925
               about it. */
1926
0
            assert(PyInterpreterState_Head() == NULL);
1927
0
        }
1928
0
        else {
1929
0
            assert(interp != NULL);
1930
            /* It is probably the last interpreter but not necessarily. */
1931
0
            assert(PyInterpreterState_Next(interp) == NULL);
1932
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1933
0
        }
1934
0
    }
1935
0
    else {
1936
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1937
0
        HEAD_LOCK(runtime);
1938
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1939
0
        assert(interp != NULL);
1940
#ifdef Py_DEBUG
1941
        int got_main = 0;
1942
#endif
1943
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1944
#ifdef Py_DEBUG
1945
            if (_Py_IsMainInterpreter(interp)) {
1946
                assert(!got_main);
1947
                got_main = 1;
1948
                assert(has_own_state(interp));
1949
            }
1950
#endif
1951
0
            if (has_own_state(interp)) {
1952
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1953
0
            }
1954
0
        }
1955
0
        HEAD_UNLOCK(runtime);
1956
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1957
#ifdef Py_DEBUG
1958
        assert(got_main);
1959
#endif
1960
0
    }
1961
0
    total += runtime->obmalloc.interpreter_leaks;
1962
0
    total += last_final_leaks;
1963
0
    return total;
1964
0
}
1965
1966
Py_ssize_t
1967
_Py_GetGlobalAllocatedBlocks(void)
1968
0
{
1969
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1970
0
}
1971
1972
#if WITH_PYMALLOC_RADIX_TREE
1973
/*==========================================================================*/
1974
/* radix tree for tracking arena usage. */
1975
1976
6.16G
#define arena_map_root (state->usage.arena_map_root)
1977
#ifdef USE_INTERIOR_NODES
1978
36
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1979
36
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1980
#endif
1981
1982
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1983
 * it cannot be created */
1984
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1985
arena_map_get(OMState *state, pymem_block *p, int create)
1986
2.14G
{
1987
2.14G
#ifdef USE_INTERIOR_NODES
1988
    /* sanity check that IGNORE_BITS is correct */
1989
2.14G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1990
2.14G
    int i1 = MAP_TOP_INDEX(p);
1991
2.14G
    if (arena_map_root.ptrs[i1] == NULL) {
1992
36
        if (!create) {
1993
0
            return NULL;
1994
0
        }
1995
36
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1996
36
        if (n == NULL) {
1997
0
            return NULL;
1998
0
        }
1999
36
        arena_map_root.ptrs[i1] = n;
2000
36
        arena_map_mid_count++;
2001
36
    }
2002
2.14G
    int i2 = MAP_MID_INDEX(p);
2003
2.14G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
2004
259M
        if (!create) {
2005
259M
            return NULL;
2006
259M
        }
2007
36
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
2008
36
        if (n == NULL) {
2009
0
            return NULL;
2010
0
        }
2011
36
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
2012
36
        arena_map_bot_count++;
2013
36
    }
2014
1.88G
    return arena_map_root.ptrs[i1]->ptrs[i2];
2015
#else
2016
    return &arena_map_root;
2017
#endif
2018
2.14G
}
2019
2020
2021
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
2022
 * away 24 bits of the address.  That reduces the space requirement of
2023
 * the tree compared to similar radix tree page-map schemes.  In
2024
 * exchange for slashing the space requirement, it needs more
2025
 * computation to check an address.
2026
 *
2027
 * Tracking coverage is done by "ideal" arena address.  It is easier to
2028
 * explain in decimal so let's say that the arena size is 100 bytes.
2029
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
2030
 * pointer address is inside an actual arena, we have to check two ideal
2031
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
2032
 * 300.  In the rare case that an arena is aligned in the ideal way
2033
 * (e.g. base address of arena is 200) then we only have to check one
2034
 * ideal address.
2035
 *
2036
 * The tree nodes for 200 and 300 both store the address of arena.
2037
 * There are two cases: the arena starts at a lower ideal arena and
2038
 * extends to this one, or the arena starts in this arena and extends to
2039
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
2040
 * these two cases.
2041
 */
2042
2043
2044
/* mark or unmark addresses covered by arena */
2045
static int
2046
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
2047
11.8k
{
2048
    /* sanity check that IGNORE_BITS is correct */
2049
11.8k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
2050
11.8k
    arena_map_bot_t *n_hi = arena_map_get(
2051
11.8k
            state, (pymem_block *)arena_base, is_used);
2052
11.8k
    if (n_hi == NULL) {
2053
0
        assert(is_used); /* otherwise node should already exist */
2054
0
        return 0; /* failed to allocate space for node */
2055
0
    }
2056
11.8k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
2057
11.8k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
2058
11.8k
    if (tail == 0) {
2059
        /* is ideal arena address */
2060
77
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2061
77
    }
2062
11.7k
    else {
2063
        /* arena_base address is not ideal (aligned to arena size) and
2064
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
2065
         * for the next arena.  Note that it might be in different MAP_TOP
2066
         * and MAP_MID nodes as well so we need to call arena_map_get()
2067
         * again (do the full tree traversal).
2068
         */
2069
11.7k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2070
11.7k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2071
        /* If arena_base is a legit arena address, so is arena_base_next - 1
2072
         * (last address in arena).  If arena_base_next overflows then it
2073
         * must overflow to 0.  However, that would mean arena_base was
2074
         * "ideal" and we should not be in this case. */
2075
11.7k
        assert(arena_base < arena_base_next);
2076
11.7k
        arena_map_bot_t *n_lo = arena_map_get(
2077
11.7k
                state, (pymem_block *)arena_base_next, is_used);
2078
11.7k
        if (n_lo == NULL) {
2079
0
            assert(is_used); /* otherwise should already exist */
2080
0
            n_hi->arenas[i3].tail_hi = 0;
2081
0
            return 0; /* failed to allocate space for node */
2082
0
        }
2083
11.7k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2084
11.7k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2085
11.7k
    }
2086
11.8k
    return 1;
2087
11.8k
}
2088
2089
/* Return true if 'p' is a pointer inside an obmalloc arena.
2090
 * _PyObject_Free() calls this so it needs to be very fast. */
2091
static int
2092
arena_map_is_used(OMState *state, pymem_block *p)
2093
2.14G
{
2094
2.14G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2095
2.14G
    if (n == NULL) {
2096
259M
        return 0;
2097
259M
    }
2098
1.88G
    int i3 = MAP_BOT_INDEX(p);
2099
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2100
1.88G
    int32_t hi = n->arenas[i3].tail_hi;
2101
1.88G
    int32_t lo = n->arenas[i3].tail_lo;
2102
1.88G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2103
1.88G
    return (tail < lo) || (tail >= hi && hi != 0);
2104
2.14G
}
2105
2106
/* end of radix tree logic */
2107
/*==========================================================================*/
2108
#endif /* WITH_PYMALLOC_RADIX_TREE */
2109
2110
2111
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2112
 * allocate a new arena, and return the address of an arena_object
2113
 * describing the new arena.  It's expected that the caller will set
2114
 * `usable_arenas` to the return value.
2115
 */
2116
static struct arena_object*
2117
new_arena(OMState *state)
2118
6.23k
{
2119
6.23k
    struct arena_object* arenaobj;
2120
6.23k
    uint excess;        /* number of bytes above pool alignment */
2121
6.23k
    void *address;
2122
2123
6.23k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2124
6.23k
    if (debug_stats == -1) {
2125
36
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2126
36
        debug_stats = (opt != NULL && *opt != '\0');
2127
36
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2128
36
    }
2129
6.23k
    if (debug_stats) {
2130
0
        _PyObject_DebugMallocStats(stderr);
2131
0
    }
2132
2133
6.23k
    if (unused_arena_objects == NULL) {
2134
69
        uint i;
2135
69
        uint numarenas;
2136
69
        size_t nbytes;
2137
2138
        /* Double the number of arena objects on each allocation.
2139
         * Note that it's possible for `numarenas` to overflow.
2140
         */
2141
69
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2142
69
        if (numarenas <= maxarenas)
2143
0
            return NULL;                /* overflow */
2144
#if SIZEOF_SIZE_T <= SIZEOF_INT
2145
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2146
            return NULL;                /* overflow */
2147
#endif
2148
69
        nbytes = numarenas * sizeof(*allarenas);
2149
69
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2150
69
        if (arenaobj == NULL)
2151
0
            return NULL;
2152
69
        allarenas = arenaobj;
2153
2154
        /* We might need to fix pointers that were copied.  However,
2155
         * new_arena only gets called when all the pages in the
2156
         * previous arenas are full.  Thus, there are *no* pointers
2157
         * into the old array. Thus, we don't have to worry about
2158
         * invalid pointers.  Just to be sure, some asserts:
2159
         */
2160
69
        assert(usable_arenas == NULL);
2161
69
        assert(unused_arena_objects == NULL);
2162
2163
        /* Put the new arenas on the unused_arena_objects list. */
2164
1.94k
        for (i = maxarenas; i < numarenas; ++i) {
2165
1.87k
            allarenas[i].address = 0;              /* mark as unassociated */
2166
1.87k
            allarenas[i].nextarena = i < numarenas - 1 ?
2167
1.87k
                                        &allarenas[i+1] : NULL;
2168
1.87k
        }
2169
2170
        /* Update globals. */
2171
69
        unused_arena_objects = &allarenas[maxarenas];
2172
69
        maxarenas = numarenas;
2173
69
    }
2174
2175
    /* Take the next available arena object off the head of the list. */
2176
6.23k
    assert(unused_arena_objects != NULL);
2177
6.23k
    arenaobj = unused_arena_objects;
2178
6.23k
    unused_arena_objects = arenaobj->nextarena;
2179
6.23k
    assert(arenaobj->address == 0);
2180
6.23k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2181
6.23k
#if WITH_PYMALLOC_RADIX_TREE
2182
6.23k
    if (address != NULL) {
2183
6.23k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2184
            /* marking arena in radix tree failed, abort */
2185
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2186
0
            address = NULL;
2187
0
        }
2188
6.23k
    }
2189
6.23k
#endif
2190
6.23k
    if (address == NULL) {
2191
        /* The allocation failed: return NULL after putting the
2192
         * arenaobj back.
2193
         */
2194
0
        arenaobj->nextarena = unused_arena_objects;
2195
0
        unused_arena_objects = arenaobj;
2196
0
        return NULL;
2197
0
    }
2198
6.23k
    arenaobj->address = (uintptr_t)address;
2199
2200
6.23k
    ++narenas_currently_allocated;
2201
6.23k
    ++ntimes_arena_allocated;
2202
6.23k
    if (narenas_currently_allocated > narenas_highwater)
2203
1.11k
        narenas_highwater = narenas_currently_allocated;
2204
6.23k
    arenaobj->freepools = NULL;
2205
    /* pool_address <- first pool-aligned address in the arena
2206
       nfreepools <- number of whole pools that fit after alignment */
2207
6.23k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2208
6.23k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2209
6.23k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2210
6.23k
    if (excess != 0) {
2211
4.57k
        --arenaobj->nfreepools;
2212
4.57k
        arenaobj->pool_address += POOL_SIZE - excess;
2213
4.57k
    }
2214
6.23k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2215
2216
6.23k
    return arenaobj;
2217
6.23k
}
2218
2219
2220
2221
#if WITH_PYMALLOC_RADIX_TREE
2222
/* Return true if and only if P is an address that was allocated by
2223
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2224
 */
2225
static bool
2226
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2227
2.14G
{
2228
2.14G
    return arena_map_is_used(state, p);
2229
2.14G
}
2230
#else
2231
/*
2232
address_in_range(P, POOL)
2233
2234
Return true if and only if P is an address that was allocated by pymalloc.
2235
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2236
(the caller is asked to compute this because the macro expands POOL more than
2237
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2238
variable and pass the latter to the macro; because address_in_range is
2239
called on every alloc/realloc/free, micro-efficiency is important here).
2240
2241
Tricky:  Let B be the arena base address associated with the pool, B =
2242
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2243
2244
    B <= P < B + ARENA_SIZE
2245
2246
Subtracting B throughout, this is true iff
2247
2248
    0 <= P-B < ARENA_SIZE
2249
2250
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2251
2252
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2253
before the first arena has been allocated.  `arenas` is still NULL in that
2254
case.  We're relying on that maxarenas is also 0 in that case, so that
2255
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2256
into a NULL arenas.
2257
2258
Details:  given P and POOL, the arena_object corresponding to P is AO =
2259
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2260
stores, etc), POOL is the correct address of P's pool, AO.address is the
2261
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2262
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2263
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2264
controls P.
2265
2266
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2267
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2268
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2269
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2270
control P.
2271
2272
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2273
allocated arena, obmalloc controls all the memory in slice AO.address :
2274
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2275
so P doesn't lie in that slice, so the macro correctly reports that P is not
2276
controlled by obmalloc.
2277
2278
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2279
arena_object (one not currently associated with an allocated arena),
2280
AO.address is 0, and the second test in the macro reduces to:
2281
2282
    P < ARENA_SIZE
2283
2284
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2285
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2286
of the test still passes, and the third clause (AO.address != 0) is necessary
2287
to get the correct result:  AO.address is 0 in this case, so the macro
2288
correctly reports that P is not controlled by obmalloc (despite that P lies in
2289
slice AO.address : AO.address + ARENA_SIZE).
2290
2291
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2292
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2293
corresponded to a currently-allocated arena, so the "P is not controlled by
2294
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2295
was impossible.
2296
2297
Note that the logic is excruciating, and reading up possibly uninitialized
2298
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2299
creates problems for some memory debuggers.  The overwhelming advantage is
2300
that this test determines whether an arbitrary address is controlled by
2301
obmalloc in a small constant time, independent of the number of arenas
2302
obmalloc controls.  Since this test is needed at every entry point, it's
2303
extremely desirable that it be this fast.
2304
*/
2305
2306
static bool _Py_NO_SANITIZE_ADDRESS
2307
            _Py_NO_SANITIZE_THREAD
2308
            _Py_NO_SANITIZE_MEMORY
2309
address_in_range(OMState *state, void *p, poolp pool)
2310
{
2311
    // Since address_in_range may be reading from memory which was not allocated
2312
    // by Python, it is important that pool->arenaindex is read only once, as
2313
    // another thread may be concurrently modifying the value without holding
2314
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2315
    // only once.
2316
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2317
    return arenaindex < maxarenas &&
2318
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2319
        allarenas[arenaindex].address != 0;
2320
}
2321
2322
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2323
2324
/*==========================================================================*/
2325
2326
// Called when freelist is exhausted.  Extend the freelist if there is
2327
// space for a block.  Otherwise, remove this pool from usedpools.
2328
static void
2329
pymalloc_pool_extend(poolp pool, uint size)
2330
418M
{
2331
418M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2332
        /* There is room for another block. */
2333
224M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2334
224M
        pool->nextoffset += INDEX2SIZE(size);
2335
224M
        *(pymem_block **)(pool->freeblock) = NULL;
2336
224M
        return;
2337
224M
    }
2338
2339
    /* Pool is full, unlink from used pools. */
2340
194M
    poolp next;
2341
194M
    next = pool->nextpool;
2342
194M
    pool = pool->prevpool;
2343
194M
    next->prevpool = pool;
2344
194M
    pool->nextpool = next;
2345
194M
}
2346
2347
/* called when pymalloc_alloc can not allocate a block from usedpool.
2348
 * This function takes new pool and allocate a block from it.
2349
 */
2350
static void*
2351
allocate_from_new_pool(OMState *state, uint size)
2352
2.58M
{
2353
    /* There isn't a pool of the right size class immediately
2354
     * available:  use a free pool.
2355
     */
2356
2.58M
    if (UNLIKELY(usable_arenas == NULL)) {
2357
        /* No arena has a free pool:  allocate a new arena. */
2358
#ifdef WITH_MEMORY_LIMITS
2359
        if (narenas_currently_allocated >= MAX_ARENAS) {
2360
            return NULL;
2361
        }
2362
#endif
2363
6.23k
        usable_arenas = new_arena(state);
2364
6.23k
        if (usable_arenas == NULL) {
2365
0
            return NULL;
2366
0
        }
2367
6.23k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2368
6.23k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2369
6.23k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2370
6.23k
    }
2371
2.58M
    assert(usable_arenas->address != 0);
2372
2373
    /* This arena already had the smallest nfreepools value, so decreasing
2374
     * nfreepools doesn't change that, and we don't need to rearrange the
2375
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2376
     * we need to remove its arena_object from usable_arenas.
2377
     */
2378
2.58M
    assert(usable_arenas->nfreepools > 0);
2379
2.58M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2380
        /* It's the last of this size, so there won't be any. */
2381
2.57M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2382
2.57M
    }
2383
    /* If any free pools will remain, it will be the new smallest. */
2384
2.58M
    if (usable_arenas->nfreepools > 1) {
2385
2.48M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2386
2.48M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2387
2.48M
    }
2388
2389
    /* Try to get a cached free pool. */
2390
2.58M
    poolp pool = usable_arenas->freepools;
2391
2.58M
    if (LIKELY(pool != NULL)) {
2392
        /* Unlink from cached pools. */
2393
2.19M
        usable_arenas->freepools = pool->nextpool;
2394
2.19M
        usable_arenas->nfreepools--;
2395
2.19M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2396
            /* Wholly allocated:  remove. */
2397
93.7k
            assert(usable_arenas->freepools == NULL);
2398
93.7k
            assert(usable_arenas->nextarena == NULL ||
2399
93.7k
                   usable_arenas->nextarena->prevarena ==
2400
93.7k
                   usable_arenas);
2401
93.7k
            usable_arenas = usable_arenas->nextarena;
2402
93.7k
            if (usable_arenas != NULL) {
2403
90.9k
                usable_arenas->prevarena = NULL;
2404
90.9k
                assert(usable_arenas->address != 0);
2405
90.9k
            }
2406
93.7k
        }
2407
2.10M
        else {
2408
            /* nfreepools > 0:  it must be that freepools
2409
             * isn't NULL, or that we haven't yet carved
2410
             * off all the arena's pools for the first
2411
             * time.
2412
             */
2413
2.10M
            assert(usable_arenas->freepools != NULL ||
2414
2.10M
                   usable_arenas->pool_address <=
2415
2.10M
                   (pymem_block*)usable_arenas->address +
2416
2.10M
                       ARENA_SIZE - POOL_SIZE);
2417
2.10M
        }
2418
2.19M
    }
2419
387k
    else {
2420
        /* Carve off a new pool. */
2421
387k
        assert(usable_arenas->nfreepools > 0);
2422
387k
        assert(usable_arenas->freepools == NULL);
2423
387k
        pool = (poolp)usable_arenas->pool_address;
2424
387k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2425
387k
                                 ARENA_SIZE - POOL_SIZE);
2426
387k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2427
387k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2428
387k
        pool->szidx = DUMMY_SIZE_IDX;
2429
387k
        usable_arenas->pool_address += POOL_SIZE;
2430
387k
        --usable_arenas->nfreepools;
2431
2432
387k
        if (usable_arenas->nfreepools == 0) {
2433
6.02k
            assert(usable_arenas->nextarena == NULL ||
2434
6.02k
                   usable_arenas->nextarena->prevarena ==
2435
6.02k
                   usable_arenas);
2436
            /* Unlink the arena:  it is completely allocated. */
2437
6.02k
            usable_arenas = usable_arenas->nextarena;
2438
6.02k
            if (usable_arenas != NULL) {
2439
401
                usable_arenas->prevarena = NULL;
2440
401
                assert(usable_arenas->address != 0);
2441
401
            }
2442
6.02k
        }
2443
387k
    }
2444
2445
    /* Frontlink to used pools. */
2446
2.58M
    pymem_block *bp;
2447
2.58M
    poolp next = usedpools[size + size]; /* == prev */
2448
2.58M
    pool->nextpool = next;
2449
2.58M
    pool->prevpool = next;
2450
2.58M
    next->nextpool = pool;
2451
2.58M
    next->prevpool = pool;
2452
2.58M
    pool->ref.count = 1;
2453
2.58M
    if (pool->szidx == size) {
2454
        /* Luckily, this pool last contained blocks
2455
         * of the same size class, so its header
2456
         * and free list are already initialized.
2457
         */
2458
1.53M
        bp = pool->freeblock;
2459
1.53M
        assert(bp != NULL);
2460
1.53M
        pool->freeblock = *(pymem_block **)bp;
2461
1.53M
        return bp;
2462
1.53M
    }
2463
    /*
2464
     * Initialize the pool header, set up the free list to
2465
     * contain just the second block, and return the first
2466
     * block.
2467
     */
2468
1.05M
    pool->szidx = size;
2469
1.05M
    size = INDEX2SIZE(size);
2470
1.05M
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2471
1.05M
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2472
1.05M
    pool->maxnextoffset = POOL_SIZE - size;
2473
1.05M
    pool->freeblock = bp + size;
2474
1.05M
    *(pymem_block **)(pool->freeblock) = NULL;
2475
1.05M
    return bp;
2476
2.58M
}
2477
2478
/* pymalloc allocator
2479
2480
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2481
2482
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2483
   requests, on error in the code below (as a last chance to serve the request)
2484
   or when the max memory limit has been reached.
2485
*/
2486
static inline void*
2487
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2488
2.04G
{
2489
#ifdef WITH_VALGRIND
2490
    if (UNLIKELY(running_on_valgrind == -1)) {
2491
        running_on_valgrind = RUNNING_ON_VALGRIND;
2492
    }
2493
    if (UNLIKELY(running_on_valgrind)) {
2494
        return NULL;
2495
    }
2496
#endif
2497
2498
2.04G
    if (UNLIKELY(nbytes == 0)) {
2499
42.0M
        return NULL;
2500
42.0M
    }
2501
2.00G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2502
208M
        return NULL;
2503
208M
    }
2504
2505
1.79G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2506
1.79G
    poolp pool = usedpools[size + size];
2507
1.79G
    pymem_block *bp;
2508
2509
1.79G
    if (LIKELY(pool != pool->nextpool)) {
2510
        /*
2511
         * There is a used pool for this size class.
2512
         * Pick up the head block of its free list.
2513
         */
2514
1.79G
        ++pool->ref.count;
2515
1.79G
        bp = pool->freeblock;
2516
1.79G
        assert(bp != NULL);
2517
2518
1.79G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2519
            // Reached the end of the free list, try to extend it.
2520
418M
            pymalloc_pool_extend(pool, size);
2521
418M
        }
2522
1.79G
    }
2523
2.58M
    else {
2524
        /* There isn't a pool of the right size class immediately
2525
         * available:  use a free pool.
2526
         */
2527
2.58M
        bp = allocate_from_new_pool(state, size);
2528
2.58M
    }
2529
2530
1.79G
    return (void *)bp;
2531
2.00G
}
2532
2533
2534
void *
2535
_PyObject_Malloc(void *ctx, size_t nbytes)
2536
2.00G
{
2537
2.00G
    OMState *state = get_state();
2538
2.00G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2539
2.00G
    if (LIKELY(ptr != NULL)) {
2540
1.74G
        return ptr;
2541
1.74G
    }
2542
2543
250M
    ptr = PyMem_RawMalloc(nbytes);
2544
250M
    if (ptr != NULL) {
2545
250M
        raw_allocated_blocks++;
2546
250M
    }
2547
250M
    return ptr;
2548
2.00G
}
2549
2550
2551
void *
2552
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2553
46.2M
{
2554
46.2M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2555
46.2M
    size_t nbytes = nelem * elsize;
2556
2557
46.2M
    OMState *state = get_state();
2558
46.2M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2559
46.2M
    if (LIKELY(ptr != NULL)) {
2560
46.1M
        memset(ptr, 0, nbytes);
2561
46.1M
        return ptr;
2562
46.1M
    }
2563
2564
152k
    ptr = PyMem_RawCalloc(nelem, elsize);
2565
152k
    if (ptr != NULL) {
2566
152k
        raw_allocated_blocks++;
2567
152k
    }
2568
152k
    return ptr;
2569
46.2M
}
2570
2571
2572
static void
2573
insert_to_usedpool(OMState *state, poolp pool)
2574
194M
{
2575
194M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2576
2577
194M
    uint size = pool->szidx;
2578
194M
    poolp next = usedpools[size + size];
2579
194M
    poolp prev = next->prevpool;
2580
2581
    /* insert pool before next:   prev <-> pool <-> next */
2582
194M
    pool->nextpool = next;
2583
194M
    pool->prevpool = prev;
2584
194M
    next->prevpool = pool;
2585
194M
    prev->nextpool = pool;
2586
194M
}
2587
2588
static void
2589
insert_to_freepool(OMState *state, poolp pool)
2590
2.55M
{
2591
2.55M
    poolp next = pool->nextpool;
2592
2.55M
    poolp prev = pool->prevpool;
2593
2.55M
    next->prevpool = prev;
2594
2.55M
    prev->nextpool = next;
2595
2596
    /* Link the pool to freepools.  This is a singly-linked
2597
     * list, and pool->prevpool isn't used there.
2598
     */
2599
2.55M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2600
2.55M
    pool->nextpool = ao->freepools;
2601
2.55M
    ao->freepools = pool;
2602
2.55M
    uint nf = ao->nfreepools;
2603
    /* If this is the rightmost arena with this number of free pools,
2604
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2605
     * are no arenas in usable_arenas with that value.
2606
     */
2607
2.55M
    struct arena_object* lastnf = nfp2lasta[nf];
2608
2.55M
    assert((nf == 0 && lastnf == NULL) ||
2609
2.55M
           (nf > 0 &&
2610
2.55M
            lastnf != NULL &&
2611
2.55M
            lastnf->nfreepools == nf &&
2612
2.55M
            (lastnf->nextarena == NULL ||
2613
2.55M
             nf < lastnf->nextarena->nfreepools)));
2614
2.55M
    if (lastnf == ao) {  /* it is the rightmost */
2615
2.37M
        struct arena_object* p = ao->prevarena;
2616
2.37M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2617
2.37M
    }
2618
2.55M
    ao->nfreepools = ++nf;
2619
2620
    /* All the rest is arena management.  We just freed
2621
     * a pool, and there are 4 cases for arena mgmt:
2622
     * 1. If all the pools are free, return the arena to
2623
     *    the system free().  Except if this is the last
2624
     *    arena in the list, keep it to avoid thrashing:
2625
     *    keeping one wholly free arena in the list avoids
2626
     *    pathological cases where a simple loop would
2627
     *    otherwise provoke needing to allocate and free an
2628
     *    arena on every iteration.  See bpo-37257.
2629
     * 2. If this is the only free pool in the arena,
2630
     *    add the arena back to the `usable_arenas` list.
2631
     * 3. If the "next" arena has a smaller count of free
2632
     *    pools, we have to "slide this arena right" to
2633
     *    restore that usable_arenas is sorted in order of
2634
     *    nfreepools.
2635
     * 4. Else there's nothing more to do.
2636
     */
2637
2.55M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2638
        /* Case 1.  First unlink ao from usable_arenas.
2639
         */
2640
5.61k
        assert(ao->prevarena == NULL ||
2641
5.61k
               ao->prevarena->address != 0);
2642
5.61k
        assert(ao ->nextarena == NULL ||
2643
5.61k
               ao->nextarena->address != 0);
2644
2645
        /* Fix the pointer in the prevarena, or the
2646
         * usable_arenas pointer.
2647
         */
2648
5.61k
        if (ao->prevarena == NULL) {
2649
766
            usable_arenas = ao->nextarena;
2650
766
            assert(usable_arenas == NULL ||
2651
766
                   usable_arenas->address != 0);
2652
766
        }
2653
4.84k
        else {
2654
4.84k
            assert(ao->prevarena->nextarena == ao);
2655
4.84k
            ao->prevarena->nextarena =
2656
4.84k
                ao->nextarena;
2657
4.84k
        }
2658
        /* Fix the pointer in the nextarena. */
2659
5.61k
        if (ao->nextarena != NULL) {
2660
5.61k
            assert(ao->nextarena->prevarena == ao);
2661
5.61k
            ao->nextarena->prevarena =
2662
5.61k
                ao->prevarena;
2663
5.61k
        }
2664
        /* Record that this arena_object slot is
2665
         * available to be reused.
2666
         */
2667
5.61k
        ao->nextarena = unused_arena_objects;
2668
5.61k
        unused_arena_objects = ao;
2669
2670
5.61k
#if WITH_PYMALLOC_RADIX_TREE
2671
        /* mark arena region as not under control of obmalloc */
2672
5.61k
        arena_map_mark_used(state, ao->address, 0);
2673
5.61k
#endif
2674
2675
        /* Free the entire arena. */
2676
5.61k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2677
5.61k
                             (void *)ao->address, ARENA_SIZE);
2678
5.61k
        ao->address = 0;                        /* mark unassociated */
2679
5.61k
        --narenas_currently_allocated;
2680
2681
5.61k
        return;
2682
5.61k
    }
2683
2684
2.55M
    if (nf == 1) {
2685
        /* Case 2.  Put ao at the head of
2686
         * usable_arenas.  Note that because
2687
         * ao->nfreepools was 0 before, ao isn't
2688
         * currently on the usable_arenas list.
2689
         */
2690
99.4k
        ao->nextarena = usable_arenas;
2691
99.4k
        ao->prevarena = NULL;
2692
99.4k
        if (usable_arenas)
2693
97.2k
            usable_arenas->prevarena = ao;
2694
99.4k
        usable_arenas = ao;
2695
99.4k
        assert(usable_arenas->address != 0);
2696
99.4k
        if (nfp2lasta[1] == NULL) {
2697
94.4k
            nfp2lasta[1] = ao;
2698
94.4k
        }
2699
2700
99.4k
        return;
2701
99.4k
    }
2702
2703
    /* If this arena is now out of order, we need to keep
2704
     * the list sorted.  The list is kept sorted so that
2705
     * the "most full" arenas are used first, which allows
2706
     * the nearly empty arenas to be completely freed.  In
2707
     * a few un-scientific tests, it seems like this
2708
     * approach allowed a lot more memory to be freed.
2709
     */
2710
    /* If this is the only arena with nf, record that. */
2711
2.45M
    if (nfp2lasta[nf] == NULL) {
2712
2.35M
        nfp2lasta[nf] = ao;
2713
2.35M
    } /* else the rightmost with nf doesn't change */
2714
    /* If this was the rightmost of the old size, it remains in place. */
2715
2.45M
    if (ao == lastnf) {
2716
        /* Case 4.  Nothing to do. */
2717
2.36M
        return;
2718
2.36M
    }
2719
    /* If ao were the only arena in the list, the last block would have
2720
     * gotten us out.
2721
     */
2722
2.45M
    assert(ao->nextarena != NULL);
2723
2724
    /* Case 3:  We have to move the arena towards the end of the list,
2725
     * because it has more free pools than the arena to its right.  It needs
2726
     * to move to follow lastnf.
2727
     * First unlink ao from usable_arenas.
2728
     */
2729
84.8k
    if (ao->prevarena != NULL) {
2730
        /* ao isn't at the head of the list */
2731
68.2k
        assert(ao->prevarena->nextarena == ao);
2732
68.2k
        ao->prevarena->nextarena = ao->nextarena;
2733
68.2k
    }
2734
16.6k
    else {
2735
        /* ao is at the head of the list */
2736
16.6k
        assert(usable_arenas == ao);
2737
16.6k
        usable_arenas = ao->nextarena;
2738
16.6k
    }
2739
84.8k
    ao->nextarena->prevarena = ao->prevarena;
2740
    /* And insert after lastnf. */
2741
84.8k
    ao->prevarena = lastnf;
2742
84.8k
    ao->nextarena = lastnf->nextarena;
2743
84.8k
    if (ao->nextarena != NULL) {
2744
82.8k
        ao->nextarena->prevarena = ao;
2745
82.8k
    }
2746
84.8k
    lastnf->nextarena = ao;
2747
    /* Verify that the swaps worked. */
2748
84.8k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2749
84.8k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2750
84.8k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2751
84.8k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2752
84.8k
           || ao->prevarena->nextarena == ao);
2753
84.8k
}
2754
2755
/* Free a memory block allocated by pymalloc_alloc().
2756
   Return 1 if it was freed.
2757
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2758
static inline int
2759
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2760
2.04G
{
2761
2.04G
    assert(p != NULL);
2762
2763
#ifdef WITH_VALGRIND
2764
    if (UNLIKELY(running_on_valgrind > 0)) {
2765
        return 0;
2766
    }
2767
#endif
2768
2769
2.04G
    poolp pool = POOL_ADDR(p);
2770
2.04G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2771
250M
        return 0;
2772
250M
    }
2773
    /* We allocated this address. */
2774
2775
    /* Link p to the start of the pool's freeblock list.  Since
2776
     * the pool had at least the p block outstanding, the pool
2777
     * wasn't empty (so it's already in a usedpools[] list, or
2778
     * was full and is in no list -- it's not in the freeblocks
2779
     * list in any case).
2780
     */
2781
2.04G
    assert(pool->ref.count > 0);            /* else it was empty */
2782
1.79G
    pymem_block *lastfree = pool->freeblock;
2783
1.79G
    *(pymem_block **)p = lastfree;
2784
1.79G
    pool->freeblock = (pymem_block *)p;
2785
1.79G
    pool->ref.count--;
2786
2787
1.79G
    if (UNLIKELY(lastfree == NULL)) {
2788
        /* Pool was full, so doesn't currently live in any list:
2789
         * link it to the front of the appropriate usedpools[] list.
2790
         * This mimics LRU pool usage for new allocations and
2791
         * targets optimal filling when several pools contain
2792
         * blocks of the same size class.
2793
         */
2794
194M
        insert_to_usedpool(state, pool);
2795
194M
        return 1;
2796
194M
    }
2797
2798
    /* freeblock wasn't NULL, so the pool wasn't full,
2799
     * and the pool is in a usedpools[] list.
2800
     */
2801
1.59G
    if (LIKELY(pool->ref.count != 0)) {
2802
        /* pool isn't empty:  leave it in usedpools */
2803
1.59G
        return 1;
2804
1.59G
    }
2805
2806
    /* Pool is now empty:  unlink from usedpools, and
2807
     * link to the front of freepools.  This ensures that
2808
     * previously freed pools will be allocated later
2809
     * (being not referenced, they are perhaps paged out).
2810
     */
2811
2.55M
    insert_to_freepool(state, pool);
2812
2.55M
    return 1;
2813
1.59G
}
2814
2815
2816
void
2817
_PyObject_Free(void *ctx, void *p)
2818
2.04G
{
2819
    /* PyObject_Free(NULL) has no effect */
2820
2.04G
    if (p == NULL) {
2821
2.25M
        return;
2822
2.25M
    }
2823
2824
2.04G
    OMState *state = get_state();
2825
2.04G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2826
        /* pymalloc didn't allocate this address */
2827
250M
        PyMem_RawFree(p);
2828
250M
        raw_allocated_blocks--;
2829
250M
    }
2830
2.04G
}
2831
2832
2833
/* pymalloc realloc.
2834
2835
   If nbytes==0, then as the Python docs promise, we do not treat this like
2836
   free(p), and return a non-NULL result.
2837
2838
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2839
   newptr_p.
2840
2841
   Return 0 if pymalloc didn't allocated p. */
2842
static int
2843
pymalloc_realloc(OMState *state, void *ctx,
2844
                 void **newptr_p, void *p, size_t nbytes)
2845
101M
{
2846
101M
    void *bp;
2847
101M
    poolp pool;
2848
101M
    size_t size;
2849
2850
101M
    assert(p != NULL);
2851
2852
#ifdef WITH_VALGRIND
2853
    /* Treat running_on_valgrind == -1 the same as 0 */
2854
    if (UNLIKELY(running_on_valgrind > 0)) {
2855
        return 0;
2856
    }
2857
#endif
2858
2859
101M
    pool = POOL_ADDR(p);
2860
101M
    if (!address_in_range(state, p, pool)) {
2861
        /* pymalloc is not managing this block.
2862
2863
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2864
           over this block.  However, if we do, we need to copy the valid data
2865
           from the C-managed block to one of our blocks, and there's no
2866
           portable way to know how much of the memory space starting at p is
2867
           valid.
2868
2869
           As bug 1185883 pointed out the hard way, it's possible that the
2870
           C-managed block is "at the end" of allocated VM space, so that a
2871
           memory fault can occur if we try to copy nbytes bytes starting at p.
2872
           Instead we punt: let C continue to manage this block. */
2873
9.00M
        return 0;
2874
9.00M
    }
2875
2876
    /* pymalloc is in charge of this block */
2877
92.0M
    size = INDEX2SIZE(pool->szidx);
2878
92.0M
    if (nbytes <= size) {
2879
        /* The block is staying the same or shrinking.
2880
2881
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2882
           block to a smaller size class, but it wastes memory not to copy it.
2883
2884
           The compromise here is to copy on shrink only if at least 25% of
2885
           size can be shaved off. */
2886
71.0M
        if (4 * nbytes > 3 * size) {
2887
            /* It's the same, or shrinking and new/old > 3/4. */
2888
31.5M
            *newptr_p = p;
2889
31.5M
            return 1;
2890
31.5M
        }
2891
39.4M
        size = nbytes;
2892
39.4M
    }
2893
2894
60.4M
    bp = _PyObject_Malloc(ctx, nbytes);
2895
60.4M
    if (bp != NULL) {
2896
60.4M
        memcpy(bp, p, size);
2897
60.4M
        _PyObject_Free(ctx, p);
2898
60.4M
    }
2899
60.4M
    *newptr_p = bp;
2900
60.4M
    return 1;
2901
92.0M
}
2902
2903
2904
void *
2905
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2906
338M
{
2907
338M
    void *ptr2;
2908
2909
338M
    if (ptr == NULL) {
2910
237M
        return _PyObject_Malloc(ctx, nbytes);
2911
237M
    }
2912
2913
101M
    OMState *state = get_state();
2914
101M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2915
92.0M
        return ptr2;
2916
92.0M
    }
2917
2918
9.00M
    return PyMem_RawRealloc(ptr, nbytes);
2919
101M
}
2920
2921
#else   /* ! WITH_PYMALLOC */
2922
2923
/*==========================================================================*/
2924
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2925
 * only be used by extensions that are compiled with pymalloc enabled. */
2926
2927
Py_ssize_t
2928
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2929
{
2930
    return 0;
2931
}
2932
2933
Py_ssize_t
2934
_Py_GetGlobalAllocatedBlocks(void)
2935
{
2936
    return 0;
2937
}
2938
2939
void
2940
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2941
{
2942
    return;
2943
}
2944
2945
void
2946
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2947
{
2948
    return;
2949
}
2950
2951
#endif /* WITH_PYMALLOC */
2952
2953
2954
/*==========================================================================*/
2955
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2956
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2957
 */
2958
2959
/* Uncomment this define to add the "serialno" field */
2960
/* #define PYMEM_DEBUG_SERIALNO */
2961
2962
#ifdef PYMEM_DEBUG_SERIALNO
2963
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2964
2965
/* serialno is always incremented via calling this routine.  The point is
2966
 * to supply a single place to set a breakpoint.
2967
 */
2968
static void
2969
bumpserialno(void)
2970
{
2971
    ++serialno;
2972
}
2973
#endif
2974
2975
0
#define SST SIZEOF_SIZE_T
2976
2977
#ifdef PYMEM_DEBUG_SERIALNO
2978
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2979
#else
2980
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2981
#endif
2982
2983
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2984
static size_t
2985
read_size_t(const void *p)
2986
0
{
2987
0
    const uint8_t *q = (const uint8_t *)p;
2988
0
    size_t result = *q++;
2989
0
    int i;
2990
2991
0
    for (i = SST; --i > 0; ++q)
2992
0
        result = (result << 8) | *q;
2993
0
    return result;
2994
0
}
2995
2996
/* Write n as a big-endian size_t, MSB at address p, LSB at
2997
 * p + sizeof(size_t) - 1.
2998
 */
2999
static void
3000
write_size_t(void *p, size_t n)
3001
0
{
3002
0
    uint8_t *q = (uint8_t *)p + SST - 1;
3003
0
    int i;
3004
3005
0
    for (i = SST; --i >= 0; --q) {
3006
0
        *q = (uint8_t)(n & 0xff);
3007
0
        n >>= 8;
3008
0
    }
3009
0
}
3010
3011
static void
3012
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
3013
               bool is_alloc)
3014
0
{
3015
#ifdef Py_GIL_DISABLED
3016
    if (api->api_id == 'o') {
3017
        // Don't overwrite the first few bytes of a PyObject allocation in the
3018
        // free-threaded build
3019
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
3020
        size_t debug_offset;
3021
        if (is_alloc) {
3022
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
3023
        }
3024
        else {
3025
            char *alloc = (char *)data - 2*SST;  // start of the allocation
3026
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
3027
        }
3028
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
3029
        if (debug_offset < nbytes) {
3030
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
3031
        }
3032
        return;
3033
    }
3034
#endif
3035
0
    memset(data, c, nbytes);
3036
0
}
3037
3038
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
3039
   fills them with useful stuff, here calling the underlying malloc's result p:
3040
3041
p[0: S]
3042
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
3043
    to read in a memory dump).
3044
p[S]
3045
    API ID.  See PEP 445.  This is a character, but seems undocumented.
3046
p[S+1: 2*S]
3047
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
3048
p[2*S: 2*S+n]
3049
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
3050
    Used to catch reference to uninitialized memory.
3051
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
3052
    handled the request itself.
3053
p[2*S+n: 2*S+n+S]
3054
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
3055
p[2*S+n+S: 2*S+n+2*S]
3056
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
3057
    and _PyMem_DebugRealloc.
3058
    This is a big-endian size_t.
3059
    If "bad memory" is detected later, the serial number gives an
3060
    excellent way to set a breakpoint on the next run, to capture the
3061
    instant at which this block was passed out.
3062
3063
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
3064
for 3 * S extra bytes, and omits the last serialno field.
3065
*/
3066
3067
static void *
3068
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
3069
0
{
3070
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3071
0
    uint8_t *p;           /* base address of malloc'ed pad block */
3072
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3073
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3074
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3075
3076
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3077
        /* integer overflow: can't represent total as a Py_ssize_t */
3078
0
        return NULL;
3079
0
    }
3080
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3081
3082
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3083
                ^--- p    ^--- data   ^--- tail
3084
       S: nbytes stored as size_t
3085
       I: API identifier (1 byte)
3086
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3087
       C: Clean bytes used later to store actual data
3088
       N: Serial number stored as size_t
3089
3090
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3091
       is omitted. */
3092
3093
0
    if (use_calloc) {
3094
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3095
0
    }
3096
0
    else {
3097
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3098
0
    }
3099
0
    if (p == NULL) {
3100
0
        return NULL;
3101
0
    }
3102
0
    data = p + 2*SST;
3103
3104
#ifdef PYMEM_DEBUG_SERIALNO
3105
    bumpserialno();
3106
#endif
3107
3108
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3109
0
    write_size_t(p, nbytes);
3110
0
    p[SST] = (uint8_t)api->api_id;
3111
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3112
3113
0
    if (nbytes > 0 && !use_calloc) {
3114
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3115
0
    }
3116
3117
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3118
0
    tail = data + nbytes;
3119
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3120
#ifdef PYMEM_DEBUG_SERIALNO
3121
    write_size_t(tail + SST, serialno);
3122
#endif
3123
3124
0
    return data;
3125
0
}
3126
3127
void *
3128
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3129
0
{
3130
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3131
0
}
3132
3133
void *
3134
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3135
0
{
3136
0
    size_t nbytes;
3137
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3138
0
    nbytes = nelem * elsize;
3139
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3140
0
}
3141
3142
3143
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3144
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3145
   Then fills the original bytes with PYMEM_DEADBYTE.
3146
   Then calls the underlying free.
3147
*/
3148
void
3149
_PyMem_DebugRawFree(void *ctx, void *p)
3150
0
{
3151
    /* PyMem_Free(NULL) has no effect */
3152
0
    if (p == NULL) {
3153
0
        return;
3154
0
    }
3155
3156
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3157
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3158
0
    size_t nbytes;
3159
3160
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3161
0
    nbytes = read_size_t(q);
3162
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3163
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3164
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3165
0
    api->alloc.free(api->alloc.ctx, q);
3166
0
}
3167
3168
3169
void *
3170
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3171
0
{
3172
0
    if (p == NULL) {
3173
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3174
0
    }
3175
3176
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3177
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3178
0
    uint8_t *data;        /* pointer to data bytes */
3179
0
    uint8_t *r;
3180
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3181
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3182
0
    size_t original_nbytes;
3183
0
#define ERASED_SIZE 64
3184
3185
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3186
3187
0
    data = (uint8_t *)p;
3188
0
    head = data - 2*SST;
3189
0
    original_nbytes = read_size_t(head);
3190
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3191
        /* integer overflow: can't represent total as a Py_ssize_t */
3192
0
        return NULL;
3193
0
    }
3194
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3195
3196
0
    tail = data + original_nbytes;
3197
#ifdef PYMEM_DEBUG_SERIALNO
3198
    size_t block_serialno = read_size_t(tail + SST);
3199
#endif
3200
0
#ifndef Py_GIL_DISABLED
3201
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3202
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3203
     */
3204
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3205
0
    if (original_nbytes <= sizeof(save)) {
3206
0
        memcpy(save, data, original_nbytes);
3207
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3208
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3209
0
    }
3210
0
    else {
3211
0
        memcpy(save, data, ERASED_SIZE);
3212
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3213
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3214
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3215
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3216
0
    }
3217
0
#endif
3218
3219
    /* Resize and add decorations. */
3220
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3221
0
    if (r == NULL) {
3222
        /* if realloc() failed: rewrite header and footer which have
3223
           just been erased */
3224
0
        nbytes = original_nbytes;
3225
0
    }
3226
0
    else {
3227
0
        head = r;
3228
#ifdef PYMEM_DEBUG_SERIALNO
3229
        bumpserialno();
3230
        block_serialno = serialno;
3231
#endif
3232
0
    }
3233
0
    data = head + 2*SST;
3234
3235
0
    write_size_t(head, nbytes);
3236
0
    head[SST] = (uint8_t)api->api_id;
3237
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3238
3239
0
    tail = data + nbytes;
3240
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3241
#ifdef PYMEM_DEBUG_SERIALNO
3242
    write_size_t(tail + SST, block_serialno);
3243
#endif
3244
3245
0
#ifndef Py_GIL_DISABLED
3246
    /* Restore saved bytes. */
3247
0
    if (original_nbytes <= sizeof(save)) {
3248
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3249
0
    }
3250
0
    else {
3251
0
        size_t i = original_nbytes - ERASED_SIZE;
3252
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3253
0
        if (nbytes > i) {
3254
0
            memcpy(data + i, &save[ERASED_SIZE],
3255
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3256
0
        }
3257
0
    }
3258
0
#endif
3259
3260
0
    if (r == NULL) {
3261
0
        return NULL;
3262
0
    }
3263
3264
0
    if (nbytes > original_nbytes) {
3265
        /* growing: mark new extra memory clean */
3266
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3267
0
               nbytes - original_nbytes);
3268
0
    }
3269
3270
0
    return data;
3271
0
}
3272
3273
static inline void
3274
_PyMem_DebugCheckGIL(const char *func)
3275
0
{
3276
0
    PyThreadState *tstate = _PyThreadState_GET();
3277
0
    if (tstate == NULL) {
3278
0
#ifndef Py_GIL_DISABLED
3279
0
        _Py_FatalErrorFunc(func,
3280
0
                           "Python memory allocator called "
3281
0
                           "without holding the GIL");
3282
#else
3283
        _Py_FatalErrorFunc(func,
3284
                           "Python memory allocator called "
3285
                           "without an active thread state. "
3286
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3287
#endif
3288
0
    }
3289
0
}
3290
3291
void *
3292
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3293
0
{
3294
0
    _PyMem_DebugCheckGIL(__func__);
3295
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3296
0
}
3297
3298
void *
3299
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3300
0
{
3301
0
    _PyMem_DebugCheckGIL(__func__);
3302
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3303
0
}
3304
3305
3306
void
3307
_PyMem_DebugFree(void *ctx, void *ptr)
3308
0
{
3309
0
    _PyMem_DebugCheckGIL(__func__);
3310
0
    _PyMem_DebugRawFree(ctx, ptr);
3311
0
}
3312
3313
3314
void *
3315
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3316
0
{
3317
0
    _PyMem_DebugCheckGIL(__func__);
3318
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3319
0
}
3320
3321
/* Check the forbidden bytes on both ends of the memory allocated for p.
3322
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3323
 * and call Py_FatalError to kill the program.
3324
 * The API id, is also checked.
3325
 */
3326
static void
3327
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3328
0
{
3329
0
    assert(p != NULL);
3330
3331
0
    const uint8_t *q = (const uint8_t *)p;
3332
0
    size_t nbytes;
3333
0
    const uint8_t *tail;
3334
0
    int i;
3335
0
    char id;
3336
3337
    /* Check the API id */
3338
0
    id = (char)q[-SST];
3339
0
    if (id != api) {
3340
0
        _PyObject_DebugDumpAddress(p);
3341
0
        _Py_FatalErrorFormat(func,
3342
0
                             "bad ID: Allocated using API '%c', "
3343
0
                             "verified using API '%c'",
3344
0
                             id, api);
3345
0
    }
3346
3347
    /* Check the stuff at the start of p first:  if there's underwrite
3348
     * corruption, the number-of-bytes field may be nuts, and checking
3349
     * the tail could lead to a segfault then.
3350
     */
3351
0
    for (i = SST-1; i >= 1; --i) {
3352
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3353
0
            _PyObject_DebugDumpAddress(p);
3354
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3355
0
        }
3356
0
    }
3357
3358
0
    nbytes = read_size_t(q - 2*SST);
3359
0
    tail = q + nbytes;
3360
0
    for (i = 0; i < SST; ++i) {
3361
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3362
0
            _PyObject_DebugDumpAddress(p);
3363
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3364
0
        }
3365
0
    }
3366
0
}
3367
3368
/* Display info to stderr about the memory block at p. */
3369
static void
3370
_PyObject_DebugDumpAddress(const void *p)
3371
0
{
3372
0
    const uint8_t *q = (const uint8_t *)p;
3373
0
    const uint8_t *tail;
3374
0
    size_t nbytes;
3375
0
    int i;
3376
0
    int ok;
3377
0
    char id;
3378
3379
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3380
0
    if (p == NULL) {
3381
0
        fprintf(stderr, "\n");
3382
0
        return;
3383
0
    }
3384
0
    id = (char)q[-SST];
3385
0
    fprintf(stderr, " API '%c'\n", id);
3386
3387
0
    nbytes = read_size_t(q - 2*SST);
3388
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3389
3390
    /* In case this is nuts, check the leading pad bytes first. */
3391
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3392
0
    ok = 1;
3393
0
    for (i = 1; i <= SST-1; ++i) {
3394
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3395
0
            ok = 0;
3396
0
            break;
3397
0
        }
3398
0
    }
3399
0
    if (ok)
3400
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3401
0
    else {
3402
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3403
0
            PYMEM_FORBIDDENBYTE);
3404
0
        for (i = SST-1; i >= 1; --i) {
3405
0
            const uint8_t byte = *(q-i);
3406
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3407
0
            if (byte != PYMEM_FORBIDDENBYTE)
3408
0
                fputs(" *** OUCH", stderr);
3409
0
            fputc('\n', stderr);
3410
0
        }
3411
3412
0
        fputs("    Because memory is corrupted at the start, the "
3413
0
              "count of bytes requested\n"
3414
0
              "       may be bogus, and checking the trailing pad "
3415
0
              "bytes may segfault.\n", stderr);
3416
0
    }
3417
3418
0
    tail = q + nbytes;
3419
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3420
0
    ok = 1;
3421
0
    for (i = 0; i < SST; ++i) {
3422
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3423
0
            ok = 0;
3424
0
            break;
3425
0
        }
3426
0
    }
3427
0
    if (ok)
3428
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3429
0
    else {
3430
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3431
0
                PYMEM_FORBIDDENBYTE);
3432
0
        for (i = 0; i < SST; ++i) {
3433
0
            const uint8_t byte = tail[i];
3434
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3435
0
                    i, byte);
3436
0
            if (byte != PYMEM_FORBIDDENBYTE)
3437
0
                fputs(" *** OUCH", stderr);
3438
0
            fputc('\n', stderr);
3439
0
        }
3440
0
    }
3441
3442
#ifdef PYMEM_DEBUG_SERIALNO
3443
    size_t serial = read_size_t(tail + SST);
3444
    fprintf(stderr,
3445
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3446
            serial);
3447
#endif
3448
3449
0
    if (nbytes > 0) {
3450
0
        i = 0;
3451
0
        fputs("    Data at p:", stderr);
3452
        /* print up to 8 bytes at the start */
3453
0
        while (q < tail && i < 8) {
3454
0
            fprintf(stderr, " %02x", *q);
3455
0
            ++i;
3456
0
            ++q;
3457
0
        }
3458
        /* and up to 8 at the end */
3459
0
        if (q < tail) {
3460
0
            if (tail - q > 8) {
3461
0
                fputs(" ...", stderr);
3462
0
                q = tail - 8;
3463
0
            }
3464
0
            while (q < tail) {
3465
0
                fprintf(stderr, " %02x", *q);
3466
0
                ++q;
3467
0
            }
3468
0
        }
3469
0
        fputc('\n', stderr);
3470
0
    }
3471
0
    fputc('\n', stderr);
3472
3473
0
    fflush(stderr);
3474
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3475
0
}
3476
3477
3478
static size_t
3479
printone(FILE *out, const char* msg, size_t value)
3480
0
{
3481
0
    int i, k;
3482
0
    char buf[100];
3483
0
    size_t origvalue = value;
3484
3485
0
    fputs(msg, out);
3486
0
    for (i = (int)strlen(msg); i < 35; ++i)
3487
0
        fputc(' ', out);
3488
0
    fputc('=', out);
3489
3490
    /* Write the value with commas. */
3491
0
    i = 22;
3492
0
    buf[i--] = '\0';
3493
0
    buf[i--] = '\n';
3494
0
    k = 3;
3495
0
    do {
3496
0
        size_t nextvalue = value / 10;
3497
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3498
0
        value = nextvalue;
3499
0
        buf[i--] = (char)(digit + '0');
3500
0
        --k;
3501
0
        if (k == 0 && value && i >= 0) {
3502
0
            k = 3;
3503
0
            buf[i--] = ',';
3504
0
        }
3505
0
    } while (value && i >= 0);
3506
3507
0
    while (i >= 0)
3508
0
        buf[i--] = ' ';
3509
0
    fputs(buf, out);
3510
3511
0
    return origvalue;
3512
0
}
3513
3514
void
3515
_PyDebugAllocatorStats(FILE *out,
3516
                       const char *block_name, int num_blocks, size_t sizeof_block)
3517
0
{
3518
0
    char buf1[128];
3519
0
    char buf2[128];
3520
0
    PyOS_snprintf(buf1, sizeof(buf1),
3521
0
                  "%d %ss * %zd bytes each",
3522
0
                  num_blocks, block_name, sizeof_block);
3523
0
    PyOS_snprintf(buf2, sizeof(buf2),
3524
0
                  "%48s ", buf1);
3525
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3526
0
}
3527
3528
// Return true if the obmalloc state structure is heap allocated,
3529
// by PyMem_RawCalloc().  For the main interpreter, this structure
3530
// allocated in the BSS.  Allocating that way gives some memory savings
3531
// and a small performance win (at least on a demand paged OS).  On
3532
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3533
// memory is for the arena_map_top array.  Since normally only one entry
3534
// of that array is used, only one page of resident memory is actually
3535
// used, rather than the full 256 kB.
3536
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3537
0
{
3538
0
#if WITH_PYMALLOC
3539
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3540
#else
3541
    return false;
3542
#endif
3543
0
}
3544
3545
#ifdef WITH_PYMALLOC
3546
static void
3547
init_obmalloc_pools(PyInterpreterState *interp)
3548
36
{
3549
    // initialize the obmalloc->pools structure.  This must be done
3550
    // before the obmalloc alloc/free functions can be called.
3551
36
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3552
36
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3553
36
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3554
36
}
3555
#endif /* WITH_PYMALLOC */
3556
3557
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3558
36
{
3559
36
#ifdef WITH_PYMALLOC
3560
    /* Initialize obmalloc, but only for subinterpreters,
3561
       since the main interpreter is initialized statically. */
3562
36
    if (_Py_IsMainInterpreter(interp)
3563
0
            || _PyInterpreterState_HasFeature(interp,
3564
36
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3565
36
        interp->obmalloc = &obmalloc_state_main;
3566
36
        if (!obmalloc_state_initialized) {
3567
36
            init_obmalloc_pools(interp);
3568
36
            obmalloc_state_initialized = true;
3569
36
        }
3570
36
    } else {
3571
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3572
0
        if (interp->obmalloc == NULL) {
3573
0
            return -1;
3574
0
        }
3575
0
        init_obmalloc_pools(interp);
3576
0
    }
3577
36
#endif /* WITH_PYMALLOC */
3578
36
    return 0; // success
3579
36
}
3580
3581
3582
#ifdef WITH_PYMALLOC
3583
3584
static void
3585
free_obmalloc_arenas(PyInterpreterState *interp)
3586
0
{
3587
0
    OMState *state = interp->obmalloc;
3588
0
    for (uint i = 0; i < maxarenas; ++i) {
3589
        // free each obmalloc memory arena
3590
0
        struct arena_object *ao = &allarenas[i];
3591
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3592
0
                             (void *)ao->address, ARENA_SIZE);
3593
0
    }
3594
    // free the array containing pointers to all arenas
3595
0
    PyMem_RawFree(allarenas);
3596
0
#if WITH_PYMALLOC_RADIX_TREE
3597
0
#ifdef USE_INTERIOR_NODES
3598
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3599
    // by arena_map_mark_used() but not freed when arenas are freed.
3600
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3601
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3602
0
         if (mid == NULL) {
3603
0
             continue;
3604
0
         }
3605
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3606
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3607
0
            if (bot == NULL) {
3608
0
                continue;
3609
0
            }
3610
0
            PyMem_RawFree(bot);
3611
0
         }
3612
0
         PyMem_RawFree(mid);
3613
0
    }
3614
0
#endif
3615
0
#endif
3616
0
}
3617
3618
#ifdef Py_DEBUG
3619
/* Is target in the list?  The list is traversed via the nextpool pointers.
3620
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3621
 * list, else 0.
3622
 */
3623
static int
3624
pool_is_in_list(const poolp target, poolp list)
3625
{
3626
    poolp origlist = list;
3627
    assert(target != NULL);
3628
    if (list == NULL)
3629
        return 0;
3630
    do {
3631
        if (target == list)
3632
            return 1;
3633
        list = list->nextpool;
3634
    } while (list != NULL && list != origlist);
3635
    return 0;
3636
}
3637
#endif
3638
3639
#ifdef WITH_MIMALLOC
3640
struct _alloc_stats {
3641
    size_t allocated_blocks;
3642
    size_t allocated_bytes;
3643
    size_t allocated_with_overhead;
3644
    size_t bytes_reserved;
3645
    size_t bytes_committed;
3646
};
3647
3648
static bool _collect_alloc_stats(
3649
    const mi_heap_t* heap, const mi_heap_area_t* area,
3650
    void* block, size_t block_size, void* arg)
3651
0
{
3652
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3653
0
    stats->allocated_blocks += area->used;
3654
0
    stats->allocated_bytes += area->used * area->block_size;
3655
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3656
0
    stats->bytes_reserved += area->reserved;
3657
0
    stats->bytes_committed += area->committed;
3658
0
    return 1;
3659
0
}
3660
3661
static void
3662
py_mimalloc_print_stats(FILE *out)
3663
0
{
3664
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3665
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3666
0
    fprintf(out, "Medium block threshold = %zu\n",
3667
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3668
0
    fprintf(out, "Large object max size = %zu\n",
3669
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3670
3671
0
    mi_heap_t *heap = mi_heap_get_default();
3672
0
    struct _alloc_stats stats;
3673
0
    memset(&stats, 0, sizeof(stats));
3674
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3675
3676
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3677
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3678
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3679
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3680
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3681
0
}
3682
#endif
3683
3684
3685
static void
3686
pymalloc_print_stats(FILE *out)
3687
0
{
3688
0
    OMState *state = get_state();
3689
3690
0
    uint i;
3691
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3692
    /* # of pools, allocated blocks, and free blocks per class index */
3693
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3694
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3695
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3696
    /* total # of allocated bytes in used and full pools */
3697
0
    size_t allocated_bytes = 0;
3698
    /* total # of available bytes in used pools */
3699
0
    size_t available_bytes = 0;
3700
    /* # of free pools + pools not yet carved out of current arena */
3701
0
    uint numfreepools = 0;
3702
    /* # of bytes for arena alignment padding */
3703
0
    size_t arena_alignment = 0;
3704
    /* # of bytes in used and full pools used for pool_headers */
3705
0
    size_t pool_header_bytes = 0;
3706
    /* # of bytes in used and full pools wasted due to quantization,
3707
     * i.e. the necessarily leftover space at the ends of used and
3708
     * full pools.
3709
     */
3710
0
    size_t quantization = 0;
3711
    /* # of arenas actually allocated. */
3712
0
    size_t narenas = 0;
3713
    /* running total -- should equal narenas * ARENA_SIZE */
3714
0
    size_t total;
3715
0
    char buf[128];
3716
3717
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3718
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3719
3720
0
    for (i = 0; i < numclasses; ++i)
3721
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3722
3723
    /* Because full pools aren't linked to from anything, it's easiest
3724
     * to march over all the arenas.  If we're lucky, most of the memory
3725
     * will be living in full pools -- would be a shame to miss them.
3726
     */
3727
0
    for (i = 0; i < maxarenas; ++i) {
3728
0
        uintptr_t base = allarenas[i].address;
3729
3730
        /* Skip arenas which are not allocated. */
3731
0
        if (allarenas[i].address == (uintptr_t)NULL)
3732
0
            continue;
3733
0
        narenas += 1;
3734
3735
0
        numfreepools += allarenas[i].nfreepools;
3736
3737
        /* round up to pool alignment */
3738
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3739
0
            arena_alignment += POOL_SIZE;
3740
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3741
0
            base += POOL_SIZE;
3742
0
        }
3743
3744
        /* visit every pool in the arena */
3745
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3746
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3747
0
            poolp p = (poolp)base;
3748
0
            const uint sz = p->szidx;
3749
0
            uint freeblocks;
3750
3751
0
            if (p->ref.count == 0) {
3752
                /* currently unused */
3753
#ifdef Py_DEBUG
3754
                assert(pool_is_in_list(p, allarenas[i].freepools));
3755
#endif
3756
0
                continue;
3757
0
            }
3758
0
            ++numpools[sz];
3759
0
            numblocks[sz] += p->ref.count;
3760
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3761
0
            numfreeblocks[sz] += freeblocks;
3762
#ifdef Py_DEBUG
3763
            if (freeblocks > 0)
3764
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3765
#endif
3766
0
        }
3767
0
    }
3768
0
    assert(narenas == narenas_currently_allocated);
3769
3770
0
    fputc('\n', out);
3771
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3772
0
          "-----   ----   ---------   -------------  ------------\n",
3773
0
          out);
3774
3775
0
    for (i = 0; i < numclasses; ++i) {
3776
0
        size_t p = numpools[i];
3777
0
        size_t b = numblocks[i];
3778
0
        size_t f = numfreeblocks[i];
3779
0
        uint size = INDEX2SIZE(i);
3780
0
        if (p == 0) {
3781
0
            assert(b == 0 && f == 0);
3782
0
            continue;
3783
0
        }
3784
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3785
0
                i, size, p, b, f);
3786
0
        allocated_bytes += b * size;
3787
0
        available_bytes += f * size;
3788
0
        pool_header_bytes += p * POOL_OVERHEAD;
3789
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3790
0
    }
3791
0
    fputc('\n', out);
3792
#ifdef PYMEM_DEBUG_SERIALNO
3793
    if (_PyMem_DebugEnabled()) {
3794
        (void)printone(out, "# times object malloc called", serialno);
3795
    }
3796
#endif
3797
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3798
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3799
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3800
0
    (void)printone(out, "# arenas allocated current", narenas);
3801
3802
0
    PyOS_snprintf(buf, sizeof(buf),
3803
0
                  "%zu arenas * %d bytes/arena",
3804
0
                  narenas, ARENA_SIZE);
3805
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3806
3807
0
    fputc('\n', out);
3808
3809
    /* Account for what all of those arena bytes are being used for. */
3810
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3811
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3812
3813
0
    PyOS_snprintf(buf, sizeof(buf),
3814
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3815
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3816
3817
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3818
0
    total += printone(out, "# bytes lost to quantization", quantization);
3819
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3820
0
    (void)printone(out, "Total", total);
3821
0
    assert(narenas * ARENA_SIZE == total);
3822
3823
0
#if WITH_PYMALLOC_RADIX_TREE
3824
0
    fputs("\narena map counts\n", out);
3825
0
#ifdef USE_INTERIOR_NODES
3826
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3827
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3828
0
    fputc('\n', out);
3829
0
#endif
3830
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3831
0
#ifdef USE_INTERIOR_NODES
3832
0
    total += printone(out, "# bytes lost to arena map mid",
3833
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3834
0
    total += printone(out, "# bytes lost to arena map bot",
3835
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3836
0
    (void)printone(out, "Total", total);
3837
0
#endif
3838
0
#endif
3839
3840
0
}
3841
3842
/* Print summary info to "out" about the state of pymalloc's structures.
3843
 * In Py_DEBUG mode, also perform some expensive internal consistency
3844
 * checks.
3845
 *
3846
 * Return 0 if the memory debug hooks are not installed or no statistics was
3847
 * written into out, return 1 otherwise.
3848
 */
3849
int
3850
_PyObject_DebugMallocStats(FILE *out)
3851
0
{
3852
0
#ifdef WITH_MIMALLOC
3853
0
    if (_PyMem_MimallocEnabled()) {
3854
0
        py_mimalloc_print_stats(out);
3855
0
        return 1;
3856
0
    }
3857
0
    else
3858
0
#endif
3859
0
    if (_PyMem_PymallocEnabled()) {
3860
0
        pymalloc_print_stats(out);
3861
0
        return 1;
3862
0
    }
3863
0
    else {
3864
0
        return 0;
3865
0
    }
3866
0
}
3867
3868
#endif /* #ifdef WITH_PYMALLOC */