Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#ifdef WITH_MIMALLOC
18
// Forward declarations of functions used in our mimalloc modifications
19
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
20
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
21
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
22
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
23
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
24
#  include "pycore_mimalloc.h"
25
#  include "mimalloc/static.c"
26
#  include "mimalloc/internal.h"  // for stats
27
#endif
28
29
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
30
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
31
#endif
32
33
#undef  uint
34
1.92G
#define uint pymem_uint
35
36
37
/* Defined in tracemalloc.c */
38
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
39
40
static void _PyObject_DebugDumpAddress(const void *p);
41
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
42
43
44
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
45
static void set_up_debug_hooks_unlocked(void);
46
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
47
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
49
50
/***************************************/
51
/* low-level allocator implementations */
52
/***************************************/
53
54
/* the default raw allocator (wraps malloc) */
55
56
void *
57
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
58
245M
{
59
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
60
       for malloc(0), which would be treated as an error. Some platforms would
61
       return a pointer with no memory behind it, which would break pymalloc.
62
       To solve these problems, allocate an extra byte. */
63
245M
    if (size == 0)
64
37.4M
        size = 1;
65
245M
    return malloc(size);
66
245M
}
67
68
void *
69
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
70
60.8k
{
71
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
72
       for calloc(0, 0), which would be treated as an error. Some platforms
73
       would return a pointer with no memory behind it, which would break
74
       pymalloc.  To solve these problems, allocate an extra byte. */
75
60.8k
    if (nelem == 0 || elsize == 0) {
76
2
        nelem = 1;
77
2
        elsize = 1;
78
2
    }
79
60.8k
    return calloc(nelem, elsize);
80
60.8k
}
81
82
void *
83
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
84
9.05M
{
85
9.05M
    if (size == 0)
86
0
        size = 1;
87
9.05M
    return realloc(ptr, size);
88
9.05M
}
89
90
void
91
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
92
245M
{
93
245M
    free(ptr);
94
245M
}
95
96
#ifdef WITH_MIMALLOC
97
98
static void
99
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
100
0
{
101
#ifdef Py_GIL_DISABLED
102
    // Clear the QSBR goal and remove the page from the QSBR linked list.
103
    page->qsbr_goal = 0;
104
    if (page->qsbr_node.next != NULL) {
105
        llist_remove(&page->qsbr_node);
106
    }
107
#endif
108
0
}
109
110
// Check if an empty, newly reclaimed page is safe to free now.
111
static bool
112
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
113
0
{
114
0
    assert(mi_page_all_free(page));
115
#ifdef Py_GIL_DISABLED
116
    assert(page->qsbr_node.next == NULL);
117
    if (page->use_qsbr && page->qsbr_goal != 0) {
118
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
119
        if (tstate == NULL) {
120
            return false;
121
        }
122
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
123
    }
124
#endif
125
0
    return true;
126
127
0
}
128
129
#ifdef Py_GIL_DISABLED
130
131
// If we are deferring collection of more than this amount of memory for
132
// mimalloc pages, advance the write sequence.  Advancing allows these
133
// pages to be re-used in a different thread or for a different size class.
134
#define QSBR_PAGE_MEM_LIMIT 4096*20
135
136
// Return true if the global write sequence should be advanced for a mimalloc
137
// page that is deferred from collection.
138
static bool
139
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
140
{
141
    size_t bsize = mi_page_block_size(page);
142
    size_t page_size = page->capacity*bsize;
143
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
144
        qsbr->deferred_page_memory = 0;
145
        return true;
146
    }
147
    qsbr->deferred_page_memory += page_size;
148
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
149
        qsbr->deferred_page_memory = 0;
150
        return true;
151
    }
152
    return false;
153
}
154
#endif
155
156
static bool
157
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
158
0
{
159
#ifdef Py_GIL_DISABLED
160
    assert(mi_page_all_free(page));
161
    if (page->use_qsbr) {
162
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
163
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
164
            _PyMem_mi_page_clear_qsbr(page);
165
            _mi_page_free(page, pq, force);
166
            return true;
167
        }
168
169
        _PyMem_mi_page_clear_qsbr(page);
170
        page->retire_expire = 0;
171
172
        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
173
            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
174
        }
175
        else {
176
            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
177
        }
178
179
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
180
        return false;
181
    }
182
#endif
183
0
    _mi_page_free(page, pq, force);
184
0
    return true;
185
0
}
186
187
static void
188
_PyMem_mi_page_reclaimed(mi_page_t *page)
189
0
{
190
#ifdef Py_GIL_DISABLED
191
    assert(page->qsbr_node.next == NULL);
192
    if (page->qsbr_goal != 0) {
193
        if (mi_page_all_free(page)) {
194
            assert(page->qsbr_node.next == NULL);
195
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
196
            page->retire_expire = 0;
197
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
198
        }
199
        else {
200
            page->qsbr_goal = 0;
201
        }
202
    }
203
#endif
204
0
}
205
206
static void
207
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    if (!heap->page_use_qsbr) {
211
        return;
212
    }
213
214
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
215
    struct llist_node *head = &tstate->mimalloc.page_list;
216
    if (llist_empty(head)) {
217
        return;
218
    }
219
220
    struct llist_node *node;
221
    llist_for_each_safe(node, head) {
222
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
223
        if (!mi_page_all_free(page)) {
224
            // We allocated from this page some point after the delayed free
225
            _PyMem_mi_page_clear_qsbr(page);
226
            continue;
227
        }
228
229
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
230
            return;
231
        }
232
233
        _PyMem_mi_page_clear_qsbr(page);
234
        _mi_page_free(page, mi_page_queue_of(page), false);
235
    }
236
#endif
237
0
}
238
239
void *
240
_PyMem_MiMalloc(void *ctx, size_t size)
241
0
{
242
#ifdef Py_GIL_DISABLED
243
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
244
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
245
    return mi_heap_malloc(heap, size);
246
#else
247
0
    return mi_malloc(size);
248
0
#endif
249
0
}
250
251
void *
252
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
253
0
{
254
#ifdef Py_GIL_DISABLED
255
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
256
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
257
    return mi_heap_calloc(heap, nelem, elsize);
258
#else
259
0
    return mi_calloc(nelem, elsize);
260
0
#endif
261
0
}
262
263
void *
264
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
265
0
{
266
#ifdef Py_GIL_DISABLED
267
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
268
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
269
    return mi_heap_realloc(heap, ptr, size);
270
#else
271
0
    return mi_realloc(ptr, size);
272
0
#endif
273
0
}
274
275
void
276
_PyMem_MiFree(void *ctx, void *ptr)
277
0
{
278
0
    mi_free(ptr);
279
0
}
280
281
void *
282
_PyObject_MiMalloc(void *ctx, size_t nbytes)
283
0
{
284
#ifdef Py_GIL_DISABLED
285
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
286
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
287
    return mi_heap_malloc(heap, nbytes);
288
#else
289
0
    return mi_malloc(nbytes);
290
0
#endif
291
0
}
292
293
void *
294
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
295
0
{
296
#ifdef Py_GIL_DISABLED
297
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
298
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
299
    return mi_heap_calloc(heap, nelem, elsize);
300
#else
301
0
    return mi_calloc(nelem, elsize);
302
0
#endif
303
0
}
304
305
306
void *
307
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
308
0
{
309
#ifdef Py_GIL_DISABLED
310
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
311
    // Implement our own realloc logic so that we can copy PyObject header
312
    // in a thread-safe way.
313
    size_t size = mi_usable_size(ptr);
314
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
315
        return ptr;
316
    }
317
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    void* newp = mi_heap_malloc(heap, nbytes);
320
    if (newp == NULL) {
321
        return NULL;
322
    }
323
324
    // Free threaded Python allows access from other threads to the PyObject reference count
325
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
326
    // These fields are typically initialized by PyObject_Init() using relaxed
327
    // atomic stores. We need to copy these fields in a thread-safe way here.
328
    // We use the "debug_offset" to determine how many bytes to copy -- it
329
    // includes the PyObject header and plus any extra pre-headers.
330
    size_t offset = heap->debug_offset;
331
    assert(offset % sizeof(void*) == 0);
332
333
    size_t copy_size = (size < nbytes ? size : nbytes);
334
    if (copy_size >= offset) {
335
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
336
            // Use memcpy to avoid strict-aliasing issues. However, we probably
337
            // still have unavoidable strict-aliasing issues with
338
            // _Py_atomic_store_ptr_relaxed here.
339
            void *word;
340
            memcpy(&word, (char*)ptr + i, sizeof(void*));
341
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
342
        }
343
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
344
    }
345
    else {
346
        _mi_memcpy(newp, ptr, copy_size);
347
    }
348
    mi_free(ptr);
349
    return newp;
350
#else
351
0
    return mi_realloc(ptr, nbytes);
352
0
#endif
353
0
}
354
355
void
356
_PyObject_MiFree(void *ctx, void *ptr)
357
0
{
358
0
    mi_free(ptr);
359
0
}
360
361
void *
362
_PyMem_MiRawMalloc(void *ctx, size_t size)
363
0
{
364
0
    return mi_malloc(size);
365
0
}
366
367
void *
368
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
369
0
{
370
0
    return mi_calloc(nelem, elsize);
371
0
}
372
373
void *
374
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
375
0
{
376
0
    return mi_realloc(ptr, size);
377
0
}
378
379
void
380
_PyMem_MiRawFree(void *ctx, void *ptr)
381
0
{
382
0
    mi_free(ptr);
383
0
}
384
#endif // WITH_MIMALLOC
385
386
387
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
388
389
390
#ifdef WITH_MIMALLOC
391
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
392
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
393
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
394
#endif
395
396
/* the pymalloc allocator */
397
398
// The actual implementation is further down.
399
400
#if defined(WITH_PYMALLOC)
401
void* _PyObject_Malloc(void *ctx, size_t size);
402
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
403
void _PyObject_Free(void *ctx, void *p);
404
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
405
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
406
#endif  // WITH_PYMALLOC
407
408
#if defined(Py_GIL_DISABLED)
409
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
410
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
411
#  define PYMEM_ALLOC MIMALLOC_ALLOC
412
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
413
#elif defined(WITH_PYMALLOC)
414
0
#  define PYRAW_ALLOC MALLOC_ALLOC
415
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
416
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
417
#else
418
#  define PYRAW_ALLOC MALLOC_ALLOC
419
#  define PYMEM_ALLOC MALLOC_ALLOC
420
#  define PYOBJ_ALLOC MALLOC_ALLOC
421
#endif
422
423
424
/* the default debug allocators */
425
426
// The actual implementation is further down.
427
428
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
429
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
430
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
431
void _PyMem_DebugRawFree(void *ctx, void *ptr);
432
433
void* _PyMem_DebugMalloc(void *ctx, size_t size);
434
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
435
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
436
void _PyMem_DebugFree(void *ctx, void *p);
437
438
#define PYDBGRAW_ALLOC \
439
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
440
#define PYDBGMEM_ALLOC \
441
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
442
#define PYDBGOBJ_ALLOC \
443
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
444
445
/* default raw allocator (not swappable) */
446
447
void *
448
_PyMem_DefaultRawMalloc(size_t size)
449
256
{
450
#ifdef Py_DEBUG
451
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
452
#else
453
256
    return _PyMem_RawMalloc(NULL, size);
454
256
#endif
455
256
}
456
457
void *
458
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
459
0
{
460
#ifdef Py_DEBUG
461
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
462
#else
463
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
464
0
#endif
465
0
}
466
467
void *
468
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
469
0
{
470
#ifdef Py_DEBUG
471
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
472
#else
473
0
    return _PyMem_RawRealloc(NULL, ptr, size);
474
0
#endif
475
0
}
476
477
void
478
_PyMem_DefaultRawFree(void *ptr)
479
288
{
480
#ifdef Py_DEBUG
481
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
482
#else
483
288
    _PyMem_RawFree(NULL, ptr);
484
288
#endif
485
288
}
486
487
wchar_t*
488
_PyMem_DefaultRawWcsdup(const wchar_t *str)
489
192
{
490
192
    assert(str != NULL);
491
492
192
    size_t len = wcslen(str);
493
192
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
494
0
        return NULL;
495
0
    }
496
497
192
    size_t size = (len + 1) * sizeof(wchar_t);
498
192
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
499
192
    if (str2 == NULL) {
500
0
        return NULL;
501
0
    }
502
503
192
    memcpy(str2, str, size);
504
192
    return str2;
505
192
}
506
507
/* the low-level virtual memory allocator */
508
509
#ifdef WITH_PYMALLOC
510
#  ifdef MS_WINDOWS
511
#    include <windows.h>
512
#  elif defined(HAVE_MMAP)
513
#    include <sys/mman.h>
514
#    ifdef MAP_ANONYMOUS
515
#      define ARENAS_USE_MMAP
516
#    endif
517
#  endif
518
#endif
519
520
/* Return the system's default huge page size in bytes, or 0 if it
521
 * cannot be determined.  The result is cached after the first call.
522
 *
523
 * This is Linux-only (/proc/meminfo).  On other systems that define
524
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
525
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
526
static size_t
527
_pymalloc_system_hugepage_size(void)
528
{
529
    static size_t hp_size = 0;
530
    static int initialized = 0;
531
532
    if (initialized) {
533
        return hp_size;
534
    }
535
536
#ifdef __linux__
537
    FILE *f = fopen("/proc/meminfo", "r");
538
    if (f != NULL) {
539
        char line[256];
540
        while (fgets(line, sizeof(line), f)) {
541
            unsigned long size_kb;
542
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
543
                hp_size = (size_t)size_kb * 1024;
544
                break;
545
            }
546
        }
547
        fclose(f);
548
    }
549
#endif
550
551
    initialized = 1;
552
    return hp_size;
553
}
554
#endif
555
556
void *
557
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
558
258k
{
559
#ifdef MS_WINDOWS
560
#  ifdef PYMALLOC_USE_HUGEPAGES
561
    if (_PyRuntime.allocators.use_hugepages) {
562
        SIZE_T lp_size = GetLargePageMinimum();
563
        if (lp_size > 0 && size % lp_size == 0) {
564
            void *ptr = VirtualAlloc(NULL, size,
565
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
566
                            PAGE_READWRITE);
567
            if (ptr != NULL)
568
                return ptr;
569
        }
570
    }
571
    /* Fall back to regular pages */
572
#  endif
573
    return VirtualAlloc(NULL, size,
574
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
575
#elif defined(ARENAS_USE_MMAP)
576
    void *ptr;
577
#  ifdef PYMALLOC_USE_HUGEPAGES
578
#    ifdef MAP_HUGETLB
579
    if (_PyRuntime.allocators.use_hugepages) {
580
        size_t hp_size = _pymalloc_system_hugepage_size();
581
        /* Only use huge pages if the arena size is a multiple of the
582
         * system's default huge page size.  When the arena is smaller
583
         * than the huge page, mmap still succeeds but silently
584
         * allocates an entire huge page; the subsequent munmap with
585
         * the smaller arena size then fails with EINVAL, leaking
586
         * all of that memory. */
587
        if (hp_size > 0 && size % hp_size == 0) {
588
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
589
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
590
            if (ptr != MAP_FAILED) {
591
                assert(ptr != NULL);
592
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
593
                return ptr;
594
            }
595
        }
596
    }
597
    /* Fall back to regular pages */
598
#    endif
599
#  endif
600
258k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
601
258k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
602
258k
    if (ptr == MAP_FAILED)
603
0
        return NULL;
604
258k
    assert(ptr != NULL);
605
258k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
606
258k
    return ptr;
607
#else
608
    return malloc(size);
609
#endif
610
258k
}
611
612
void
613
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
614
#if defined(ARENAS_USE_MMAP)
615
    size_t size
616
#else
617
    size_t Py_UNUSED(size)
618
#endif
619
)
620
258k
{
621
#ifdef MS_WINDOWS
622
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
623
    if (ptr == NULL) {
624
        return;
625
    }
626
    VirtualFree(ptr, 0, MEM_RELEASE);
627
#elif defined(ARENAS_USE_MMAP)
628
    /* Unlike free(), munmap() does not special-case NULL to noop. */
629
258k
    if (ptr == NULL) {
630
0
        return;
631
0
    }
632
258k
    munmap(ptr, size);
633
#else
634
    free(ptr);
635
#endif
636
258k
}
637
638
/*******************************************/
639
/* end low-level allocator implementations */
640
/*******************************************/
641
642
643
64
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
644
999M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
645
2.18G
#define _PyMem (_PyRuntime.allocators.standard.mem)
646
5.77G
#define _PyObject (_PyRuntime.allocators.standard.obj)
647
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
648
1.03M
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
649
650
651
/***************************/
652
/* managing the allocators */
653
/***************************/
654
655
static int
656
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
657
                               PyMemAllocatorEx *old_alloc)
658
0
{
659
0
    if (old_alloc != NULL) {
660
0
        get_allocator_unlocked(domain, old_alloc);
661
0
    }
662
663
664
0
    PyMemAllocatorEx new_alloc;
665
0
    switch(domain)
666
0
    {
667
0
    case PYMEM_DOMAIN_RAW:
668
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
669
0
        break;
670
0
    case PYMEM_DOMAIN_MEM:
671
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
672
0
        break;
673
0
    case PYMEM_DOMAIN_OBJ:
674
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
675
0
        break;
676
0
    default:
677
        /* unknown domain */
678
0
        return -1;
679
0
    }
680
0
    set_allocator_unlocked(domain, &new_alloc);
681
0
    if (debug) {
682
0
        set_up_debug_hooks_domain_unlocked(domain);
683
0
    }
684
0
    return 0;
685
0
}
686
687
688
#ifdef Py_DEBUG
689
static const int pydebug = 1;
690
#else
691
static const int pydebug = 0;
692
#endif
693
694
int
695
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
696
0
{
697
0
    if (name == NULL || *name == '\0') {
698
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
699
           nameions): use default memory allocators */
700
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
701
0
    }
702
0
    else if (strcmp(name, "default") == 0) {
703
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
704
0
    }
705
0
    else if (strcmp(name, "debug") == 0) {
706
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
707
0
    }
708
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
709
0
    else if (strcmp(name, "pymalloc") == 0) {
710
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
711
0
    }
712
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
713
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
714
0
    }
715
0
#endif
716
0
#ifdef WITH_MIMALLOC
717
0
    else if (strcmp(name, "mimalloc") == 0) {
718
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
719
0
    }
720
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
721
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
722
0
    }
723
0
#endif
724
0
#ifndef Py_GIL_DISABLED
725
0
    else if (strcmp(name, "malloc") == 0) {
726
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
727
0
    }
728
0
    else if (strcmp(name, "malloc_debug") == 0) {
729
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
730
0
    }
731
0
#endif
732
0
    else {
733
        /* unknown allocator */
734
0
        return -1;
735
0
    }
736
0
    return 0;
737
0
}
738
739
740
static int
741
set_up_allocators_unlocked(PyMemAllocatorName allocator)
742
0
{
743
0
    switch (allocator) {
744
0
    case PYMEM_ALLOCATOR_NOT_SET:
745
        /* do nothing */
746
0
        break;
747
748
0
    case PYMEM_ALLOCATOR_DEFAULT:
749
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
750
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
751
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
752
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
753
0
        break;
754
755
0
    case PYMEM_ALLOCATOR_DEBUG:
756
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
757
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
758
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
759
0
        _PyRuntime.allocators.is_debug_enabled = 1;
760
0
        break;
761
762
0
#ifdef WITH_PYMALLOC
763
0
    case PYMEM_ALLOCATOR_PYMALLOC:
764
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
765
0
    {
766
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
767
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
768
769
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
770
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
771
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
772
773
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
774
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
775
0
        if (is_debug) {
776
0
            set_up_debug_hooks_unlocked();
777
0
        }
778
0
        break;
779
0
    }
780
0
#endif
781
0
#ifdef WITH_MIMALLOC
782
0
    case PYMEM_ALLOCATOR_MIMALLOC:
783
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
784
0
    {
785
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
786
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
787
788
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
789
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
790
791
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
792
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
793
794
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
795
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
796
0
        if (is_debug) {
797
0
            set_up_debug_hooks_unlocked();
798
0
        }
799
800
0
        break;
801
0
    }
802
0
#endif
803
804
0
    case PYMEM_ALLOCATOR_MALLOC:
805
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
806
0
    {
807
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
808
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
809
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
810
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
811
812
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
813
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
814
0
        if (is_debug) {
815
0
            set_up_debug_hooks_unlocked();
816
0
        }
817
0
        break;
818
0
    }
819
820
0
    default:
821
        /* unknown allocator */
822
0
        return -1;
823
0
    }
824
825
0
    return 0;
826
0
}
827
828
int
829
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
830
0
{
831
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
832
0
    int res = set_up_allocators_unlocked(allocator);
833
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
834
0
    return res;
835
0
}
836
837
838
static int
839
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
840
0
{
841
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
842
0
}
843
844
845
static const char*
846
get_current_allocator_name_unlocked(void)
847
0
{
848
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
849
0
#ifdef WITH_PYMALLOC
850
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
851
0
#endif
852
0
#ifdef WITH_MIMALLOC
853
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
854
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
855
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
856
0
#endif
857
858
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
859
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
860
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
861
0
    {
862
0
        return "malloc";
863
0
    }
864
0
#ifdef WITH_PYMALLOC
865
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
866
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
867
0
        pymemallocator_eq(&_PyObject, &pymalloc))
868
0
    {
869
0
        return "pymalloc";
870
0
    }
871
0
#endif
872
0
#ifdef WITH_MIMALLOC
873
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
874
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
875
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
876
0
    {
877
0
        return "mimalloc";
878
0
    }
879
0
#endif
880
881
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
882
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
883
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
884
885
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
886
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
887
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
888
0
    {
889
        /* Debug hooks installed */
890
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
891
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
892
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
893
0
        {
894
0
            return "malloc_debug";
895
0
        }
896
0
#ifdef WITH_PYMALLOC
897
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
898
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
899
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
900
0
        {
901
0
            return "pymalloc_debug";
902
0
        }
903
0
#endif
904
0
#ifdef WITH_MIMALLOC
905
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
906
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
907
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
908
0
        {
909
0
            return "mimalloc_debug";
910
0
        }
911
0
#endif
912
0
    }
913
0
    return NULL;
914
0
}
915
916
const char*
917
_PyMem_GetCurrentAllocatorName(void)
918
0
{
919
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
920
0
    const char *name = get_current_allocator_name_unlocked();
921
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
922
0
    return name;
923
0
}
924
925
926
int
927
_PyMem_DebugEnabled(void)
928
0
{
929
0
    return _PyRuntime.allocators.is_debug_enabled;
930
0
}
931
932
#ifdef WITH_PYMALLOC
933
static int
934
_PyMem_PymallocEnabled(void)
935
0
{
936
0
    if (_PyMem_DebugEnabled()) {
937
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
938
0
    }
939
0
    else {
940
0
        return (_PyObject.malloc == _PyObject_Malloc);
941
0
    }
942
0
}
943
944
#ifdef WITH_MIMALLOC
945
static int
946
_PyMem_MimallocEnabled(void)
947
0
{
948
#ifdef Py_GIL_DISABLED
949
    return 1;
950
#else
951
0
    if (_PyMem_DebugEnabled()) {
952
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
953
0
    }
954
0
    else {
955
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
956
0
    }
957
0
#endif
958
0
}
959
#endif  // WITH_MIMALLOC
960
961
#endif  // WITH_PYMALLOC
962
963
964
static void
965
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
966
0
{
967
0
    PyMemAllocatorEx alloc;
968
969
0
    if (domain == PYMEM_DOMAIN_RAW) {
970
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
971
0
            return;
972
0
        }
973
974
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
975
0
        alloc.ctx = &_PyMem_Debug.raw;
976
0
        alloc.malloc = _PyMem_DebugRawMalloc;
977
0
        alloc.calloc = _PyMem_DebugRawCalloc;
978
0
        alloc.realloc = _PyMem_DebugRawRealloc;
979
0
        alloc.free = _PyMem_DebugRawFree;
980
0
        set_allocator_unlocked(domain, &alloc);
981
0
    }
982
0
    else if (domain == PYMEM_DOMAIN_MEM) {
983
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
984
0
            return;
985
0
        }
986
987
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
988
0
        alloc.ctx = &_PyMem_Debug.mem;
989
0
        alloc.malloc = _PyMem_DebugMalloc;
990
0
        alloc.calloc = _PyMem_DebugCalloc;
991
0
        alloc.realloc = _PyMem_DebugRealloc;
992
0
        alloc.free = _PyMem_DebugFree;
993
0
        set_allocator_unlocked(domain, &alloc);
994
0
    }
995
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
996
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
997
0
            return;
998
0
        }
999
1000
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1001
0
        alloc.ctx = &_PyMem_Debug.obj;
1002
0
        alloc.malloc = _PyMem_DebugMalloc;
1003
0
        alloc.calloc = _PyMem_DebugCalloc;
1004
0
        alloc.realloc = _PyMem_DebugRealloc;
1005
0
        alloc.free = _PyMem_DebugFree;
1006
0
        set_allocator_unlocked(domain, &alloc);
1007
0
    }
1008
0
}
1009
1010
1011
static void
1012
set_up_debug_hooks_unlocked(void)
1013
0
{
1014
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1015
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1016
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1017
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1018
0
}
1019
1020
void
1021
PyMem_SetupDebugHooks(void)
1022
0
{
1023
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1024
0
    set_up_debug_hooks_unlocked();
1025
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1026
0
}
1027
1028
static void
1029
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1030
32
{
1031
32
    switch(domain)
1032
32
    {
1033
32
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1034
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1035
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1036
0
    default:
1037
        /* unknown domain: set all attributes to NULL */
1038
0
        allocator->ctx = NULL;
1039
0
        allocator->malloc = NULL;
1040
0
        allocator->calloc = NULL;
1041
0
        allocator->realloc = NULL;
1042
0
        allocator->free = NULL;
1043
32
    }
1044
32
}
1045
1046
static void
1047
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1048
0
{
1049
0
    switch(domain)
1050
0
    {
1051
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1052
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1053
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1054
    /* ignore unknown domain */
1055
0
    }
1056
0
}
1057
1058
void
1059
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1060
32
{
1061
32
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1062
32
    get_allocator_unlocked(domain, allocator);
1063
32
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1064
32
}
1065
1066
void
1067
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1068
0
{
1069
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1070
0
    set_allocator_unlocked(domain, allocator);
1071
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1072
0
}
1073
1074
void
1075
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1076
0
{
1077
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1078
0
    *allocator = _PyObject_Arena;
1079
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1080
0
}
1081
1082
void
1083
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1084
0
{
1085
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1086
0
    _PyObject_Arena = *allocator;
1087
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1088
0
}
1089
1090
1091
/* Note that there is a possible, but very unlikely, race in any place
1092
 * below where we call one of the allocator functions.  We access two
1093
 * fields in each case:  "malloc", etc. and "ctx".
1094
 *
1095
 * It is unlikely that the allocator will be changed while one of those
1096
 * calls is happening, much less in that very narrow window.
1097
 * Furthermore, the likelihood of a race is drastically reduced by the
1098
 * fact that the allocator may not be changed after runtime init
1099
 * (except with a wrapper).
1100
 *
1101
 * With the above in mind, we currently don't worry about locking
1102
 * around these uses of the runtime-global allocators state. */
1103
1104
1105
/*************************/
1106
/* the "arena" allocator */
1107
/*************************/
1108
1109
void *
1110
_PyObject_VirtualAlloc(size_t size)
1111
252k
{
1112
252k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
1113
252k
}
1114
1115
void
1116
_PyObject_VirtualFree(void *obj, size_t size)
1117
251k
{
1118
251k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
1119
251k
}
1120
1121
1122
/***********************/
1123
/* the "raw" allocator */
1124
/***********************/
1125
1126
void *
1127
PyMem_RawMalloc(size_t size)
1128
245M
{
1129
    /*
1130
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1131
     * Most python internals blindly use a signed Py_ssize_t to track
1132
     * things without checking for overflows or negatives.
1133
     * As size_t is unsigned, checking for size < 0 is not required.
1134
     */
1135
245M
    if (size > (size_t)PY_SSIZE_T_MAX)
1136
0
        return NULL;
1137
245M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1138
245M
}
1139
1140
void *
1141
PyMem_RawCalloc(size_t nelem, size_t elsize)
1142
60.8k
{
1143
    /* see PyMem_RawMalloc() */
1144
60.8k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1145
0
        return NULL;
1146
60.8k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1147
60.8k
}
1148
1149
void*
1150
PyMem_RawRealloc(void *ptr, size_t new_size)
1151
9.05M
{
1152
    /* see PyMem_RawMalloc() */
1153
9.05M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1154
0
        return NULL;
1155
9.05M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1156
9.05M
}
1157
1158
void PyMem_RawFree(void *ptr)
1159
245M
{
1160
245M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1161
245M
}
1162
1163
1164
/***********************/
1165
/* the "mem" allocator */
1166
/***********************/
1167
1168
void *
1169
PyMem_Malloc(size_t size)
1170
247M
{
1171
    /* see PyMem_RawMalloc() */
1172
247M
    if (size > (size_t)PY_SSIZE_T_MAX)
1173
0
        return NULL;
1174
247M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1175
247M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1176
247M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1177
247M
    OBJECT_STAT_INC(allocations);
1178
247M
    return _PyMem.malloc(_PyMem.ctx, size);
1179
247M
}
1180
1181
void *
1182
PyMem_Calloc(size_t nelem, size_t elsize)
1183
46.9M
{
1184
    /* see PyMem_RawMalloc() */
1185
46.9M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1186
0
        return NULL;
1187
46.9M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1188
46.9M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1189
46.9M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1190
46.9M
    OBJECT_STAT_INC(allocations);
1191
46.9M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1192
46.9M
}
1193
1194
void *
1195
PyMem_Realloc(void *ptr, size_t new_size)
1196
268M
{
1197
    /* see PyMem_RawMalloc() */
1198
268M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1199
0
        return NULL;
1200
268M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1201
268M
}
1202
1203
void
1204
PyMem_Free(void *ptr)
1205
527M
{
1206
527M
    OBJECT_STAT_INC(frees);
1207
527M
    _PyMem.free(_PyMem.ctx, ptr);
1208
527M
}
1209
1210
1211
/***************************/
1212
/* pymem utility functions */
1213
/***************************/
1214
1215
wchar_t*
1216
_PyMem_RawWcsdup(const wchar_t *str)
1217
1.28k
{
1218
1.28k
    assert(str != NULL);
1219
1220
1.28k
    size_t len = wcslen(str);
1221
1.28k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1222
0
        return NULL;
1223
0
    }
1224
1225
1.28k
    size_t size = (len + 1) * sizeof(wchar_t);
1226
1.28k
    wchar_t *str2 = PyMem_RawMalloc(size);
1227
1.28k
    if (str2 == NULL) {
1228
0
        return NULL;
1229
0
    }
1230
1231
1.28k
    memcpy(str2, str, size);
1232
1.28k
    return str2;
1233
1.28k
}
1234
1235
char *
1236
_PyMem_RawStrdup(const char *str)
1237
96
{
1238
96
    assert(str != NULL);
1239
96
    size_t size = strlen(str) + 1;
1240
96
    char *copy = PyMem_RawMalloc(size);
1241
96
    if (copy == NULL) {
1242
0
        return NULL;
1243
0
    }
1244
96
    memcpy(copy, str, size);
1245
96
    return copy;
1246
96
}
1247
1248
char *
1249
_PyMem_Strdup(const char *str)
1250
0
{
1251
0
    assert(str != NULL);
1252
0
    size_t size = strlen(str) + 1;
1253
0
    char *copy = PyMem_Malloc(size);
1254
0
    if (copy == NULL) {
1255
0
        return NULL;
1256
0
    }
1257
0
    memcpy(copy, str, size);
1258
0
    return copy;
1259
0
}
1260
1261
/***********************************************/
1262
/* Delayed freeing support for Py_GIL_DISABLED */
1263
/***********************************************/
1264
1265
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1266
#define WORK_ITEMS_PER_CHUNK 254
1267
1268
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1269
struct _mem_work_item {
1270
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1271
    uint64_t qsbr_goal;
1272
};
1273
1274
// A fixed-size buffer of pointers to be freed
1275
struct _mem_work_chunk {
1276
    // Linked list node of chunks in queue
1277
    struct llist_node node;
1278
1279
    Py_ssize_t rd_idx;  // index of next item to read
1280
    Py_ssize_t wr_idx;  // index of next item to write
1281
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1282
};
1283
1284
static int
1285
work_item_should_decref(uintptr_t ptr)
1286
0
{
1287
0
    return ptr & 0x01;
1288
0
}
1289
1290
static void
1291
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1292
0
{
1293
0
    if (work_item_should_decref(ptr)) {
1294
0
        PyObject *obj = (PyObject *)(ptr - 1);
1295
#ifdef Py_GIL_DISABLED
1296
        if (cb == NULL) {
1297
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1298
            Py_DECREF(obj);
1299
            return;
1300
        }
1301
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1302
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1303
        if (refcount == 0) {
1304
            cb(obj, state);
1305
        }
1306
#else
1307
0
        Py_DECREF(obj);
1308
0
#endif
1309
0
    }
1310
0
    else {
1311
0
        PyMem_Free((void *)ptr);
1312
0
    }
1313
0
}
1314
1315
1316
#ifdef Py_GIL_DISABLED
1317
1318
// For deferred advance on free: the number of deferred items before advancing
1319
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1320
// want to process a chunk before it overflows.
1321
#define QSBR_DEFERRED_LIMIT 127
1322
1323
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1324
// helps limit memory usage due to QSBR delaying frees too long.
1325
#define QSBR_FREE_MEM_LIMIT 1024*1024
1326
1327
// Return true if the global write sequence should be advanced for a deferred
1328
// memory free.
1329
static bool
1330
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1331
{
1332
    if (size > QSBR_FREE_MEM_LIMIT) {
1333
        qsbr->deferred_count = 0;
1334
        qsbr->deferred_memory = 0;
1335
        qsbr->should_process = true;
1336
        return true;
1337
    }
1338
    qsbr->deferred_count++;
1339
    qsbr->deferred_memory += size;
1340
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1341
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1342
        qsbr->deferred_count = 0;
1343
        qsbr->deferred_memory = 0;
1344
        qsbr->should_process = true;
1345
        return true;
1346
    }
1347
    return false;
1348
}
1349
#endif
1350
1351
static void
1352
free_delayed(uintptr_t ptr, size_t size)
1353
0
{
1354
0
#ifndef Py_GIL_DISABLED
1355
0
    free_work_item(ptr, NULL, NULL);
1356
#else
1357
    PyInterpreterState *interp = _PyInterpreterState_GET();
1358
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1359
        interp->stoptheworld.world_stopped)
1360
    {
1361
        // Free immediately during interpreter shutdown or if the world is
1362
        // stopped.
1363
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1364
        free_work_item(ptr, NULL, NULL);
1365
        return;
1366
    }
1367
1368
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1369
    struct llist_node *head = &tstate->mem_free_queue;
1370
1371
    struct _mem_work_chunk *buf = NULL;
1372
    if (!llist_empty(head)) {
1373
        // Try to re-use the last buffer
1374
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1375
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1376
            // already full
1377
            buf = NULL;
1378
        }
1379
    }
1380
1381
    if (buf == NULL) {
1382
        buf = PyMem_Calloc(1, sizeof(*buf));
1383
        if (buf != NULL) {
1384
            llist_insert_tail(head, &buf->node);
1385
        }
1386
    }
1387
1388
    if (buf == NULL) {
1389
        // failed to allocate a buffer, free immediately
1390
        PyObject *to_dealloc = NULL;
1391
        _PyEval_StopTheWorld(tstate->base.interp);
1392
        if (work_item_should_decref(ptr)) {
1393
            PyObject *obj = (PyObject *)(ptr - 1);
1394
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1395
            if (refcount == 0) {
1396
                to_dealloc = obj;
1397
            }
1398
        }
1399
        else {
1400
            PyMem_Free((void *)ptr);
1401
        }
1402
        _PyEval_StartTheWorld(tstate->base.interp);
1403
        if (to_dealloc != NULL) {
1404
            _Py_Dealloc(to_dealloc);
1405
        }
1406
        return;
1407
    }
1408
1409
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1410
    uint64_t seq;
1411
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1412
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1413
    }
1414
    else {
1415
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1416
    }
1417
    buf->array[buf->wr_idx].ptr = ptr;
1418
    buf->array[buf->wr_idx].qsbr_goal = seq;
1419
    buf->wr_idx++;
1420
1421
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1422
        // Normally the processing of delayed items is done from the eval
1423
        // breaker.  Processing here is a safety measure to ensure too much
1424
        // work does not accumulate.
1425
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1426
    }
1427
#endif
1428
0
}
1429
1430
void
1431
_PyMem_FreeDelayed(void *ptr, size_t size)
1432
0
{
1433
0
    assert(!((uintptr_t)ptr & 0x01));
1434
0
    if (ptr != NULL) {
1435
0
        free_delayed((uintptr_t)ptr, size);
1436
0
    }
1437
0
}
1438
1439
#ifdef Py_GIL_DISABLED
1440
void
1441
_PyObject_XDecRefDelayed(PyObject *ptr)
1442
{
1443
    assert(!((uintptr_t)ptr & 0x01));
1444
    if (ptr != NULL) {
1445
        // We use 0 as the size since we don't have an easy way to know the
1446
        // actual size.  If we are freeing many objects, the write sequence
1447
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1448
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1449
    }
1450
}
1451
#endif
1452
1453
#ifdef Py_GIL_DISABLED
1454
void
1455
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1456
{
1457
    PyObject *old = *ptr;
1458
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1459
    if (old == NULL) {
1460
        return;
1461
    }
1462
    if (!_Py_IsImmortal(old)) {
1463
         _PyObject_XDecRefDelayed(old);
1464
    }
1465
}
1466
#endif
1467
1468
static struct _mem_work_chunk *
1469
work_queue_first(struct llist_node *head)
1470
0
{
1471
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1472
0
}
1473
1474
static void
1475
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1476
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1477
0
{
1478
0
    while (!llist_empty(head)) {
1479
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1480
1481
0
        if (buf->rd_idx < buf->wr_idx) {
1482
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1483
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1484
0
                return;
1485
0
            }
1486
1487
0
            buf->rd_idx++;
1488
            // NB: free_work_item may re-enter or execute arbitrary code
1489
0
            free_work_item(item->ptr, cb, state);
1490
0
            continue;
1491
0
        }
1492
1493
0
        assert(buf->rd_idx == buf->wr_idx);
1494
0
        if (keep_empty && buf->node.next == head) {
1495
            // Keep the last buffer in the queue to reduce re-allocations
1496
0
            buf->rd_idx = buf->wr_idx = 0;
1497
0
            return;
1498
0
        }
1499
1500
0
        llist_remove(&buf->node);
1501
0
        PyMem_Free(buf);
1502
0
    }
1503
0
}
1504
1505
static void
1506
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1507
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1508
                     void *state)
1509
0
{
1510
0
    assert(PyMutex_IsLocked(&queue->mutex));
1511
0
    process_queue(&queue->head, tstate, false, cb, state);
1512
1513
0
    int more_work = !llist_empty(&queue->head);
1514
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1515
0
}
1516
1517
static void
1518
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1519
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1520
                           void *state)
1521
0
{
1522
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1523
0
        return;
1524
0
    }
1525
1526
    // Try to acquire the lock, but don't block if it's already held.
1527
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1528
0
        process_interp_queue(queue, tstate, cb, state);
1529
0
        PyMutex_Unlock(&queue->mutex);
1530
0
    }
1531
0
}
1532
1533
void
1534
_PyMem_ProcessDelayed(PyThreadState *tstate)
1535
0
{
1536
0
    PyInterpreterState *interp = tstate->interp;
1537
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1538
1539
0
    tstate_impl->qsbr->should_process = false;
1540
1541
    // Process thread-local work
1542
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1543
1544
    // Process shared interpreter work
1545
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1546
0
}
1547
1548
void
1549
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1550
0
{
1551
0
    PyInterpreterState *interp = tstate->interp;
1552
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1553
1554
    // Process thread-local work
1555
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1556
1557
    // Process shared interpreter work
1558
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1559
0
}
1560
1561
void
1562
_PyMem_AbandonDelayed(PyThreadState *tstate)
1563
0
{
1564
0
    PyInterpreterState *interp = tstate->interp;
1565
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1566
1567
0
    if (llist_empty(queue)) {
1568
0
        return;
1569
0
    }
1570
1571
    // Check if the queue contains one empty buffer
1572
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1573
0
    if (buf->rd_idx == buf->wr_idx) {
1574
0
        llist_remove(&buf->node);
1575
0
        PyMem_Free(buf);
1576
0
        assert(llist_empty(queue));
1577
0
        return;
1578
0
    }
1579
1580
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1581
1582
    // Merge the thread's work queue into the interpreter's work queue.
1583
0
    llist_concat(&interp->mem_free_queue.head, queue);
1584
1585
    // Process the merged queue now (see gh-130794).
1586
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1587
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1588
1589
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1590
1591
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1592
0
}
1593
1594
void
1595
_PyMem_FiniDelayed(PyInterpreterState *interp)
1596
0
{
1597
0
    struct llist_node *head = &interp->mem_free_queue.head;
1598
0
    while (!llist_empty(head)) {
1599
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1600
1601
0
        if (buf->rd_idx < buf->wr_idx) {
1602
            // Free the remaining items immediately. There should be no other
1603
            // threads accessing the memory at this point during shutdown.
1604
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1605
0
            buf->rd_idx++;
1606
            // NB: free_work_item may re-enter or execute arbitrary code
1607
0
            free_work_item(item->ptr, NULL, NULL);
1608
0
            continue;
1609
0
        }
1610
1611
0
        llist_remove(&buf->node);
1612
0
        PyMem_Free(buf);
1613
0
    }
1614
0
}
1615
1616
/**************************/
1617
/* the "object" allocator */
1618
/**************************/
1619
1620
void *
1621
PyObject_Malloc(size_t size)
1622
1.40G
{
1623
    /* see PyMem_RawMalloc() */
1624
1.40G
    if (size > (size_t)PY_SSIZE_T_MAX)
1625
0
        return NULL;
1626
1.40G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1627
1.40G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1628
1.40G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1629
1.40G
    OBJECT_STAT_INC(allocations);
1630
1.40G
    return _PyObject.malloc(_PyObject.ctx, size);
1631
1.40G
}
1632
1633
void *
1634
PyObject_Calloc(size_t nelem, size_t elsize)
1635
0
{
1636
    /* see PyMem_RawMalloc() */
1637
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1638
0
        return NULL;
1639
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1640
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1641
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1642
0
    OBJECT_STAT_INC(allocations);
1643
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1644
0
}
1645
1646
void *
1647
PyObject_Realloc(void *ptr, size_t new_size)
1648
76.0M
{
1649
    /* see PyMem_RawMalloc() */
1650
76.0M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1651
0
        return NULL;
1652
76.0M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1653
76.0M
}
1654
1655
void
1656
PyObject_Free(void *ptr)
1657
1.40G
{
1658
1.40G
    OBJECT_STAT_INC(frees);
1659
1.40G
    _PyObject.free(_PyObject.ctx, ptr);
1660
1.40G
}
1661
1662
1663
/* Use __builtin_expect() where available to reduce overhead of
1664
   the valgrind checks */
1665
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1666
11.9G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1667
5.40G
#  define LIKELY(value) __builtin_expect((value), 1)
1668
#else
1669
#  define UNLIKELY(value) (value)
1670
#  define LIKELY(value) (value)
1671
#endif
1672
1673
#ifdef WITH_PYMALLOC
1674
1675
#ifdef WITH_VALGRIND
1676
#include <valgrind/valgrind.h>
1677
1678
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1679
static int running_on_valgrind = -1;
1680
#endif
1681
1682
typedef struct _obmalloc_state OMState;
1683
1684
/* obmalloc state for main interpreter and shared by all interpreters without
1685
 * their own obmalloc state.  By not explicitly initializing this structure, it
1686
 * will be allocated in the BSS which is a small performance win.  The radix
1687
 * tree arrays are fairly large but are sparsely used.  */
1688
static struct _obmalloc_state obmalloc_state_main;
1689
static bool obmalloc_state_initialized;
1690
1691
static inline int
1692
has_own_state(PyInterpreterState *interp)
1693
0
{
1694
0
    return (_Py_IsMainInterpreter(interp) ||
1695
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1696
0
            _Py_IsMainInterpreterFinalizing(interp));
1697
0
}
1698
1699
static inline OMState *
1700
get_state(void)
1701
4.13G
{
1702
4.13G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1703
4.13G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1704
4.13G
    return interp->obmalloc;
1705
4.13G
}
1706
1707
// These macros all rely on a local "state" variable.
1708
1.92G
#define usedpools (state->pools.used)
1709
3.67M
#define allarenas (state->mgmt.arenas)
1710
352
#define maxarenas (state->mgmt.maxarenas)
1711
32.9k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1712
31.9M
#define usable_arenas (state->mgmt.usable_arenas)
1713
22.2M
#define nfp2lasta (state->mgmt.nfp2lasta)
1714
20.8k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1715
6.78k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1716
7.78k
#define narenas_highwater (state->mgmt.narenas_highwater)
1717
488M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1718
1719
#ifdef WITH_MIMALLOC
1720
static bool count_blocks(
1721
    const mi_heap_t* heap, const mi_heap_area_t* area,
1722
    void* block, size_t block_size, void* allocated_blocks)
1723
0
{
1724
0
    *(size_t *)allocated_blocks += area->used;
1725
0
    return 1;
1726
0
}
1727
1728
static Py_ssize_t
1729
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1730
0
{
1731
0
    size_t allocated_blocks = 0;
1732
#ifdef Py_GIL_DISABLED
1733
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1734
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1735
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1736
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1737
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1738
        }
1739
    }
1740
1741
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1742
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1743
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1744
                                        &allocated_blocks);
1745
    }
1746
#else
1747
    // TODO(sgross): this only counts the current thread's blocks.
1748
0
    mi_heap_t *heap = mi_heap_get_default();
1749
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1750
0
#endif
1751
0
    return allocated_blocks;
1752
0
}
1753
#endif
1754
1755
Py_ssize_t
1756
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1757
0
{
1758
0
#ifdef WITH_MIMALLOC
1759
0
    if (_PyMem_MimallocEnabled()) {
1760
0
        return get_mimalloc_allocated_blocks(interp);
1761
0
    }
1762
0
#endif
1763
1764
#ifdef Py_DEBUG
1765
    assert(has_own_state(interp));
1766
#else
1767
0
    if (!has_own_state(interp)) {
1768
0
        _Py_FatalErrorFunc(__func__,
1769
0
                           "the interpreter doesn't have its own allocator");
1770
0
    }
1771
0
#endif
1772
0
    OMState *state = interp->obmalloc;
1773
1774
0
    if (state == NULL) {
1775
0
        return 0;
1776
0
    }
1777
1778
0
    Py_ssize_t n = raw_allocated_blocks;
1779
    /* add up allocated blocks for used pools */
1780
0
    for (uint i = 0; i < maxarenas; ++i) {
1781
        /* Skip arenas which are not allocated. */
1782
0
        if (allarenas[i].address == 0) {
1783
0
            continue;
1784
0
        }
1785
1786
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1787
1788
        /* visit every pool in the arena */
1789
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1790
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1791
0
            poolp p = (poolp)base;
1792
0
            n += p->ref.count;
1793
0
        }
1794
0
    }
1795
0
    return n;
1796
0
}
1797
1798
static void free_obmalloc_arenas(PyInterpreterState *interp);
1799
1800
void
1801
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1802
0
{
1803
0
#ifdef WITH_MIMALLOC
1804
0
    if (_PyMem_MimallocEnabled()) {
1805
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1806
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1807
0
        return;
1808
0
    }
1809
0
#endif
1810
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1811
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1812
0
        assert(has_own_state(interp) || leaked == 0);
1813
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1814
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1815
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1816
            // then some of the memory allocated by obmalloc has not been
1817
            // freed.  It might be safe to free the arenas in that case but
1818
            // it's possible that extension modules are still using that
1819
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1820
            // should be warning when this happens.  It should be possible to
1821
            // use a tool like "-fsanitize=address" to track down these leaks.
1822
0
            free_obmalloc_arenas(interp);
1823
0
        }
1824
0
    }
1825
0
}
1826
1827
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1828
1829
/* We preserve the number of blocks leaked during runtime finalization,
1830
   so they can be reported if the runtime is initialized again. */
1831
// XXX We don't lose any information by dropping this,
1832
// so we should consider doing so.
1833
static Py_ssize_t last_final_leaks = 0;
1834
1835
void
1836
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1837
0
{
1838
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1839
0
    runtime->obmalloc.interpreter_leaks = 0;
1840
0
}
1841
1842
static Py_ssize_t
1843
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1844
0
{
1845
0
    Py_ssize_t total = 0;
1846
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1847
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1848
0
        if (interp == NULL) {
1849
            /* We are at the very end of runtime finalization.
1850
               We can't rely on finalizing->interp since that thread
1851
               state is probably already freed, so we don't worry
1852
               about it. */
1853
0
            assert(PyInterpreterState_Head() == NULL);
1854
0
        }
1855
0
        else {
1856
0
            assert(interp != NULL);
1857
            /* It is probably the last interpreter but not necessarily. */
1858
0
            assert(PyInterpreterState_Next(interp) == NULL);
1859
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1860
0
        }
1861
0
    }
1862
0
    else {
1863
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1864
0
        HEAD_LOCK(runtime);
1865
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1866
0
        assert(interp != NULL);
1867
#ifdef Py_DEBUG
1868
        int got_main = 0;
1869
#endif
1870
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1871
#ifdef Py_DEBUG
1872
            if (_Py_IsMainInterpreter(interp)) {
1873
                assert(!got_main);
1874
                got_main = 1;
1875
                assert(has_own_state(interp));
1876
            }
1877
#endif
1878
0
            if (has_own_state(interp)) {
1879
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1880
0
            }
1881
0
        }
1882
0
        HEAD_UNLOCK(runtime);
1883
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1884
#ifdef Py_DEBUG
1885
        assert(got_main);
1886
#endif
1887
0
    }
1888
0
    total += runtime->obmalloc.interpreter_leaks;
1889
0
    total += last_final_leaks;
1890
0
    return total;
1891
0
}
1892
1893
Py_ssize_t
1894
_Py_GetGlobalAllocatedBlocks(void)
1895
0
{
1896
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1897
0
}
1898
1899
#if WITH_PYMALLOC_RADIX_TREE
1900
/*==========================================================================*/
1901
/* radix tree for tracking arena usage. */
1902
1903
6.11G
#define arena_map_root (state->usage.arena_map_root)
1904
#ifdef USE_INTERIOR_NODES
1905
32
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1906
32
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1907
#endif
1908
1909
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1910
 * it cannot be created */
1911
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1912
arena_map_get(OMState *state, pymem_block *p, int create)
1913
2.12G
{
1914
2.12G
#ifdef USE_INTERIOR_NODES
1915
    /* sanity check that IGNORE_BITS is correct */
1916
2.12G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1917
2.12G
    int i1 = MAP_TOP_INDEX(p);
1918
2.12G
    if (arena_map_root.ptrs[i1] == NULL) {
1919
32
        if (!create) {
1920
0
            return NULL;
1921
0
        }
1922
32
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1923
32
        if (n == NULL) {
1924
0
            return NULL;
1925
0
        }
1926
32
        arena_map_root.ptrs[i1] = n;
1927
32
        arena_map_mid_count++;
1928
32
    }
1929
2.12G
    int i2 = MAP_MID_INDEX(p);
1930
2.12G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1931
253M
        if (!create) {
1932
253M
            return NULL;
1933
253M
        }
1934
32
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1935
32
        if (n == NULL) {
1936
0
            return NULL;
1937
0
        }
1938
32
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1939
32
        arena_map_bot_count++;
1940
32
    }
1941
1.86G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1942
#else
1943
    return &arena_map_root;
1944
#endif
1945
2.12G
}
1946
1947
1948
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1949
 * away 24 bits of the address.  That reduces the space requirement of
1950
 * the tree compared to similar radix tree page-map schemes.  In
1951
 * exchange for slashing the space requirement, it needs more
1952
 * computation to check an address.
1953
 *
1954
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1955
 * explain in decimal so let's say that the arena size is 100 bytes.
1956
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1957
 * pointer address is inside an actual arena, we have to check two ideal
1958
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1959
 * 300.  In the rare case that an arena is aligned in the ideal way
1960
 * (e.g. base address of arena is 200) then we only have to check one
1961
 * ideal address.
1962
 *
1963
 * The tree nodes for 200 and 300 both store the address of arena.
1964
 * There are two cases: the arena starts at a lower ideal arena and
1965
 * extends to this one, or the arena starts in this arena and extends to
1966
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1967
 * these two cases.
1968
 */
1969
1970
1971
/* mark or unmark addresses covered by arena */
1972
static int
1973
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1974
13.0k
{
1975
    /* sanity check that IGNORE_BITS is correct */
1976
13.0k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1977
13.0k
    arena_map_bot_t *n_hi = arena_map_get(
1978
13.0k
            state, (pymem_block *)arena_base, is_used);
1979
13.0k
    if (n_hi == NULL) {
1980
0
        assert(is_used); /* otherwise node should already exist */
1981
0
        return 0; /* failed to allocate space for node */
1982
0
    }
1983
13.0k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1984
13.0k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1985
13.0k
    if (tail == 0) {
1986
        /* is ideal arena address */
1987
79
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1988
79
    }
1989
12.9k
    else {
1990
        /* arena_base address is not ideal (aligned to arena size) and
1991
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1992
         * for the next arena.  Note that it might be in different MAP_TOP
1993
         * and MAP_MID nodes as well so we need to call arena_map_get()
1994
         * again (do the full tree traversal).
1995
         */
1996
12.9k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1997
12.9k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1998
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1999
         * (last address in arena).  If arena_base_next overflows then it
2000
         * must overflow to 0.  However, that would mean arena_base was
2001
         * "ideal" and we should not be in this case. */
2002
12.9k
        assert(arena_base < arena_base_next);
2003
12.9k
        arena_map_bot_t *n_lo = arena_map_get(
2004
12.9k
                state, (pymem_block *)arena_base_next, is_used);
2005
12.9k
        if (n_lo == NULL) {
2006
0
            assert(is_used); /* otherwise should already exist */
2007
0
            n_hi->arenas[i3].tail_hi = 0;
2008
0
            return 0; /* failed to allocate space for node */
2009
0
        }
2010
12.9k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2011
12.9k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2012
12.9k
    }
2013
13.0k
    return 1;
2014
13.0k
}
2015
2016
/* Return true if 'p' is a pointer inside an obmalloc arena.
2017
 * _PyObject_Free() calls this so it needs to be very fast. */
2018
static int
2019
arena_map_is_used(OMState *state, pymem_block *p)
2020
2.12G
{
2021
2.12G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2022
2.12G
    if (n == NULL) {
2023
253M
        return 0;
2024
253M
    }
2025
1.86G
    int i3 = MAP_BOT_INDEX(p);
2026
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2027
1.86G
    int32_t hi = n->arenas[i3].tail_hi;
2028
1.86G
    int32_t lo = n->arenas[i3].tail_lo;
2029
1.86G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2030
1.86G
    return (tail < lo) || (tail >= hi && hi != 0);
2031
2.12G
}
2032
2033
/* end of radix tree logic */
2034
/*==========================================================================*/
2035
#endif /* WITH_PYMALLOC_RADIX_TREE */
2036
2037
2038
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2039
 * allocate a new arena, and return the address of an arena_object
2040
 * describing the new arena.  It's expected that the caller will set
2041
 * `usable_arenas` to the return value.
2042
 */
2043
static struct arena_object*
2044
new_arena(OMState *state)
2045
6.78k
{
2046
6.78k
    struct arena_object* arenaobj;
2047
6.78k
    uint excess;        /* number of bytes above pool alignment */
2048
6.78k
    void *address;
2049
2050
6.78k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2051
6.78k
    if (debug_stats == -1) {
2052
32
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2053
32
        debug_stats = (opt != NULL && *opt != '\0');
2054
32
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2055
32
    }
2056
6.78k
    if (debug_stats) {
2057
0
        _PyObject_DebugMallocStats(stderr);
2058
0
    }
2059
2060
6.78k
    if (unused_arena_objects == NULL) {
2061
64
        uint i;
2062
64
        uint numarenas;
2063
64
        size_t nbytes;
2064
2065
        /* Double the number of arena objects on each allocation.
2066
         * Note that it's possible for `numarenas` to overflow.
2067
         */
2068
64
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2069
64
        if (numarenas <= maxarenas)
2070
0
            return NULL;                /* overflow */
2071
#if SIZEOF_SIZE_T <= SIZEOF_INT
2072
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2073
            return NULL;                /* overflow */
2074
#endif
2075
64
        nbytes = numarenas * sizeof(*allarenas);
2076
64
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2077
64
        if (arenaobj == NULL)
2078
0
            return NULL;
2079
64
        allarenas = arenaobj;
2080
2081
        /* We might need to fix pointers that were copied.  However,
2082
         * new_arena only gets called when all the pages in the
2083
         * previous arenas are full.  Thus, there are *no* pointers
2084
         * into the old array. Thus, we don't have to worry about
2085
         * invalid pointers.  Just to be sure, some asserts:
2086
         */
2087
64
        assert(usable_arenas == NULL);
2088
64
        assert(unused_arena_objects == NULL);
2089
2090
        /* Put the new arenas on the unused_arena_objects list. */
2091
1.84k
        for (i = maxarenas; i < numarenas; ++i) {
2092
1.77k
            allarenas[i].address = 0;              /* mark as unassociated */
2093
1.77k
            allarenas[i].nextarena = i < numarenas - 1 ?
2094
1.77k
                                        &allarenas[i+1] : NULL;
2095
1.77k
        }
2096
2097
        /* Update globals. */
2098
64
        unused_arena_objects = &allarenas[maxarenas];
2099
64
        maxarenas = numarenas;
2100
64
    }
2101
2102
    /* Take the next available arena object off the head of the list. */
2103
6.78k
    assert(unused_arena_objects != NULL);
2104
6.78k
    arenaobj = unused_arena_objects;
2105
6.78k
    unused_arena_objects = arenaobj->nextarena;
2106
6.78k
    assert(arenaobj->address == 0);
2107
6.78k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2108
6.78k
#if WITH_PYMALLOC_RADIX_TREE
2109
6.78k
    if (address != NULL) {
2110
6.78k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2111
            /* marking arena in radix tree failed, abort */
2112
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2113
0
            address = NULL;
2114
0
        }
2115
6.78k
    }
2116
6.78k
#endif
2117
6.78k
    if (address == NULL) {
2118
        /* The allocation failed: return NULL after putting the
2119
         * arenaobj back.
2120
         */
2121
0
        arenaobj->nextarena = unused_arena_objects;
2122
0
        unused_arena_objects = arenaobj;
2123
0
        return NULL;
2124
0
    }
2125
6.78k
    arenaobj->address = (uintptr_t)address;
2126
2127
6.78k
    ++narenas_currently_allocated;
2128
6.78k
    ++ntimes_arena_allocated;
2129
6.78k
    if (narenas_currently_allocated > narenas_highwater)
2130
1.00k
        narenas_highwater = narenas_currently_allocated;
2131
6.78k
    arenaobj->freepools = NULL;
2132
    /* pool_address <- first pool-aligned address in the arena
2133
       nfreepools <- number of whole pools that fit after alignment */
2134
6.78k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2135
6.78k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2136
6.78k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2137
6.78k
    if (excess != 0) {
2138
5.36k
        --arenaobj->nfreepools;
2139
5.36k
        arenaobj->pool_address += POOL_SIZE - excess;
2140
5.36k
    }
2141
6.78k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2142
2143
6.78k
    return arenaobj;
2144
6.78k
}
2145
2146
2147
2148
#if WITH_PYMALLOC_RADIX_TREE
2149
/* Return true if and only if P is an address that was allocated by
2150
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2151
 */
2152
static bool
2153
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2154
2.12G
{
2155
2.12G
    return arena_map_is_used(state, p);
2156
2.12G
}
2157
#else
2158
/*
2159
address_in_range(P, POOL)
2160
2161
Return true if and only if P is an address that was allocated by pymalloc.
2162
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2163
(the caller is asked to compute this because the macro expands POOL more than
2164
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2165
variable and pass the latter to the macro; because address_in_range is
2166
called on every alloc/realloc/free, micro-efficiency is important here).
2167
2168
Tricky:  Let B be the arena base address associated with the pool, B =
2169
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2170
2171
    B <= P < B + ARENA_SIZE
2172
2173
Subtracting B throughout, this is true iff
2174
2175
    0 <= P-B < ARENA_SIZE
2176
2177
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2178
2179
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2180
before the first arena has been allocated.  `arenas` is still NULL in that
2181
case.  We're relying on that maxarenas is also 0 in that case, so that
2182
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2183
into a NULL arenas.
2184
2185
Details:  given P and POOL, the arena_object corresponding to P is AO =
2186
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2187
stores, etc), POOL is the correct address of P's pool, AO.address is the
2188
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2189
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2190
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2191
controls P.
2192
2193
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2194
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2195
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2196
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2197
control P.
2198
2199
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2200
allocated arena, obmalloc controls all the memory in slice AO.address :
2201
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2202
so P doesn't lie in that slice, so the macro correctly reports that P is not
2203
controlled by obmalloc.
2204
2205
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2206
arena_object (one not currently associated with an allocated arena),
2207
AO.address is 0, and the second test in the macro reduces to:
2208
2209
    P < ARENA_SIZE
2210
2211
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2212
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2213
of the test still passes, and the third clause (AO.address != 0) is necessary
2214
to get the correct result:  AO.address is 0 in this case, so the macro
2215
correctly reports that P is not controlled by obmalloc (despite that P lies in
2216
slice AO.address : AO.address + ARENA_SIZE).
2217
2218
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2219
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2220
corresponded to a currently-allocated arena, so the "P is not controlled by
2221
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2222
was impossible.
2223
2224
Note that the logic is excruciating, and reading up possibly uninitialized
2225
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2226
creates problems for some memory debuggers.  The overwhelming advantage is
2227
that this test determines whether an arbitrary address is controlled by
2228
obmalloc in a small constant time, independent of the number of arenas
2229
obmalloc controls.  Since this test is needed at every entry point, it's
2230
extremely desirable that it be this fast.
2231
*/
2232
2233
static bool _Py_NO_SANITIZE_ADDRESS
2234
            _Py_NO_SANITIZE_THREAD
2235
            _Py_NO_SANITIZE_MEMORY
2236
address_in_range(OMState *state, void *p, poolp pool)
2237
{
2238
    // Since address_in_range may be reading from memory which was not allocated
2239
    // by Python, it is important that pool->arenaindex is read only once, as
2240
    // another thread may be concurrently modifying the value without holding
2241
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2242
    // only once.
2243
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2244
    return arenaindex < maxarenas &&
2245
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2246
        allarenas[arenaindex].address != 0;
2247
}
2248
2249
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2250
2251
/*==========================================================================*/
2252
2253
// Called when freelist is exhausted.  Extend the freelist if there is
2254
// space for a block.  Otherwise, remove this pool from usedpools.
2255
static void
2256
pymalloc_pool_extend(poolp pool, uint size)
2257
391M
{
2258
391M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2259
        /* There is room for another block. */
2260
241M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2261
241M
        pool->nextoffset += INDEX2SIZE(size);
2262
241M
        *(pymem_block **)(pool->freeblock) = NULL;
2263
241M
        return;
2264
241M
    }
2265
2266
    /* Pool is full, unlink from used pools. */
2267
149M
    poolp next;
2268
149M
    next = pool->nextpool;
2269
149M
    pool = pool->prevpool;
2270
149M
    next->prevpool = pool;
2271
149M
    pool->nextpool = next;
2272
149M
}
2273
2274
/* called when pymalloc_alloc can not allocate a block from usedpool.
2275
 * This function takes new pool and allocate a block from it.
2276
 */
2277
static void*
2278
allocate_from_new_pool(OMState *state, uint size)
2279
3.27M
{
2280
    /* There isn't a pool of the right size class immediately
2281
     * available:  use a free pool.
2282
     */
2283
3.27M
    if (UNLIKELY(usable_arenas == NULL)) {
2284
        /* No arena has a free pool:  allocate a new arena. */
2285
#ifdef WITH_MEMORY_LIMITS
2286
        if (narenas_currently_allocated >= MAX_ARENAS) {
2287
            return NULL;
2288
        }
2289
#endif
2290
6.78k
        usable_arenas = new_arena(state);
2291
6.78k
        if (usable_arenas == NULL) {
2292
0
            return NULL;
2293
0
        }
2294
6.78k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2295
6.78k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2296
6.78k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2297
6.78k
    }
2298
3.27M
    assert(usable_arenas->address != 0);
2299
2300
    /* This arena already had the smallest nfreepools value, so decreasing
2301
     * nfreepools doesn't change that, and we don't need to rearrange the
2302
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2303
     * we need to remove its arena_object from usable_arenas.
2304
     */
2305
3.27M
    assert(usable_arenas->nfreepools > 0);
2306
3.27M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2307
        /* It's the last of this size, so there won't be any. */
2308
3.25M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2309
3.25M
    }
2310
    /* If any free pools will remain, it will be the new smallest. */
2311
3.27M
    if (usable_arenas->nfreepools > 1) {
2312
3.07M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2313
3.07M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2314
3.07M
    }
2315
2316
    /* Try to get a cached free pool. */
2317
3.27M
    poolp pool = usable_arenas->freepools;
2318
3.27M
    if (LIKELY(pool != NULL)) {
2319
        /* Unlink from cached pools. */
2320
2.85M
        usable_arenas->freepools = pool->nextpool;
2321
2.85M
        usable_arenas->nfreepools--;
2322
2.85M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2323
            /* Wholly allocated:  remove. */
2324
194k
            assert(usable_arenas->freepools == NULL);
2325
194k
            assert(usable_arenas->nextarena == NULL ||
2326
194k
                   usable_arenas->nextarena->prevarena ==
2327
194k
                   usable_arenas);
2328
194k
            usable_arenas = usable_arenas->nextarena;
2329
194k
            if (usable_arenas != NULL) {
2330
191k
                usable_arenas->prevarena = NULL;
2331
191k
                assert(usable_arenas->address != 0);
2332
191k
            }
2333
194k
        }
2334
2.65M
        else {
2335
            /* nfreepools > 0:  it must be that freepools
2336
             * isn't NULL, or that we haven't yet carved
2337
             * off all the arena's pools for the first
2338
             * time.
2339
             */
2340
2.65M
            assert(usable_arenas->freepools != NULL ||
2341
2.65M
                   usable_arenas->pool_address <=
2342
2.65M
                   (pymem_block*)usable_arenas->address +
2343
2.65M
                       ARENA_SIZE - POOL_SIZE);
2344
2.65M
        }
2345
2.85M
    }
2346
422k
    else {
2347
        /* Carve off a new pool. */
2348
422k
        assert(usable_arenas->nfreepools > 0);
2349
422k
        assert(usable_arenas->freepools == NULL);
2350
422k
        pool = (poolp)usable_arenas->pool_address;
2351
422k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2352
422k
                                 ARENA_SIZE - POOL_SIZE);
2353
422k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2354
422k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2355
422k
        pool->szidx = DUMMY_SIZE_IDX;
2356
422k
        usable_arenas->pool_address += POOL_SIZE;
2357
422k
        --usable_arenas->nfreepools;
2358
2359
422k
        if (usable_arenas->nfreepools == 0) {
2360
6.57k
            assert(usable_arenas->nextarena == NULL ||
2361
6.57k
                   usable_arenas->nextarena->prevarena ==
2362
6.57k
                   usable_arenas);
2363
            /* Unlink the arena:  it is completely allocated. */
2364
6.57k
            usable_arenas = usable_arenas->nextarena;
2365
6.57k
            if (usable_arenas != NULL) {
2366
320
                usable_arenas->prevarena = NULL;
2367
320
                assert(usable_arenas->address != 0);
2368
320
            }
2369
6.57k
        }
2370
422k
    }
2371
2372
    /* Frontlink to used pools. */
2373
3.27M
    pymem_block *bp;
2374
3.27M
    poolp next = usedpools[size + size]; /* == prev */
2375
3.27M
    pool->nextpool = next;
2376
3.27M
    pool->prevpool = next;
2377
3.27M
    next->nextpool = pool;
2378
3.27M
    next->prevpool = pool;
2379
3.27M
    pool->ref.count = 1;
2380
3.27M
    if (pool->szidx == size) {
2381
        /* Luckily, this pool last contained blocks
2382
         * of the same size class, so its header
2383
         * and free list are already initialized.
2384
         */
2385
2.00M
        bp = pool->freeblock;
2386
2.00M
        assert(bp != NULL);
2387
2.00M
        pool->freeblock = *(pymem_block **)bp;
2388
2.00M
        return bp;
2389
2.00M
    }
2390
    /*
2391
     * Initialize the pool header, set up the free list to
2392
     * contain just the second block, and return the first
2393
     * block.
2394
     */
2395
1.26M
    pool->szidx = size;
2396
1.26M
    size = INDEX2SIZE(size);
2397
1.26M
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2398
1.26M
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2399
1.26M
    pool->maxnextoffset = POOL_SIZE - size;
2400
1.26M
    pool->freeblock = bp + size;
2401
1.26M
    *(pymem_block **)(pool->freeblock) = NULL;
2402
1.26M
    return bp;
2403
3.27M
}
2404
2405
/* pymalloc allocator
2406
2407
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2408
2409
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2410
   requests, on error in the code below (as a last chance to serve the request)
2411
   or when the max memory limit has been reached.
2412
*/
2413
static inline void*
2414
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2415
2.01G
{
2416
#ifdef WITH_VALGRIND
2417
    if (UNLIKELY(running_on_valgrind == -1)) {
2418
        running_on_valgrind = RUNNING_ON_VALGRIND;
2419
    }
2420
    if (UNLIKELY(running_on_valgrind)) {
2421
        return NULL;
2422
    }
2423
#endif
2424
2425
2.01G
    if (UNLIKELY(nbytes == 0)) {
2426
37.4M
        return NULL;
2427
37.4M
    }
2428
1.97G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2429
206M
        return NULL;
2430
206M
    }
2431
2432
1.77G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2433
1.77G
    poolp pool = usedpools[size + size];
2434
1.77G
    pymem_block *bp;
2435
2436
1.77G
    if (LIKELY(pool != pool->nextpool)) {
2437
        /*
2438
         * There is a used pool for this size class.
2439
         * Pick up the head block of its free list.
2440
         */
2441
1.76G
        ++pool->ref.count;
2442
1.76G
        bp = pool->freeblock;
2443
1.76G
        assert(bp != NULL);
2444
2445
1.76G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2446
            // Reached the end of the free list, try to extend it.
2447
391M
            pymalloc_pool_extend(pool, size);
2448
391M
        }
2449
1.76G
    }
2450
3.27M
    else {
2451
        /* There isn't a pool of the right size class immediately
2452
         * available:  use a free pool.
2453
         */
2454
3.27M
        bp = allocate_from_new_pool(state, size);
2455
3.27M
    }
2456
2457
1.77G
    return (void *)bp;
2458
1.97G
}
2459
2460
2461
void *
2462
_PyObject_Malloc(void *ctx, size_t nbytes)
2463
1.96G
{
2464
1.96G
    OMState *state = get_state();
2465
1.96G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2466
1.96G
    if (LIKELY(ptr != NULL)) {
2467
1.72G
        return ptr;
2468
1.72G
    }
2469
2470
244M
    ptr = PyMem_RawMalloc(nbytes);
2471
244M
    if (ptr != NULL) {
2472
244M
        raw_allocated_blocks++;
2473
244M
    }
2474
244M
    return ptr;
2475
1.96G
}
2476
2477
2478
void *
2479
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2480
46.9M
{
2481
46.9M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2482
46.9M
    size_t nbytes = nelem * elsize;
2483
2484
46.9M
    OMState *state = get_state();
2485
46.9M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2486
46.9M
    if (LIKELY(ptr != NULL)) {
2487
46.8M
        memset(ptr, 0, nbytes);
2488
46.8M
        return ptr;
2489
46.8M
    }
2490
2491
60.2k
    ptr = PyMem_RawCalloc(nelem, elsize);
2492
60.2k
    if (ptr != NULL) {
2493
60.2k
        raw_allocated_blocks++;
2494
60.2k
    }
2495
60.2k
    return ptr;
2496
46.9M
}
2497
2498
2499
static void
2500
insert_to_usedpool(OMState *state, poolp pool)
2501
149M
{
2502
149M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2503
2504
149M
    uint size = pool->szidx;
2505
149M
    poolp next = usedpools[size + size];
2506
149M
    poolp prev = next->prevpool;
2507
2508
    /* insert pool before next:   prev <-> pool <-> next */
2509
149M
    pool->nextpool = next;
2510
149M
    pool->prevpool = prev;
2511
149M
    next->prevpool = pool;
2512
149M
    prev->nextpool = pool;
2513
149M
}
2514
2515
static void
2516
insert_to_freepool(OMState *state, poolp pool)
2517
3.24M
{
2518
3.24M
    poolp next = pool->nextpool;
2519
3.24M
    poolp prev = pool->prevpool;
2520
3.24M
    next->prevpool = prev;
2521
3.24M
    prev->nextpool = next;
2522
2523
    /* Link the pool to freepools.  This is a singly-linked
2524
     * list, and pool->prevpool isn't used there.
2525
     */
2526
3.24M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2527
3.24M
    pool->nextpool = ao->freepools;
2528
3.24M
    ao->freepools = pool;
2529
3.24M
    uint nf = ao->nfreepools;
2530
    /* If this is the rightmost arena with this number of free pools,
2531
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2532
     * are no arenas in usable_arenas with that value.
2533
     */
2534
3.24M
    struct arena_object* lastnf = nfp2lasta[nf];
2535
3.24M
    assert((nf == 0 && lastnf == NULL) ||
2536
3.24M
           (nf > 0 &&
2537
3.24M
            lastnf != NULL &&
2538
3.24M
            lastnf->nfreepools == nf &&
2539
3.24M
            (lastnf->nextarena == NULL ||
2540
3.24M
             nf < lastnf->nextarena->nfreepools)));
2541
3.24M
    if (lastnf == ao) {  /* it is the rightmost */
2542
2.97M
        struct arena_object* p = ao->prevarena;
2543
2.97M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2544
2.97M
    }
2545
3.24M
    ao->nfreepools = ++nf;
2546
2547
    /* All the rest is arena management.  We just freed
2548
     * a pool, and there are 4 cases for arena mgmt:
2549
     * 1. If all the pools are free, return the arena to
2550
     *    the system free().  Except if this is the last
2551
     *    arena in the list, keep it to avoid thrashing:
2552
     *    keeping one wholly free arena in the list avoids
2553
     *    pathological cases where a simple loop would
2554
     *    otherwise provoke needing to allocate and free an
2555
     *    arena on every iteration.  See bpo-37257.
2556
     * 2. If this is the only free pool in the arena,
2557
     *    add the arena back to the `usable_arenas` list.
2558
     * 3. If the "next" arena has a smaller count of free
2559
     *    pools, we have to "slide this arena right" to
2560
     *    restore that usable_arenas is sorted in order of
2561
     *    nfreepools.
2562
     * 4. Else there's nothing more to do.
2563
     */
2564
3.24M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2565
        /* Case 1.  First unlink ao from usable_arenas.
2566
         */
2567
6.27k
        assert(ao->prevarena == NULL ||
2568
6.27k
               ao->prevarena->address != 0);
2569
6.27k
        assert(ao ->nextarena == NULL ||
2570
6.27k
               ao->nextarena->address != 0);
2571
2572
        /* Fix the pointer in the prevarena, or the
2573
         * usable_arenas pointer.
2574
         */
2575
6.27k
        if (ao->prevarena == NULL) {
2576
1.58k
            usable_arenas = ao->nextarena;
2577
1.58k
            assert(usable_arenas == NULL ||
2578
1.58k
                   usable_arenas->address != 0);
2579
1.58k
        }
2580
4.68k
        else {
2581
4.68k
            assert(ao->prevarena->nextarena == ao);
2582
4.68k
            ao->prevarena->nextarena =
2583
4.68k
                ao->nextarena;
2584
4.68k
        }
2585
        /* Fix the pointer in the nextarena. */
2586
6.27k
        if (ao->nextarena != NULL) {
2587
6.27k
            assert(ao->nextarena->prevarena == ao);
2588
6.27k
            ao->nextarena->prevarena =
2589
6.27k
                ao->prevarena;
2590
6.27k
        }
2591
        /* Record that this arena_object slot is
2592
         * available to be reused.
2593
         */
2594
6.27k
        ao->nextarena = unused_arena_objects;
2595
6.27k
        unused_arena_objects = ao;
2596
2597
6.27k
#if WITH_PYMALLOC_RADIX_TREE
2598
        /* mark arena region as not under control of obmalloc */
2599
6.27k
        arena_map_mark_used(state, ao->address, 0);
2600
6.27k
#endif
2601
2602
        /* Free the entire arena. */
2603
6.27k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2604
6.27k
                             (void *)ao->address, ARENA_SIZE);
2605
6.27k
        ao->address = 0;                        /* mark unassociated */
2606
6.27k
        --narenas_currently_allocated;
2607
2608
6.27k
        return;
2609
6.27k
    }
2610
2611
3.24M
    if (nf == 1) {
2612
        /* Case 2.  Put ao at the head of
2613
         * usable_arenas.  Note that because
2614
         * ao->nfreepools was 0 before, ao isn't
2615
         * currently on the usable_arenas list.
2616
         */
2617
200k
        ao->nextarena = usable_arenas;
2618
200k
        ao->prevarena = NULL;
2619
200k
        if (usable_arenas)
2620
198k
            usable_arenas->prevarena = ao;
2621
200k
        usable_arenas = ao;
2622
200k
        assert(usable_arenas->address != 0);
2623
200k
        if (nfp2lasta[1] == NULL) {
2624
192k
            nfp2lasta[1] = ao;
2625
192k
        }
2626
2627
200k
        return;
2628
200k
    }
2629
2630
    /* If this arena is now out of order, we need to keep
2631
     * the list sorted.  The list is kept sorted so that
2632
     * the "most full" arenas are used first, which allows
2633
     * the nearly empty arenas to be completely freed.  In
2634
     * a few un-scientific tests, it seems like this
2635
     * approach allowed a lot more memory to be freed.
2636
     */
2637
    /* If this is the only arena with nf, record that. */
2638
3.04M
    if (nfp2lasta[nf] == NULL) {
2639
2.95M
        nfp2lasta[nf] = ao;
2640
2.95M
    } /* else the rightmost with nf doesn't change */
2641
    /* If this was the rightmost of the old size, it remains in place. */
2642
3.04M
    if (ao == lastnf) {
2643
        /* Case 4.  Nothing to do. */
2644
2.97M
        return;
2645
2.97M
    }
2646
    /* If ao were the only arena in the list, the last block would have
2647
     * gotten us out.
2648
     */
2649
3.04M
    assert(ao->nextarena != NULL);
2650
2651
    /* Case 3:  We have to move the arena towards the end of the list,
2652
     * because it has more free pools than the arena to its right.  It needs
2653
     * to move to follow lastnf.
2654
     * First unlink ao from usable_arenas.
2655
     */
2656
68.2k
    if (ao->prevarena != NULL) {
2657
        /* ao isn't at the head of the list */
2658
51.9k
        assert(ao->prevarena->nextarena == ao);
2659
51.9k
        ao->prevarena->nextarena = ao->nextarena;
2660
51.9k
    }
2661
16.3k
    else {
2662
        /* ao is at the head of the list */
2663
16.3k
        assert(usable_arenas == ao);
2664
16.3k
        usable_arenas = ao->nextarena;
2665
16.3k
    }
2666
68.2k
    ao->nextarena->prevarena = ao->prevarena;
2667
    /* And insert after lastnf. */
2668
68.2k
    ao->prevarena = lastnf;
2669
68.2k
    ao->nextarena = lastnf->nextarena;
2670
68.2k
    if (ao->nextarena != NULL) {
2671
66.6k
        ao->nextarena->prevarena = ao;
2672
66.6k
    }
2673
68.2k
    lastnf->nextarena = ao;
2674
    /* Verify that the swaps worked. */
2675
68.2k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2676
68.2k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2677
68.2k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2678
68.2k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2679
68.2k
           || ao->prevarena->nextarena == ao);
2680
68.2k
}
2681
2682
/* Free a memory block allocated by pymalloc_alloc().
2683
   Return 1 if it was freed.
2684
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2685
static inline int
2686
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2687
2.01G
{
2688
2.01G
    assert(p != NULL);
2689
2690
#ifdef WITH_VALGRIND
2691
    if (UNLIKELY(running_on_valgrind > 0)) {
2692
        return 0;
2693
    }
2694
#endif
2695
2696
2.01G
    poolp pool = POOL_ADDR(p);
2697
2.01G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2698
244M
        return 0;
2699
244M
    }
2700
    /* We allocated this address. */
2701
2702
    /* Link p to the start of the pool's freeblock list.  Since
2703
     * the pool had at least the p block outstanding, the pool
2704
     * wasn't empty (so it's already in a usedpools[] list, or
2705
     * was full and is in no list -- it's not in the freeblocks
2706
     * list in any case).
2707
     */
2708
2.01G
    assert(pool->ref.count > 0);            /* else it was empty */
2709
1.76G
    pymem_block *lastfree = pool->freeblock;
2710
1.76G
    *(pymem_block **)p = lastfree;
2711
1.76G
    pool->freeblock = (pymem_block *)p;
2712
1.76G
    pool->ref.count--;
2713
2714
1.76G
    if (UNLIKELY(lastfree == NULL)) {
2715
        /* Pool was full, so doesn't currently live in any list:
2716
         * link it to the front of the appropriate usedpools[] list.
2717
         * This mimics LRU pool usage for new allocations and
2718
         * targets optimal filling when several pools contain
2719
         * blocks of the same size class.
2720
         */
2721
149M
        insert_to_usedpool(state, pool);
2722
149M
        return 1;
2723
149M
    }
2724
2725
    /* freeblock wasn't NULL, so the pool wasn't full,
2726
     * and the pool is in a usedpools[] list.
2727
     */
2728
1.61G
    if (LIKELY(pool->ref.count != 0)) {
2729
        /* pool isn't empty:  leave it in usedpools */
2730
1.61G
        return 1;
2731
1.61G
    }
2732
2733
    /* Pool is now empty:  unlink from usedpools, and
2734
     * link to the front of freepools.  This ensures that
2735
     * previously freed pools will be allocated later
2736
     * (being not referenced, they are perhaps paged out).
2737
     */
2738
3.24M
    insert_to_freepool(state, pool);
2739
3.24M
    return 1;
2740
1.61G
}
2741
2742
2743
void
2744
_PyObject_Free(void *ctx, void *p)
2745
2.01G
{
2746
    /* PyObject_Free(NULL) has no effect */
2747
2.01G
    if (p == NULL) {
2748
2.07M
        return;
2749
2.07M
    }
2750
2751
2.01G
    OMState *state = get_state();
2752
2.01G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2753
        /* pymalloc didn't allocate this address */
2754
244M
        PyMem_RawFree(p);
2755
244M
        raw_allocated_blocks--;
2756
244M
    }
2757
2.01G
}
2758
2759
2760
/* pymalloc realloc.
2761
2762
   If nbytes==0, then as the Python docs promise, we do not treat this like
2763
   free(p), and return a non-NULL result.
2764
2765
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2766
   newptr_p.
2767
2768
   Return 0 if pymalloc didn't allocated p. */
2769
static int
2770
pymalloc_realloc(OMState *state, void *ctx,
2771
                 void **newptr_p, void *p, size_t nbytes)
2772
111M
{
2773
111M
    void *bp;
2774
111M
    poolp pool;
2775
111M
    size_t size;
2776
2777
111M
    assert(p != NULL);
2778
2779
#ifdef WITH_VALGRIND
2780
    /* Treat running_on_valgrind == -1 the same as 0 */
2781
    if (UNLIKELY(running_on_valgrind > 0)) {
2782
        return 0;
2783
    }
2784
#endif
2785
2786
111M
    pool = POOL_ADDR(p);
2787
111M
    if (!address_in_range(state, p, pool)) {
2788
        /* pymalloc is not managing this block.
2789
2790
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2791
           over this block.  However, if we do, we need to copy the valid data
2792
           from the C-managed block to one of our blocks, and there's no
2793
           portable way to know how much of the memory space starting at p is
2794
           valid.
2795
2796
           As bug 1185883 pointed out the hard way, it's possible that the
2797
           C-managed block is "at the end" of allocated VM space, so that a
2798
           memory fault can occur if we try to copy nbytes bytes starting at p.
2799
           Instead we punt: let C continue to manage this block. */
2800
9.05M
        return 0;
2801
9.05M
    }
2802
2803
    /* pymalloc is in charge of this block */
2804
102M
    size = INDEX2SIZE(pool->szidx);
2805
102M
    if (nbytes <= size) {
2806
        /* The block is staying the same or shrinking.
2807
2808
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2809
           block to a smaller size class, but it wastes memory not to copy it.
2810
2811
           The compromise here is to copy on shrink only if at least 25% of
2812
           size can be shaved off. */
2813
70.1M
        if (4 * nbytes > 3 * size) {
2814
            /* It's the same, or shrinking and new/old > 3/4. */
2815
22.6M
            *newptr_p = p;
2816
22.6M
            return 1;
2817
22.6M
        }
2818
47.5M
        size = nbytes;
2819
47.5M
    }
2820
2821
79.8M
    bp = _PyObject_Malloc(ctx, nbytes);
2822
79.8M
    if (bp != NULL) {
2823
79.8M
        memcpy(bp, p, size);
2824
79.8M
        _PyObject_Free(ctx, p);
2825
79.8M
    }
2826
79.8M
    *newptr_p = bp;
2827
79.8M
    return 1;
2828
102M
}
2829
2830
2831
void *
2832
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2833
344M
{
2834
344M
    void *ptr2;
2835
2836
344M
    if (ptr == NULL) {
2837
232M
        return _PyObject_Malloc(ctx, nbytes);
2838
232M
    }
2839
2840
111M
    OMState *state = get_state();
2841
111M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2842
102M
        return ptr2;
2843
102M
    }
2844
2845
9.05M
    return PyMem_RawRealloc(ptr, nbytes);
2846
111M
}
2847
2848
#else   /* ! WITH_PYMALLOC */
2849
2850
/*==========================================================================*/
2851
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2852
 * only be used by extensions that are compiled with pymalloc enabled. */
2853
2854
Py_ssize_t
2855
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2856
{
2857
    return 0;
2858
}
2859
2860
Py_ssize_t
2861
_Py_GetGlobalAllocatedBlocks(void)
2862
{
2863
    return 0;
2864
}
2865
2866
void
2867
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2868
{
2869
    return;
2870
}
2871
2872
void
2873
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2874
{
2875
    return;
2876
}
2877
2878
#endif /* WITH_PYMALLOC */
2879
2880
2881
/*==========================================================================*/
2882
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2883
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2884
 */
2885
2886
/* Uncomment this define to add the "serialno" field */
2887
/* #define PYMEM_DEBUG_SERIALNO */
2888
2889
#ifdef PYMEM_DEBUG_SERIALNO
2890
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2891
2892
/* serialno is always incremented via calling this routine.  The point is
2893
 * to supply a single place to set a breakpoint.
2894
 */
2895
static void
2896
bumpserialno(void)
2897
{
2898
    ++serialno;
2899
}
2900
#endif
2901
2902
0
#define SST SIZEOF_SIZE_T
2903
2904
#ifdef PYMEM_DEBUG_SERIALNO
2905
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2906
#else
2907
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2908
#endif
2909
2910
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2911
static size_t
2912
read_size_t(const void *p)
2913
0
{
2914
0
    const uint8_t *q = (const uint8_t *)p;
2915
0
    size_t result = *q++;
2916
0
    int i;
2917
2918
0
    for (i = SST; --i > 0; ++q)
2919
0
        result = (result << 8) | *q;
2920
0
    return result;
2921
0
}
2922
2923
/* Write n as a big-endian size_t, MSB at address p, LSB at
2924
 * p + sizeof(size_t) - 1.
2925
 */
2926
static void
2927
write_size_t(void *p, size_t n)
2928
0
{
2929
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2930
0
    int i;
2931
2932
0
    for (i = SST; --i >= 0; --q) {
2933
0
        *q = (uint8_t)(n & 0xff);
2934
0
        n >>= 8;
2935
0
    }
2936
0
}
2937
2938
static void
2939
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2940
               bool is_alloc)
2941
0
{
2942
#ifdef Py_GIL_DISABLED
2943
    if (api->api_id == 'o') {
2944
        // Don't overwrite the first few bytes of a PyObject allocation in the
2945
        // free-threaded build
2946
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2947
        size_t debug_offset;
2948
        if (is_alloc) {
2949
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2950
        }
2951
        else {
2952
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2953
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2954
        }
2955
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2956
        if (debug_offset < nbytes) {
2957
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2958
        }
2959
        return;
2960
    }
2961
#endif
2962
0
    memset(data, c, nbytes);
2963
0
}
2964
2965
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2966
   fills them with useful stuff, here calling the underlying malloc's result p:
2967
2968
p[0: S]
2969
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2970
    to read in a memory dump).
2971
p[S]
2972
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2973
p[S+1: 2*S]
2974
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2975
p[2*S: 2*S+n]
2976
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2977
    Used to catch reference to uninitialized memory.
2978
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2979
    handled the request itself.
2980
p[2*S+n: 2*S+n+S]
2981
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2982
p[2*S+n+S: 2*S+n+2*S]
2983
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2984
    and _PyMem_DebugRealloc.
2985
    This is a big-endian size_t.
2986
    If "bad memory" is detected later, the serial number gives an
2987
    excellent way to set a breakpoint on the next run, to capture the
2988
    instant at which this block was passed out.
2989
2990
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2991
for 3 * S extra bytes, and omits the last serialno field.
2992
*/
2993
2994
static void *
2995
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2996
0
{
2997
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2998
0
    uint8_t *p;           /* base address of malloc'ed pad block */
2999
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3000
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3001
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3002
3003
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3004
        /* integer overflow: can't represent total as a Py_ssize_t */
3005
0
        return NULL;
3006
0
    }
3007
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3008
3009
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3010
                ^--- p    ^--- data   ^--- tail
3011
       S: nbytes stored as size_t
3012
       I: API identifier (1 byte)
3013
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3014
       C: Clean bytes used later to store actual data
3015
       N: Serial number stored as size_t
3016
3017
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3018
       is omitted. */
3019
3020
0
    if (use_calloc) {
3021
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3022
0
    }
3023
0
    else {
3024
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3025
0
    }
3026
0
    if (p == NULL) {
3027
0
        return NULL;
3028
0
    }
3029
0
    data = p + 2*SST;
3030
3031
#ifdef PYMEM_DEBUG_SERIALNO
3032
    bumpserialno();
3033
#endif
3034
3035
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3036
0
    write_size_t(p, nbytes);
3037
0
    p[SST] = (uint8_t)api->api_id;
3038
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3039
3040
0
    if (nbytes > 0 && !use_calloc) {
3041
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3042
0
    }
3043
3044
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3045
0
    tail = data + nbytes;
3046
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3047
#ifdef PYMEM_DEBUG_SERIALNO
3048
    write_size_t(tail + SST, serialno);
3049
#endif
3050
3051
0
    return data;
3052
0
}
3053
3054
void *
3055
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3056
0
{
3057
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3058
0
}
3059
3060
void *
3061
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3062
0
{
3063
0
    size_t nbytes;
3064
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3065
0
    nbytes = nelem * elsize;
3066
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3067
0
}
3068
3069
3070
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3071
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3072
   Then fills the original bytes with PYMEM_DEADBYTE.
3073
   Then calls the underlying free.
3074
*/
3075
void
3076
_PyMem_DebugRawFree(void *ctx, void *p)
3077
0
{
3078
    /* PyMem_Free(NULL) has no effect */
3079
0
    if (p == NULL) {
3080
0
        return;
3081
0
    }
3082
3083
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3084
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3085
0
    size_t nbytes;
3086
3087
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3088
0
    nbytes = read_size_t(q);
3089
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3090
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3091
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3092
0
    api->alloc.free(api->alloc.ctx, q);
3093
0
}
3094
3095
3096
void *
3097
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3098
0
{
3099
0
    if (p == NULL) {
3100
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3101
0
    }
3102
3103
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3104
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3105
0
    uint8_t *data;        /* pointer to data bytes */
3106
0
    uint8_t *r;
3107
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3108
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3109
0
    size_t original_nbytes;
3110
0
#define ERASED_SIZE 64
3111
3112
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3113
3114
0
    data = (uint8_t *)p;
3115
0
    head = data - 2*SST;
3116
0
    original_nbytes = read_size_t(head);
3117
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3118
        /* integer overflow: can't represent total as a Py_ssize_t */
3119
0
        return NULL;
3120
0
    }
3121
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3122
3123
0
    tail = data + original_nbytes;
3124
#ifdef PYMEM_DEBUG_SERIALNO
3125
    size_t block_serialno = read_size_t(tail + SST);
3126
#endif
3127
0
#ifndef Py_GIL_DISABLED
3128
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3129
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3130
     */
3131
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3132
0
    if (original_nbytes <= sizeof(save)) {
3133
0
        memcpy(save, data, original_nbytes);
3134
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3135
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3136
0
    }
3137
0
    else {
3138
0
        memcpy(save, data, ERASED_SIZE);
3139
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3140
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3141
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3142
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3143
0
    }
3144
0
#endif
3145
3146
    /* Resize and add decorations. */
3147
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3148
0
    if (r == NULL) {
3149
        /* if realloc() failed: rewrite header and footer which have
3150
           just been erased */
3151
0
        nbytes = original_nbytes;
3152
0
    }
3153
0
    else {
3154
0
        head = r;
3155
#ifdef PYMEM_DEBUG_SERIALNO
3156
        bumpserialno();
3157
        block_serialno = serialno;
3158
#endif
3159
0
    }
3160
0
    data = head + 2*SST;
3161
3162
0
    write_size_t(head, nbytes);
3163
0
    head[SST] = (uint8_t)api->api_id;
3164
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3165
3166
0
    tail = data + nbytes;
3167
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3168
#ifdef PYMEM_DEBUG_SERIALNO
3169
    write_size_t(tail + SST, block_serialno);
3170
#endif
3171
3172
0
#ifndef Py_GIL_DISABLED
3173
    /* Restore saved bytes. */
3174
0
    if (original_nbytes <= sizeof(save)) {
3175
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3176
0
    }
3177
0
    else {
3178
0
        size_t i = original_nbytes - ERASED_SIZE;
3179
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3180
0
        if (nbytes > i) {
3181
0
            memcpy(data + i, &save[ERASED_SIZE],
3182
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3183
0
        }
3184
0
    }
3185
0
#endif
3186
3187
0
    if (r == NULL) {
3188
0
        return NULL;
3189
0
    }
3190
3191
0
    if (nbytes > original_nbytes) {
3192
        /* growing: mark new extra memory clean */
3193
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3194
0
               nbytes - original_nbytes);
3195
0
    }
3196
3197
0
    return data;
3198
0
}
3199
3200
static inline void
3201
_PyMem_DebugCheckGIL(const char *func)
3202
0
{
3203
0
    PyThreadState *tstate = _PyThreadState_GET();
3204
0
    if (tstate == NULL) {
3205
0
#ifndef Py_GIL_DISABLED
3206
0
        _Py_FatalErrorFunc(func,
3207
0
                           "Python memory allocator called "
3208
0
                           "without holding the GIL");
3209
#else
3210
        _Py_FatalErrorFunc(func,
3211
                           "Python memory allocator called "
3212
                           "without an active thread state. "
3213
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3214
#endif
3215
0
    }
3216
0
}
3217
3218
void *
3219
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3220
0
{
3221
0
    _PyMem_DebugCheckGIL(__func__);
3222
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3223
0
}
3224
3225
void *
3226
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3227
0
{
3228
0
    _PyMem_DebugCheckGIL(__func__);
3229
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3230
0
}
3231
3232
3233
void
3234
_PyMem_DebugFree(void *ctx, void *ptr)
3235
0
{
3236
0
    _PyMem_DebugCheckGIL(__func__);
3237
0
    _PyMem_DebugRawFree(ctx, ptr);
3238
0
}
3239
3240
3241
void *
3242
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3243
0
{
3244
0
    _PyMem_DebugCheckGIL(__func__);
3245
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3246
0
}
3247
3248
/* Check the forbidden bytes on both ends of the memory allocated for p.
3249
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3250
 * and call Py_FatalError to kill the program.
3251
 * The API id, is also checked.
3252
 */
3253
static void
3254
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3255
0
{
3256
0
    assert(p != NULL);
3257
3258
0
    const uint8_t *q = (const uint8_t *)p;
3259
0
    size_t nbytes;
3260
0
    const uint8_t *tail;
3261
0
    int i;
3262
0
    char id;
3263
3264
    /* Check the API id */
3265
0
    id = (char)q[-SST];
3266
0
    if (id != api) {
3267
0
        _PyObject_DebugDumpAddress(p);
3268
0
        _Py_FatalErrorFormat(func,
3269
0
                             "bad ID: Allocated using API '%c', "
3270
0
                             "verified using API '%c'",
3271
0
                             id, api);
3272
0
    }
3273
3274
    /* Check the stuff at the start of p first:  if there's underwrite
3275
     * corruption, the number-of-bytes field may be nuts, and checking
3276
     * the tail could lead to a segfault then.
3277
     */
3278
0
    for (i = SST-1; i >= 1; --i) {
3279
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3280
0
            _PyObject_DebugDumpAddress(p);
3281
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3282
0
        }
3283
0
    }
3284
3285
0
    nbytes = read_size_t(q - 2*SST);
3286
0
    tail = q + nbytes;
3287
0
    for (i = 0; i < SST; ++i) {
3288
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3289
0
            _PyObject_DebugDumpAddress(p);
3290
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3291
0
        }
3292
0
    }
3293
0
}
3294
3295
/* Display info to stderr about the memory block at p. */
3296
static void
3297
_PyObject_DebugDumpAddress(const void *p)
3298
0
{
3299
0
    const uint8_t *q = (const uint8_t *)p;
3300
0
    const uint8_t *tail;
3301
0
    size_t nbytes;
3302
0
    int i;
3303
0
    int ok;
3304
0
    char id;
3305
3306
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3307
0
    if (p == NULL) {
3308
0
        fprintf(stderr, "\n");
3309
0
        return;
3310
0
    }
3311
0
    id = (char)q[-SST];
3312
0
    fprintf(stderr, " API '%c'\n", id);
3313
3314
0
    nbytes = read_size_t(q - 2*SST);
3315
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3316
3317
    /* In case this is nuts, check the leading pad bytes first. */
3318
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3319
0
    ok = 1;
3320
0
    for (i = 1; i <= SST-1; ++i) {
3321
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3322
0
            ok = 0;
3323
0
            break;
3324
0
        }
3325
0
    }
3326
0
    if (ok)
3327
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3328
0
    else {
3329
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3330
0
            PYMEM_FORBIDDENBYTE);
3331
0
        for (i = SST-1; i >= 1; --i) {
3332
0
            const uint8_t byte = *(q-i);
3333
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3334
0
            if (byte != PYMEM_FORBIDDENBYTE)
3335
0
                fputs(" *** OUCH", stderr);
3336
0
            fputc('\n', stderr);
3337
0
        }
3338
3339
0
        fputs("    Because memory is corrupted at the start, the "
3340
0
              "count of bytes requested\n"
3341
0
              "       may be bogus, and checking the trailing pad "
3342
0
              "bytes may segfault.\n", stderr);
3343
0
    }
3344
3345
0
    tail = q + nbytes;
3346
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3347
0
    ok = 1;
3348
0
    for (i = 0; i < SST; ++i) {
3349
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3350
0
            ok = 0;
3351
0
            break;
3352
0
        }
3353
0
    }
3354
0
    if (ok)
3355
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3356
0
    else {
3357
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3358
0
                PYMEM_FORBIDDENBYTE);
3359
0
        for (i = 0; i < SST; ++i) {
3360
0
            const uint8_t byte = tail[i];
3361
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3362
0
                    i, byte);
3363
0
            if (byte != PYMEM_FORBIDDENBYTE)
3364
0
                fputs(" *** OUCH", stderr);
3365
0
            fputc('\n', stderr);
3366
0
        }
3367
0
    }
3368
3369
#ifdef PYMEM_DEBUG_SERIALNO
3370
    size_t serial = read_size_t(tail + SST);
3371
    fprintf(stderr,
3372
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3373
            serial);
3374
#endif
3375
3376
0
    if (nbytes > 0) {
3377
0
        i = 0;
3378
0
        fputs("    Data at p:", stderr);
3379
        /* print up to 8 bytes at the start */
3380
0
        while (q < tail && i < 8) {
3381
0
            fprintf(stderr, " %02x", *q);
3382
0
            ++i;
3383
0
            ++q;
3384
0
        }
3385
        /* and up to 8 at the end */
3386
0
        if (q < tail) {
3387
0
            if (tail - q > 8) {
3388
0
                fputs(" ...", stderr);
3389
0
                q = tail - 8;
3390
0
            }
3391
0
            while (q < tail) {
3392
0
                fprintf(stderr, " %02x", *q);
3393
0
                ++q;
3394
0
            }
3395
0
        }
3396
0
        fputc('\n', stderr);
3397
0
    }
3398
0
    fputc('\n', stderr);
3399
3400
0
    fflush(stderr);
3401
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3402
0
}
3403
3404
3405
static size_t
3406
printone(FILE *out, const char* msg, size_t value)
3407
0
{
3408
0
    int i, k;
3409
0
    char buf[100];
3410
0
    size_t origvalue = value;
3411
3412
0
    fputs(msg, out);
3413
0
    for (i = (int)strlen(msg); i < 35; ++i)
3414
0
        fputc(' ', out);
3415
0
    fputc('=', out);
3416
3417
    /* Write the value with commas. */
3418
0
    i = 22;
3419
0
    buf[i--] = '\0';
3420
0
    buf[i--] = '\n';
3421
0
    k = 3;
3422
0
    do {
3423
0
        size_t nextvalue = value / 10;
3424
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3425
0
        value = nextvalue;
3426
0
        buf[i--] = (char)(digit + '0');
3427
0
        --k;
3428
0
        if (k == 0 && value && i >= 0) {
3429
0
            k = 3;
3430
0
            buf[i--] = ',';
3431
0
        }
3432
0
    } while (value && i >= 0);
3433
3434
0
    while (i >= 0)
3435
0
        buf[i--] = ' ';
3436
0
    fputs(buf, out);
3437
3438
0
    return origvalue;
3439
0
}
3440
3441
void
3442
_PyDebugAllocatorStats(FILE *out,
3443
                       const char *block_name, int num_blocks, size_t sizeof_block)
3444
0
{
3445
0
    char buf1[128];
3446
0
    char buf2[128];
3447
0
    PyOS_snprintf(buf1, sizeof(buf1),
3448
0
                  "%d %ss * %zd bytes each",
3449
0
                  num_blocks, block_name, sizeof_block);
3450
0
    PyOS_snprintf(buf2, sizeof(buf2),
3451
0
                  "%48s ", buf1);
3452
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3453
0
}
3454
3455
// Return true if the obmalloc state structure is heap allocated,
3456
// by PyMem_RawCalloc().  For the main interpreter, this structure
3457
// allocated in the BSS.  Allocating that way gives some memory savings
3458
// and a small performance win (at least on a demand paged OS).  On
3459
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3460
// memory is for the arena_map_top array.  Since normally only one entry
3461
// of that array is used, only one page of resident memory is actually
3462
// used, rather than the full 256 kB.
3463
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3464
0
{
3465
0
#if WITH_PYMALLOC
3466
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3467
#else
3468
    return false;
3469
#endif
3470
0
}
3471
3472
#ifdef WITH_PYMALLOC
3473
static void
3474
init_obmalloc_pools(PyInterpreterState *interp)
3475
32
{
3476
    // initialize the obmalloc->pools structure.  This must be done
3477
    // before the obmalloc alloc/free functions can be called.
3478
32
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3479
32
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3480
32
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3481
32
}
3482
#endif /* WITH_PYMALLOC */
3483
3484
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3485
32
{
3486
32
#ifdef WITH_PYMALLOC
3487
    /* Initialize obmalloc, but only for subinterpreters,
3488
       since the main interpreter is initialized statically. */
3489
32
    if (_Py_IsMainInterpreter(interp)
3490
0
            || _PyInterpreterState_HasFeature(interp,
3491
32
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3492
32
        interp->obmalloc = &obmalloc_state_main;
3493
32
        if (!obmalloc_state_initialized) {
3494
32
            init_obmalloc_pools(interp);
3495
32
            obmalloc_state_initialized = true;
3496
32
        }
3497
32
    } else {
3498
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3499
0
        if (interp->obmalloc == NULL) {
3500
0
            return -1;
3501
0
        }
3502
0
        init_obmalloc_pools(interp);
3503
0
    }
3504
32
#endif /* WITH_PYMALLOC */
3505
32
    return 0; // success
3506
32
}
3507
3508
3509
#ifdef WITH_PYMALLOC
3510
3511
static void
3512
free_obmalloc_arenas(PyInterpreterState *interp)
3513
0
{
3514
0
    OMState *state = interp->obmalloc;
3515
0
    for (uint i = 0; i < maxarenas; ++i) {
3516
        // free each obmalloc memory arena
3517
0
        struct arena_object *ao = &allarenas[i];
3518
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3519
0
                             (void *)ao->address, ARENA_SIZE);
3520
0
    }
3521
    // free the array containing pointers to all arenas
3522
0
    PyMem_RawFree(allarenas);
3523
0
#if WITH_PYMALLOC_RADIX_TREE
3524
0
#ifdef USE_INTERIOR_NODES
3525
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3526
    // by arena_map_mark_used() but not freed when arenas are freed.
3527
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3528
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3529
0
         if (mid == NULL) {
3530
0
             continue;
3531
0
         }
3532
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3533
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3534
0
            if (bot == NULL) {
3535
0
                continue;
3536
0
            }
3537
0
            PyMem_RawFree(bot);
3538
0
         }
3539
0
         PyMem_RawFree(mid);
3540
0
    }
3541
0
#endif
3542
0
#endif
3543
0
}
3544
3545
#ifdef Py_DEBUG
3546
/* Is target in the list?  The list is traversed via the nextpool pointers.
3547
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3548
 * list, else 0.
3549
 */
3550
static int
3551
pool_is_in_list(const poolp target, poolp list)
3552
{
3553
    poolp origlist = list;
3554
    assert(target != NULL);
3555
    if (list == NULL)
3556
        return 0;
3557
    do {
3558
        if (target == list)
3559
            return 1;
3560
        list = list->nextpool;
3561
    } while (list != NULL && list != origlist);
3562
    return 0;
3563
}
3564
#endif
3565
3566
#ifdef WITH_MIMALLOC
3567
struct _alloc_stats {
3568
    size_t allocated_blocks;
3569
    size_t allocated_bytes;
3570
    size_t allocated_with_overhead;
3571
    size_t bytes_reserved;
3572
    size_t bytes_committed;
3573
};
3574
3575
static bool _collect_alloc_stats(
3576
    const mi_heap_t* heap, const mi_heap_area_t* area,
3577
    void* block, size_t block_size, void* arg)
3578
0
{
3579
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3580
0
    stats->allocated_blocks += area->used;
3581
0
    stats->allocated_bytes += area->used * area->block_size;
3582
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3583
0
    stats->bytes_reserved += area->reserved;
3584
0
    stats->bytes_committed += area->committed;
3585
0
    return 1;
3586
0
}
3587
3588
static void
3589
py_mimalloc_print_stats(FILE *out)
3590
0
{
3591
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3592
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3593
0
    fprintf(out, "Medium block threshold = %zu\n",
3594
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3595
0
    fprintf(out, "Large object max size = %zu\n",
3596
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3597
3598
0
    mi_heap_t *heap = mi_heap_get_default();
3599
0
    struct _alloc_stats stats;
3600
0
    memset(&stats, 0, sizeof(stats));
3601
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3602
3603
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3604
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3605
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3606
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3607
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3608
0
}
3609
#endif
3610
3611
3612
static void
3613
pymalloc_print_stats(FILE *out)
3614
0
{
3615
0
    OMState *state = get_state();
3616
3617
0
    uint i;
3618
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3619
    /* # of pools, allocated blocks, and free blocks per class index */
3620
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3621
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3622
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3623
    /* total # of allocated bytes in used and full pools */
3624
0
    size_t allocated_bytes = 0;
3625
    /* total # of available bytes in used pools */
3626
0
    size_t available_bytes = 0;
3627
    /* # of free pools + pools not yet carved out of current arena */
3628
0
    uint numfreepools = 0;
3629
    /* # of bytes for arena alignment padding */
3630
0
    size_t arena_alignment = 0;
3631
    /* # of bytes in used and full pools used for pool_headers */
3632
0
    size_t pool_header_bytes = 0;
3633
    /* # of bytes in used and full pools wasted due to quantization,
3634
     * i.e. the necessarily leftover space at the ends of used and
3635
     * full pools.
3636
     */
3637
0
    size_t quantization = 0;
3638
    /* # of arenas actually allocated. */
3639
0
    size_t narenas = 0;
3640
    /* running total -- should equal narenas * ARENA_SIZE */
3641
0
    size_t total;
3642
0
    char buf[128];
3643
3644
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3645
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3646
3647
0
    for (i = 0; i < numclasses; ++i)
3648
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3649
3650
    /* Because full pools aren't linked to from anything, it's easiest
3651
     * to march over all the arenas.  If we're lucky, most of the memory
3652
     * will be living in full pools -- would be a shame to miss them.
3653
     */
3654
0
    for (i = 0; i < maxarenas; ++i) {
3655
0
        uintptr_t base = allarenas[i].address;
3656
3657
        /* Skip arenas which are not allocated. */
3658
0
        if (allarenas[i].address == (uintptr_t)NULL)
3659
0
            continue;
3660
0
        narenas += 1;
3661
3662
0
        numfreepools += allarenas[i].nfreepools;
3663
3664
        /* round up to pool alignment */
3665
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3666
0
            arena_alignment += POOL_SIZE;
3667
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3668
0
            base += POOL_SIZE;
3669
0
        }
3670
3671
        /* visit every pool in the arena */
3672
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3673
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3674
0
            poolp p = (poolp)base;
3675
0
            const uint sz = p->szidx;
3676
0
            uint freeblocks;
3677
3678
0
            if (p->ref.count == 0) {
3679
                /* currently unused */
3680
#ifdef Py_DEBUG
3681
                assert(pool_is_in_list(p, allarenas[i].freepools));
3682
#endif
3683
0
                continue;
3684
0
            }
3685
0
            ++numpools[sz];
3686
0
            numblocks[sz] += p->ref.count;
3687
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3688
0
            numfreeblocks[sz] += freeblocks;
3689
#ifdef Py_DEBUG
3690
            if (freeblocks > 0)
3691
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3692
#endif
3693
0
        }
3694
0
    }
3695
0
    assert(narenas == narenas_currently_allocated);
3696
3697
0
    fputc('\n', out);
3698
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3699
0
          "-----   ----   ---------   -------------  ------------\n",
3700
0
          out);
3701
3702
0
    for (i = 0; i < numclasses; ++i) {
3703
0
        size_t p = numpools[i];
3704
0
        size_t b = numblocks[i];
3705
0
        size_t f = numfreeblocks[i];
3706
0
        uint size = INDEX2SIZE(i);
3707
0
        if (p == 0) {
3708
0
            assert(b == 0 && f == 0);
3709
0
            continue;
3710
0
        }
3711
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3712
0
                i, size, p, b, f);
3713
0
        allocated_bytes += b * size;
3714
0
        available_bytes += f * size;
3715
0
        pool_header_bytes += p * POOL_OVERHEAD;
3716
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3717
0
    }
3718
0
    fputc('\n', out);
3719
#ifdef PYMEM_DEBUG_SERIALNO
3720
    if (_PyMem_DebugEnabled()) {
3721
        (void)printone(out, "# times object malloc called", serialno);
3722
    }
3723
#endif
3724
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3725
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3726
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3727
0
    (void)printone(out, "# arenas allocated current", narenas);
3728
3729
0
    PyOS_snprintf(buf, sizeof(buf),
3730
0
                  "%zu arenas * %d bytes/arena",
3731
0
                  narenas, ARENA_SIZE);
3732
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3733
3734
0
    fputc('\n', out);
3735
3736
    /* Account for what all of those arena bytes are being used for. */
3737
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3738
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3739
3740
0
    PyOS_snprintf(buf, sizeof(buf),
3741
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3742
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3743
3744
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3745
0
    total += printone(out, "# bytes lost to quantization", quantization);
3746
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3747
0
    (void)printone(out, "Total", total);
3748
0
    assert(narenas * ARENA_SIZE == total);
3749
3750
0
#if WITH_PYMALLOC_RADIX_TREE
3751
0
    fputs("\narena map counts\n", out);
3752
0
#ifdef USE_INTERIOR_NODES
3753
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3754
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3755
0
    fputc('\n', out);
3756
0
#endif
3757
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3758
0
#ifdef USE_INTERIOR_NODES
3759
0
    total += printone(out, "# bytes lost to arena map mid",
3760
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3761
0
    total += printone(out, "# bytes lost to arena map bot",
3762
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3763
0
    (void)printone(out, "Total", total);
3764
0
#endif
3765
0
#endif
3766
3767
0
}
3768
3769
/* Print summary info to "out" about the state of pymalloc's structures.
3770
 * In Py_DEBUG mode, also perform some expensive internal consistency
3771
 * checks.
3772
 *
3773
 * Return 0 if the memory debug hooks are not installed or no statistics was
3774
 * written into out, return 1 otherwise.
3775
 */
3776
int
3777
_PyObject_DebugMallocStats(FILE *out)
3778
0
{
3779
0
#ifdef WITH_MIMALLOC
3780
0
    if (_PyMem_MimallocEnabled()) {
3781
0
        py_mimalloc_print_stats(out);
3782
0
        return 1;
3783
0
    }
3784
0
    else
3785
0
#endif
3786
0
    if (_PyMem_PymallocEnabled()) {
3787
0
        pymalloc_print_stats(out);
3788
0
        return 1;
3789
0
    }
3790
0
    else {
3791
0
        return 0;
3792
0
    }
3793
0
}
3794
3795
#endif /* #ifdef WITH_PYMALLOC */