Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#ifdef WITH_MIMALLOC
18
// Forward declarations of functions used in our mimalloc modifications
19
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
20
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
21
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
22
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
23
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
24
#  include "pycore_mimalloc.h"
25
#  include "mimalloc/static.c"
26
#  include "mimalloc/internal.h"  // for stats
27
#endif
28
29
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
30
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
31
#endif
32
33
#undef  uint
34
1.50G
#define uint pymem_uint
35
36
37
/* Defined in tracemalloc.c */
38
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
39
40
static void _PyObject_DebugDumpAddress(const void *p);
41
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
42
43
44
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
45
static void set_up_debug_hooks_unlocked(void);
46
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
47
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
49
50
/***************************************/
51
/* low-level allocator implementations */
52
/***************************************/
53
54
/* the default raw allocator (wraps malloc) */
55
56
void *
57
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
58
198M
{
59
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
60
       for malloc(0), which would be treated as an error. Some platforms would
61
       return a pointer with no memory behind it, which would break pymalloc.
62
       To solve these problems, allocate an extra byte. */
63
198M
    if (size == 0)
64
33.8M
        size = 1;
65
198M
    return malloc(size);
66
198M
}
67
68
void *
69
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
70
60.5k
{
71
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
72
       for calloc(0, 0), which would be treated as an error. Some platforms
73
       would return a pointer with no memory behind it, which would break
74
       pymalloc.  To solve these problems, allocate an extra byte. */
75
60.5k
    if (nelem == 0 || elsize == 0) {
76
2
        nelem = 1;
77
2
        elsize = 1;
78
2
    }
79
60.5k
    return calloc(nelem, elsize);
80
60.5k
}
81
82
void *
83
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
84
6.82M
{
85
6.82M
    if (size == 0)
86
0
        size = 1;
87
6.82M
    return realloc(ptr, size);
88
6.82M
}
89
90
void
91
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
92
198M
{
93
198M
    free(ptr);
94
198M
}
95
96
#ifdef WITH_MIMALLOC
97
98
static void
99
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
100
0
{
101
#ifdef Py_GIL_DISABLED
102
    // Clear the QSBR goal and remove the page from the QSBR linked list.
103
    page->qsbr_goal = 0;
104
    if (page->qsbr_node.next != NULL) {
105
        llist_remove(&page->qsbr_node);
106
    }
107
#endif
108
0
}
109
110
// Check if an empty, newly reclaimed page is safe to free now.
111
static bool
112
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
113
0
{
114
0
    assert(mi_page_all_free(page));
115
#ifdef Py_GIL_DISABLED
116
    assert(page->qsbr_node.next == NULL);
117
    if (page->use_qsbr && page->qsbr_goal != 0) {
118
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
119
        if (tstate == NULL) {
120
            return false;
121
        }
122
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
123
    }
124
#endif
125
0
    return true;
126
127
0
}
128
129
#ifdef Py_GIL_DISABLED
130
131
// If we are deferring collection of more than this amount of memory for
132
// mimalloc pages, advance the write sequence.  Advancing allows these
133
// pages to be re-used in a different thread or for a different size class.
134
#define QSBR_PAGE_MEM_LIMIT 4096*20
135
136
// Return true if the global write sequence should be advanced for a mimalloc
137
// page that is deferred from collection.
138
static bool
139
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
140
{
141
    size_t bsize = mi_page_block_size(page);
142
    size_t page_size = page->capacity*bsize;
143
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
144
        qsbr->deferred_page_memory = 0;
145
        return true;
146
    }
147
    qsbr->deferred_page_memory += page_size;
148
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
149
        qsbr->deferred_page_memory = 0;
150
        return true;
151
    }
152
    return false;
153
}
154
#endif
155
156
static bool
157
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
158
0
{
159
#ifdef Py_GIL_DISABLED
160
    assert(mi_page_all_free(page));
161
    if (page->use_qsbr) {
162
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
163
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
164
            _PyMem_mi_page_clear_qsbr(page);
165
            _mi_page_free(page, pq, force);
166
            return true;
167
        }
168
169
        _PyMem_mi_page_clear_qsbr(page);
170
        page->retire_expire = 0;
171
172
        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
173
            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
174
        }
175
        else {
176
            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
177
        }
178
179
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
180
        return false;
181
    }
182
#endif
183
0
    _mi_page_free(page, pq, force);
184
0
    return true;
185
0
}
186
187
static void
188
_PyMem_mi_page_reclaimed(mi_page_t *page)
189
0
{
190
#ifdef Py_GIL_DISABLED
191
    assert(page->qsbr_node.next == NULL);
192
    if (page->qsbr_goal != 0) {
193
        if (mi_page_all_free(page)) {
194
            assert(page->qsbr_node.next == NULL);
195
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
196
            page->retire_expire = 0;
197
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
198
        }
199
        else {
200
            page->qsbr_goal = 0;
201
        }
202
    }
203
#endif
204
0
}
205
206
static void
207
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    if (!heap->page_use_qsbr) {
211
        return;
212
    }
213
214
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
215
    struct llist_node *head = &tstate->mimalloc.page_list;
216
    if (llist_empty(head)) {
217
        return;
218
    }
219
220
    struct llist_node *node;
221
    llist_for_each_safe(node, head) {
222
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
223
        if (!mi_page_all_free(page)) {
224
            // We allocated from this page some point after the delayed free
225
            _PyMem_mi_page_clear_qsbr(page);
226
            continue;
227
        }
228
229
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
230
            return;
231
        }
232
233
        _PyMem_mi_page_clear_qsbr(page);
234
        _mi_page_free(page, mi_page_queue_of(page), false);
235
    }
236
#endif
237
0
}
238
239
void *
240
_PyMem_MiMalloc(void *ctx, size_t size)
241
0
{
242
#ifdef Py_GIL_DISABLED
243
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
244
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
245
    return mi_heap_malloc(heap, size);
246
#else
247
0
    return mi_malloc(size);
248
0
#endif
249
0
}
250
251
void *
252
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
253
0
{
254
#ifdef Py_GIL_DISABLED
255
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
256
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
257
    return mi_heap_calloc(heap, nelem, elsize);
258
#else
259
0
    return mi_calloc(nelem, elsize);
260
0
#endif
261
0
}
262
263
void *
264
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
265
0
{
266
#ifdef Py_GIL_DISABLED
267
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
268
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
269
    return mi_heap_realloc(heap, ptr, size);
270
#else
271
0
    return mi_realloc(ptr, size);
272
0
#endif
273
0
}
274
275
void
276
_PyMem_MiFree(void *ctx, void *ptr)
277
0
{
278
0
    mi_free(ptr);
279
0
}
280
281
void *
282
_PyObject_MiMalloc(void *ctx, size_t nbytes)
283
0
{
284
#ifdef Py_GIL_DISABLED
285
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
286
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
287
    return mi_heap_malloc(heap, nbytes);
288
#else
289
0
    return mi_malloc(nbytes);
290
0
#endif
291
0
}
292
293
void *
294
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
295
0
{
296
#ifdef Py_GIL_DISABLED
297
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
298
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
299
    return mi_heap_calloc(heap, nelem, elsize);
300
#else
301
0
    return mi_calloc(nelem, elsize);
302
0
#endif
303
0
}
304
305
306
void *
307
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
308
0
{
309
#ifdef Py_GIL_DISABLED
310
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
311
    // Implement our own realloc logic so that we can copy PyObject header
312
    // in a thread-safe way.
313
    size_t size = mi_usable_size(ptr);
314
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
315
        return ptr;
316
    }
317
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    void* newp = mi_heap_malloc(heap, nbytes);
320
    if (newp == NULL) {
321
        return NULL;
322
    }
323
324
    // Free threaded Python allows access from other threads to the PyObject reference count
325
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
326
    // These fields are typically initialized by PyObject_Init() using relaxed
327
    // atomic stores. We need to copy these fields in a thread-safe way here.
328
    // We use the "debug_offset" to determine how many bytes to copy -- it
329
    // includes the PyObject header and plus any extra pre-headers.
330
    size_t offset = heap->debug_offset;
331
    assert(offset % sizeof(void*) == 0);
332
333
    size_t copy_size = (size < nbytes ? size : nbytes);
334
    if (copy_size >= offset) {
335
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
336
            // Use memcpy to avoid strict-aliasing issues. However, we probably
337
            // still have unavoidable strict-aliasing issues with
338
            // _Py_atomic_store_ptr_relaxed here.
339
            void *word;
340
            memcpy(&word, (char*)ptr + i, sizeof(void*));
341
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
342
        }
343
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
344
    }
345
    else {
346
        _mi_memcpy(newp, ptr, copy_size);
347
    }
348
    mi_free(ptr);
349
    return newp;
350
#else
351
0
    return mi_realloc(ptr, nbytes);
352
0
#endif
353
0
}
354
355
void
356
_PyObject_MiFree(void *ctx, void *ptr)
357
0
{
358
0
    mi_free(ptr);
359
0
}
360
361
#endif // WITH_MIMALLOC
362
363
364
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
365
366
367
#ifdef WITH_MIMALLOC
368
0
#  define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
369
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
370
#endif
371
372
/* the pymalloc allocator */
373
374
// The actual implementation is further down.
375
376
#if defined(WITH_PYMALLOC)
377
void* _PyObject_Malloc(void *ctx, size_t size);
378
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
379
void _PyObject_Free(void *ctx, void *p);
380
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
381
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
382
#endif  // WITH_PYMALLOC
383
384
#if defined(Py_GIL_DISABLED)
385
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
386
#  define PYRAW_ALLOC MALLOC_ALLOC
387
#  define PYMEM_ALLOC MIMALLOC_ALLOC
388
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
389
#elif defined(WITH_PYMALLOC)
390
0
#  define PYRAW_ALLOC MALLOC_ALLOC
391
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
392
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
393
#else
394
#  define PYRAW_ALLOC MALLOC_ALLOC
395
#  define PYMEM_ALLOC MALLOC_ALLOC
396
#  define PYOBJ_ALLOC MALLOC_ALLOC
397
#endif
398
399
400
/* the default debug allocators */
401
402
// The actual implementation is further down.
403
404
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
405
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
406
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
407
void _PyMem_DebugRawFree(void *ctx, void *ptr);
408
409
void* _PyMem_DebugMalloc(void *ctx, size_t size);
410
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
411
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
412
void _PyMem_DebugFree(void *ctx, void *p);
413
414
#define PYDBGRAW_ALLOC \
415
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
416
#define PYDBGMEM_ALLOC \
417
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
418
#define PYDBGOBJ_ALLOC \
419
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
420
421
/* default raw allocator (not swappable) */
422
423
void *
424
_PyMem_DefaultRawMalloc(size_t size)
425
256
{
426
#ifdef Py_DEBUG
427
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
428
#else
429
256
    return _PyMem_RawMalloc(NULL, size);
430
256
#endif
431
256
}
432
433
void *
434
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
435
0
{
436
#ifdef Py_DEBUG
437
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
438
#else
439
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
440
0
#endif
441
0
}
442
443
void *
444
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
445
0
{
446
#ifdef Py_DEBUG
447
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
448
#else
449
0
    return _PyMem_RawRealloc(NULL, ptr, size);
450
0
#endif
451
0
}
452
453
void
454
_PyMem_DefaultRawFree(void *ptr)
455
288
{
456
#ifdef Py_DEBUG
457
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
458
#else
459
288
    _PyMem_RawFree(NULL, ptr);
460
288
#endif
461
288
}
462
463
wchar_t*
464
_PyMem_DefaultRawWcsdup(const wchar_t *str)
465
192
{
466
192
    assert(str != NULL);
467
468
192
    size_t len = wcslen(str);
469
192
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
470
0
        return NULL;
471
0
    }
472
473
192
    size_t size = (len + 1) * sizeof(wchar_t);
474
192
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
475
192
    if (str2 == NULL) {
476
0
        return NULL;
477
0
    }
478
479
192
    memcpy(str2, str, size);
480
192
    return str2;
481
192
}
482
483
/* the low-level virtual memory allocator */
484
485
#ifdef WITH_PYMALLOC
486
#  ifdef MS_WINDOWS
487
#    include <windows.h>
488
#  elif defined(HAVE_MMAP)
489
#    include <sys/mman.h>
490
#    ifdef MAP_ANONYMOUS
491
#      define ARENAS_USE_MMAP
492
#    endif
493
#  endif
494
#endif
495
496
/* Return the system's default huge page size in bytes, or 0 if it
497
 * cannot be determined.  The result is cached after the first call.
498
 *
499
 * This is Linux-only (/proc/meminfo).  On other systems that define
500
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
501
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
502
static size_t
503
_pymalloc_system_hugepage_size(void)
504
{
505
    static size_t hp_size = 0;
506
    static int initialized = 0;
507
508
    if (initialized) {
509
        return hp_size;
510
    }
511
512
#ifdef __linux__
513
    FILE *f = fopen("/proc/meminfo", "r");
514
    if (f != NULL) {
515
        char line[256];
516
        while (fgets(line, sizeof(line), f)) {
517
            unsigned long size_kb;
518
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
519
                hp_size = (size_t)size_kb * 1024;
520
                break;
521
            }
522
        }
523
        fclose(f);
524
    }
525
#endif
526
527
    initialized = 1;
528
    return hp_size;
529
}
530
#endif
531
532
void *
533
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
534
197k
{
535
#ifdef MS_WINDOWS
536
#  ifdef PYMALLOC_USE_HUGEPAGES
537
    if (_PyRuntime.allocators.use_hugepages) {
538
        SIZE_T lp_size = GetLargePageMinimum();
539
        if (lp_size > 0 && size % lp_size == 0) {
540
            void *ptr = VirtualAlloc(NULL, size,
541
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
542
                            PAGE_READWRITE);
543
            if (ptr != NULL)
544
                return ptr;
545
        }
546
    }
547
    /* Fall back to regular pages */
548
#  endif
549
    return VirtualAlloc(NULL, size,
550
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
551
#elif defined(ARENAS_USE_MMAP)
552
    void *ptr;
553
#  ifdef PYMALLOC_USE_HUGEPAGES
554
#    ifdef MAP_HUGETLB
555
    if (_PyRuntime.allocators.use_hugepages) {
556
        size_t hp_size = _pymalloc_system_hugepage_size();
557
        /* Only use huge pages if the arena size is a multiple of the
558
         * system's default huge page size.  When the arena is smaller
559
         * than the huge page, mmap still succeeds but silently
560
         * allocates an entire huge page; the subsequent munmap with
561
         * the smaller arena size then fails with EINVAL, leaking
562
         * all of that memory. */
563
        if (hp_size > 0 && size % hp_size == 0) {
564
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
565
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
566
            if (ptr != MAP_FAILED) {
567
                assert(ptr != NULL);
568
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
569
                return ptr;
570
            }
571
        }
572
    }
573
    /* Fall back to regular pages */
574
#    endif
575
#  endif
576
197k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
577
197k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
578
197k
    if (ptr == MAP_FAILED)
579
0
        return NULL;
580
197k
    assert(ptr != NULL);
581
197k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
582
197k
    return ptr;
583
#else
584
    return malloc(size);
585
#endif
586
197k
}
587
588
void
589
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
590
#if defined(ARENAS_USE_MMAP)
591
    size_t size
592
#else
593
    size_t Py_UNUSED(size)
594
#endif
595
)
596
196k
{
597
#ifdef MS_WINDOWS
598
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
599
    if (ptr == NULL) {
600
        return;
601
    }
602
    VirtualFree(ptr, 0, MEM_RELEASE);
603
#elif defined(ARENAS_USE_MMAP)
604
    /* Unlike free(), munmap() does not special-case NULL to noop. */
605
196k
    if (ptr == NULL) {
606
0
        return;
607
0
    }
608
196k
    munmap(ptr, size);
609
#else
610
    free(ptr);
611
#endif
612
196k
}
613
614
/*******************************************/
615
/* end low-level allocator implementations */
616
/*******************************************/
617
618
619
64
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
620
808M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
621
1.77G
#define _PyMem (_PyRuntime.allocators.standard.mem)
622
4.42G
#define _PyObject (_PyRuntime.allocators.standard.obj)
623
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
624
787k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
625
626
627
/***************************/
628
/* managing the allocators */
629
/***************************/
630
631
static int
632
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
633
                               PyMemAllocatorEx *old_alloc)
634
0
{
635
0
    if (old_alloc != NULL) {
636
0
        get_allocator_unlocked(domain, old_alloc);
637
0
    }
638
639
640
0
    PyMemAllocatorEx new_alloc;
641
0
    switch(domain)
642
0
    {
643
0
    case PYMEM_DOMAIN_RAW:
644
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
645
0
        break;
646
0
    case PYMEM_DOMAIN_MEM:
647
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
648
0
        break;
649
0
    case PYMEM_DOMAIN_OBJ:
650
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
651
0
        break;
652
0
    default:
653
        /* unknown domain */
654
0
        return -1;
655
0
    }
656
0
    set_allocator_unlocked(domain, &new_alloc);
657
0
    if (debug) {
658
0
        set_up_debug_hooks_domain_unlocked(domain);
659
0
    }
660
0
    return 0;
661
0
}
662
663
664
#ifdef Py_DEBUG
665
static const int pydebug = 1;
666
#else
667
static const int pydebug = 0;
668
#endif
669
670
int
671
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
672
0
{
673
0
    if (name == NULL || *name == '\0') {
674
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
675
           nameions): use default memory allocators */
676
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
677
0
    }
678
0
    else if (strcmp(name, "default") == 0) {
679
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
680
0
    }
681
0
    else if (strcmp(name, "debug") == 0) {
682
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
683
0
    }
684
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
685
0
    else if (strcmp(name, "pymalloc") == 0) {
686
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
687
0
    }
688
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
689
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
690
0
    }
691
0
#endif
692
0
#ifdef WITH_MIMALLOC
693
0
    else if (strcmp(name, "mimalloc") == 0) {
694
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
695
0
    }
696
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
697
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
698
0
    }
699
0
#endif
700
0
#ifndef Py_GIL_DISABLED
701
0
    else if (strcmp(name, "malloc") == 0) {
702
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
703
0
    }
704
0
    else if (strcmp(name, "malloc_debug") == 0) {
705
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
706
0
    }
707
0
#endif
708
0
    else {
709
        /* unknown allocator */
710
0
        return -1;
711
0
    }
712
0
    return 0;
713
0
}
714
715
716
static int
717
set_up_allocators_unlocked(PyMemAllocatorName allocator)
718
0
{
719
0
    switch (allocator) {
720
0
    case PYMEM_ALLOCATOR_NOT_SET:
721
        /* do nothing */
722
0
        break;
723
724
0
    case PYMEM_ALLOCATOR_DEFAULT:
725
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
726
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
727
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
728
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
729
0
        break;
730
731
0
    case PYMEM_ALLOCATOR_DEBUG:
732
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
733
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
734
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
735
0
        _PyRuntime.allocators.is_debug_enabled = 1;
736
0
        break;
737
738
0
#ifdef WITH_PYMALLOC
739
0
    case PYMEM_ALLOCATOR_PYMALLOC:
740
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
741
0
    {
742
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
743
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
744
745
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
746
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
747
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
748
749
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
750
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
751
0
        if (is_debug) {
752
0
            set_up_debug_hooks_unlocked();
753
0
        }
754
0
        break;
755
0
    }
756
0
#endif
757
0
#ifdef WITH_MIMALLOC
758
0
    case PYMEM_ALLOCATOR_MIMALLOC:
759
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
760
0
    {
761
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
762
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
763
764
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
765
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
766
767
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
768
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
769
770
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
771
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
772
0
        if (is_debug) {
773
0
            set_up_debug_hooks_unlocked();
774
0
        }
775
776
0
        break;
777
0
    }
778
0
#endif
779
780
0
    case PYMEM_ALLOCATOR_MALLOC:
781
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
782
0
    {
783
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
784
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
785
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
786
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
787
788
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
789
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
790
0
        if (is_debug) {
791
0
            set_up_debug_hooks_unlocked();
792
0
        }
793
0
        break;
794
0
    }
795
796
0
    default:
797
        /* unknown allocator */
798
0
        return -1;
799
0
    }
800
801
0
    return 0;
802
0
}
803
804
int
805
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
806
0
{
807
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
808
0
    int res = set_up_allocators_unlocked(allocator);
809
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
810
0
    return res;
811
0
}
812
813
814
static int
815
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
816
0
{
817
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
818
0
}
819
820
821
static const char*
822
get_current_allocator_name_unlocked(void)
823
0
{
824
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
825
0
#ifdef WITH_PYMALLOC
826
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
827
0
#endif
828
0
#ifdef WITH_MIMALLOC
829
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
830
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
831
0
#endif
832
833
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
834
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
835
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
836
0
    {
837
0
        return "malloc";
838
0
    }
839
0
#ifdef WITH_PYMALLOC
840
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
841
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
842
0
        pymemallocator_eq(&_PyObject, &pymalloc))
843
0
    {
844
0
        return "pymalloc";
845
0
    }
846
0
#endif
847
0
#ifdef WITH_MIMALLOC
848
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
849
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
850
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
851
0
    {
852
0
        return "mimalloc";
853
0
    }
854
0
#endif
855
856
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
857
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
858
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
859
860
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
861
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
862
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
863
0
    {
864
        /* Debug hooks installed */
865
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
866
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
867
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
868
0
        {
869
0
            return "malloc_debug";
870
0
        }
871
0
#ifdef WITH_PYMALLOC
872
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
873
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
874
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
875
0
        {
876
0
            return "pymalloc_debug";
877
0
        }
878
0
#endif
879
0
#ifdef WITH_MIMALLOC
880
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
881
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
882
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
883
0
        {
884
0
            return "mimalloc_debug";
885
0
        }
886
0
#endif
887
0
    }
888
0
    return NULL;
889
0
}
890
891
const char*
892
_PyMem_GetCurrentAllocatorName(void)
893
0
{
894
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
895
0
    const char *name = get_current_allocator_name_unlocked();
896
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
897
0
    return name;
898
0
}
899
900
901
int
902
_PyMem_DebugEnabled(void)
903
0
{
904
0
    return _PyRuntime.allocators.is_debug_enabled;
905
0
}
906
907
#ifdef WITH_PYMALLOC
908
static int
909
_PyMem_PymallocEnabled(void)
910
0
{
911
0
    if (_PyMem_DebugEnabled()) {
912
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
913
0
    }
914
0
    else {
915
0
        return (_PyObject.malloc == _PyObject_Malloc);
916
0
    }
917
0
}
918
919
#ifdef WITH_MIMALLOC
920
static int
921
_PyMem_MimallocEnabled(void)
922
0
{
923
#ifdef Py_GIL_DISABLED
924
    return 1;
925
#else
926
0
    if (_PyMem_DebugEnabled()) {
927
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
928
0
    }
929
0
    else {
930
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
931
0
    }
932
0
#endif
933
0
}
934
#endif  // WITH_MIMALLOC
935
936
#endif  // WITH_PYMALLOC
937
938
939
static void
940
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
941
0
{
942
0
    PyMemAllocatorEx alloc;
943
944
0
    if (domain == PYMEM_DOMAIN_RAW) {
945
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
946
0
            return;
947
0
        }
948
949
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
950
0
        alloc.ctx = &_PyMem_Debug.raw;
951
0
        alloc.malloc = _PyMem_DebugRawMalloc;
952
0
        alloc.calloc = _PyMem_DebugRawCalloc;
953
0
        alloc.realloc = _PyMem_DebugRawRealloc;
954
0
        alloc.free = _PyMem_DebugRawFree;
955
0
        set_allocator_unlocked(domain, &alloc);
956
0
    }
957
0
    else if (domain == PYMEM_DOMAIN_MEM) {
958
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
959
0
            return;
960
0
        }
961
962
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
963
0
        alloc.ctx = &_PyMem_Debug.mem;
964
0
        alloc.malloc = _PyMem_DebugMalloc;
965
0
        alloc.calloc = _PyMem_DebugCalloc;
966
0
        alloc.realloc = _PyMem_DebugRealloc;
967
0
        alloc.free = _PyMem_DebugFree;
968
0
        set_allocator_unlocked(domain, &alloc);
969
0
    }
970
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
971
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
972
0
            return;
973
0
        }
974
975
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
976
0
        alloc.ctx = &_PyMem_Debug.obj;
977
0
        alloc.malloc = _PyMem_DebugMalloc;
978
0
        alloc.calloc = _PyMem_DebugCalloc;
979
0
        alloc.realloc = _PyMem_DebugRealloc;
980
0
        alloc.free = _PyMem_DebugFree;
981
0
        set_allocator_unlocked(domain, &alloc);
982
0
    }
983
0
}
984
985
986
static void
987
set_up_debug_hooks_unlocked(void)
988
0
{
989
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
990
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
991
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
992
0
    _PyRuntime.allocators.is_debug_enabled = 1;
993
0
}
994
995
void
996
PyMem_SetupDebugHooks(void)
997
0
{
998
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
999
0
    set_up_debug_hooks_unlocked();
1000
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1001
0
}
1002
1003
static void
1004
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1005
32
{
1006
32
    switch(domain)
1007
32
    {
1008
32
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1009
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1010
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1011
0
    default:
1012
        /* unknown domain: set all attributes to NULL */
1013
0
        allocator->ctx = NULL;
1014
0
        allocator->malloc = NULL;
1015
0
        allocator->calloc = NULL;
1016
0
        allocator->realloc = NULL;
1017
0
        allocator->free = NULL;
1018
32
    }
1019
32
}
1020
1021
static void
1022
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1023
0
{
1024
0
    switch(domain)
1025
0
    {
1026
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1027
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1028
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1029
    /* ignore unknown domain */
1030
0
    }
1031
0
}
1032
1033
void
1034
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1035
32
{
1036
32
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1037
32
    get_allocator_unlocked(domain, allocator);
1038
32
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1039
32
}
1040
1041
void
1042
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1043
0
{
1044
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1045
0
    set_allocator_unlocked(domain, allocator);
1046
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1047
0
}
1048
1049
void
1050
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1051
0
{
1052
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1053
0
    *allocator = _PyObject_Arena;
1054
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1055
0
}
1056
1057
void
1058
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1059
0
{
1060
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1061
0
    _PyObject_Arena = *allocator;
1062
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1063
0
}
1064
1065
1066
/* Note that there is a possible, but very unlikely, race in any place
1067
 * below where we call one of the allocator functions.  We access two
1068
 * fields in each case:  "malloc", etc. and "ctx".
1069
 *
1070
 * It is unlikely that the allocator will be changed while one of those
1071
 * calls is happening, much less in that very narrow window.
1072
 * Furthermore, the likelihood of a race is drastically reduced by the
1073
 * fact that the allocator may not be changed after runtime init
1074
 * (except with a wrapper).
1075
 *
1076
 * With the above in mind, we currently don't worry about locking
1077
 * around these uses of the runtime-global allocators state. */
1078
1079
1080
/*************************/
1081
/* the "arena" allocator */
1082
/*************************/
1083
1084
void *
1085
_PyObject_VirtualAlloc(size_t size)
1086
193k
{
1087
193k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
1088
193k
}
1089
1090
void
1091
_PyObject_VirtualFree(void *obj, size_t size)
1092
193k
{
1093
193k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
1094
193k
}
1095
1096
1097
/***********************/
1098
/* the "raw" allocator */
1099
/***********************/
1100
1101
void *
1102
PyMem_RawMalloc(size_t size)
1103
198M
{
1104
    /*
1105
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1106
     * Most python internals blindly use a signed Py_ssize_t to track
1107
     * things without checking for overflows or negatives.
1108
     * As size_t is unsigned, checking for size < 0 is not required.
1109
     */
1110
198M
    if (size > (size_t)PY_SSIZE_T_MAX)
1111
0
        return NULL;
1112
198M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1113
198M
}
1114
1115
void *
1116
PyMem_RawCalloc(size_t nelem, size_t elsize)
1117
60.5k
{
1118
    /* see PyMem_RawMalloc() */
1119
60.5k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1120
0
        return NULL;
1121
60.5k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1122
60.5k
}
1123
1124
void*
1125
PyMem_RawRealloc(void *ptr, size_t new_size)
1126
6.82M
{
1127
    /* see PyMem_RawMalloc() */
1128
6.82M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1129
0
        return NULL;
1130
6.82M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1131
6.82M
}
1132
1133
void PyMem_RawFree(void *ptr)
1134
198M
{
1135
198M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1136
198M
}
1137
1138
1139
/***********************/
1140
/* the "mem" allocator */
1141
/***********************/
1142
1143
void *
1144
PyMem_Malloc(size_t size)
1145
204M
{
1146
    /* see PyMem_RawMalloc() */
1147
204M
    if (size > (size_t)PY_SSIZE_T_MAX)
1148
0
        return NULL;
1149
204M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1150
204M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1151
204M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1152
204M
    OBJECT_STAT_INC(allocations);
1153
204M
    return _PyMem.malloc(_PyMem.ctx, size);
1154
204M
}
1155
1156
void *
1157
PyMem_Calloc(size_t nelem, size_t elsize)
1158
41.4M
{
1159
    /* see PyMem_RawMalloc() */
1160
41.4M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1161
0
        return NULL;
1162
41.4M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1163
41.4M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1164
41.4M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1165
41.4M
    OBJECT_STAT_INC(allocations);
1166
41.4M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1167
41.4M
}
1168
1169
void *
1170
PyMem_Realloc(void *ptr, size_t new_size)
1171
210M
{
1172
    /* see PyMem_RawMalloc() */
1173
210M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1174
0
        return NULL;
1175
210M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1176
210M
}
1177
1178
void
1179
PyMem_Free(void *ptr)
1180
430M
{
1181
430M
    OBJECT_STAT_INC(frees);
1182
430M
    _PyMem.free(_PyMem.ctx, ptr);
1183
430M
}
1184
1185
1186
/***************************/
1187
/* pymem utility functions */
1188
/***************************/
1189
1190
wchar_t*
1191
_PyMem_RawWcsdup(const wchar_t *str)
1192
1.28k
{
1193
1.28k
    assert(str != NULL);
1194
1195
1.28k
    size_t len = wcslen(str);
1196
1.28k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1197
0
        return NULL;
1198
0
    }
1199
1200
1.28k
    size_t size = (len + 1) * sizeof(wchar_t);
1201
1.28k
    wchar_t *str2 = PyMem_RawMalloc(size);
1202
1.28k
    if (str2 == NULL) {
1203
0
        return NULL;
1204
0
    }
1205
1206
1.28k
    memcpy(str2, str, size);
1207
1.28k
    return str2;
1208
1.28k
}
1209
1210
char *
1211
_PyMem_RawStrdup(const char *str)
1212
96
{
1213
96
    assert(str != NULL);
1214
96
    size_t size = strlen(str) + 1;
1215
96
    char *copy = PyMem_RawMalloc(size);
1216
96
    if (copy == NULL) {
1217
0
        return NULL;
1218
0
    }
1219
96
    memcpy(copy, str, size);
1220
96
    return copy;
1221
96
}
1222
1223
char *
1224
_PyMem_Strdup(const char *str)
1225
0
{
1226
0
    assert(str != NULL);
1227
0
    size_t size = strlen(str) + 1;
1228
0
    char *copy = PyMem_Malloc(size);
1229
0
    if (copy == NULL) {
1230
0
        return NULL;
1231
0
    }
1232
0
    memcpy(copy, str, size);
1233
0
    return copy;
1234
0
}
1235
1236
/***********************************************/
1237
/* Delayed freeing support for Py_GIL_DISABLED */
1238
/***********************************************/
1239
1240
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1241
#define WORK_ITEMS_PER_CHUNK 254
1242
1243
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1244
struct _mem_work_item {
1245
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1246
    uint64_t qsbr_goal;
1247
};
1248
1249
// A fixed-size buffer of pointers to be freed
1250
struct _mem_work_chunk {
1251
    // Linked list node of chunks in queue
1252
    struct llist_node node;
1253
1254
    Py_ssize_t rd_idx;  // index of next item to read
1255
    Py_ssize_t wr_idx;  // index of next item to write
1256
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1257
};
1258
1259
static int
1260
work_item_should_decref(uintptr_t ptr)
1261
0
{
1262
0
    return ptr & 0x01;
1263
0
}
1264
1265
static void
1266
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1267
0
{
1268
0
    if (work_item_should_decref(ptr)) {
1269
0
        PyObject *obj = (PyObject *)(ptr - 1);
1270
#ifdef Py_GIL_DISABLED
1271
        if (cb == NULL) {
1272
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1273
            Py_DECREF(obj);
1274
            return;
1275
        }
1276
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1277
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1278
        if (refcount == 0) {
1279
            cb(obj, state);
1280
        }
1281
#else
1282
0
        Py_DECREF(obj);
1283
0
#endif
1284
0
    }
1285
0
    else {
1286
0
        PyMem_Free((void *)ptr);
1287
0
    }
1288
0
}
1289
1290
1291
#ifdef Py_GIL_DISABLED
1292
1293
// For deferred advance on free: the number of deferred items before advancing
1294
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1295
// want to process a chunk before it overflows.
1296
#define QSBR_DEFERRED_LIMIT 127
1297
1298
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1299
// helps limit memory usage due to QSBR delaying frees too long.
1300
#define QSBR_FREE_MEM_LIMIT 1024*1024
1301
1302
// Return true if the global write sequence should be advanced for a deferred
1303
// memory free.
1304
static bool
1305
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1306
{
1307
    if (size > QSBR_FREE_MEM_LIMIT) {
1308
        qsbr->deferred_count = 0;
1309
        qsbr->deferred_memory = 0;
1310
        qsbr->should_process = true;
1311
        return true;
1312
    }
1313
    qsbr->deferred_count++;
1314
    qsbr->deferred_memory += size;
1315
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1316
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1317
        qsbr->deferred_count = 0;
1318
        qsbr->deferred_memory = 0;
1319
        qsbr->should_process = true;
1320
        return true;
1321
    }
1322
    return false;
1323
}
1324
#endif
1325
1326
static void
1327
free_delayed(uintptr_t ptr, size_t size)
1328
0
{
1329
0
#ifndef Py_GIL_DISABLED
1330
0
    free_work_item(ptr, NULL, NULL);
1331
#else
1332
    PyInterpreterState *interp = _PyInterpreterState_GET();
1333
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1334
        interp->stoptheworld.world_stopped)
1335
    {
1336
        // Free immediately during interpreter shutdown or if the world is
1337
        // stopped.
1338
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1339
        free_work_item(ptr, NULL, NULL);
1340
        return;
1341
    }
1342
1343
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1344
    struct llist_node *head = &tstate->mem_free_queue;
1345
1346
    struct _mem_work_chunk *buf = NULL;
1347
    if (!llist_empty(head)) {
1348
        // Try to re-use the last buffer
1349
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1350
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1351
            // already full
1352
            buf = NULL;
1353
        }
1354
    }
1355
1356
    if (buf == NULL) {
1357
        buf = PyMem_Calloc(1, sizeof(*buf));
1358
        if (buf != NULL) {
1359
            llist_insert_tail(head, &buf->node);
1360
        }
1361
    }
1362
1363
    if (buf == NULL) {
1364
        // failed to allocate a buffer, free immediately
1365
        PyObject *to_dealloc = NULL;
1366
        _PyEval_StopTheWorld(tstate->base.interp);
1367
        if (work_item_should_decref(ptr)) {
1368
            PyObject *obj = (PyObject *)(ptr - 1);
1369
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1370
            if (refcount == 0) {
1371
                to_dealloc = obj;
1372
            }
1373
        }
1374
        else {
1375
            PyMem_Free((void *)ptr);
1376
        }
1377
        _PyEval_StartTheWorld(tstate->base.interp);
1378
        if (to_dealloc != NULL) {
1379
            _Py_Dealloc(to_dealloc);
1380
        }
1381
        return;
1382
    }
1383
1384
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1385
    uint64_t seq;
1386
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1387
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1388
    }
1389
    else {
1390
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1391
    }
1392
    buf->array[buf->wr_idx].ptr = ptr;
1393
    buf->array[buf->wr_idx].qsbr_goal = seq;
1394
    buf->wr_idx++;
1395
1396
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1397
        // Normally the processing of delayed items is done from the eval
1398
        // breaker.  Processing here is a safety measure to ensure too much
1399
        // work does not accumulate.
1400
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1401
    }
1402
#endif
1403
0
}
1404
1405
void
1406
_PyMem_FreeDelayed(void *ptr, size_t size)
1407
0
{
1408
0
    assert(!((uintptr_t)ptr & 0x01));
1409
0
    if (ptr != NULL) {
1410
0
        free_delayed((uintptr_t)ptr, size);
1411
0
    }
1412
0
}
1413
1414
#ifdef Py_GIL_DISABLED
1415
void
1416
_PyObject_XDecRefDelayed(PyObject *ptr)
1417
{
1418
    assert(!((uintptr_t)ptr & 0x01));
1419
    if (ptr != NULL) {
1420
        // We use 0 as the size since we don't have an easy way to know the
1421
        // actual size.  If we are freeing many objects, the write sequence
1422
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1423
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1424
    }
1425
}
1426
#endif
1427
1428
#ifdef Py_GIL_DISABLED
1429
void
1430
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1431
{
1432
    PyObject *old = *ptr;
1433
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1434
    if (old == NULL) {
1435
        return;
1436
    }
1437
    if (!_Py_IsImmortal(old)) {
1438
         _PyObject_XDecRefDelayed(old);
1439
    }
1440
}
1441
#endif
1442
1443
static struct _mem_work_chunk *
1444
work_queue_first(struct llist_node *head)
1445
0
{
1446
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1447
0
}
1448
1449
static void
1450
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1451
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1452
0
{
1453
0
    while (!llist_empty(head)) {
1454
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1455
1456
0
        if (buf->rd_idx < buf->wr_idx) {
1457
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1458
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1459
0
                return;
1460
0
            }
1461
1462
0
            buf->rd_idx++;
1463
            // NB: free_work_item may re-enter or execute arbitrary code
1464
0
            free_work_item(item->ptr, cb, state);
1465
0
            continue;
1466
0
        }
1467
1468
0
        assert(buf->rd_idx == buf->wr_idx);
1469
0
        if (keep_empty && buf->node.next == head) {
1470
            // Keep the last buffer in the queue to reduce re-allocations
1471
0
            buf->rd_idx = buf->wr_idx = 0;
1472
0
            return;
1473
0
        }
1474
1475
0
        llist_remove(&buf->node);
1476
0
        PyMem_Free(buf);
1477
0
    }
1478
0
}
1479
1480
static void
1481
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1482
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1483
                     void *state)
1484
0
{
1485
0
    assert(PyMutex_IsLocked(&queue->mutex));
1486
0
    process_queue(&queue->head, tstate, false, cb, state);
1487
1488
0
    int more_work = !llist_empty(&queue->head);
1489
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1490
0
}
1491
1492
static void
1493
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1494
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1495
                           void *state)
1496
0
{
1497
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1498
0
        return;
1499
0
    }
1500
1501
    // Try to acquire the lock, but don't block if it's already held.
1502
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1503
0
        process_interp_queue(queue, tstate, cb, state);
1504
0
        PyMutex_Unlock(&queue->mutex);
1505
0
    }
1506
0
}
1507
1508
void
1509
_PyMem_ProcessDelayed(PyThreadState *tstate)
1510
0
{
1511
0
    PyInterpreterState *interp = tstate->interp;
1512
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1513
1514
0
    tstate_impl->qsbr->should_process = false;
1515
1516
    // Process thread-local work
1517
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1518
1519
    // Process shared interpreter work
1520
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1521
0
}
1522
1523
void
1524
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1525
0
{
1526
0
    PyInterpreterState *interp = tstate->interp;
1527
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1528
1529
    // Process thread-local work
1530
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1531
1532
    // Process shared interpreter work
1533
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1534
0
}
1535
1536
void
1537
_PyMem_AbandonDelayed(PyThreadState *tstate)
1538
0
{
1539
0
    PyInterpreterState *interp = tstate->interp;
1540
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1541
1542
0
    if (llist_empty(queue)) {
1543
0
        return;
1544
0
    }
1545
1546
    // Check if the queue contains one empty buffer
1547
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1548
0
    if (buf->rd_idx == buf->wr_idx) {
1549
0
        llist_remove(&buf->node);
1550
0
        PyMem_Free(buf);
1551
0
        assert(llist_empty(queue));
1552
0
        return;
1553
0
    }
1554
1555
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1556
1557
    // Merge the thread's work queue into the interpreter's work queue.
1558
0
    llist_concat(&interp->mem_free_queue.head, queue);
1559
1560
    // Process the merged queue now (see gh-130794).
1561
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1562
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1563
1564
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1565
1566
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1567
0
}
1568
1569
void
1570
_PyMem_FiniDelayed(PyInterpreterState *interp)
1571
0
{
1572
0
    struct llist_node *head = &interp->mem_free_queue.head;
1573
0
    while (!llist_empty(head)) {
1574
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1575
1576
0
        if (buf->rd_idx < buf->wr_idx) {
1577
            // Free the remaining items immediately. There should be no other
1578
            // threads accessing the memory at this point during shutdown.
1579
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1580
0
            buf->rd_idx++;
1581
            // NB: free_work_item may re-enter or execute arbitrary code
1582
0
            free_work_item(item->ptr, NULL, NULL);
1583
0
            continue;
1584
0
        }
1585
1586
0
        llist_remove(&buf->node);
1587
0
        PyMem_Free(buf);
1588
0
    }
1589
0
}
1590
1591
/**************************/
1592
/* the "object" allocator */
1593
/**************************/
1594
1595
void *
1596
PyObject_Malloc(size_t size)
1597
1.08G
{
1598
    /* see PyMem_RawMalloc() */
1599
1.08G
    if (size > (size_t)PY_SSIZE_T_MAX)
1600
0
        return NULL;
1601
1.08G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1602
1.08G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1603
1.08G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1604
1.08G
    OBJECT_STAT_INC(allocations);
1605
1.08G
    return _PyObject.malloc(_PyObject.ctx, size);
1606
1.08G
}
1607
1608
void *
1609
PyObject_Calloc(size_t nelem, size_t elsize)
1610
0
{
1611
    /* see PyMem_RawMalloc() */
1612
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1613
0
        return NULL;
1614
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1615
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1616
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1617
0
    OBJECT_STAT_INC(allocations);
1618
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1619
0
}
1620
1621
void *
1622
PyObject_Realloc(void *ptr, size_t new_size)
1623
51.4M
{
1624
    /* see PyMem_RawMalloc() */
1625
51.4M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1626
0
        return NULL;
1627
51.4M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1628
51.4M
}
1629
1630
void
1631
PyObject_Free(void *ptr)
1632
1.08G
{
1633
1.08G
    OBJECT_STAT_INC(frees);
1634
1.08G
    _PyObject.free(_PyObject.ctx, ptr);
1635
1.08G
}
1636
1637
1638
/* Use __builtin_expect() where available to reduce overhead of
1639
   the valgrind checks */
1640
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1641
9.25G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1642
4.16G
#  define LIKELY(value) __builtin_expect((value), 1)
1643
#else
1644
#  define UNLIKELY(value) (value)
1645
#  define LIKELY(value) (value)
1646
#endif
1647
1648
#ifdef WITH_PYMALLOC
1649
1650
#ifdef WITH_VALGRIND
1651
#include <valgrind/valgrind.h>
1652
1653
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1654
static int running_on_valgrind = -1;
1655
#endif
1656
1657
typedef struct _obmalloc_state OMState;
1658
1659
/* obmalloc state for main interpreter and shared by all interpreters without
1660
 * their own obmalloc state.  By not explicitly initializing this structure, it
1661
 * will be allocated in the BSS which is a small performance win.  The radix
1662
 * tree arrays are fairly large but are sparsely used.  */
1663
static struct _obmalloc_state obmalloc_state_main;
1664
static bool obmalloc_state_initialized;
1665
1666
static inline int
1667
has_own_state(PyInterpreterState *interp)
1668
0
{
1669
0
    return (_Py_IsMainInterpreter(interp) ||
1670
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1671
0
            _Py_IsMainInterpreterFinalizing(interp));
1672
0
}
1673
1674
static inline OMState *
1675
get_state(void)
1676
3.20G
{
1677
3.20G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1678
3.20G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1679
3.20G
    return interp->obmalloc;
1680
3.20G
}
1681
1682
// These macros all rely on a local "state" variable.
1683
1.50G
#define usedpools (state->pools.used)
1684
2.34M
#define allarenas (state->mgmt.arenas)
1685
286
#define maxarenas (state->mgmt.maxarenas)
1686
18.6k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1687
20.5M
#define usable_arenas (state->mgmt.usable_arenas)
1688
14.3M
#define nfp2lasta (state->mgmt.nfp2lasta)
1689
11.9k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1690
3.90k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1691
4.59k
#define narenas_highwater (state->mgmt.narenas_highwater)
1692
394M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1693
1694
#ifdef WITH_MIMALLOC
1695
static bool count_blocks(
1696
    const mi_heap_t* heap, const mi_heap_area_t* area,
1697
    void* block, size_t block_size, void* allocated_blocks)
1698
0
{
1699
0
    *(size_t *)allocated_blocks += area->used;
1700
0
    return 1;
1701
0
}
1702
1703
static Py_ssize_t
1704
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1705
0
{
1706
0
    size_t allocated_blocks = 0;
1707
#ifdef Py_GIL_DISABLED
1708
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1709
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1710
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1711
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1712
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1713
        }
1714
    }
1715
1716
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1717
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1718
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1719
                                        &allocated_blocks);
1720
    }
1721
#else
1722
    // TODO(sgross): this only counts the current thread's blocks.
1723
0
    mi_heap_t *heap = mi_heap_get_default();
1724
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1725
0
#endif
1726
0
    return allocated_blocks;
1727
0
}
1728
#endif
1729
1730
Py_ssize_t
1731
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1732
0
{
1733
0
#ifdef WITH_MIMALLOC
1734
0
    if (_PyMem_MimallocEnabled()) {
1735
0
        return get_mimalloc_allocated_blocks(interp);
1736
0
    }
1737
0
#endif
1738
1739
#ifdef Py_DEBUG
1740
    assert(has_own_state(interp));
1741
#else
1742
0
    if (!has_own_state(interp)) {
1743
0
        _Py_FatalErrorFunc(__func__,
1744
0
                           "the interpreter doesn't have its own allocator");
1745
0
    }
1746
0
#endif
1747
0
    OMState *state = interp->obmalloc;
1748
1749
0
    if (state == NULL) {
1750
0
        return 0;
1751
0
    }
1752
1753
0
    Py_ssize_t n = raw_allocated_blocks;
1754
    /* add up allocated blocks for used pools */
1755
0
    for (uint i = 0; i < maxarenas; ++i) {
1756
        /* Skip arenas which are not allocated. */
1757
0
        if (allarenas[i].address == 0) {
1758
0
            continue;
1759
0
        }
1760
1761
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1762
1763
        /* visit every pool in the arena */
1764
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1765
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1766
0
            poolp p = (poolp)base;
1767
0
            n += p->ref.count;
1768
0
        }
1769
0
    }
1770
0
    return n;
1771
0
}
1772
1773
static void free_obmalloc_arenas(PyInterpreterState *interp);
1774
1775
void
1776
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1777
0
{
1778
0
#ifdef WITH_MIMALLOC
1779
0
    if (_PyMem_MimallocEnabled()) {
1780
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1781
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1782
0
        return;
1783
0
    }
1784
0
#endif
1785
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1786
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1787
0
        assert(has_own_state(interp) || leaked == 0);
1788
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1789
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1790
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1791
            // then some of the memory allocated by obmalloc has not been
1792
            // freed.  It might be safe to free the arenas in that case but
1793
            // it's possible that extension modules are still using that
1794
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1795
            // should be warning when this happens.  It should be possible to
1796
            // use a tool like "-fsanitize=address" to track down these leaks.
1797
0
            free_obmalloc_arenas(interp);
1798
0
        }
1799
0
    }
1800
0
}
1801
1802
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1803
1804
/* We preserve the number of blocks leaked during runtime finalization,
1805
   so they can be reported if the runtime is initialized again. */
1806
// XXX We don't lose any information by dropping this,
1807
// so we should consider doing so.
1808
static Py_ssize_t last_final_leaks = 0;
1809
1810
void
1811
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1812
0
{
1813
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1814
0
    runtime->obmalloc.interpreter_leaks = 0;
1815
0
}
1816
1817
static Py_ssize_t
1818
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1819
0
{
1820
0
    Py_ssize_t total = 0;
1821
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1822
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1823
0
        if (interp == NULL) {
1824
            /* We are at the very end of runtime finalization.
1825
               We can't rely on finalizing->interp since that thread
1826
               state is probably already freed, so we don't worry
1827
               about it. */
1828
0
            assert(PyInterpreterState_Head() == NULL);
1829
0
        }
1830
0
        else {
1831
0
            assert(interp != NULL);
1832
            /* It is probably the last interpreter but not necessarily. */
1833
0
            assert(PyInterpreterState_Next(interp) == NULL);
1834
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1835
0
        }
1836
0
    }
1837
0
    else {
1838
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1839
0
        HEAD_LOCK(runtime);
1840
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1841
0
        assert(interp != NULL);
1842
#ifdef Py_DEBUG
1843
        int got_main = 0;
1844
#endif
1845
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1846
#ifdef Py_DEBUG
1847
            if (_Py_IsMainInterpreter(interp)) {
1848
                assert(!got_main);
1849
                got_main = 1;
1850
                assert(has_own_state(interp));
1851
            }
1852
#endif
1853
0
            if (has_own_state(interp)) {
1854
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1855
0
            }
1856
0
        }
1857
0
        HEAD_UNLOCK(runtime);
1858
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1859
#ifdef Py_DEBUG
1860
        assert(got_main);
1861
#endif
1862
0
    }
1863
0
    total += runtime->obmalloc.interpreter_leaks;
1864
0
    total += last_final_leaks;
1865
0
    return total;
1866
0
}
1867
1868
Py_ssize_t
1869
_Py_GetGlobalAllocatedBlocks(void)
1870
0
{
1871
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1872
0
}
1873
1874
#if WITH_PYMALLOC_RADIX_TREE
1875
/*==========================================================================*/
1876
/* radix tree for tracking arena usage. */
1877
1878
4.71G
#define arena_map_root (state->usage.arena_map_root)
1879
#ifdef USE_INTERIOR_NODES
1880
32
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1881
32
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1882
#endif
1883
1884
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1885
 * it cannot be created */
1886
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1887
arena_map_get(OMState *state, pymem_block *p, int create)
1888
1.63G
{
1889
1.63G
#ifdef USE_INTERIOR_NODES
1890
    /* sanity check that IGNORE_BITS is correct */
1891
1.63G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1892
1.63G
    int i1 = MAP_TOP_INDEX(p);
1893
1.63G
    if (arena_map_root.ptrs[i1] == NULL) {
1894
32
        if (!create) {
1895
0
            return NULL;
1896
0
        }
1897
32
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1898
32
        if (n == NULL) {
1899
0
            return NULL;
1900
0
        }
1901
32
        arena_map_root.ptrs[i1] = n;
1902
32
        arena_map_mid_count++;
1903
32
    }
1904
1.63G
    int i2 = MAP_MID_INDEX(p);
1905
1.63G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1906
204M
        if (!create) {
1907
204M
            return NULL;
1908
204M
        }
1909
32
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1910
32
        if (n == NULL) {
1911
0
            return NULL;
1912
0
        }
1913
32
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1914
32
        arena_map_bot_count++;
1915
32
    }
1916
1.43G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1917
#else
1918
    return &arena_map_root;
1919
#endif
1920
1.63G
}
1921
1922
1923
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1924
 * away 24 bits of the address.  That reduces the space requirement of
1925
 * the tree compared to similar radix tree page-map schemes.  In
1926
 * exchange for slashing the space requirement, it needs more
1927
 * computation to check an address.
1928
 *
1929
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1930
 * explain in decimal so let's say that the arena size is 100 bytes.
1931
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1932
 * pointer address is inside an actual arena, we have to check two ideal
1933
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1934
 * 300.  In the rare case that an arena is aligned in the ideal way
1935
 * (e.g. base address of arena is 200) then we only have to check one
1936
 * ideal address.
1937
 *
1938
 * The tree nodes for 200 and 300 both store the address of arena.
1939
 * There are two cases: the arena starts at a lower ideal arena and
1940
 * extends to this one, or the arena starts in this arena and extends to
1941
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1942
 * these two cases.
1943
 */
1944
1945
1946
/* mark or unmark addresses covered by arena */
1947
static int
1948
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1949
7.36k
{
1950
    /* sanity check that IGNORE_BITS is correct */
1951
7.36k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1952
7.36k
    arena_map_bot_t *n_hi = arena_map_get(
1953
7.36k
            state, (pymem_block *)arena_base, is_used);
1954
7.36k
    if (n_hi == NULL) {
1955
0
        assert(is_used); /* otherwise node should already exist */
1956
0
        return 0; /* failed to allocate space for node */
1957
0
    }
1958
7.36k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1959
7.36k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1960
7.36k
    if (tail == 0) {
1961
        /* is ideal arena address */
1962
70
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1963
70
    }
1964
7.29k
    else {
1965
        /* arena_base address is not ideal (aligned to arena size) and
1966
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1967
         * for the next arena.  Note that it might be in different MAP_TOP
1968
         * and MAP_MID nodes as well so we need to call arena_map_get()
1969
         * again (do the full tree traversal).
1970
         */
1971
7.29k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1972
7.29k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1973
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1974
         * (last address in arena).  If arena_base_next overflows then it
1975
         * must overflow to 0.  However, that would mean arena_base was
1976
         * "ideal" and we should not be in this case. */
1977
7.29k
        assert(arena_base < arena_base_next);
1978
7.29k
        arena_map_bot_t *n_lo = arena_map_get(
1979
7.29k
                state, (pymem_block *)arena_base_next, is_used);
1980
7.29k
        if (n_lo == NULL) {
1981
0
            assert(is_used); /* otherwise should already exist */
1982
0
            n_hi->arenas[i3].tail_hi = 0;
1983
0
            return 0; /* failed to allocate space for node */
1984
0
        }
1985
7.29k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
1986
7.29k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1987
7.29k
    }
1988
7.36k
    return 1;
1989
7.36k
}
1990
1991
/* Return true if 'p' is a pointer inside an obmalloc arena.
1992
 * _PyObject_Free() calls this so it needs to be very fast. */
1993
static int
1994
arena_map_is_used(OMState *state, pymem_block *p)
1995
1.63G
{
1996
1.63G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
1997
1.63G
    if (n == NULL) {
1998
204M
        return 0;
1999
204M
    }
2000
1.43G
    int i3 = MAP_BOT_INDEX(p);
2001
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2002
1.43G
    int32_t hi = n->arenas[i3].tail_hi;
2003
1.43G
    int32_t lo = n->arenas[i3].tail_lo;
2004
1.43G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2005
1.43G
    return (tail < lo) || (tail >= hi && hi != 0);
2006
1.63G
}
2007
2008
/* end of radix tree logic */
2009
/*==========================================================================*/
2010
#endif /* WITH_PYMALLOC_RADIX_TREE */
2011
2012
2013
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2014
 * allocate a new arena, and return the address of an arena_object
2015
 * describing the new arena.  It's expected that the caller will set
2016
 * `usable_arenas` to the return value.
2017
 */
2018
static struct arena_object*
2019
new_arena(OMState *state)
2020
3.90k
{
2021
3.90k
    struct arena_object* arenaobj;
2022
3.90k
    uint excess;        /* number of bytes above pool alignment */
2023
3.90k
    void *address;
2024
2025
3.90k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2026
3.90k
    if (debug_stats == -1) {
2027
32
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2028
32
        debug_stats = (opt != NULL && *opt != '\0');
2029
32
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2030
32
    }
2031
3.90k
    if (debug_stats) {
2032
0
        _PyObject_DebugMallocStats(stderr);
2033
0
    }
2034
2035
3.90k
    if (unused_arena_objects == NULL) {
2036
53
        uint i;
2037
53
        uint numarenas;
2038
53
        size_t nbytes;
2039
2040
        /* Double the number of arena objects on each allocation.
2041
         * Note that it's possible for `numarenas` to overflow.
2042
         */
2043
53
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2044
53
        if (numarenas <= maxarenas)
2045
0
            return NULL;                /* overflow */
2046
#if SIZEOF_SIZE_T <= SIZEOF_INT
2047
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2048
            return NULL;                /* overflow */
2049
#endif
2050
53
        nbytes = numarenas * sizeof(*allarenas);
2051
53
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2052
53
        if (arenaobj == NULL)
2053
0
            return NULL;
2054
53
        allarenas = arenaobj;
2055
2056
        /* We might need to fix pointers that were copied.  However,
2057
         * new_arena only gets called when all the pages in the
2058
         * previous arenas are full.  Thus, there are *no* pointers
2059
         * into the old array. Thus, we don't have to worry about
2060
         * invalid pointers.  Just to be sure, some asserts:
2061
         */
2062
53
        assert(usable_arenas == NULL);
2063
53
        assert(unused_arena_objects == NULL);
2064
2065
        /* Put the new arenas on the unused_arena_objects list. */
2066
1.15k
        for (i = maxarenas; i < numarenas; ++i) {
2067
1.10k
            allarenas[i].address = 0;              /* mark as unassociated */
2068
1.10k
            allarenas[i].nextarena = i < numarenas - 1 ?
2069
1.10k
                                        &allarenas[i+1] : NULL;
2070
1.10k
        }
2071
2072
        /* Update globals. */
2073
53
        unused_arena_objects = &allarenas[maxarenas];
2074
53
        maxarenas = numarenas;
2075
53
    }
2076
2077
    /* Take the next available arena object off the head of the list. */
2078
3.90k
    assert(unused_arena_objects != NULL);
2079
3.90k
    arenaobj = unused_arena_objects;
2080
3.90k
    unused_arena_objects = arenaobj->nextarena;
2081
3.90k
    assert(arenaobj->address == 0);
2082
3.90k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2083
3.90k
#if WITH_PYMALLOC_RADIX_TREE
2084
3.90k
    if (address != NULL) {
2085
3.90k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2086
            /* marking arena in radix tree failed, abort */
2087
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2088
0
            address = NULL;
2089
0
        }
2090
3.90k
    }
2091
3.90k
#endif
2092
3.90k
    if (address == NULL) {
2093
        /* The allocation failed: return NULL after putting the
2094
         * arenaobj back.
2095
         */
2096
0
        arenaobj->nextarena = unused_arena_objects;
2097
0
        unused_arena_objects = arenaobj;
2098
0
        return NULL;
2099
0
    }
2100
3.90k
    arenaobj->address = (uintptr_t)address;
2101
2102
3.90k
    ++narenas_currently_allocated;
2103
3.90k
    ++ntimes_arena_allocated;
2104
3.90k
    if (narenas_currently_allocated > narenas_highwater)
2105
690
        narenas_highwater = narenas_currently_allocated;
2106
3.90k
    arenaobj->freepools = NULL;
2107
    /* pool_address <- first pool-aligned address in the arena
2108
       nfreepools <- number of whole pools that fit after alignment */
2109
3.90k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2110
3.90k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2111
3.90k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2112
3.90k
    if (excess != 0) {
2113
2.38k
        --arenaobj->nfreepools;
2114
2.38k
        arenaobj->pool_address += POOL_SIZE - excess;
2115
2.38k
    }
2116
3.90k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2117
2118
3.90k
    return arenaobj;
2119
3.90k
}
2120
2121
2122
2123
#if WITH_PYMALLOC_RADIX_TREE
2124
/* Return true if and only if P is an address that was allocated by
2125
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2126
 */
2127
static bool
2128
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2129
1.63G
{
2130
1.63G
    return arena_map_is_used(state, p);
2131
1.63G
}
2132
#else
2133
/*
2134
address_in_range(P, POOL)
2135
2136
Return true if and only if P is an address that was allocated by pymalloc.
2137
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2138
(the caller is asked to compute this because the macro expands POOL more than
2139
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2140
variable and pass the latter to the macro; because address_in_range is
2141
called on every alloc/realloc/free, micro-efficiency is important here).
2142
2143
Tricky:  Let B be the arena base address associated with the pool, B =
2144
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2145
2146
    B <= P < B + ARENA_SIZE
2147
2148
Subtracting B throughout, this is true iff
2149
2150
    0 <= P-B < ARENA_SIZE
2151
2152
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2153
2154
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2155
before the first arena has been allocated.  `arenas` is still NULL in that
2156
case.  We're relying on that maxarenas is also 0 in that case, so that
2157
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2158
into a NULL arenas.
2159
2160
Details:  given P and POOL, the arena_object corresponding to P is AO =
2161
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2162
stores, etc), POOL is the correct address of P's pool, AO.address is the
2163
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2164
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2165
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2166
controls P.
2167
2168
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2169
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2170
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2171
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2172
control P.
2173
2174
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2175
allocated arena, obmalloc controls all the memory in slice AO.address :
2176
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2177
so P doesn't lie in that slice, so the macro correctly reports that P is not
2178
controlled by obmalloc.
2179
2180
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2181
arena_object (one not currently associated with an allocated arena),
2182
AO.address is 0, and the second test in the macro reduces to:
2183
2184
    P < ARENA_SIZE
2185
2186
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2187
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2188
of the test still passes, and the third clause (AO.address != 0) is necessary
2189
to get the correct result:  AO.address is 0 in this case, so the macro
2190
correctly reports that P is not controlled by obmalloc (despite that P lies in
2191
slice AO.address : AO.address + ARENA_SIZE).
2192
2193
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2194
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2195
corresponded to a currently-allocated arena, so the "P is not controlled by
2196
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2197
was impossible.
2198
2199
Note that the logic is excruciating, and reading up possibly uninitialized
2200
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2201
creates problems for some memory debuggers.  The overwhelming advantage is
2202
that this test determines whether an arbitrary address is controlled by
2203
obmalloc in a small constant time, independent of the number of arenas
2204
obmalloc controls.  Since this test is needed at every entry point, it's
2205
extremely desirable that it be this fast.
2206
*/
2207
2208
static bool _Py_NO_SANITIZE_ADDRESS
2209
            _Py_NO_SANITIZE_THREAD
2210
            _Py_NO_SANITIZE_MEMORY
2211
address_in_range(OMState *state, void *p, poolp pool)
2212
{
2213
    // Since address_in_range may be reading from memory which was not allocated
2214
    // by Python, it is important that pool->arenaindex is read only once, as
2215
    // another thread may be concurrently modifying the value without holding
2216
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2217
    // only once.
2218
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2219
    return arenaindex < maxarenas &&
2220
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2221
        allarenas[arenaindex].address != 0;
2222
}
2223
2224
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2225
2226
/*==========================================================================*/
2227
2228
// Called when freelist is exhausted.  Extend the freelist if there is
2229
// space for a block.  Otherwise, remove this pool from usedpools.
2230
static void
2231
pymalloc_pool_extend(poolp pool, uint size)
2232
295M
{
2233
295M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2234
        /* There is room for another block. */
2235
163M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2236
163M
        pool->nextoffset += INDEX2SIZE(size);
2237
163M
        *(pymem_block **)(pool->freeblock) = NULL;
2238
163M
        return;
2239
163M
    }
2240
2241
    /* Pool is full, unlink from used pools. */
2242
131M
    poolp next;
2243
131M
    next = pool->nextpool;
2244
131M
    pool = pool->prevpool;
2245
131M
    next->prevpool = pool;
2246
131M
    pool->nextpool = next;
2247
131M
}
2248
2249
/* called when pymalloc_alloc can not allocate a block from usedpool.
2250
 * This function takes new pool and allocate a block from it.
2251
 */
2252
static void*
2253
allocate_from_new_pool(OMState *state, uint size)
2254
2.12M
{
2255
    /* There isn't a pool of the right size class immediately
2256
     * available:  use a free pool.
2257
     */
2258
2.12M
    if (UNLIKELY(usable_arenas == NULL)) {
2259
        /* No arena has a free pool:  allocate a new arena. */
2260
#ifdef WITH_MEMORY_LIMITS
2261
        if (narenas_currently_allocated >= MAX_ARENAS) {
2262
            return NULL;
2263
        }
2264
#endif
2265
3.90k
        usable_arenas = new_arena(state);
2266
3.90k
        if (usable_arenas == NULL) {
2267
0
            return NULL;
2268
0
        }
2269
3.90k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2270
3.90k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2271
3.90k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2272
3.90k
    }
2273
2.12M
    assert(usable_arenas->address != 0);
2274
2275
    /* This arena already had the smallest nfreepools value, so decreasing
2276
     * nfreepools doesn't change that, and we don't need to rearrange the
2277
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2278
     * we need to remove its arena_object from usable_arenas.
2279
     */
2280
2.12M
    assert(usable_arenas->nfreepools > 0);
2281
2.12M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2282
        /* It's the last of this size, so there won't be any. */
2283
2.11M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2284
2.11M
    }
2285
    /* If any free pools will remain, it will be the new smallest. */
2286
2.12M
    if (usable_arenas->nfreepools > 1) {
2287
1.99M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2288
1.99M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2289
1.99M
    }
2290
2291
    /* Try to get a cached free pool. */
2292
2.12M
    poolp pool = usable_arenas->freepools;
2293
2.12M
    if (LIKELY(pool != NULL)) {
2294
        /* Unlink from cached pools. */
2295
1.87M
        usable_arenas->freepools = pool->nextpool;
2296
1.87M
        usable_arenas->nfreepools--;
2297
1.87M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2298
            /* Wholly allocated:  remove. */
2299
121k
            assert(usable_arenas->freepools == NULL);
2300
121k
            assert(usable_arenas->nextarena == NULL ||
2301
121k
                   usable_arenas->nextarena->prevarena ==
2302
121k
                   usable_arenas);
2303
121k
            usable_arenas = usable_arenas->nextarena;
2304
121k
            if (usable_arenas != NULL) {
2305
119k
                usable_arenas->prevarena = NULL;
2306
119k
                assert(usable_arenas->address != 0);
2307
119k
            }
2308
121k
        }
2309
1.75M
        else {
2310
            /* nfreepools > 0:  it must be that freepools
2311
             * isn't NULL, or that we haven't yet carved
2312
             * off all the arena's pools for the first
2313
             * time.
2314
             */
2315
1.75M
            assert(usable_arenas->freepools != NULL ||
2316
1.75M
                   usable_arenas->pool_address <=
2317
1.75M
                   (pymem_block*)usable_arenas->address +
2318
1.75M
                       ARENA_SIZE - POOL_SIZE);
2319
1.75M
        }
2320
1.87M
    }
2321
241k
    else {
2322
        /* Carve off a new pool. */
2323
241k
        assert(usable_arenas->nfreepools > 0);
2324
241k
        assert(usable_arenas->freepools == NULL);
2325
241k
        pool = (poolp)usable_arenas->pool_address;
2326
241k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2327
241k
                                 ARENA_SIZE - POOL_SIZE);
2328
241k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2329
241k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2330
241k
        pool->szidx = DUMMY_SIZE_IDX;
2331
241k
        usable_arenas->pool_address += POOL_SIZE;
2332
241k
        --usable_arenas->nfreepools;
2333
2334
241k
        if (usable_arenas->nfreepools == 0) {
2335
3.71k
            assert(usable_arenas->nextarena == NULL ||
2336
3.71k
                   usable_arenas->nextarena->prevarena ==
2337
3.71k
                   usable_arenas);
2338
            /* Unlink the arena:  it is completely allocated. */
2339
3.71k
            usable_arenas = usable_arenas->nextarena;
2340
3.71k
            if (usable_arenas != NULL) {
2341
227
                usable_arenas->prevarena = NULL;
2342
227
                assert(usable_arenas->address != 0);
2343
227
            }
2344
3.71k
        }
2345
241k
    }
2346
2347
    /* Frontlink to used pools. */
2348
2.12M
    pymem_block *bp;
2349
2.12M
    poolp next = usedpools[size + size]; /* == prev */
2350
2.12M
    pool->nextpool = next;
2351
2.12M
    pool->prevpool = next;
2352
2.12M
    next->nextpool = pool;
2353
2.12M
    next->prevpool = pool;
2354
2.12M
    pool->ref.count = 1;
2355
2.12M
    if (pool->szidx == size) {
2356
        /* Luckily, this pool last contained blocks
2357
         * of the same size class, so its header
2358
         * and free list are already initialized.
2359
         */
2360
1.27M
        bp = pool->freeblock;
2361
1.27M
        assert(bp != NULL);
2362
1.27M
        pool->freeblock = *(pymem_block **)bp;
2363
1.27M
        return bp;
2364
1.27M
    }
2365
    /*
2366
     * Initialize the pool header, set up the free list to
2367
     * contain just the second block, and return the first
2368
     * block.
2369
     */
2370
850k
    pool->szidx = size;
2371
850k
    size = INDEX2SIZE(size);
2372
850k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2373
850k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2374
850k
    pool->maxnextoffset = POOL_SIZE - size;
2375
850k
    pool->freeblock = bp + size;
2376
850k
    *(pymem_block **)(pool->freeblock) = NULL;
2377
850k
    return bp;
2378
2.12M
}
2379
2380
/* pymalloc allocator
2381
2382
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2383
2384
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2385
   requests, on error in the code below (as a last chance to serve the request)
2386
   or when the max memory limit has been reached.
2387
*/
2388
static inline void*
2389
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2390
1.56G
{
2391
#ifdef WITH_VALGRIND
2392
    if (UNLIKELY(running_on_valgrind == -1)) {
2393
        running_on_valgrind = RUNNING_ON_VALGRIND;
2394
    }
2395
    if (UNLIKELY(running_on_valgrind)) {
2396
        return NULL;
2397
    }
2398
#endif
2399
2400
1.56G
    if (UNLIKELY(nbytes == 0)) {
2401
33.8M
        return NULL;
2402
33.8M
    }
2403
1.53G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2404
163M
        return NULL;
2405
163M
    }
2406
2407
1.36G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2408
1.36G
    poolp pool = usedpools[size + size];
2409
1.36G
    pymem_block *bp;
2410
2411
1.36G
    if (LIKELY(pool != pool->nextpool)) {
2412
        /*
2413
         * There is a used pool for this size class.
2414
         * Pick up the head block of its free list.
2415
         */
2416
1.36G
        ++pool->ref.count;
2417
1.36G
        bp = pool->freeblock;
2418
1.36G
        assert(bp != NULL);
2419
2420
1.36G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2421
            // Reached the end of the free list, try to extend it.
2422
295M
            pymalloc_pool_extend(pool, size);
2423
295M
        }
2424
1.36G
    }
2425
2.12M
    else {
2426
        /* There isn't a pool of the right size class immediately
2427
         * available:  use a free pool.
2428
         */
2429
2.12M
        bp = allocate_from_new_pool(state, size);
2430
2.12M
    }
2431
2432
1.36G
    return (void *)bp;
2433
1.53G
}
2434
2435
2436
void *
2437
_PyObject_Malloc(void *ctx, size_t nbytes)
2438
1.52G
{
2439
1.52G
    OMState *state = get_state();
2440
1.52G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2441
1.52G
    if (LIKELY(ptr != NULL)) {
2442
1.32G
        return ptr;
2443
1.32G
    }
2444
2445
197M
    ptr = PyMem_RawMalloc(nbytes);
2446
197M
    if (ptr != NULL) {
2447
197M
        raw_allocated_blocks++;
2448
197M
    }
2449
197M
    return ptr;
2450
1.52G
}
2451
2452
2453
void *
2454
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2455
41.4M
{
2456
41.4M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2457
41.4M
    size_t nbytes = nelem * elsize;
2458
2459
41.4M
    OMState *state = get_state();
2460
41.4M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2461
41.4M
    if (LIKELY(ptr != NULL)) {
2462
41.3M
        memset(ptr, 0, nbytes);
2463
41.3M
        return ptr;
2464
41.3M
    }
2465
2466
59.8k
    ptr = PyMem_RawCalloc(nelem, elsize);
2467
59.8k
    if (ptr != NULL) {
2468
59.8k
        raw_allocated_blocks++;
2469
59.8k
    }
2470
59.8k
    return ptr;
2471
41.4M
}
2472
2473
2474
static void
2475
insert_to_usedpool(OMState *state, poolp pool)
2476
131M
{
2477
131M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2478
2479
131M
    uint size = pool->szidx;
2480
131M
    poolp next = usedpools[size + size];
2481
131M
    poolp prev = next->prevpool;
2482
2483
    /* insert pool before next:   prev <-> pool <-> next */
2484
131M
    pool->nextpool = next;
2485
131M
    pool->prevpool = prev;
2486
131M
    next->prevpool = pool;
2487
131M
    prev->nextpool = pool;
2488
131M
}
2489
2490
static void
2491
insert_to_freepool(OMState *state, poolp pool)
2492
2.09M
{
2493
2.09M
    poolp next = pool->nextpool;
2494
2.09M
    poolp prev = pool->prevpool;
2495
2.09M
    next->prevpool = prev;
2496
2.09M
    prev->nextpool = next;
2497
2498
    /* Link the pool to freepools.  This is a singly-linked
2499
     * list, and pool->prevpool isn't used there.
2500
     */
2501
2.09M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2502
2.09M
    pool->nextpool = ao->freepools;
2503
2.09M
    ao->freepools = pool;
2504
2.09M
    uint nf = ao->nfreepools;
2505
    /* If this is the rightmost arena with this number of free pools,
2506
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2507
     * are no arenas in usable_arenas with that value.
2508
     */
2509
2.09M
    struct arena_object* lastnf = nfp2lasta[nf];
2510
2.09M
    assert((nf == 0 && lastnf == NULL) ||
2511
2.09M
           (nf > 0 &&
2512
2.09M
            lastnf != NULL &&
2513
2.09M
            lastnf->nfreepools == nf &&
2514
2.09M
            (lastnf->nextarena == NULL ||
2515
2.09M
             nf < lastnf->nextarena->nfreepools)));
2516
2.09M
    if (lastnf == ao) {  /* it is the rightmost */
2517
1.92M
        struct arena_object* p = ao->prevarena;
2518
1.92M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2519
1.92M
    }
2520
2.09M
    ao->nfreepools = ++nf;
2521
2522
    /* All the rest is arena management.  We just freed
2523
     * a pool, and there are 4 cases for arena mgmt:
2524
     * 1. If all the pools are free, return the arena to
2525
     *    the system free().  Except if this is the last
2526
     *    arena in the list, keep it to avoid thrashing:
2527
     *    keeping one wholly free arena in the list avoids
2528
     *    pathological cases where a simple loop would
2529
     *    otherwise provoke needing to allocate and free an
2530
     *    arena on every iteration.  See bpo-37257.
2531
     * 2. If this is the only free pool in the arena,
2532
     *    add the arena back to the `usable_arenas` list.
2533
     * 3. If the "next" arena has a smaller count of free
2534
     *    pools, we have to "slide this arena right" to
2535
     *    restore that usable_arenas is sorted in order of
2536
     *    nfreepools.
2537
     * 4. Else there's nothing more to do.
2538
     */
2539
2.09M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2540
        /* Case 1.  First unlink ao from usable_arenas.
2541
         */
2542
3.46k
        assert(ao->prevarena == NULL ||
2543
3.46k
               ao->prevarena->address != 0);
2544
3.46k
        assert(ao ->nextarena == NULL ||
2545
3.46k
               ao->nextarena->address != 0);
2546
2547
        /* Fix the pointer in the prevarena, or the
2548
         * usable_arenas pointer.
2549
         */
2550
3.46k
        if (ao->prevarena == NULL) {
2551
756
            usable_arenas = ao->nextarena;
2552
756
            assert(usable_arenas == NULL ||
2553
756
                   usable_arenas->address != 0);
2554
756
        }
2555
2.70k
        else {
2556
2.70k
            assert(ao->prevarena->nextarena == ao);
2557
2.70k
            ao->prevarena->nextarena =
2558
2.70k
                ao->nextarena;
2559
2.70k
        }
2560
        /* Fix the pointer in the nextarena. */
2561
3.46k
        if (ao->nextarena != NULL) {
2562
3.46k
            assert(ao->nextarena->prevarena == ao);
2563
3.46k
            ao->nextarena->prevarena =
2564
3.46k
                ao->prevarena;
2565
3.46k
        }
2566
        /* Record that this arena_object slot is
2567
         * available to be reused.
2568
         */
2569
3.46k
        ao->nextarena = unused_arena_objects;
2570
3.46k
        unused_arena_objects = ao;
2571
2572
3.46k
#if WITH_PYMALLOC_RADIX_TREE
2573
        /* mark arena region as not under control of obmalloc */
2574
3.46k
        arena_map_mark_used(state, ao->address, 0);
2575
3.46k
#endif
2576
2577
        /* Free the entire arena. */
2578
3.46k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2579
3.46k
                             (void *)ao->address, ARENA_SIZE);
2580
3.46k
        ao->address = 0;                        /* mark unassociated */
2581
3.46k
        --narenas_currently_allocated;
2582
2583
3.46k
        return;
2584
3.46k
    }
2585
2586
2.09M
    if (nf == 1) {
2587
        /* Case 2.  Put ao at the head of
2588
         * usable_arenas.  Note that because
2589
         * ao->nfreepools was 0 before, ao isn't
2590
         * currently on the usable_arenas list.
2591
         */
2592
124k
        ao->nextarena = usable_arenas;
2593
124k
        ao->prevarena = NULL;
2594
124k
        if (usable_arenas)
2595
123k
            usable_arenas->prevarena = ao;
2596
124k
        usable_arenas = ao;
2597
124k
        assert(usable_arenas->address != 0);
2598
124k
        if (nfp2lasta[1] == NULL) {
2599
121k
            nfp2lasta[1] = ao;
2600
121k
        }
2601
2602
124k
        return;
2603
124k
    }
2604
2605
    /* If this arena is now out of order, we need to keep
2606
     * the list sorted.  The list is kept sorted so that
2607
     * the "most full" arenas are used first, which allows
2608
     * the nearly empty arenas to be completely freed.  In
2609
     * a few un-scientific tests, it seems like this
2610
     * approach allowed a lot more memory to be freed.
2611
     */
2612
    /* If this is the only arena with nf, record that. */
2613
1.97M
    if (nfp2lasta[nf] == NULL) {
2614
1.91M
        nfp2lasta[nf] = ao;
2615
1.91M
    } /* else the rightmost with nf doesn't change */
2616
    /* If this was the rightmost of the old size, it remains in place. */
2617
1.97M
    if (ao == lastnf) {
2618
        /* Case 4.  Nothing to do. */
2619
1.92M
        return;
2620
1.92M
    }
2621
    /* If ao were the only arena in the list, the last block would have
2622
     * gotten us out.
2623
     */
2624
1.97M
    assert(ao->nextarena != NULL);
2625
2626
    /* Case 3:  We have to move the arena towards the end of the list,
2627
     * because it has more free pools than the arena to its right.  It needs
2628
     * to move to follow lastnf.
2629
     * First unlink ao from usable_arenas.
2630
     */
2631
46.4k
    if (ao->prevarena != NULL) {
2632
        /* ao isn't at the head of the list */
2633
36.8k
        assert(ao->prevarena->nextarena == ao);
2634
36.8k
        ao->prevarena->nextarena = ao->nextarena;
2635
36.8k
    }
2636
9.56k
    else {
2637
        /* ao is at the head of the list */
2638
9.56k
        assert(usable_arenas == ao);
2639
9.56k
        usable_arenas = ao->nextarena;
2640
9.56k
    }
2641
46.4k
    ao->nextarena->prevarena = ao->prevarena;
2642
    /* And insert after lastnf. */
2643
46.4k
    ao->prevarena = lastnf;
2644
46.4k
    ao->nextarena = lastnf->nextarena;
2645
46.4k
    if (ao->nextarena != NULL) {
2646
45.0k
        ao->nextarena->prevarena = ao;
2647
45.0k
    }
2648
46.4k
    lastnf->nextarena = ao;
2649
    /* Verify that the swaps worked. */
2650
46.4k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2651
46.4k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2652
46.4k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2653
46.4k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2654
46.4k
           || ao->prevarena->nextarena == ao);
2655
46.4k
}
2656
2657
/* Free a memory block allocated by pymalloc_alloc().
2658
   Return 1 if it was freed.
2659
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2660
static inline int
2661
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2662
1.56G
{
2663
1.56G
    assert(p != NULL);
2664
2665
#ifdef WITH_VALGRIND
2666
    if (UNLIKELY(running_on_valgrind > 0)) {
2667
        return 0;
2668
    }
2669
#endif
2670
2671
1.56G
    poolp pool = POOL_ADDR(p);
2672
1.56G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2673
197M
        return 0;
2674
197M
    }
2675
    /* We allocated this address. */
2676
2677
    /* Link p to the start of the pool's freeblock list.  Since
2678
     * the pool had at least the p block outstanding, the pool
2679
     * wasn't empty (so it's already in a usedpools[] list, or
2680
     * was full and is in no list -- it's not in the freeblocks
2681
     * list in any case).
2682
     */
2683
1.56G
    assert(pool->ref.count > 0);            /* else it was empty */
2684
1.36G
    pymem_block *lastfree = pool->freeblock;
2685
1.36G
    *(pymem_block **)p = lastfree;
2686
1.36G
    pool->freeblock = (pymem_block *)p;
2687
1.36G
    pool->ref.count--;
2688
2689
1.36G
    if (UNLIKELY(lastfree == NULL)) {
2690
        /* Pool was full, so doesn't currently live in any list:
2691
         * link it to the front of the appropriate usedpools[] list.
2692
         * This mimics LRU pool usage for new allocations and
2693
         * targets optimal filling when several pools contain
2694
         * blocks of the same size class.
2695
         */
2696
131M
        insert_to_usedpool(state, pool);
2697
131M
        return 1;
2698
131M
    }
2699
2700
    /* freeblock wasn't NULL, so the pool wasn't full,
2701
     * and the pool is in a usedpools[] list.
2702
     */
2703
1.23G
    if (LIKELY(pool->ref.count != 0)) {
2704
        /* pool isn't empty:  leave it in usedpools */
2705
1.23G
        return 1;
2706
1.23G
    }
2707
2708
    /* Pool is now empty:  unlink from usedpools, and
2709
     * link to the front of freepools.  This ensures that
2710
     * previously freed pools will be allocated later
2711
     * (being not referenced, they are perhaps paged out).
2712
     */
2713
2.09M
    insert_to_freepool(state, pool);
2714
2.09M
    return 1;
2715
1.23G
}
2716
2717
2718
void
2719
_PyObject_Free(void *ctx, void *p)
2720
1.56G
{
2721
    /* PyObject_Free(NULL) has no effect */
2722
1.56G
    if (p == NULL) {
2723
1.63M
        return;
2724
1.63M
    }
2725
2726
1.56G
    OMState *state = get_state();
2727
1.56G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2728
        /* pymalloc didn't allocate this address */
2729
197M
        PyMem_RawFree(p);
2730
197M
        raw_allocated_blocks--;
2731
197M
    }
2732
1.56G
}
2733
2734
2735
/* pymalloc realloc.
2736
2737
   If nbytes==0, then as the Python docs promise, we do not treat this like
2738
   free(p), and return a non-NULL result.
2739
2740
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2741
   newptr_p.
2742
2743
   Return 0 if pymalloc didn't allocated p. */
2744
static int
2745
pymalloc_realloc(OMState *state, void *ctx,
2746
                 void **newptr_p, void *p, size_t nbytes)
2747
77.5M
{
2748
77.5M
    void *bp;
2749
77.5M
    poolp pool;
2750
77.5M
    size_t size;
2751
2752
77.5M
    assert(p != NULL);
2753
2754
#ifdef WITH_VALGRIND
2755
    /* Treat running_on_valgrind == -1 the same as 0 */
2756
    if (UNLIKELY(running_on_valgrind > 0)) {
2757
        return 0;
2758
    }
2759
#endif
2760
2761
77.5M
    pool = POOL_ADDR(p);
2762
77.5M
    if (!address_in_range(state, p, pool)) {
2763
        /* pymalloc is not managing this block.
2764
2765
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2766
           over this block.  However, if we do, we need to copy the valid data
2767
           from the C-managed block to one of our blocks, and there's no
2768
           portable way to know how much of the memory space starting at p is
2769
           valid.
2770
2771
           As bug 1185883 pointed out the hard way, it's possible that the
2772
           C-managed block is "at the end" of allocated VM space, so that a
2773
           memory fault can occur if we try to copy nbytes bytes starting at p.
2774
           Instead we punt: let C continue to manage this block. */
2775
6.82M
        return 0;
2776
6.82M
    }
2777
2778
    /* pymalloc is in charge of this block */
2779
70.6M
    size = INDEX2SIZE(pool->szidx);
2780
70.6M
    if (nbytes <= size) {
2781
        /* The block is staying the same or shrinking.
2782
2783
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2784
           block to a smaller size class, but it wastes memory not to copy it.
2785
2786
           The compromise here is to copy on shrink only if at least 25% of
2787
           size can be shaved off. */
2788
50.7M
        if (4 * nbytes > 3 * size) {
2789
            /* It's the same, or shrinking and new/old > 3/4. */
2790
18.0M
            *newptr_p = p;
2791
18.0M
            return 1;
2792
18.0M
        }
2793
32.7M
        size = nbytes;
2794
32.7M
    }
2795
2796
52.6M
    bp = _PyObject_Malloc(ctx, nbytes);
2797
52.6M
    if (bp != NULL) {
2798
52.6M
        memcpy(bp, p, size);
2799
52.6M
        _PyObject_Free(ctx, p);
2800
52.6M
    }
2801
52.6M
    *newptr_p = bp;
2802
52.6M
    return 1;
2803
70.6M
}
2804
2805
2806
void *
2807
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2808
261M
{
2809
261M
    void *ptr2;
2810
2811
261M
    if (ptr == NULL) {
2812
183M
        return _PyObject_Malloc(ctx, nbytes);
2813
183M
    }
2814
2815
77.5M
    OMState *state = get_state();
2816
77.5M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2817
70.6M
        return ptr2;
2818
70.6M
    }
2819
2820
6.82M
    return PyMem_RawRealloc(ptr, nbytes);
2821
77.5M
}
2822
2823
#else   /* ! WITH_PYMALLOC */
2824
2825
/*==========================================================================*/
2826
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2827
 * only be used by extensions that are compiled with pymalloc enabled. */
2828
2829
Py_ssize_t
2830
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2831
{
2832
    return 0;
2833
}
2834
2835
Py_ssize_t
2836
_Py_GetGlobalAllocatedBlocks(void)
2837
{
2838
    return 0;
2839
}
2840
2841
void
2842
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2843
{
2844
    return;
2845
}
2846
2847
void
2848
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2849
{
2850
    return;
2851
}
2852
2853
#endif /* WITH_PYMALLOC */
2854
2855
2856
/*==========================================================================*/
2857
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2858
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2859
 */
2860
2861
/* Uncomment this define to add the "serialno" field */
2862
/* #define PYMEM_DEBUG_SERIALNO */
2863
2864
#ifdef PYMEM_DEBUG_SERIALNO
2865
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2866
2867
/* serialno is always incremented via calling this routine.  The point is
2868
 * to supply a single place to set a breakpoint.
2869
 */
2870
static void
2871
bumpserialno(void)
2872
{
2873
    ++serialno;
2874
}
2875
#endif
2876
2877
0
#define SST SIZEOF_SIZE_T
2878
2879
#ifdef PYMEM_DEBUG_SERIALNO
2880
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2881
#else
2882
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2883
#endif
2884
2885
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2886
static size_t
2887
read_size_t(const void *p)
2888
0
{
2889
0
    const uint8_t *q = (const uint8_t *)p;
2890
0
    size_t result = *q++;
2891
0
    int i;
2892
2893
0
    for (i = SST; --i > 0; ++q)
2894
0
        result = (result << 8) | *q;
2895
0
    return result;
2896
0
}
2897
2898
/* Write n as a big-endian size_t, MSB at address p, LSB at
2899
 * p + sizeof(size_t) - 1.
2900
 */
2901
static void
2902
write_size_t(void *p, size_t n)
2903
0
{
2904
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2905
0
    int i;
2906
2907
0
    for (i = SST; --i >= 0; --q) {
2908
0
        *q = (uint8_t)(n & 0xff);
2909
0
        n >>= 8;
2910
0
    }
2911
0
}
2912
2913
static void
2914
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2915
               bool is_alloc)
2916
0
{
2917
#ifdef Py_GIL_DISABLED
2918
    if (api->api_id == 'o') {
2919
        // Don't overwrite the first few bytes of a PyObject allocation in the
2920
        // free-threaded build
2921
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2922
        size_t debug_offset;
2923
        if (is_alloc) {
2924
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2925
        }
2926
        else {
2927
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2928
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2929
        }
2930
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2931
        if (debug_offset < nbytes) {
2932
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2933
        }
2934
        return;
2935
    }
2936
#endif
2937
0
    memset(data, c, nbytes);
2938
0
}
2939
2940
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2941
   fills them with useful stuff, here calling the underlying malloc's result p:
2942
2943
p[0: S]
2944
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2945
    to read in a memory dump).
2946
p[S]
2947
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2948
p[S+1: 2*S]
2949
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2950
p[2*S: 2*S+n]
2951
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2952
    Used to catch reference to uninitialized memory.
2953
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2954
    handled the request itself.
2955
p[2*S+n: 2*S+n+S]
2956
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2957
p[2*S+n+S: 2*S+n+2*S]
2958
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2959
    and _PyMem_DebugRealloc.
2960
    This is a big-endian size_t.
2961
    If "bad memory" is detected later, the serial number gives an
2962
    excellent way to set a breakpoint on the next run, to capture the
2963
    instant at which this block was passed out.
2964
2965
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2966
for 3 * S extra bytes, and omits the last serialno field.
2967
*/
2968
2969
static void *
2970
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2971
0
{
2972
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2973
0
    uint8_t *p;           /* base address of malloc'ed pad block */
2974
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2975
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2976
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2977
2978
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2979
        /* integer overflow: can't represent total as a Py_ssize_t */
2980
0
        return NULL;
2981
0
    }
2982
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2983
2984
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2985
                ^--- p    ^--- data   ^--- tail
2986
       S: nbytes stored as size_t
2987
       I: API identifier (1 byte)
2988
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2989
       C: Clean bytes used later to store actual data
2990
       N: Serial number stored as size_t
2991
2992
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2993
       is omitted. */
2994
2995
0
    if (use_calloc) {
2996
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2997
0
    }
2998
0
    else {
2999
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3000
0
    }
3001
0
    if (p == NULL) {
3002
0
        return NULL;
3003
0
    }
3004
0
    data = p + 2*SST;
3005
3006
#ifdef PYMEM_DEBUG_SERIALNO
3007
    bumpserialno();
3008
#endif
3009
3010
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3011
0
    write_size_t(p, nbytes);
3012
0
    p[SST] = (uint8_t)api->api_id;
3013
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3014
3015
0
    if (nbytes > 0 && !use_calloc) {
3016
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3017
0
    }
3018
3019
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3020
0
    tail = data + nbytes;
3021
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3022
#ifdef PYMEM_DEBUG_SERIALNO
3023
    write_size_t(tail + SST, serialno);
3024
#endif
3025
3026
0
    return data;
3027
0
}
3028
3029
void *
3030
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3031
0
{
3032
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3033
0
}
3034
3035
void *
3036
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3037
0
{
3038
0
    size_t nbytes;
3039
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3040
0
    nbytes = nelem * elsize;
3041
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3042
0
}
3043
3044
3045
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3046
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3047
   Then fills the original bytes with PYMEM_DEADBYTE.
3048
   Then calls the underlying free.
3049
*/
3050
void
3051
_PyMem_DebugRawFree(void *ctx, void *p)
3052
0
{
3053
    /* PyMem_Free(NULL) has no effect */
3054
0
    if (p == NULL) {
3055
0
        return;
3056
0
    }
3057
3058
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3059
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3060
0
    size_t nbytes;
3061
3062
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3063
0
    nbytes = read_size_t(q);
3064
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3065
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3066
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3067
0
    api->alloc.free(api->alloc.ctx, q);
3068
0
}
3069
3070
3071
void *
3072
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3073
0
{
3074
0
    if (p == NULL) {
3075
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3076
0
    }
3077
3078
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3079
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3080
0
    uint8_t *data;        /* pointer to data bytes */
3081
0
    uint8_t *r;
3082
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3083
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3084
0
    size_t original_nbytes;
3085
0
#define ERASED_SIZE 64
3086
3087
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3088
3089
0
    data = (uint8_t *)p;
3090
0
    head = data - 2*SST;
3091
0
    original_nbytes = read_size_t(head);
3092
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3093
        /* integer overflow: can't represent total as a Py_ssize_t */
3094
0
        return NULL;
3095
0
    }
3096
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3097
3098
0
    tail = data + original_nbytes;
3099
#ifdef PYMEM_DEBUG_SERIALNO
3100
    size_t block_serialno = read_size_t(tail + SST);
3101
#endif
3102
0
#ifndef Py_GIL_DISABLED
3103
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3104
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3105
     */
3106
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3107
0
    if (original_nbytes <= sizeof(save)) {
3108
0
        memcpy(save, data, original_nbytes);
3109
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3110
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3111
0
    }
3112
0
    else {
3113
0
        memcpy(save, data, ERASED_SIZE);
3114
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3115
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3116
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3117
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3118
0
    }
3119
0
#endif
3120
3121
    /* Resize and add decorations. */
3122
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3123
0
    if (r == NULL) {
3124
        /* if realloc() failed: rewrite header and footer which have
3125
           just been erased */
3126
0
        nbytes = original_nbytes;
3127
0
    }
3128
0
    else {
3129
0
        head = r;
3130
#ifdef PYMEM_DEBUG_SERIALNO
3131
        bumpserialno();
3132
        block_serialno = serialno;
3133
#endif
3134
0
    }
3135
0
    data = head + 2*SST;
3136
3137
0
    write_size_t(head, nbytes);
3138
0
    head[SST] = (uint8_t)api->api_id;
3139
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3140
3141
0
    tail = data + nbytes;
3142
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3143
#ifdef PYMEM_DEBUG_SERIALNO
3144
    write_size_t(tail + SST, block_serialno);
3145
#endif
3146
3147
0
#ifndef Py_GIL_DISABLED
3148
    /* Restore saved bytes. */
3149
0
    if (original_nbytes <= sizeof(save)) {
3150
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3151
0
    }
3152
0
    else {
3153
0
        size_t i = original_nbytes - ERASED_SIZE;
3154
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3155
0
        if (nbytes > i) {
3156
0
            memcpy(data + i, &save[ERASED_SIZE],
3157
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3158
0
        }
3159
0
    }
3160
0
#endif
3161
3162
0
    if (r == NULL) {
3163
0
        return NULL;
3164
0
    }
3165
3166
0
    if (nbytes > original_nbytes) {
3167
        /* growing: mark new extra memory clean */
3168
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3169
0
               nbytes - original_nbytes);
3170
0
    }
3171
3172
0
    return data;
3173
0
}
3174
3175
static inline void
3176
_PyMem_DebugCheckGIL(const char *func)
3177
0
{
3178
0
    PyThreadState *tstate = _PyThreadState_GET();
3179
0
    if (tstate == NULL) {
3180
0
#ifndef Py_GIL_DISABLED
3181
0
        _Py_FatalErrorFunc(func,
3182
0
                           "Python memory allocator called "
3183
0
                           "without holding the GIL");
3184
#else
3185
        _Py_FatalErrorFunc(func,
3186
                           "Python memory allocator called "
3187
                           "without an active thread state. "
3188
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3189
#endif
3190
0
    }
3191
0
}
3192
3193
void *
3194
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3195
0
{
3196
0
    _PyMem_DebugCheckGIL(__func__);
3197
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3198
0
}
3199
3200
void *
3201
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3202
0
{
3203
0
    _PyMem_DebugCheckGIL(__func__);
3204
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3205
0
}
3206
3207
3208
void
3209
_PyMem_DebugFree(void *ctx, void *ptr)
3210
0
{
3211
0
    _PyMem_DebugCheckGIL(__func__);
3212
0
    _PyMem_DebugRawFree(ctx, ptr);
3213
0
}
3214
3215
3216
void *
3217
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3218
0
{
3219
0
    _PyMem_DebugCheckGIL(__func__);
3220
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3221
0
}
3222
3223
/* Check the forbidden bytes on both ends of the memory allocated for p.
3224
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3225
 * and call Py_FatalError to kill the program.
3226
 * The API id, is also checked.
3227
 */
3228
static void
3229
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3230
0
{
3231
0
    assert(p != NULL);
3232
3233
0
    const uint8_t *q = (const uint8_t *)p;
3234
0
    size_t nbytes;
3235
0
    const uint8_t *tail;
3236
0
    int i;
3237
0
    char id;
3238
3239
    /* Check the API id */
3240
0
    id = (char)q[-SST];
3241
0
    if (id != api) {
3242
0
        _PyObject_DebugDumpAddress(p);
3243
0
        _Py_FatalErrorFormat(func,
3244
0
                             "bad ID: Allocated using API '%c', "
3245
0
                             "verified using API '%c'",
3246
0
                             id, api);
3247
0
    }
3248
3249
    /* Check the stuff at the start of p first:  if there's underwrite
3250
     * corruption, the number-of-bytes field may be nuts, and checking
3251
     * the tail could lead to a segfault then.
3252
     */
3253
0
    for (i = SST-1; i >= 1; --i) {
3254
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3255
0
            _PyObject_DebugDumpAddress(p);
3256
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3257
0
        }
3258
0
    }
3259
3260
0
    nbytes = read_size_t(q - 2*SST);
3261
0
    tail = q + nbytes;
3262
0
    for (i = 0; i < SST; ++i) {
3263
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3264
0
            _PyObject_DebugDumpAddress(p);
3265
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3266
0
        }
3267
0
    }
3268
0
}
3269
3270
/* Display info to stderr about the memory block at p. */
3271
static void
3272
_PyObject_DebugDumpAddress(const void *p)
3273
0
{
3274
0
    const uint8_t *q = (const uint8_t *)p;
3275
0
    const uint8_t *tail;
3276
0
    size_t nbytes;
3277
0
    int i;
3278
0
    int ok;
3279
0
    char id;
3280
3281
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3282
0
    if (p == NULL) {
3283
0
        fprintf(stderr, "\n");
3284
0
        return;
3285
0
    }
3286
0
    id = (char)q[-SST];
3287
0
    fprintf(stderr, " API '%c'\n", id);
3288
3289
0
    nbytes = read_size_t(q - 2*SST);
3290
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3291
3292
    /* In case this is nuts, check the leading pad bytes first. */
3293
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3294
0
    ok = 1;
3295
0
    for (i = 1; i <= SST-1; ++i) {
3296
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3297
0
            ok = 0;
3298
0
            break;
3299
0
        }
3300
0
    }
3301
0
    if (ok)
3302
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3303
0
    else {
3304
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3305
0
            PYMEM_FORBIDDENBYTE);
3306
0
        for (i = SST-1; i >= 1; --i) {
3307
0
            const uint8_t byte = *(q-i);
3308
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3309
0
            if (byte != PYMEM_FORBIDDENBYTE)
3310
0
                fputs(" *** OUCH", stderr);
3311
0
            fputc('\n', stderr);
3312
0
        }
3313
3314
0
        fputs("    Because memory is corrupted at the start, the "
3315
0
              "count of bytes requested\n"
3316
0
              "       may be bogus, and checking the trailing pad "
3317
0
              "bytes may segfault.\n", stderr);
3318
0
    }
3319
3320
0
    tail = q + nbytes;
3321
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3322
0
    ok = 1;
3323
0
    for (i = 0; i < SST; ++i) {
3324
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3325
0
            ok = 0;
3326
0
            break;
3327
0
        }
3328
0
    }
3329
0
    if (ok)
3330
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3331
0
    else {
3332
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3333
0
                PYMEM_FORBIDDENBYTE);
3334
0
        for (i = 0; i < SST; ++i) {
3335
0
            const uint8_t byte = tail[i];
3336
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3337
0
                    i, byte);
3338
0
            if (byte != PYMEM_FORBIDDENBYTE)
3339
0
                fputs(" *** OUCH", stderr);
3340
0
            fputc('\n', stderr);
3341
0
        }
3342
0
    }
3343
3344
#ifdef PYMEM_DEBUG_SERIALNO
3345
    size_t serial = read_size_t(tail + SST);
3346
    fprintf(stderr,
3347
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3348
            serial);
3349
#endif
3350
3351
0
    if (nbytes > 0) {
3352
0
        i = 0;
3353
0
        fputs("    Data at p:", stderr);
3354
        /* print up to 8 bytes at the start */
3355
0
        while (q < tail && i < 8) {
3356
0
            fprintf(stderr, " %02x", *q);
3357
0
            ++i;
3358
0
            ++q;
3359
0
        }
3360
        /* and up to 8 at the end */
3361
0
        if (q < tail) {
3362
0
            if (tail - q > 8) {
3363
0
                fputs(" ...", stderr);
3364
0
                q = tail - 8;
3365
0
            }
3366
0
            while (q < tail) {
3367
0
                fprintf(stderr, " %02x", *q);
3368
0
                ++q;
3369
0
            }
3370
0
        }
3371
0
        fputc('\n', stderr);
3372
0
    }
3373
0
    fputc('\n', stderr);
3374
3375
0
    fflush(stderr);
3376
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3377
0
}
3378
3379
3380
static size_t
3381
printone(FILE *out, const char* msg, size_t value)
3382
0
{
3383
0
    int i, k;
3384
0
    char buf[100];
3385
0
    size_t origvalue = value;
3386
3387
0
    fputs(msg, out);
3388
0
    for (i = (int)strlen(msg); i < 35; ++i)
3389
0
        fputc(' ', out);
3390
0
    fputc('=', out);
3391
3392
    /* Write the value with commas. */
3393
0
    i = 22;
3394
0
    buf[i--] = '\0';
3395
0
    buf[i--] = '\n';
3396
0
    k = 3;
3397
0
    do {
3398
0
        size_t nextvalue = value / 10;
3399
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3400
0
        value = nextvalue;
3401
0
        buf[i--] = (char)(digit + '0');
3402
0
        --k;
3403
0
        if (k == 0 && value && i >= 0) {
3404
0
            k = 3;
3405
0
            buf[i--] = ',';
3406
0
        }
3407
0
    } while (value && i >= 0);
3408
3409
0
    while (i >= 0)
3410
0
        buf[i--] = ' ';
3411
0
    fputs(buf, out);
3412
3413
0
    return origvalue;
3414
0
}
3415
3416
void
3417
_PyDebugAllocatorStats(FILE *out,
3418
                       const char *block_name, int num_blocks, size_t sizeof_block)
3419
0
{
3420
0
    char buf1[128];
3421
0
    char buf2[128];
3422
0
    PyOS_snprintf(buf1, sizeof(buf1),
3423
0
                  "%d %ss * %zd bytes each",
3424
0
                  num_blocks, block_name, sizeof_block);
3425
0
    PyOS_snprintf(buf2, sizeof(buf2),
3426
0
                  "%48s ", buf1);
3427
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3428
0
}
3429
3430
// Return true if the obmalloc state structure is heap allocated,
3431
// by PyMem_RawCalloc().  For the main interpreter, this structure
3432
// allocated in the BSS.  Allocating that way gives some memory savings
3433
// and a small performance win (at least on a demand paged OS).  On
3434
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3435
// memory is for the arena_map_top array.  Since normally only one entry
3436
// of that array is used, only one page of resident memory is actually
3437
// used, rather than the full 256 kB.
3438
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3439
0
{
3440
0
#if WITH_PYMALLOC
3441
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3442
#else
3443
    return false;
3444
#endif
3445
0
}
3446
3447
#ifdef WITH_PYMALLOC
3448
static void
3449
init_obmalloc_pools(PyInterpreterState *interp)
3450
32
{
3451
    // initialize the obmalloc->pools structure.  This must be done
3452
    // before the obmalloc alloc/free functions can be called.
3453
32
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3454
32
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3455
32
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3456
32
}
3457
#endif /* WITH_PYMALLOC */
3458
3459
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3460
32
{
3461
32
#ifdef WITH_PYMALLOC
3462
    /* Initialize obmalloc, but only for subinterpreters,
3463
       since the main interpreter is initialized statically. */
3464
32
    if (_Py_IsMainInterpreter(interp)
3465
0
            || _PyInterpreterState_HasFeature(interp,
3466
32
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3467
32
        interp->obmalloc = &obmalloc_state_main;
3468
32
        if (!obmalloc_state_initialized) {
3469
32
            init_obmalloc_pools(interp);
3470
32
            obmalloc_state_initialized = true;
3471
32
        }
3472
32
    } else {
3473
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3474
0
        if (interp->obmalloc == NULL) {
3475
0
            return -1;
3476
0
        }
3477
0
        init_obmalloc_pools(interp);
3478
0
    }
3479
32
#endif /* WITH_PYMALLOC */
3480
32
    return 0; // success
3481
32
}
3482
3483
3484
#ifdef WITH_PYMALLOC
3485
3486
static void
3487
free_obmalloc_arenas(PyInterpreterState *interp)
3488
0
{
3489
0
    OMState *state = interp->obmalloc;
3490
0
    for (uint i = 0; i < maxarenas; ++i) {
3491
        // free each obmalloc memory arena
3492
0
        struct arena_object *ao = &allarenas[i];
3493
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3494
0
                             (void *)ao->address, ARENA_SIZE);
3495
0
    }
3496
    // free the array containing pointers to all arenas
3497
0
    PyMem_RawFree(allarenas);
3498
0
#if WITH_PYMALLOC_RADIX_TREE
3499
0
#ifdef USE_INTERIOR_NODES
3500
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3501
    // by arena_map_mark_used() but not freed when arenas are freed.
3502
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3503
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3504
0
         if (mid == NULL) {
3505
0
             continue;
3506
0
         }
3507
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3508
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3509
0
            if (bot == NULL) {
3510
0
                continue;
3511
0
            }
3512
0
            PyMem_RawFree(bot);
3513
0
         }
3514
0
         PyMem_RawFree(mid);
3515
0
    }
3516
0
#endif
3517
0
#endif
3518
0
}
3519
3520
#ifdef Py_DEBUG
3521
/* Is target in the list?  The list is traversed via the nextpool pointers.
3522
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3523
 * list, else 0.
3524
 */
3525
static int
3526
pool_is_in_list(const poolp target, poolp list)
3527
{
3528
    poolp origlist = list;
3529
    assert(target != NULL);
3530
    if (list == NULL)
3531
        return 0;
3532
    do {
3533
        if (target == list)
3534
            return 1;
3535
        list = list->nextpool;
3536
    } while (list != NULL && list != origlist);
3537
    return 0;
3538
}
3539
#endif
3540
3541
#ifdef WITH_MIMALLOC
3542
struct _alloc_stats {
3543
    size_t allocated_blocks;
3544
    size_t allocated_bytes;
3545
    size_t allocated_with_overhead;
3546
    size_t bytes_reserved;
3547
    size_t bytes_committed;
3548
};
3549
3550
static bool _collect_alloc_stats(
3551
    const mi_heap_t* heap, const mi_heap_area_t* area,
3552
    void* block, size_t block_size, void* arg)
3553
0
{
3554
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3555
0
    stats->allocated_blocks += area->used;
3556
0
    stats->allocated_bytes += area->used * area->block_size;
3557
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3558
0
    stats->bytes_reserved += area->reserved;
3559
0
    stats->bytes_committed += area->committed;
3560
0
    return 1;
3561
0
}
3562
3563
static void
3564
py_mimalloc_print_stats(FILE *out)
3565
0
{
3566
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3567
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3568
0
    fprintf(out, "Medium block threshold = %zu\n",
3569
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3570
0
    fprintf(out, "Large object max size = %zu\n",
3571
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3572
3573
0
    mi_heap_t *heap = mi_heap_get_default();
3574
0
    struct _alloc_stats stats;
3575
0
    memset(&stats, 0, sizeof(stats));
3576
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3577
3578
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3579
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3580
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3581
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3582
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3583
0
}
3584
#endif
3585
3586
3587
static void
3588
pymalloc_print_stats(FILE *out)
3589
0
{
3590
0
    OMState *state = get_state();
3591
3592
0
    uint i;
3593
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3594
    /* # of pools, allocated blocks, and free blocks per class index */
3595
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3596
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3597
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3598
    /* total # of allocated bytes in used and full pools */
3599
0
    size_t allocated_bytes = 0;
3600
    /* total # of available bytes in used pools */
3601
0
    size_t available_bytes = 0;
3602
    /* # of free pools + pools not yet carved out of current arena */
3603
0
    uint numfreepools = 0;
3604
    /* # of bytes for arena alignment padding */
3605
0
    size_t arena_alignment = 0;
3606
    /* # of bytes in used and full pools used for pool_headers */
3607
0
    size_t pool_header_bytes = 0;
3608
    /* # of bytes in used and full pools wasted due to quantization,
3609
     * i.e. the necessarily leftover space at the ends of used and
3610
     * full pools.
3611
     */
3612
0
    size_t quantization = 0;
3613
    /* # of arenas actually allocated. */
3614
0
    size_t narenas = 0;
3615
    /* running total -- should equal narenas * ARENA_SIZE */
3616
0
    size_t total;
3617
0
    char buf[128];
3618
3619
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3620
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3621
3622
0
    for (i = 0; i < numclasses; ++i)
3623
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3624
3625
    /* Because full pools aren't linked to from anything, it's easiest
3626
     * to march over all the arenas.  If we're lucky, most of the memory
3627
     * will be living in full pools -- would be a shame to miss them.
3628
     */
3629
0
    for (i = 0; i < maxarenas; ++i) {
3630
0
        uintptr_t base = allarenas[i].address;
3631
3632
        /* Skip arenas which are not allocated. */
3633
0
        if (allarenas[i].address == (uintptr_t)NULL)
3634
0
            continue;
3635
0
        narenas += 1;
3636
3637
0
        numfreepools += allarenas[i].nfreepools;
3638
3639
        /* round up to pool alignment */
3640
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3641
0
            arena_alignment += POOL_SIZE;
3642
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3643
0
            base += POOL_SIZE;
3644
0
        }
3645
3646
        /* visit every pool in the arena */
3647
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3648
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3649
0
            poolp p = (poolp)base;
3650
0
            const uint sz = p->szidx;
3651
0
            uint freeblocks;
3652
3653
0
            if (p->ref.count == 0) {
3654
                /* currently unused */
3655
#ifdef Py_DEBUG
3656
                assert(pool_is_in_list(p, allarenas[i].freepools));
3657
#endif
3658
0
                continue;
3659
0
            }
3660
0
            ++numpools[sz];
3661
0
            numblocks[sz] += p->ref.count;
3662
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3663
0
            numfreeblocks[sz] += freeblocks;
3664
#ifdef Py_DEBUG
3665
            if (freeblocks > 0)
3666
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3667
#endif
3668
0
        }
3669
0
    }
3670
0
    assert(narenas == narenas_currently_allocated);
3671
3672
0
    fputc('\n', out);
3673
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3674
0
          "-----   ----   ---------   -------------  ------------\n",
3675
0
          out);
3676
3677
0
    for (i = 0; i < numclasses; ++i) {
3678
0
        size_t p = numpools[i];
3679
0
        size_t b = numblocks[i];
3680
0
        size_t f = numfreeblocks[i];
3681
0
        uint size = INDEX2SIZE(i);
3682
0
        if (p == 0) {
3683
0
            assert(b == 0 && f == 0);
3684
0
            continue;
3685
0
        }
3686
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3687
0
                i, size, p, b, f);
3688
0
        allocated_bytes += b * size;
3689
0
        available_bytes += f * size;
3690
0
        pool_header_bytes += p * POOL_OVERHEAD;
3691
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3692
0
    }
3693
0
    fputc('\n', out);
3694
#ifdef PYMEM_DEBUG_SERIALNO
3695
    if (_PyMem_DebugEnabled()) {
3696
        (void)printone(out, "# times object malloc called", serialno);
3697
    }
3698
#endif
3699
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3700
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3701
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3702
0
    (void)printone(out, "# arenas allocated current", narenas);
3703
3704
0
    PyOS_snprintf(buf, sizeof(buf),
3705
0
                  "%zu arenas * %d bytes/arena",
3706
0
                  narenas, ARENA_SIZE);
3707
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3708
3709
0
    fputc('\n', out);
3710
3711
    /* Account for what all of those arena bytes are being used for. */
3712
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3713
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3714
3715
0
    PyOS_snprintf(buf, sizeof(buf),
3716
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3717
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3718
3719
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3720
0
    total += printone(out, "# bytes lost to quantization", quantization);
3721
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3722
0
    (void)printone(out, "Total", total);
3723
0
    assert(narenas * ARENA_SIZE == total);
3724
3725
0
#if WITH_PYMALLOC_RADIX_TREE
3726
0
    fputs("\narena map counts\n", out);
3727
0
#ifdef USE_INTERIOR_NODES
3728
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3729
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3730
0
    fputc('\n', out);
3731
0
#endif
3732
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3733
0
#ifdef USE_INTERIOR_NODES
3734
0
    total += printone(out, "# bytes lost to arena map mid",
3735
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3736
0
    total += printone(out, "# bytes lost to arena map bot",
3737
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3738
0
    (void)printone(out, "Total", total);
3739
0
#endif
3740
0
#endif
3741
3742
0
}
3743
3744
/* Print summary info to "out" about the state of pymalloc's structures.
3745
 * In Py_DEBUG mode, also perform some expensive internal consistency
3746
 * checks.
3747
 *
3748
 * Return 0 if the memory debug hooks are not installed or no statistics was
3749
 * written into out, return 1 otherwise.
3750
 */
3751
int
3752
_PyObject_DebugMallocStats(FILE *out)
3753
0
{
3754
0
#ifdef WITH_MIMALLOC
3755
0
    if (_PyMem_MimallocEnabled()) {
3756
0
        py_mimalloc_print_stats(out);
3757
0
        return 1;
3758
0
    }
3759
0
    else
3760
0
#endif
3761
0
    if (_PyMem_PymallocEnabled()) {
3762
0
        pymalloc_print_stats(out);
3763
0
        return 1;
3764
0
    }
3765
0
    else {
3766
0
        return 0;
3767
0
    }
3768
0
}
3769
3770
#endif /* #ifdef WITH_PYMALLOC */