Coverage Report

Created: 2026-01-10 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#ifdef WITH_MIMALLOC
17
// Forward declarations of functions used in our mimalloc modifications
18
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
19
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
20
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
21
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
22
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
23
#  include "pycore_mimalloc.h"
24
#  include "mimalloc/static.c"
25
#  include "mimalloc/internal.h"  // for stats
26
#endif
27
28
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
29
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
30
#endif
31
32
#undef  uint
33
1.97G
#define uint pymem_uint
34
35
36
/* Defined in tracemalloc.c */
37
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
38
39
static void _PyObject_DebugDumpAddress(const void *p);
40
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
41
42
43
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
44
static void set_up_debug_hooks_unlocked(void);
45
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
46
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
47
48
49
/***************************************/
50
/* low-level allocator implementations */
51
/***************************************/
52
53
/* the default raw allocator (wraps malloc) */
54
55
void *
56
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
57
247M
{
58
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
59
       for malloc(0), which would be treated as an error. Some platforms would
60
       return a pointer with no memory behind it, which would break pymalloc.
61
       To solve these problems, allocate an extra byte. */
62
247M
    if (size == 0)
63
42.4M
        size = 1;
64
247M
    return malloc(size);
65
247M
}
66
67
void *
68
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
69
116k
{
70
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
71
       for calloc(0, 0), which would be treated as an error. Some platforms
72
       would return a pointer with no memory behind it, which would break
73
       pymalloc.  To solve these problems, allocate an extra byte. */
74
116k
    if (nelem == 0 || elsize == 0) {
75
2
        nelem = 1;
76
2
        elsize = 1;
77
2
    }
78
116k
    return calloc(nelem, elsize);
79
116k
}
80
81
void *
82
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
83
8.59M
{
84
8.59M
    if (size == 0)
85
0
        size = 1;
86
8.59M
    return realloc(ptr, size);
87
8.59M
}
88
89
void
90
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
91
247M
{
92
247M
    free(ptr);
93
247M
}
94
95
#ifdef WITH_MIMALLOC
96
97
static void
98
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
99
0
{
100
#ifdef Py_GIL_DISABLED
101
    // Clear the QSBR goal and remove the page from the QSBR linked list.
102
    page->qsbr_goal = 0;
103
    if (page->qsbr_node.next != NULL) {
104
        llist_remove(&page->qsbr_node);
105
    }
106
#endif
107
0
}
108
109
// Check if an empty, newly reclaimed page is safe to free now.
110
static bool
111
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
112
0
{
113
0
    assert(mi_page_all_free(page));
114
#ifdef Py_GIL_DISABLED
115
    assert(page->qsbr_node.next == NULL);
116
    if (page->use_qsbr && page->qsbr_goal != 0) {
117
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
118
        if (tstate == NULL) {
119
            return false;
120
        }
121
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
122
    }
123
#endif
124
0
    return true;
125
126
0
}
127
128
#ifdef Py_GIL_DISABLED
129
130
// If we are deferring collection of more than this amount of memory for
131
// mimalloc pages, advance the write sequence.  Advancing allows these
132
// pages to be re-used in a different thread or for a different size class.
133
#define QSBR_PAGE_MEM_LIMIT 4096*20
134
135
// Return true if the global write sequence should be advanced for a mimalloc
136
// page that is deferred from collection.
137
static bool
138
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
139
{
140
    size_t bsize = mi_page_block_size(page);
141
    size_t page_size = page->capacity*bsize;
142
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
143
        qsbr->deferred_page_memory = 0;
144
        return true;
145
    }
146
    qsbr->deferred_page_memory += page_size;
147
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
148
        qsbr->deferred_page_memory = 0;
149
        return true;
150
    }
151
    return false;
152
}
153
#endif
154
155
static bool
156
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
157
0
{
158
#ifdef Py_GIL_DISABLED
159
    assert(mi_page_all_free(page));
160
    if (page->use_qsbr) {
161
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
162
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
163
            _PyMem_mi_page_clear_qsbr(page);
164
            _mi_page_free(page, pq, force);
165
            return true;
166
        }
167
168
        _PyMem_mi_page_clear_qsbr(page);
169
        page->retire_expire = 0;
170
171
        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
172
            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
173
        }
174
        else {
175
            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
176
        }
177
178
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
179
        return false;
180
    }
181
#endif
182
0
    _mi_page_free(page, pq, force);
183
0
    return true;
184
0
}
185
186
static void
187
_PyMem_mi_page_reclaimed(mi_page_t *page)
188
0
{
189
#ifdef Py_GIL_DISABLED
190
    assert(page->qsbr_node.next == NULL);
191
    if (page->qsbr_goal != 0) {
192
        if (mi_page_all_free(page)) {
193
            assert(page->qsbr_node.next == NULL);
194
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
195
            page->retire_expire = 0;
196
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
197
        }
198
        else {
199
            page->qsbr_goal = 0;
200
        }
201
    }
202
#endif
203
0
}
204
205
static void
206
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
207
0
{
208
#ifdef Py_GIL_DISABLED
209
    if (!heap->page_use_qsbr) {
210
        return;
211
    }
212
213
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
214
    struct llist_node *head = &tstate->mimalloc.page_list;
215
    if (llist_empty(head)) {
216
        return;
217
    }
218
219
    struct llist_node *node;
220
    llist_for_each_safe(node, head) {
221
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
222
        if (!mi_page_all_free(page)) {
223
            // We allocated from this page some point after the delayed free
224
            _PyMem_mi_page_clear_qsbr(page);
225
            continue;
226
        }
227
228
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
229
            return;
230
        }
231
232
        _PyMem_mi_page_clear_qsbr(page);
233
        _mi_page_free(page, mi_page_queue_of(page), false);
234
    }
235
#endif
236
0
}
237
238
void *
239
_PyMem_MiMalloc(void *ctx, size_t size)
240
0
{
241
#ifdef Py_GIL_DISABLED
242
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
243
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
244
    return mi_heap_malloc(heap, size);
245
#else
246
0
    return mi_malloc(size);
247
0
#endif
248
0
}
249
250
void *
251
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
252
0
{
253
#ifdef Py_GIL_DISABLED
254
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
255
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
256
    return mi_heap_calloc(heap, nelem, elsize);
257
#else
258
0
    return mi_calloc(nelem, elsize);
259
0
#endif
260
0
}
261
262
void *
263
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
264
0
{
265
#ifdef Py_GIL_DISABLED
266
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
267
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
268
    return mi_heap_realloc(heap, ptr, size);
269
#else
270
0
    return mi_realloc(ptr, size);
271
0
#endif
272
0
}
273
274
void
275
_PyMem_MiFree(void *ctx, void *ptr)
276
0
{
277
0
    mi_free(ptr);
278
0
}
279
280
void *
281
_PyObject_MiMalloc(void *ctx, size_t nbytes)
282
0
{
283
#ifdef Py_GIL_DISABLED
284
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
285
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
286
    return mi_heap_malloc(heap, nbytes);
287
#else
288
0
    return mi_malloc(nbytes);
289
0
#endif
290
0
}
291
292
void *
293
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
294
0
{
295
#ifdef Py_GIL_DISABLED
296
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
297
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
298
    return mi_heap_calloc(heap, nelem, elsize);
299
#else
300
0
    return mi_calloc(nelem, elsize);
301
0
#endif
302
0
}
303
304
305
void *
306
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
307
0
{
308
#ifdef Py_GIL_DISABLED
309
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
310
    // Implement our own realloc logic so that we can copy PyObject header
311
    // in a thread-safe way.
312
    size_t size = mi_usable_size(ptr);
313
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
314
        return ptr;
315
    }
316
317
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
318
    void* newp = mi_heap_malloc(heap, nbytes);
319
    if (newp == NULL) {
320
        return NULL;
321
    }
322
323
    // Free threaded Python allows access from other threads to the PyObject reference count
324
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
325
    // These fields are typically initialized by PyObject_Init() using relaxed
326
    // atomic stores. We need to copy these fields in a thread-safe way here.
327
    // We use the "debug_offset" to determine how many bytes to copy -- it
328
    // includes the PyObject header and plus any extra pre-headers.
329
    size_t offset = heap->debug_offset;
330
    assert(offset % sizeof(void*) == 0);
331
332
    size_t copy_size = (size < nbytes ? size : nbytes);
333
    if (copy_size >= offset) {
334
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
335
            // Use memcpy to avoid strict-aliasing issues. However, we probably
336
            // still have unavoidable strict-aliasing issues with
337
            // _Py_atomic_store_ptr_relaxed here.
338
            void *word;
339
            memcpy(&word, (char*)ptr + i, sizeof(void*));
340
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
341
        }
342
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
343
    }
344
    else {
345
        _mi_memcpy(newp, ptr, copy_size);
346
    }
347
    mi_free(ptr);
348
    return newp;
349
#else
350
0
    return mi_realloc(ptr, nbytes);
351
0
#endif
352
0
}
353
354
void
355
_PyObject_MiFree(void *ctx, void *ptr)
356
0
{
357
0
    mi_free(ptr);
358
0
}
359
360
#endif // WITH_MIMALLOC
361
362
363
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
364
365
366
#ifdef WITH_MIMALLOC
367
0
#  define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
368
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
369
#endif
370
371
/* the pymalloc allocator */
372
373
// The actual implementation is further down.
374
375
#if defined(WITH_PYMALLOC)
376
void* _PyObject_Malloc(void *ctx, size_t size);
377
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
378
void _PyObject_Free(void *ctx, void *p);
379
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
380
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
381
#endif  // WITH_PYMALLOC
382
383
#if defined(Py_GIL_DISABLED)
384
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
385
#  define PYRAW_ALLOC MALLOC_ALLOC
386
#  define PYMEM_ALLOC MIMALLOC_ALLOC
387
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
388
#elif defined(WITH_PYMALLOC)
389
0
#  define PYRAW_ALLOC MALLOC_ALLOC
390
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
391
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
392
#else
393
#  define PYRAW_ALLOC MALLOC_ALLOC
394
#  define PYMEM_ALLOC MALLOC_ALLOC
395
#  define PYOBJ_ALLOC MALLOC_ALLOC
396
#endif
397
398
399
/* the default debug allocators */
400
401
// The actual implementation is further down.
402
403
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
404
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
405
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
406
void _PyMem_DebugRawFree(void *ctx, void *ptr);
407
408
void* _PyMem_DebugMalloc(void *ctx, size_t size);
409
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
410
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
411
void _PyMem_DebugFree(void *ctx, void *p);
412
413
#define PYDBGRAW_ALLOC \
414
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
415
#define PYDBGMEM_ALLOC \
416
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
417
#define PYDBGOBJ_ALLOC \
418
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
419
420
/* default raw allocator (not swappable) */
421
422
void *
423
_PyMem_DefaultRawMalloc(size_t size)
424
224
{
425
#ifdef Py_DEBUG
426
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
427
#else
428
224
    return _PyMem_RawMalloc(NULL, size);
429
224
#endif
430
224
}
431
432
void *
433
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
434
0
{
435
#ifdef Py_DEBUG
436
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
437
#else
438
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
439
0
#endif
440
0
}
441
442
void *
443
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
444
0
{
445
#ifdef Py_DEBUG
446
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
447
#else
448
0
    return _PyMem_RawRealloc(NULL, ptr, size);
449
0
#endif
450
0
}
451
452
void
453
_PyMem_DefaultRawFree(void *ptr)
454
252
{
455
#ifdef Py_DEBUG
456
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
457
#else
458
252
    _PyMem_RawFree(NULL, ptr);
459
252
#endif
460
252
}
461
462
wchar_t*
463
_PyMem_DefaultRawWcsdup(const wchar_t *str)
464
168
{
465
168
    assert(str != NULL);
466
467
168
    size_t len = wcslen(str);
468
168
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
469
0
        return NULL;
470
0
    }
471
472
168
    size_t size = (len + 1) * sizeof(wchar_t);
473
168
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
474
168
    if (str2 == NULL) {
475
0
        return NULL;
476
0
    }
477
478
168
    memcpy(str2, str, size);
479
168
    return str2;
480
168
}
481
482
/* the low-level virtual memory allocator */
483
484
#ifdef WITH_PYMALLOC
485
#  ifdef MS_WINDOWS
486
#    include <windows.h>
487
#  elif defined(HAVE_MMAP)
488
#    include <sys/mman.h>
489
#    ifdef MAP_ANONYMOUS
490
#      define ARENAS_USE_MMAP
491
#    endif
492
#  endif
493
#endif
494
495
void *
496
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
497
260k
{
498
#ifdef MS_WINDOWS
499
    return VirtualAlloc(NULL, size,
500
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
501
#elif defined(ARENAS_USE_MMAP)
502
    void *ptr;
503
260k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
504
260k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
505
260k
    if (ptr == MAP_FAILED)
506
0
        return NULL;
507
260k
    assert(ptr != NULL);
508
260k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
509
260k
    return ptr;
510
#else
511
    return malloc(size);
512
#endif
513
260k
}
514
515
void
516
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
517
#if defined(ARENAS_USE_MMAP)
518
    size_t size
519
#else
520
    size_t Py_UNUSED(size)
521
#endif
522
)
523
260k
{
524
#ifdef MS_WINDOWS
525
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
526
    if (ptr == NULL) {
527
        return;
528
    }
529
    VirtualFree(ptr, 0, MEM_RELEASE);
530
#elif defined(ARENAS_USE_MMAP)
531
    /* Unlike free(), munmap() does not special-case NULL to noop. */
532
260k
    if (ptr == NULL) {
533
0
        return;
534
0
    }
535
260k
    munmap(ptr, size);
536
#else
537
    free(ptr);
538
#endif
539
260k
}
540
541
/*******************************************/
542
/* end low-level allocator implementations */
543
/*******************************************/
544
545
546
56
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
547
1.00G
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
548
2.22G
#define _PyMem (_PyRuntime.allocators.standard.mem)
549
5.89G
#define _PyObject (_PyRuntime.allocators.standard.obj)
550
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
551
1.04M
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
552
553
554
/***************************/
555
/* managing the allocators */
556
/***************************/
557
558
static int
559
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
560
                               PyMemAllocatorEx *old_alloc)
561
0
{
562
0
    if (old_alloc != NULL) {
563
0
        get_allocator_unlocked(domain, old_alloc);
564
0
    }
565
566
567
0
    PyMemAllocatorEx new_alloc;
568
0
    switch(domain)
569
0
    {
570
0
    case PYMEM_DOMAIN_RAW:
571
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
572
0
        break;
573
0
    case PYMEM_DOMAIN_MEM:
574
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
575
0
        break;
576
0
    case PYMEM_DOMAIN_OBJ:
577
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
578
0
        break;
579
0
    default:
580
        /* unknown domain */
581
0
        return -1;
582
0
    }
583
0
    set_allocator_unlocked(domain, &new_alloc);
584
0
    if (debug) {
585
0
        set_up_debug_hooks_domain_unlocked(domain);
586
0
    }
587
0
    return 0;
588
0
}
589
590
591
#ifdef Py_DEBUG
592
static const int pydebug = 1;
593
#else
594
static const int pydebug = 0;
595
#endif
596
597
int
598
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
599
0
{
600
0
    if (name == NULL || *name == '\0') {
601
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
602
           nameions): use default memory allocators */
603
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
604
0
    }
605
0
    else if (strcmp(name, "default") == 0) {
606
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
607
0
    }
608
0
    else if (strcmp(name, "debug") == 0) {
609
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
610
0
    }
611
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
612
0
    else if (strcmp(name, "pymalloc") == 0) {
613
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
614
0
    }
615
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
616
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
617
0
    }
618
0
#endif
619
0
#ifdef WITH_MIMALLOC
620
0
    else if (strcmp(name, "mimalloc") == 0) {
621
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
622
0
    }
623
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
624
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
625
0
    }
626
0
#endif
627
0
#ifndef Py_GIL_DISABLED
628
0
    else if (strcmp(name, "malloc") == 0) {
629
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
630
0
    }
631
0
    else if (strcmp(name, "malloc_debug") == 0) {
632
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
633
0
    }
634
0
#endif
635
0
    else {
636
        /* unknown allocator */
637
0
        return -1;
638
0
    }
639
0
    return 0;
640
0
}
641
642
643
static int
644
set_up_allocators_unlocked(PyMemAllocatorName allocator)
645
0
{
646
0
    switch (allocator) {
647
0
    case PYMEM_ALLOCATOR_NOT_SET:
648
        /* do nothing */
649
0
        break;
650
651
0
    case PYMEM_ALLOCATOR_DEFAULT:
652
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
653
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
654
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
655
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
656
0
        break;
657
658
0
    case PYMEM_ALLOCATOR_DEBUG:
659
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
660
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
661
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
662
0
        _PyRuntime.allocators.is_debug_enabled = 1;
663
0
        break;
664
665
0
#ifdef WITH_PYMALLOC
666
0
    case PYMEM_ALLOCATOR_PYMALLOC:
667
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
668
0
    {
669
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
670
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
671
672
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
673
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
674
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
675
676
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
677
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
678
0
        if (is_debug) {
679
0
            set_up_debug_hooks_unlocked();
680
0
        }
681
0
        break;
682
0
    }
683
0
#endif
684
0
#ifdef WITH_MIMALLOC
685
0
    case PYMEM_ALLOCATOR_MIMALLOC:
686
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
687
0
    {
688
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
689
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
690
691
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
692
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
693
694
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
695
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
696
697
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
698
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
699
0
        if (is_debug) {
700
0
            set_up_debug_hooks_unlocked();
701
0
        }
702
703
0
        break;
704
0
    }
705
0
#endif
706
707
0
    case PYMEM_ALLOCATOR_MALLOC:
708
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
709
0
    {
710
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
711
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
712
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
713
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
714
715
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
716
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
717
0
        if (is_debug) {
718
0
            set_up_debug_hooks_unlocked();
719
0
        }
720
0
        break;
721
0
    }
722
723
0
    default:
724
        /* unknown allocator */
725
0
        return -1;
726
0
    }
727
728
0
    return 0;
729
0
}
730
731
int
732
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
733
0
{
734
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
735
0
    int res = set_up_allocators_unlocked(allocator);
736
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
737
0
    return res;
738
0
}
739
740
741
static int
742
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
743
0
{
744
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
745
0
}
746
747
748
static const char*
749
get_current_allocator_name_unlocked(void)
750
0
{
751
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
752
0
#ifdef WITH_PYMALLOC
753
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
754
0
#endif
755
0
#ifdef WITH_MIMALLOC
756
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
757
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
758
0
#endif
759
760
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
761
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
762
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
763
0
    {
764
0
        return "malloc";
765
0
    }
766
0
#ifdef WITH_PYMALLOC
767
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
768
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
769
0
        pymemallocator_eq(&_PyObject, &pymalloc))
770
0
    {
771
0
        return "pymalloc";
772
0
    }
773
0
#endif
774
0
#ifdef WITH_MIMALLOC
775
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
776
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
777
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
778
0
    {
779
0
        return "mimalloc";
780
0
    }
781
0
#endif
782
783
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
784
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
785
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
786
787
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
788
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
789
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
790
0
    {
791
        /* Debug hooks installed */
792
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
793
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
794
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
795
0
        {
796
0
            return "malloc_debug";
797
0
        }
798
0
#ifdef WITH_PYMALLOC
799
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
800
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
801
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
802
0
        {
803
0
            return "pymalloc_debug";
804
0
        }
805
0
#endif
806
0
#ifdef WITH_MIMALLOC
807
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
808
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
809
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
810
0
        {
811
0
            return "mimalloc_debug";
812
0
        }
813
0
#endif
814
0
    }
815
0
    return NULL;
816
0
}
817
818
const char*
819
_PyMem_GetCurrentAllocatorName(void)
820
0
{
821
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
822
0
    const char *name = get_current_allocator_name_unlocked();
823
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
824
0
    return name;
825
0
}
826
827
828
int
829
_PyMem_DebugEnabled(void)
830
0
{
831
0
    return _PyRuntime.allocators.is_debug_enabled;
832
0
}
833
834
#ifdef WITH_PYMALLOC
835
static int
836
_PyMem_PymallocEnabled(void)
837
0
{
838
0
    if (_PyMem_DebugEnabled()) {
839
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
840
0
    }
841
0
    else {
842
0
        return (_PyObject.malloc == _PyObject_Malloc);
843
0
    }
844
0
}
845
846
#ifdef WITH_MIMALLOC
847
static int
848
_PyMem_MimallocEnabled(void)
849
0
{
850
#ifdef Py_GIL_DISABLED
851
    return 1;
852
#else
853
0
    if (_PyMem_DebugEnabled()) {
854
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
855
0
    }
856
0
    else {
857
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
858
0
    }
859
0
#endif
860
0
}
861
#endif  // WITH_MIMALLOC
862
863
#endif  // WITH_PYMALLOC
864
865
866
static void
867
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
868
0
{
869
0
    PyMemAllocatorEx alloc;
870
871
0
    if (domain == PYMEM_DOMAIN_RAW) {
872
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
873
0
            return;
874
0
        }
875
876
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
877
0
        alloc.ctx = &_PyMem_Debug.raw;
878
0
        alloc.malloc = _PyMem_DebugRawMalloc;
879
0
        alloc.calloc = _PyMem_DebugRawCalloc;
880
0
        alloc.realloc = _PyMem_DebugRawRealloc;
881
0
        alloc.free = _PyMem_DebugRawFree;
882
0
        set_allocator_unlocked(domain, &alloc);
883
0
    }
884
0
    else if (domain == PYMEM_DOMAIN_MEM) {
885
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
886
0
            return;
887
0
        }
888
889
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
890
0
        alloc.ctx = &_PyMem_Debug.mem;
891
0
        alloc.malloc = _PyMem_DebugMalloc;
892
0
        alloc.calloc = _PyMem_DebugCalloc;
893
0
        alloc.realloc = _PyMem_DebugRealloc;
894
0
        alloc.free = _PyMem_DebugFree;
895
0
        set_allocator_unlocked(domain, &alloc);
896
0
    }
897
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
898
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
899
0
            return;
900
0
        }
901
902
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
903
0
        alloc.ctx = &_PyMem_Debug.obj;
904
0
        alloc.malloc = _PyMem_DebugMalloc;
905
0
        alloc.calloc = _PyMem_DebugCalloc;
906
0
        alloc.realloc = _PyMem_DebugRealloc;
907
0
        alloc.free = _PyMem_DebugFree;
908
0
        set_allocator_unlocked(domain, &alloc);
909
0
    }
910
0
}
911
912
913
static void
914
set_up_debug_hooks_unlocked(void)
915
0
{
916
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
917
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
918
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
919
0
    _PyRuntime.allocators.is_debug_enabled = 1;
920
0
}
921
922
void
923
PyMem_SetupDebugHooks(void)
924
0
{
925
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
926
0
    set_up_debug_hooks_unlocked();
927
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
928
0
}
929
930
static void
931
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
932
28
{
933
28
    switch(domain)
934
28
    {
935
28
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
936
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
937
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
938
0
    default:
939
        /* unknown domain: set all attributes to NULL */
940
0
        allocator->ctx = NULL;
941
0
        allocator->malloc = NULL;
942
0
        allocator->calloc = NULL;
943
0
        allocator->realloc = NULL;
944
0
        allocator->free = NULL;
945
28
    }
946
28
}
947
948
static void
949
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
950
0
{
951
0
    switch(domain)
952
0
    {
953
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
954
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
955
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
956
    /* ignore unknown domain */
957
0
    }
958
0
}
959
960
void
961
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
962
28
{
963
28
    PyMutex_Lock(&ALLOCATORS_MUTEX);
964
28
    get_allocator_unlocked(domain, allocator);
965
28
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
966
28
}
967
968
void
969
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
970
0
{
971
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
972
0
    set_allocator_unlocked(domain, allocator);
973
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
974
0
}
975
976
void
977
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
978
0
{
979
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
980
0
    *allocator = _PyObject_Arena;
981
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
982
0
}
983
984
void
985
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
986
0
{
987
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
988
0
    _PyObject_Arena = *allocator;
989
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
990
0
}
991
992
993
/* Note that there is a possible, but very unlikely, race in any place
994
 * below where we call one of the allocator functions.  We access two
995
 * fields in each case:  "malloc", etc. and "ctx".
996
 *
997
 * It is unlikely that the allocator will be changed while one of those
998
 * calls is happening, much less in that very narrow window.
999
 * Furthermore, the likelihood of a race is drastically reduced by the
1000
 * fact that the allocator may not be changed after runtime init
1001
 * (except with a wrapper).
1002
 *
1003
 * With the above in mind, we currently don't worry about locking
1004
 * around these uses of the runtime-global allocators state. */
1005
1006
1007
/*************************/
1008
/* the "arena" allocator */
1009
/*************************/
1010
1011
void *
1012
_PyObject_VirtualAlloc(size_t size)
1013
255k
{
1014
255k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
1015
255k
}
1016
1017
void
1018
_PyObject_VirtualFree(void *obj, size_t size)
1019
255k
{
1020
255k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
1021
255k
}
1022
1023
1024
/***********************/
1025
/* the "raw" allocator */
1026
/***********************/
1027
1028
void *
1029
PyMem_RawMalloc(size_t size)
1030
247M
{
1031
    /*
1032
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1033
     * Most python internals blindly use a signed Py_ssize_t to track
1034
     * things without checking for overflows or negatives.
1035
     * As size_t is unsigned, checking for size < 0 is not required.
1036
     */
1037
247M
    if (size > (size_t)PY_SSIZE_T_MAX)
1038
0
        return NULL;
1039
247M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1040
247M
}
1041
1042
void *
1043
PyMem_RawCalloc(size_t nelem, size_t elsize)
1044
116k
{
1045
    /* see PyMem_RawMalloc() */
1046
116k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1047
0
        return NULL;
1048
116k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1049
116k
}
1050
1051
void*
1052
PyMem_RawRealloc(void *ptr, size_t new_size)
1053
8.59M
{
1054
    /* see PyMem_RawMalloc() */
1055
8.59M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1056
0
        return NULL;
1057
8.59M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1058
8.59M
}
1059
1060
void PyMem_RawFree(void *ptr)
1061
247M
{
1062
247M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1063
247M
}
1064
1065
1066
/***********************/
1067
/* the "mem" allocator */
1068
/***********************/
1069
1070
void *
1071
PyMem_Malloc(size_t size)
1072
249M
{
1073
    /* see PyMem_RawMalloc() */
1074
249M
    if (size > (size_t)PY_SSIZE_T_MAX)
1075
0
        return NULL;
1076
249M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1077
249M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1078
249M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1079
249M
    OBJECT_STAT_INC(allocations);
1080
249M
    return _PyMem.malloc(_PyMem.ctx, size);
1081
249M
}
1082
1083
void *
1084
PyMem_Calloc(size_t nelem, size_t elsize)
1085
51.0M
{
1086
    /* see PyMem_RawMalloc() */
1087
51.0M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1088
0
        return NULL;
1089
51.0M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1090
51.0M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1091
51.0M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1092
51.0M
    OBJECT_STAT_INC(allocations);
1093
51.0M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1094
51.0M
}
1095
1096
void *
1097
PyMem_Realloc(void *ptr, size_t new_size)
1098
274M
{
1099
    /* see PyMem_RawMalloc() */
1100
274M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1101
0
        return NULL;
1102
274M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1103
274M
}
1104
1105
void
1106
PyMem_Free(void *ptr)
1107
539M
{
1108
539M
    OBJECT_STAT_INC(frees);
1109
539M
    _PyMem.free(_PyMem.ctx, ptr);
1110
539M
}
1111
1112
1113
/***************************/
1114
/* pymem utility functions */
1115
/***************************/
1116
1117
wchar_t*
1118
_PyMem_RawWcsdup(const wchar_t *str)
1119
1.12k
{
1120
1.12k
    assert(str != NULL);
1121
1122
1.12k
    size_t len = wcslen(str);
1123
1.12k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1124
0
        return NULL;
1125
0
    }
1126
1127
1.12k
    size_t size = (len + 1) * sizeof(wchar_t);
1128
1.12k
    wchar_t *str2 = PyMem_RawMalloc(size);
1129
1.12k
    if (str2 == NULL) {
1130
0
        return NULL;
1131
0
    }
1132
1133
1.12k
    memcpy(str2, str, size);
1134
1.12k
    return str2;
1135
1.12k
}
1136
1137
char *
1138
_PyMem_RawStrdup(const char *str)
1139
84
{
1140
84
    assert(str != NULL);
1141
84
    size_t size = strlen(str) + 1;
1142
84
    char *copy = PyMem_RawMalloc(size);
1143
84
    if (copy == NULL) {
1144
0
        return NULL;
1145
0
    }
1146
84
    memcpy(copy, str, size);
1147
84
    return copy;
1148
84
}
1149
1150
char *
1151
_PyMem_Strdup(const char *str)
1152
0
{
1153
0
    assert(str != NULL);
1154
0
    size_t size = strlen(str) + 1;
1155
0
    char *copy = PyMem_Malloc(size);
1156
0
    if (copy == NULL) {
1157
0
        return NULL;
1158
0
    }
1159
0
    memcpy(copy, str, size);
1160
0
    return copy;
1161
0
}
1162
1163
/***********************************************/
1164
/* Delayed freeing support for Py_GIL_DISABLED */
1165
/***********************************************/
1166
1167
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1168
#define WORK_ITEMS_PER_CHUNK 254
1169
1170
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1171
struct _mem_work_item {
1172
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1173
    uint64_t qsbr_goal;
1174
};
1175
1176
// A fixed-size buffer of pointers to be freed
1177
struct _mem_work_chunk {
1178
    // Linked list node of chunks in queue
1179
    struct llist_node node;
1180
1181
    Py_ssize_t rd_idx;  // index of next item to read
1182
    Py_ssize_t wr_idx;  // index of next item to write
1183
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1184
};
1185
1186
static int
1187
work_item_should_decref(uintptr_t ptr)
1188
0
{
1189
0
    return ptr & 0x01;
1190
0
}
1191
1192
static void
1193
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1194
0
{
1195
0
    if (work_item_should_decref(ptr)) {
1196
0
        PyObject *obj = (PyObject *)(ptr - 1);
1197
#ifdef Py_GIL_DISABLED
1198
        if (cb == NULL) {
1199
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1200
            Py_DECREF(obj);
1201
            return;
1202
        }
1203
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1204
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1205
        if (refcount == 0) {
1206
            cb(obj, state);
1207
        }
1208
#else
1209
0
        Py_DECREF(obj);
1210
0
#endif
1211
0
    }
1212
0
    else {
1213
0
        PyMem_Free((void *)ptr);
1214
0
    }
1215
0
}
1216
1217
1218
#ifdef Py_GIL_DISABLED
1219
1220
// For deferred advance on free: the number of deferred items before advancing
1221
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1222
// want to process a chunk before it overflows.
1223
#define QSBR_DEFERRED_LIMIT 127
1224
1225
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1226
// helps limit memory usage due to QSBR delaying frees too long.
1227
#define QSBR_FREE_MEM_LIMIT 1024*1024
1228
1229
// Return true if the global write sequence should be advanced for a deferred
1230
// memory free.
1231
static bool
1232
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1233
{
1234
    if (size > QSBR_FREE_MEM_LIMIT) {
1235
        qsbr->deferred_count = 0;
1236
        qsbr->deferred_memory = 0;
1237
        qsbr->should_process = true;
1238
        return true;
1239
    }
1240
    qsbr->deferred_count++;
1241
    qsbr->deferred_memory += size;
1242
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1243
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1244
        qsbr->deferred_count = 0;
1245
        qsbr->deferred_memory = 0;
1246
        qsbr->should_process = true;
1247
        return true;
1248
    }
1249
    return false;
1250
}
1251
#endif
1252
1253
static void
1254
free_delayed(uintptr_t ptr, size_t size)
1255
0
{
1256
0
#ifndef Py_GIL_DISABLED
1257
0
    free_work_item(ptr, NULL, NULL);
1258
#else
1259
    PyInterpreterState *interp = _PyInterpreterState_GET();
1260
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1261
        interp->stoptheworld.world_stopped)
1262
    {
1263
        // Free immediately during interpreter shutdown or if the world is
1264
        // stopped.
1265
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1266
        free_work_item(ptr, NULL, NULL);
1267
        return;
1268
    }
1269
1270
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1271
    struct llist_node *head = &tstate->mem_free_queue;
1272
1273
    struct _mem_work_chunk *buf = NULL;
1274
    if (!llist_empty(head)) {
1275
        // Try to re-use the last buffer
1276
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1277
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1278
            // already full
1279
            buf = NULL;
1280
        }
1281
    }
1282
1283
    if (buf == NULL) {
1284
        buf = PyMem_Calloc(1, sizeof(*buf));
1285
        if (buf != NULL) {
1286
            llist_insert_tail(head, &buf->node);
1287
        }
1288
    }
1289
1290
    if (buf == NULL) {
1291
        // failed to allocate a buffer, free immediately
1292
        PyObject *to_dealloc = NULL;
1293
        _PyEval_StopTheWorld(tstate->base.interp);
1294
        if (work_item_should_decref(ptr)) {
1295
            PyObject *obj = (PyObject *)(ptr - 1);
1296
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1297
            if (refcount == 0) {
1298
                to_dealloc = obj;
1299
            }
1300
        }
1301
        else {
1302
            PyMem_Free((void *)ptr);
1303
        }
1304
        _PyEval_StartTheWorld(tstate->base.interp);
1305
        if (to_dealloc != NULL) {
1306
            _Py_Dealloc(to_dealloc);
1307
        }
1308
        return;
1309
    }
1310
1311
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1312
    uint64_t seq;
1313
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1314
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1315
    }
1316
    else {
1317
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1318
    }
1319
    buf->array[buf->wr_idx].ptr = ptr;
1320
    buf->array[buf->wr_idx].qsbr_goal = seq;
1321
    buf->wr_idx++;
1322
1323
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1324
        // Normally the processing of delayed items is done from the eval
1325
        // breaker.  Processing here is a safety measure to ensure too much
1326
        // work does not accumulate.
1327
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1328
    }
1329
#endif
1330
0
}
1331
1332
void
1333
_PyMem_FreeDelayed(void *ptr, size_t size)
1334
0
{
1335
0
    assert(!((uintptr_t)ptr & 0x01));
1336
0
    if (ptr != NULL) {
1337
0
        free_delayed((uintptr_t)ptr, size);
1338
0
    }
1339
0
}
1340
1341
#ifdef Py_GIL_DISABLED
1342
void
1343
_PyObject_XDecRefDelayed(PyObject *ptr)
1344
{
1345
    assert(!((uintptr_t)ptr & 0x01));
1346
    if (ptr != NULL) {
1347
        // We use 0 as the size since we don't have an easy way to know the
1348
        // actual size.  If we are freeing many objects, the write sequence
1349
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1350
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1351
    }
1352
}
1353
#endif
1354
1355
#ifdef Py_GIL_DISABLED
1356
void
1357
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1358
{
1359
    PyObject *old = *ptr;
1360
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1361
    if (old == NULL) {
1362
        return;
1363
    }
1364
    if (!_Py_IsImmortal(old)) {
1365
         _PyObject_XDecRefDelayed(old);
1366
    }
1367
}
1368
#endif
1369
1370
static struct _mem_work_chunk *
1371
work_queue_first(struct llist_node *head)
1372
0
{
1373
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1374
0
}
1375
1376
static void
1377
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1378
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1379
0
{
1380
0
    while (!llist_empty(head)) {
1381
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1382
1383
0
        if (buf->rd_idx < buf->wr_idx) {
1384
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1385
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1386
0
                return;
1387
0
            }
1388
1389
0
            buf->rd_idx++;
1390
            // NB: free_work_item may re-enter or execute arbitrary code
1391
0
            free_work_item(item->ptr, cb, state);
1392
0
            continue;
1393
0
        }
1394
1395
0
        assert(buf->rd_idx == buf->wr_idx);
1396
0
        if (keep_empty && buf->node.next == head) {
1397
            // Keep the last buffer in the queue to reduce re-allocations
1398
0
            buf->rd_idx = buf->wr_idx = 0;
1399
0
            return;
1400
0
        }
1401
1402
0
        llist_remove(&buf->node);
1403
0
        PyMem_Free(buf);
1404
0
    }
1405
0
}
1406
1407
static void
1408
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1409
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1410
                     void *state)
1411
0
{
1412
0
    assert(PyMutex_IsLocked(&queue->mutex));
1413
0
    process_queue(&queue->head, tstate, false, cb, state);
1414
1415
0
    int more_work = !llist_empty(&queue->head);
1416
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1417
0
}
1418
1419
static void
1420
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1421
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1422
                           void *state)
1423
0
{
1424
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1425
0
        return;
1426
0
    }
1427
1428
    // Try to acquire the lock, but don't block if it's already held.
1429
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1430
0
        process_interp_queue(queue, tstate, cb, state);
1431
0
        PyMutex_Unlock(&queue->mutex);
1432
0
    }
1433
0
}
1434
1435
void
1436
_PyMem_ProcessDelayed(PyThreadState *tstate)
1437
0
{
1438
0
    PyInterpreterState *interp = tstate->interp;
1439
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1440
1441
0
    tstate_impl->qsbr->should_process = false;
1442
1443
    // Process thread-local work
1444
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1445
1446
    // Process shared interpreter work
1447
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1448
0
}
1449
1450
void
1451
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1452
0
{
1453
0
    PyInterpreterState *interp = tstate->interp;
1454
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1455
1456
    // Process thread-local work
1457
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1458
1459
    // Process shared interpreter work
1460
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1461
0
}
1462
1463
void
1464
_PyMem_AbandonDelayed(PyThreadState *tstate)
1465
0
{
1466
0
    PyInterpreterState *interp = tstate->interp;
1467
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1468
1469
0
    if (llist_empty(queue)) {
1470
0
        return;
1471
0
    }
1472
1473
    // Check if the queue contains one empty buffer
1474
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1475
0
    if (buf->rd_idx == buf->wr_idx) {
1476
0
        llist_remove(&buf->node);
1477
0
        PyMem_Free(buf);
1478
0
        assert(llist_empty(queue));
1479
0
        return;
1480
0
    }
1481
1482
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1483
1484
    // Merge the thread's work queue into the interpreter's work queue.
1485
0
    llist_concat(&interp->mem_free_queue.head, queue);
1486
1487
    // Process the merged queue now (see gh-130794).
1488
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1489
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1490
1491
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1492
1493
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1494
0
}
1495
1496
void
1497
_PyMem_FiniDelayed(PyInterpreterState *interp)
1498
0
{
1499
0
    struct llist_node *head = &interp->mem_free_queue.head;
1500
0
    while (!llist_empty(head)) {
1501
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1502
1503
0
        if (buf->rd_idx < buf->wr_idx) {
1504
            // Free the remaining items immediately. There should be no other
1505
            // threads accessing the memory at this point during shutdown.
1506
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1507
0
            buf->rd_idx++;
1508
            // NB: free_work_item may re-enter or execute arbitrary code
1509
0
            free_work_item(item->ptr, NULL, NULL);
1510
0
            continue;
1511
0
        }
1512
1513
0
        llist_remove(&buf->node);
1514
0
        PyMem_Free(buf);
1515
0
    }
1516
0
}
1517
1518
/**************************/
1519
/* the "object" allocator */
1520
/**************************/
1521
1522
void *
1523
PyObject_Malloc(size_t size)
1524
1.43G
{
1525
    /* see PyMem_RawMalloc() */
1526
1.43G
    if (size > (size_t)PY_SSIZE_T_MAX)
1527
0
        return NULL;
1528
1.43G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1529
1.43G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1530
1.43G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1531
1.43G
    OBJECT_STAT_INC(allocations);
1532
1.43G
    return _PyObject.malloc(_PyObject.ctx, size);
1533
1.43G
}
1534
1535
void *
1536
PyObject_Calloc(size_t nelem, size_t elsize)
1537
0
{
1538
    /* see PyMem_RawMalloc() */
1539
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1540
0
        return NULL;
1541
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1542
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1543
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1544
0
    OBJECT_STAT_INC(allocations);
1545
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1546
0
}
1547
1548
void *
1549
PyObject_Realloc(void *ptr, size_t new_size)
1550
71.0M
{
1551
    /* see PyMem_RawMalloc() */
1552
71.0M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1553
0
        return NULL;
1554
71.0M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1555
71.0M
}
1556
1557
void
1558
PyObject_Free(void *ptr)
1559
1.43G
{
1560
1.43G
    OBJECT_STAT_INC(frees);
1561
1.43G
    _PyObject.free(_PyObject.ctx, ptr);
1562
1.43G
}
1563
1564
1565
/* Use __builtin_expect() where available to reduce overhead of
1566
   the valgrind checks */
1567
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1568
12.1G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1569
5.50G
#  define LIKELY(value) __builtin_expect((value), 1)
1570
#else
1571
#  define UNLIKELY(value) (value)
1572
#  define LIKELY(value) (value)
1573
#endif
1574
1575
#ifdef WITH_PYMALLOC
1576
1577
#ifdef WITH_VALGRIND
1578
#include <valgrind/valgrind.h>
1579
1580
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1581
static int running_on_valgrind = -1;
1582
#endif
1583
1584
typedef struct _obmalloc_state OMState;
1585
1586
/* obmalloc state for main interpreter and shared by all interpreters without
1587
 * their own obmalloc state.  By not explicitly initializing this structure, it
1588
 * will be allocated in the BSS which is a small performance win.  The radix
1589
 * tree arrays are fairly large but are sparsely used.  */
1590
static struct _obmalloc_state obmalloc_state_main;
1591
static bool obmalloc_state_initialized;
1592
1593
static inline int
1594
has_own_state(PyInterpreterState *interp)
1595
0
{
1596
0
    return (_Py_IsMainInterpreter(interp) ||
1597
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1598
0
            _Py_IsMainInterpreterFinalizing(interp));
1599
0
}
1600
1601
static inline OMState *
1602
get_state(void)
1603
4.21G
{
1604
4.21G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1605
4.21G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1606
4.21G
    return interp->obmalloc;
1607
4.21G
}
1608
1609
// These macros all rely on a local "state" variable.
1610
1.97G
#define usedpools (state->pools.used)
1611
2.76M
#define allarenas (state->mgmt.arenas)
1612
278
#define maxarenas (state->mgmt.maxarenas)
1613
28.2k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1614
23.6M
#define usable_arenas (state->mgmt.usable_arenas)
1615
16.4M
#define nfp2lasta (state->mgmt.nfp2lasta)
1616
17.7k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1617
5.79k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1618
6.50k
#define narenas_highwater (state->mgmt.narenas_highwater)
1619
493M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1620
1621
#ifdef WITH_MIMALLOC
1622
static bool count_blocks(
1623
    const mi_heap_t* heap, const mi_heap_area_t* area,
1624
    void* block, size_t block_size, void* allocated_blocks)
1625
0
{
1626
0
    *(size_t *)allocated_blocks += area->used;
1627
0
    return 1;
1628
0
}
1629
1630
static Py_ssize_t
1631
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1632
0
{
1633
0
    size_t allocated_blocks = 0;
1634
#ifdef Py_GIL_DISABLED
1635
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1636
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1637
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1638
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1639
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1640
        }
1641
    }
1642
1643
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1644
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1645
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1646
                                        &allocated_blocks);
1647
    }
1648
#else
1649
    // TODO(sgross): this only counts the current thread's blocks.
1650
0
    mi_heap_t *heap = mi_heap_get_default();
1651
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1652
0
#endif
1653
0
    return allocated_blocks;
1654
0
}
1655
#endif
1656
1657
Py_ssize_t
1658
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1659
0
{
1660
0
#ifdef WITH_MIMALLOC
1661
0
    if (_PyMem_MimallocEnabled()) {
1662
0
        return get_mimalloc_allocated_blocks(interp);
1663
0
    }
1664
0
#endif
1665
1666
#ifdef Py_DEBUG
1667
    assert(has_own_state(interp));
1668
#else
1669
0
    if (!has_own_state(interp)) {
1670
0
        _Py_FatalErrorFunc(__func__,
1671
0
                           "the interpreter doesn't have its own allocator");
1672
0
    }
1673
0
#endif
1674
0
    OMState *state = interp->obmalloc;
1675
1676
0
    if (state == NULL) {
1677
0
        return 0;
1678
0
    }
1679
1680
0
    Py_ssize_t n = raw_allocated_blocks;
1681
    /* add up allocated blocks for used pools */
1682
0
    for (uint i = 0; i < maxarenas; ++i) {
1683
        /* Skip arenas which are not allocated. */
1684
0
        if (allarenas[i].address == 0) {
1685
0
            continue;
1686
0
        }
1687
1688
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1689
1690
        /* visit every pool in the arena */
1691
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1692
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1693
0
            poolp p = (poolp)base;
1694
0
            n += p->ref.count;
1695
0
        }
1696
0
    }
1697
0
    return n;
1698
0
}
1699
1700
static void free_obmalloc_arenas(PyInterpreterState *interp);
1701
1702
void
1703
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1704
0
{
1705
0
#ifdef WITH_MIMALLOC
1706
0
    if (_PyMem_MimallocEnabled()) {
1707
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1708
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1709
0
        return;
1710
0
    }
1711
0
#endif
1712
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1713
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1714
0
        assert(has_own_state(interp) || leaked == 0);
1715
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1716
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1717
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1718
            // then some of the memory allocated by obmalloc has not been
1719
            // freed.  It might be safe to free the arenas in that case but
1720
            // it's possible that extension modules are still using that
1721
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1722
            // should be warning when this happens.  It should be possible to
1723
            // use a tool like "-fsanitize=address" to track down these leaks.
1724
0
            free_obmalloc_arenas(interp);
1725
0
        }
1726
0
    }
1727
0
}
1728
1729
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1730
1731
/* We preserve the number of blocks leaked during runtime finalization,
1732
   so they can be reported if the runtime is initialized again. */
1733
// XXX We don't lose any information by dropping this,
1734
// so we should consider doing so.
1735
static Py_ssize_t last_final_leaks = 0;
1736
1737
void
1738
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1739
0
{
1740
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1741
0
    runtime->obmalloc.interpreter_leaks = 0;
1742
0
}
1743
1744
static Py_ssize_t
1745
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1746
0
{
1747
0
    Py_ssize_t total = 0;
1748
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1749
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1750
0
        if (interp == NULL) {
1751
            /* We are at the very end of runtime finalization.
1752
               We can't rely on finalizing->interp since that thread
1753
               state is probably already freed, so we don't worry
1754
               about it. */
1755
0
            assert(PyInterpreterState_Head() == NULL);
1756
0
        }
1757
0
        else {
1758
0
            assert(interp != NULL);
1759
            /* It is probably the last interpreter but not necessarily. */
1760
0
            assert(PyInterpreterState_Next(interp) == NULL);
1761
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1762
0
        }
1763
0
    }
1764
0
    else {
1765
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1766
0
        HEAD_LOCK(runtime);
1767
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1768
0
        assert(interp != NULL);
1769
#ifdef Py_DEBUG
1770
        int got_main = 0;
1771
#endif
1772
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1773
#ifdef Py_DEBUG
1774
            if (_Py_IsMainInterpreter(interp)) {
1775
                assert(!got_main);
1776
                got_main = 1;
1777
                assert(has_own_state(interp));
1778
            }
1779
#endif
1780
0
            if (has_own_state(interp)) {
1781
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1782
0
            }
1783
0
        }
1784
0
        HEAD_UNLOCK(runtime);
1785
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1786
#ifdef Py_DEBUG
1787
        assert(got_main);
1788
#endif
1789
0
    }
1790
0
    total += runtime->obmalloc.interpreter_leaks;
1791
0
    total += last_final_leaks;
1792
0
    return total;
1793
0
}
1794
1795
Py_ssize_t
1796
_Py_GetGlobalAllocatedBlocks(void)
1797
0
{
1798
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1799
0
}
1800
1801
#if WITH_PYMALLOC_RADIX_TREE
1802
/*==========================================================================*/
1803
/* radix tree for tracking arena usage. */
1804
1805
6.21G
#define arena_map_root (state->usage.arena_map_root)
1806
#ifdef USE_INTERIOR_NODES
1807
28
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1808
28
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1809
#endif
1810
1811
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1812
 * it cannot be created */
1813
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1814
arena_map_get(OMState *state, pymem_block *p, int create)
1815
2.15G
{
1816
2.15G
#ifdef USE_INTERIOR_NODES
1817
    /* sanity check that IGNORE_BITS is correct */
1818
2.15G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1819
2.15G
    int i1 = MAP_TOP_INDEX(p);
1820
2.15G
    if (arena_map_root.ptrs[i1] == NULL) {
1821
28
        if (!create) {
1822
0
            return NULL;
1823
0
        }
1824
28
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1825
28
        if (n == NULL) {
1826
0
            return NULL;
1827
0
        }
1828
28
        arena_map_root.ptrs[i1] = n;
1829
28
        arena_map_mid_count++;
1830
28
    }
1831
2.15G
    int i2 = MAP_MID_INDEX(p);
1832
2.15G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1833
255M
        if (!create) {
1834
255M
            return NULL;
1835
255M
        }
1836
28
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1837
28
        if (n == NULL) {
1838
0
            return NULL;
1839
0
        }
1840
28
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1841
28
        arena_map_bot_count++;
1842
28
    }
1843
1.90G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1844
#else
1845
    return &arena_map_root;
1846
#endif
1847
2.15G
}
1848
1849
1850
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1851
 * away 24 bits of the address.  That reduces the space requirement of
1852
 * the tree compared to similar radix tree page-map schemes.  In
1853
 * exchange for slashing the space requirement, it needs more
1854
 * computation to check an address.
1855
 *
1856
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1857
 * explain in decimal so let's say that the arena size is 100 bytes.
1858
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1859
 * pointer address is inside an actual arena, we have to check two ideal
1860
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1861
 * 300.  In the rare case that an arena is aligned in the ideal way
1862
 * (e.g. base address of arena is 200) then we only have to check one
1863
 * ideal address.
1864
 *
1865
 * The tree nodes for 200 and 300 both store the address of arena.
1866
 * There are two cases: the arena starts at a lower ideal arena and
1867
 * extends to this one, or the arena starts in this arena and extends to
1868
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1869
 * these two cases.
1870
 */
1871
1872
1873
/* mark or unmark addresses covered by arena */
1874
static int
1875
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1876
11.2k
{
1877
    /* sanity check that IGNORE_BITS is correct */
1878
11.2k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1879
11.2k
    arena_map_bot_t *n_hi = arena_map_get(
1880
11.2k
            state, (pymem_block *)arena_base, is_used);
1881
11.2k
    if (n_hi == NULL) {
1882
0
        assert(is_used); /* otherwise node should already exist */
1883
0
        return 0; /* failed to allocate space for node */
1884
0
    }
1885
11.2k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1886
11.2k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1887
11.2k
    if (tail == 0) {
1888
        /* is ideal arena address */
1889
73
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1890
73
    }
1891
11.1k
    else {
1892
        /* arena_base address is not ideal (aligned to arena size) and
1893
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1894
         * for the next arena.  Note that it might be in different MAP_TOP
1895
         * and MAP_MID nodes as well so we need to call arena_map_get()
1896
         * again (do the full tree traversal).
1897
         */
1898
11.1k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1899
11.1k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1900
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1901
         * (last address in arena).  If arena_base_next overflows then it
1902
         * must overflow to 0.  However, that would mean arena_base was
1903
         * "ideal" and we should not be in this case. */
1904
11.1k
        assert(arena_base < arena_base_next);
1905
11.1k
        arena_map_bot_t *n_lo = arena_map_get(
1906
11.1k
                state, (pymem_block *)arena_base_next, is_used);
1907
11.1k
        if (n_lo == NULL) {
1908
0
            assert(is_used); /* otherwise should already exist */
1909
0
            n_hi->arenas[i3].tail_hi = 0;
1910
0
            return 0; /* failed to allocate space for node */
1911
0
        }
1912
11.1k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
1913
11.1k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1914
11.1k
    }
1915
11.2k
    return 1;
1916
11.2k
}
1917
1918
/* Return true if 'p' is a pointer inside an obmalloc arena.
1919
 * _PyObject_Free() calls this so it needs to be very fast. */
1920
static int
1921
arena_map_is_used(OMState *state, pymem_block *p)
1922
2.15G
{
1923
2.15G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
1924
2.15G
    if (n == NULL) {
1925
255M
        return 0;
1926
255M
    }
1927
1.90G
    int i3 = MAP_BOT_INDEX(p);
1928
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1929
1.90G
    int32_t hi = n->arenas[i3].tail_hi;
1930
1.90G
    int32_t lo = n->arenas[i3].tail_lo;
1931
1.90G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1932
1.90G
    return (tail < lo) || (tail >= hi && hi != 0);
1933
2.15G
}
1934
1935
/* end of radix tree logic */
1936
/*==========================================================================*/
1937
#endif /* WITH_PYMALLOC_RADIX_TREE */
1938
1939
1940
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1941
 * allocate a new arena, and return the address of an arena_object
1942
 * describing the new arena.  It's expected that the caller will set
1943
 * `usable_arenas` to the return value.
1944
 */
1945
static struct arena_object*
1946
new_arena(OMState *state)
1947
5.79k
{
1948
5.79k
    struct arena_object* arenaobj;
1949
5.79k
    uint excess;        /* number of bytes above pool alignment */
1950
5.79k
    void *address;
1951
1952
5.79k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1953
5.79k
    if (debug_stats == -1) {
1954
28
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1955
28
        debug_stats = (opt != NULL && *opt != '\0');
1956
28
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1957
28
    }
1958
5.79k
    if (debug_stats) {
1959
0
        _PyObject_DebugMallocStats(stderr);
1960
0
    }
1961
1962
5.79k
    if (unused_arena_objects == NULL) {
1963
51
        uint i;
1964
51
        uint numarenas;
1965
51
        size_t nbytes;
1966
1967
        /* Double the number of arena objects on each allocation.
1968
         * Note that it's possible for `numarenas` to overflow.
1969
         */
1970
51
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1971
51
        if (numarenas <= maxarenas)
1972
0
            return NULL;                /* overflow */
1973
#if SIZEOF_SIZE_T <= SIZEOF_INT
1974
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
1975
            return NULL;                /* overflow */
1976
#endif
1977
51
        nbytes = numarenas * sizeof(*allarenas);
1978
51
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1979
51
        if (arenaobj == NULL)
1980
0
            return NULL;
1981
51
        allarenas = arenaobj;
1982
1983
        /* We might need to fix pointers that were copied.  However,
1984
         * new_arena only gets called when all the pages in the
1985
         * previous arenas are full.  Thus, there are *no* pointers
1986
         * into the old array. Thus, we don't have to worry about
1987
         * invalid pointers.  Just to be sure, some asserts:
1988
         */
1989
51
        assert(usable_arenas == NULL);
1990
51
        assert(unused_arena_objects == NULL);
1991
1992
        /* Put the new arenas on the unused_arena_objects list. */
1993
1.23k
        for (i = maxarenas; i < numarenas; ++i) {
1994
1.18k
            allarenas[i].address = 0;              /* mark as unassociated */
1995
1.18k
            allarenas[i].nextarena = i < numarenas - 1 ?
1996
1.18k
                                        &allarenas[i+1] : NULL;
1997
1.18k
        }
1998
1999
        /* Update globals. */
2000
51
        unused_arena_objects = &allarenas[maxarenas];
2001
51
        maxarenas = numarenas;
2002
51
    }
2003
2004
    /* Take the next available arena object off the head of the list. */
2005
5.79k
    assert(unused_arena_objects != NULL);
2006
5.79k
    arenaobj = unused_arena_objects;
2007
5.79k
    unused_arena_objects = arenaobj->nextarena;
2008
5.79k
    assert(arenaobj->address == 0);
2009
5.79k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2010
5.79k
#if WITH_PYMALLOC_RADIX_TREE
2011
5.79k
    if (address != NULL) {
2012
5.79k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2013
            /* marking arena in radix tree failed, abort */
2014
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2015
0
            address = NULL;
2016
0
        }
2017
5.79k
    }
2018
5.79k
#endif
2019
5.79k
    if (address == NULL) {
2020
        /* The allocation failed: return NULL after putting the
2021
         * arenaobj back.
2022
         */
2023
0
        arenaobj->nextarena = unused_arena_objects;
2024
0
        unused_arena_objects = arenaobj;
2025
0
        return NULL;
2026
0
    }
2027
5.79k
    arenaobj->address = (uintptr_t)address;
2028
2029
5.79k
    ++narenas_currently_allocated;
2030
5.79k
    ++ntimes_arena_allocated;
2031
5.79k
    if (narenas_currently_allocated > narenas_highwater)
2032
711
        narenas_highwater = narenas_currently_allocated;
2033
5.79k
    arenaobj->freepools = NULL;
2034
    /* pool_address <- first pool-aligned address in the arena
2035
       nfreepools <- number of whole pools that fit after alignment */
2036
5.79k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2037
5.79k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2038
5.79k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2039
5.79k
    if (excess != 0) {
2040
4.26k
        --arenaobj->nfreepools;
2041
4.26k
        arenaobj->pool_address += POOL_SIZE - excess;
2042
4.26k
    }
2043
5.79k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2044
2045
5.79k
    return arenaobj;
2046
5.79k
}
2047
2048
2049
2050
#if WITH_PYMALLOC_RADIX_TREE
2051
/* Return true if and only if P is an address that was allocated by
2052
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2053
 */
2054
static bool
2055
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2056
2.15G
{
2057
2.15G
    return arena_map_is_used(state, p);
2058
2.15G
}
2059
#else
2060
/*
2061
address_in_range(P, POOL)
2062
2063
Return true if and only if P is an address that was allocated by pymalloc.
2064
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2065
(the caller is asked to compute this because the macro expands POOL more than
2066
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2067
variable and pass the latter to the macro; because address_in_range is
2068
called on every alloc/realloc/free, micro-efficiency is important here).
2069
2070
Tricky:  Let B be the arena base address associated with the pool, B =
2071
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2072
2073
    B <= P < B + ARENA_SIZE
2074
2075
Subtracting B throughout, this is true iff
2076
2077
    0 <= P-B < ARENA_SIZE
2078
2079
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2080
2081
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2082
before the first arena has been allocated.  `arenas` is still NULL in that
2083
case.  We're relying on that maxarenas is also 0 in that case, so that
2084
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2085
into a NULL arenas.
2086
2087
Details:  given P and POOL, the arena_object corresponding to P is AO =
2088
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2089
stores, etc), POOL is the correct address of P's pool, AO.address is the
2090
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2091
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2092
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2093
controls P.
2094
2095
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2096
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2097
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2098
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2099
control P.
2100
2101
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2102
allocated arena, obmalloc controls all the memory in slice AO.address :
2103
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2104
so P doesn't lie in that slice, so the macro correctly reports that P is not
2105
controlled by obmalloc.
2106
2107
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2108
arena_object (one not currently associated with an allocated arena),
2109
AO.address is 0, and the second test in the macro reduces to:
2110
2111
    P < ARENA_SIZE
2112
2113
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2114
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2115
of the test still passes, and the third clause (AO.address != 0) is necessary
2116
to get the correct result:  AO.address is 0 in this case, so the macro
2117
correctly reports that P is not controlled by obmalloc (despite that P lies in
2118
slice AO.address : AO.address + ARENA_SIZE).
2119
2120
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2121
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2122
corresponded to a currently-allocated arena, so the "P is not controlled by
2123
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2124
was impossible.
2125
2126
Note that the logic is excruciating, and reading up possibly uninitialized
2127
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2128
creates problems for some memory debuggers.  The overwhelming advantage is
2129
that this test determines whether an arbitrary address is controlled by
2130
obmalloc in a small constant time, independent of the number of arenas
2131
obmalloc controls.  Since this test is needed at every entry point, it's
2132
extremely desirable that it be this fast.
2133
*/
2134
2135
static bool _Py_NO_SANITIZE_ADDRESS
2136
            _Py_NO_SANITIZE_THREAD
2137
            _Py_NO_SANITIZE_MEMORY
2138
address_in_range(OMState *state, void *p, poolp pool)
2139
{
2140
    // Since address_in_range may be reading from memory which was not allocated
2141
    // by Python, it is important that pool->arenaindex is read only once, as
2142
    // another thread may be concurrently modifying the value without holding
2143
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2144
    // only once.
2145
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2146
    return arenaindex < maxarenas &&
2147
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2148
        allarenas[arenaindex].address != 0;
2149
}
2150
2151
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2152
2153
/*==========================================================================*/
2154
2155
// Called when freelist is exhausted.  Extend the freelist if there is
2156
// space for a block.  Otherwise, remove this pool from usedpools.
2157
static void
2158
pymalloc_pool_extend(poolp pool, uint size)
2159
377M
{
2160
377M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2161
        /* There is room for another block. */
2162
212M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2163
212M
        pool->nextoffset += INDEX2SIZE(size);
2164
212M
        *(pymem_block **)(pool->freeblock) = NULL;
2165
212M
        return;
2166
212M
    }
2167
2168
    /* Pool is full, unlink from used pools. */
2169
164M
    poolp next;
2170
164M
    next = pool->nextpool;
2171
164M
    pool = pool->prevpool;
2172
164M
    next->prevpool = pool;
2173
164M
    pool->nextpool = next;
2174
164M
}
2175
2176
/* called when pymalloc_alloc can not allocate a block from usedpool.
2177
 * This function takes new pool and allocate a block from it.
2178
 */
2179
static void*
2180
allocate_from_new_pool(OMState *state, uint size)
2181
2.41M
{
2182
    /* There isn't a pool of the right size class immediately
2183
     * available:  use a free pool.
2184
     */
2185
2.41M
    if (UNLIKELY(usable_arenas == NULL)) {
2186
        /* No arena has a free pool:  allocate a new arena. */
2187
#ifdef WITH_MEMORY_LIMITS
2188
        if (narenas_currently_allocated >= MAX_ARENAS) {
2189
            return NULL;
2190
        }
2191
#endif
2192
5.79k
        usable_arenas = new_arena(state);
2193
5.79k
        if (usable_arenas == NULL) {
2194
0
            return NULL;
2195
0
        }
2196
5.79k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2197
5.79k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2198
5.79k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2199
5.79k
    }
2200
2.41M
    assert(usable_arenas->address != 0);
2201
2202
    /* This arena already had the smallest nfreepools value, so decreasing
2203
     * nfreepools doesn't change that, and we don't need to rearrange the
2204
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2205
     * we need to remove its arena_object from usable_arenas.
2206
     */
2207
2.41M
    assert(usable_arenas->nfreepools > 0);
2208
2.41M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2209
        /* It's the last of this size, so there won't be any. */
2210
2.40M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2211
2.40M
    }
2212
    /* If any free pools will remain, it will be the new smallest. */
2213
2.41M
    if (usable_arenas->nfreepools > 1) {
2214
2.27M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2215
2.27M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2216
2.27M
    }
2217
2218
    /* Try to get a cached free pool. */
2219
2.41M
    poolp pool = usable_arenas->freepools;
2220
2.41M
    if (LIKELY(pool != NULL)) {
2221
        /* Unlink from cached pools. */
2222
2.05M
        usable_arenas->freepools = pool->nextpool;
2223
2.05M
        usable_arenas->nfreepools--;
2224
2.05M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2225
            /* Wholly allocated:  remove. */
2226
132k
            assert(usable_arenas->freepools == NULL);
2227
132k
            assert(usable_arenas->nextarena == NULL ||
2228
132k
                   usable_arenas->nextarena->prevarena ==
2229
132k
                   usable_arenas);
2230
132k
            usable_arenas = usable_arenas->nextarena;
2231
132k
            if (usable_arenas != NULL) {
2232
129k
                usable_arenas->prevarena = NULL;
2233
129k
                assert(usable_arenas->address != 0);
2234
129k
            }
2235
132k
        }
2236
1.92M
        else {
2237
            /* nfreepools > 0:  it must be that freepools
2238
             * isn't NULL, or that we haven't yet carved
2239
             * off all the arena's pools for the first
2240
             * time.
2241
             */
2242
1.92M
            assert(usable_arenas->freepools != NULL ||
2243
1.92M
                   usable_arenas->pool_address <=
2244
1.92M
                   (pymem_block*)usable_arenas->address +
2245
1.92M
                       ARENA_SIZE - POOL_SIZE);
2246
1.92M
        }
2247
2.05M
    }
2248
359k
    else {
2249
        /* Carve off a new pool. */
2250
359k
        assert(usable_arenas->nfreepools > 0);
2251
359k
        assert(usable_arenas->freepools == NULL);
2252
359k
        pool = (poolp)usable_arenas->pool_address;
2253
359k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2254
359k
                                 ARENA_SIZE - POOL_SIZE);
2255
359k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2256
359k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2257
359k
        pool->szidx = DUMMY_SIZE_IDX;
2258
359k
        usable_arenas->pool_address += POOL_SIZE;
2259
359k
        --usable_arenas->nfreepools;
2260
2261
359k
        if (usable_arenas->nfreepools == 0) {
2262
5.59k
            assert(usable_arenas->nextarena == NULL ||
2263
5.59k
                   usable_arenas->nextarena->prevarena ==
2264
5.59k
                   usable_arenas);
2265
            /* Unlink the arena:  it is completely allocated. */
2266
5.59k
            usable_arenas = usable_arenas->nextarena;
2267
5.59k
            if (usable_arenas != NULL) {
2268
327
                usable_arenas->prevarena = NULL;
2269
327
                assert(usable_arenas->address != 0);
2270
327
            }
2271
5.59k
        }
2272
359k
    }
2273
2274
    /* Frontlink to used pools. */
2275
2.41M
    pymem_block *bp;
2276
2.41M
    poolp next = usedpools[size + size]; /* == prev */
2277
2.41M
    pool->nextpool = next;
2278
2.41M
    pool->prevpool = next;
2279
2.41M
    next->nextpool = pool;
2280
2.41M
    next->prevpool = pool;
2281
2.41M
    pool->ref.count = 1;
2282
2.41M
    if (pool->szidx == size) {
2283
        /* Luckily, this pool last contained blocks
2284
         * of the same size class, so its header
2285
         * and free list are already initialized.
2286
         */
2287
1.41M
        bp = pool->freeblock;
2288
1.41M
        assert(bp != NULL);
2289
1.41M
        pool->freeblock = *(pymem_block **)bp;
2290
1.41M
        return bp;
2291
1.41M
    }
2292
    /*
2293
     * Initialize the pool header, set up the free list to
2294
     * contain just the second block, and return the first
2295
     * block.
2296
     */
2297
997k
    pool->szidx = size;
2298
997k
    size = INDEX2SIZE(size);
2299
997k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2300
997k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2301
997k
    pool->maxnextoffset = POOL_SIZE - size;
2302
997k
    pool->freeblock = bp + size;
2303
997k
    *(pymem_block **)(pool->freeblock) = NULL;
2304
997k
    return bp;
2305
2.41M
}
2306
2307
/* pymalloc allocator
2308
2309
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2310
2311
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2312
   requests, on error in the code below (as a last chance to serve the request)
2313
   or when the max memory limit has been reached.
2314
*/
2315
static inline void*
2316
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2317
2.05G
{
2318
#ifdef WITH_VALGRIND
2319
    if (UNLIKELY(running_on_valgrind == -1)) {
2320
        running_on_valgrind = RUNNING_ON_VALGRIND;
2321
    }
2322
    if (UNLIKELY(running_on_valgrind)) {
2323
        return NULL;
2324
    }
2325
#endif
2326
2327
2.05G
    if (UNLIKELY(nbytes == 0)) {
2328
42.4M
        return NULL;
2329
42.4M
    }
2330
2.01G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2331
204M
        return NULL;
2332
204M
    }
2333
2334
1.80G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2335
1.80G
    poolp pool = usedpools[size + size];
2336
1.80G
    pymem_block *bp;
2337
2338
1.80G
    if (LIKELY(pool != pool->nextpool)) {
2339
        /*
2340
         * There is a used pool for this size class.
2341
         * Pick up the head block of its free list.
2342
         */
2343
1.80G
        ++pool->ref.count;
2344
1.80G
        bp = pool->freeblock;
2345
1.80G
        assert(bp != NULL);
2346
2347
1.80G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2348
            // Reached the end of the free list, try to extend it.
2349
377M
            pymalloc_pool_extend(pool, size);
2350
377M
        }
2351
1.80G
    }
2352
2.41M
    else {
2353
        /* There isn't a pool of the right size class immediately
2354
         * available:  use a free pool.
2355
         */
2356
2.41M
        bp = allocate_from_new_pool(state, size);
2357
2.41M
    }
2358
2359
1.80G
    return (void *)bp;
2360
2.01G
}
2361
2362
2363
void *
2364
_PyObject_Malloc(void *ctx, size_t nbytes)
2365
2.00G
{
2366
2.00G
    OMState *state = get_state();
2367
2.00G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2368
2.00G
    if (LIKELY(ptr != NULL)) {
2369
1.75G
        return ptr;
2370
1.75G
    }
2371
2372
246M
    ptr = PyMem_RawMalloc(nbytes);
2373
246M
    if (ptr != NULL) {
2374
246M
        raw_allocated_blocks++;
2375
246M
    }
2376
246M
    return ptr;
2377
2.00G
}
2378
2379
2380
void *
2381
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2382
51.0M
{
2383
51.0M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2384
51.0M
    size_t nbytes = nelem * elsize;
2385
2386
51.0M
    OMState *state = get_state();
2387
51.0M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2388
51.0M
    if (LIKELY(ptr != NULL)) {
2389
50.9M
        memset(ptr, 0, nbytes);
2390
50.9M
        return ptr;
2391
50.9M
    }
2392
2393
115k
    ptr = PyMem_RawCalloc(nelem, elsize);
2394
115k
    if (ptr != NULL) {
2395
115k
        raw_allocated_blocks++;
2396
115k
    }
2397
115k
    return ptr;
2398
51.0M
}
2399
2400
2401
static void
2402
insert_to_usedpool(OMState *state, poolp pool)
2403
164M
{
2404
164M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2405
2406
164M
    uint size = pool->szidx;
2407
164M
    poolp next = usedpools[size + size];
2408
164M
    poolp prev = next->prevpool;
2409
2410
    /* insert pool before next:   prev <-> pool <-> next */
2411
164M
    pool->nextpool = next;
2412
164M
    pool->prevpool = prev;
2413
164M
    next->prevpool = pool;
2414
164M
    prev->nextpool = pool;
2415
164M
}
2416
2417
static void
2418
insert_to_freepool(OMState *state, poolp pool)
2419
2.40M
{
2420
2.40M
    poolp next = pool->nextpool;
2421
2.40M
    poolp prev = pool->prevpool;
2422
2.40M
    next->prevpool = prev;
2423
2.40M
    prev->nextpool = next;
2424
2425
    /* Link the pool to freepools.  This is a singly-linked
2426
     * list, and pool->prevpool isn't used there.
2427
     */
2428
2.40M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2429
2.40M
    pool->nextpool = ao->freepools;
2430
2.40M
    ao->freepools = pool;
2431
2.40M
    uint nf = ao->nfreepools;
2432
    /* If this is the rightmost arena with this number of free pools,
2433
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2434
     * are no arenas in usable_arenas with that value.
2435
     */
2436
2.40M
    struct arena_object* lastnf = nfp2lasta[nf];
2437
2.40M
    assert((nf == 0 && lastnf == NULL) ||
2438
2.40M
           (nf > 0 &&
2439
2.40M
            lastnf != NULL &&
2440
2.40M
            lastnf->nfreepools == nf &&
2441
2.40M
            (lastnf->nextarena == NULL ||
2442
2.40M
             nf < lastnf->nextarena->nfreepools)));
2443
2.40M
    if (lastnf == ao) {  /* it is the rightmost */
2444
2.21M
        struct arena_object* p = ao->prevarena;
2445
2.21M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2446
2.21M
    }
2447
2.40M
    ao->nfreepools = ++nf;
2448
2449
    /* All the rest is arena management.  We just freed
2450
     * a pool, and there are 4 cases for arena mgmt:
2451
     * 1. If all the pools are free, return the arena to
2452
     *    the system free().  Except if this is the last
2453
     *    arena in the list, keep it to avoid thrashing:
2454
     *    keeping one wholly free arena in the list avoids
2455
     *    pathological cases where a simple loop would
2456
     *    otherwise provoke needing to allocate and free an
2457
     *    arena on every iteration.  See bpo-37257.
2458
     * 2. If this is the only free pool in the arena,
2459
     *    add the arena back to the `usable_arenas` list.
2460
     * 3. If the "next" arena has a smaller count of free
2461
     *    pools, we have to "slide this arena right" to
2462
     *    restore that usable_arenas is sorted in order of
2463
     *    nfreepools.
2464
     * 4. Else there's nothing more to do.
2465
     */
2466
2.40M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2467
        /* Case 1.  First unlink ao from usable_arenas.
2468
         */
2469
5.43k
        assert(ao->prevarena == NULL ||
2470
5.43k
               ao->prevarena->address != 0);
2471
5.43k
        assert(ao ->nextarena == NULL ||
2472
5.43k
               ao->nextarena->address != 0);
2473
2474
        /* Fix the pointer in the prevarena, or the
2475
         * usable_arenas pointer.
2476
         */
2477
5.43k
        if (ao->prevarena == NULL) {
2478
1.62k
            usable_arenas = ao->nextarena;
2479
1.62k
            assert(usable_arenas == NULL ||
2480
1.62k
                   usable_arenas->address != 0);
2481
1.62k
        }
2482
3.81k
        else {
2483
3.81k
            assert(ao->prevarena->nextarena == ao);
2484
3.81k
            ao->prevarena->nextarena =
2485
3.81k
                ao->nextarena;
2486
3.81k
        }
2487
        /* Fix the pointer in the nextarena. */
2488
5.43k
        if (ao->nextarena != NULL) {
2489
5.43k
            assert(ao->nextarena->prevarena == ao);
2490
5.43k
            ao->nextarena->prevarena =
2491
5.43k
                ao->prevarena;
2492
5.43k
        }
2493
        /* Record that this arena_object slot is
2494
         * available to be reused.
2495
         */
2496
5.43k
        ao->nextarena = unused_arena_objects;
2497
5.43k
        unused_arena_objects = ao;
2498
2499
5.43k
#if WITH_PYMALLOC_RADIX_TREE
2500
        /* mark arena region as not under control of obmalloc */
2501
5.43k
        arena_map_mark_used(state, ao->address, 0);
2502
5.43k
#endif
2503
2504
        /* Free the entire arena. */
2505
5.43k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2506
5.43k
                             (void *)ao->address, ARENA_SIZE);
2507
5.43k
        ao->address = 0;                        /* mark unassociated */
2508
5.43k
        --narenas_currently_allocated;
2509
2510
5.43k
        return;
2511
5.43k
    }
2512
2513
2.39M
    if (nf == 1) {
2514
        /* Case 2.  Put ao at the head of
2515
         * usable_arenas.  Note that because
2516
         * ao->nfreepools was 0 before, ao isn't
2517
         * currently on the usable_arenas list.
2518
         */
2519
137k
        ao->nextarena = usable_arenas;
2520
137k
        ao->prevarena = NULL;
2521
137k
        if (usable_arenas)
2522
135k
            usable_arenas->prevarena = ao;
2523
137k
        usable_arenas = ao;
2524
137k
        assert(usable_arenas->address != 0);
2525
137k
        if (nfp2lasta[1] == NULL) {
2526
133k
            nfp2lasta[1] = ao;
2527
133k
        }
2528
2529
137k
        return;
2530
137k
    }
2531
2532
    /* If this arena is now out of order, we need to keep
2533
     * the list sorted.  The list is kept sorted so that
2534
     * the "most full" arenas are used first, which allows
2535
     * the nearly empty arenas to be completely freed.  In
2536
     * a few un-scientific tests, it seems like this
2537
     * approach allowed a lot more memory to be freed.
2538
     */
2539
    /* If this is the only arena with nf, record that. */
2540
2.25M
    if (nfp2lasta[nf] == NULL) {
2541
2.19M
        nfp2lasta[nf] = ao;
2542
2.19M
    } /* else the rightmost with nf doesn't change */
2543
    /* If this was the rightmost of the old size, it remains in place. */
2544
2.25M
    if (ao == lastnf) {
2545
        /* Case 4.  Nothing to do. */
2546
2.21M
        return;
2547
2.21M
    }
2548
    /* If ao were the only arena in the list, the last block would have
2549
     * gotten us out.
2550
     */
2551
2.25M
    assert(ao->nextarena != NULL);
2552
2553
    /* Case 3:  We have to move the arena towards the end of the list,
2554
     * because it has more free pools than the arena to its right.  It needs
2555
     * to move to follow lastnf.
2556
     * First unlink ao from usable_arenas.
2557
     */
2558
43.2k
    if (ao->prevarena != NULL) {
2559
        /* ao isn't at the head of the list */
2560
30.5k
        assert(ao->prevarena->nextarena == ao);
2561
30.5k
        ao->prevarena->nextarena = ao->nextarena;
2562
30.5k
    }
2563
12.7k
    else {
2564
        /* ao is at the head of the list */
2565
12.7k
        assert(usable_arenas == ao);
2566
12.7k
        usable_arenas = ao->nextarena;
2567
12.7k
    }
2568
43.2k
    ao->nextarena->prevarena = ao->prevarena;
2569
    /* And insert after lastnf. */
2570
43.2k
    ao->prevarena = lastnf;
2571
43.2k
    ao->nextarena = lastnf->nextarena;
2572
43.2k
    if (ao->nextarena != NULL) {
2573
41.9k
        ao->nextarena->prevarena = ao;
2574
41.9k
    }
2575
43.2k
    lastnf->nextarena = ao;
2576
    /* Verify that the swaps worked. */
2577
43.2k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2578
43.2k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2579
43.2k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2580
43.2k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2581
43.2k
           || ao->prevarena->nextarena == ao);
2582
43.2k
}
2583
2584
/* Free a memory block allocated by pymalloc_alloc().
2585
   Return 1 if it was freed.
2586
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2587
static inline int
2588
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2589
2.05G
{
2590
2.05G
    assert(p != NULL);
2591
2592
#ifdef WITH_VALGRIND
2593
    if (UNLIKELY(running_on_valgrind > 0)) {
2594
        return 0;
2595
    }
2596
#endif
2597
2598
2.05G
    poolp pool = POOL_ADDR(p);
2599
2.05G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2600
246M
        return 0;
2601
246M
    }
2602
    /* We allocated this address. */
2603
2604
    /* Link p to the start of the pool's freeblock list.  Since
2605
     * the pool had at least the p block outstanding, the pool
2606
     * wasn't empty (so it's already in a usedpools[] list, or
2607
     * was full and is in no list -- it's not in the freeblocks
2608
     * list in any case).
2609
     */
2610
2.05G
    assert(pool->ref.count > 0);            /* else it was empty */
2611
1.80G
    pymem_block *lastfree = pool->freeblock;
2612
1.80G
    *(pymem_block **)p = lastfree;
2613
1.80G
    pool->freeblock = (pymem_block *)p;
2614
1.80G
    pool->ref.count--;
2615
2616
1.80G
    if (UNLIKELY(lastfree == NULL)) {
2617
        /* Pool was full, so doesn't currently live in any list:
2618
         * link it to the front of the appropriate usedpools[] list.
2619
         * This mimics LRU pool usage for new allocations and
2620
         * targets optimal filling when several pools contain
2621
         * blocks of the same size class.
2622
         */
2623
164M
        insert_to_usedpool(state, pool);
2624
164M
        return 1;
2625
164M
    }
2626
2627
    /* freeblock wasn't NULL, so the pool wasn't full,
2628
     * and the pool is in a usedpools[] list.
2629
     */
2630
1.63G
    if (LIKELY(pool->ref.count != 0)) {
2631
        /* pool isn't empty:  leave it in usedpools */
2632
1.63G
        return 1;
2633
1.63G
    }
2634
2635
    /* Pool is now empty:  unlink from usedpools, and
2636
     * link to the front of freepools.  This ensures that
2637
     * previously freed pools will be allocated later
2638
     * (being not referenced, they are perhaps paged out).
2639
     */
2640
2.40M
    insert_to_freepool(state, pool);
2641
2.40M
    return 1;
2642
1.63G
}
2643
2644
2645
void
2646
_PyObject_Free(void *ctx, void *p)
2647
2.05G
{
2648
    /* PyObject_Free(NULL) has no effect */
2649
2.05G
    if (p == NULL) {
2650
2.20M
        return;
2651
2.20M
    }
2652
2653
2.05G
    OMState *state = get_state();
2654
2.05G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2655
        /* pymalloc didn't allocate this address */
2656
246M
        PyMem_RawFree(p);
2657
246M
        raw_allocated_blocks--;
2658
246M
    }
2659
2.05G
}
2660
2661
2662
/* pymalloc realloc.
2663
2664
   If nbytes==0, then as the Python docs promise, we do not treat this like
2665
   free(p), and return a non-NULL result.
2666
2667
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2668
   newptr_p.
2669
2670
   Return 0 if pymalloc didn't allocated p. */
2671
static int
2672
pymalloc_realloc(OMState *state, void *ctx,
2673
                 void **newptr_p, void *p, size_t nbytes)
2674
107M
{
2675
107M
    void *bp;
2676
107M
    poolp pool;
2677
107M
    size_t size;
2678
2679
107M
    assert(p != NULL);
2680
2681
#ifdef WITH_VALGRIND
2682
    /* Treat running_on_valgrind == -1 the same as 0 */
2683
    if (UNLIKELY(running_on_valgrind > 0)) {
2684
        return 0;
2685
    }
2686
#endif
2687
2688
107M
    pool = POOL_ADDR(p);
2689
107M
    if (!address_in_range(state, p, pool)) {
2690
        /* pymalloc is not managing this block.
2691
2692
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2693
           over this block.  However, if we do, we need to copy the valid data
2694
           from the C-managed block to one of our blocks, and there's no
2695
           portable way to know how much of the memory space starting at p is
2696
           valid.
2697
2698
           As bug 1185883 pointed out the hard way, it's possible that the
2699
           C-managed block is "at the end" of allocated VM space, so that a
2700
           memory fault can occur if we try to copy nbytes bytes starting at p.
2701
           Instead we punt: let C continue to manage this block. */
2702
8.59M
        return 0;
2703
8.59M
    }
2704
2705
    /* pymalloc is in charge of this block */
2706
99.2M
    size = INDEX2SIZE(pool->szidx);
2707
99.2M
    if (nbytes <= size) {
2708
        /* The block is staying the same or shrinking.
2709
2710
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2711
           block to a smaller size class, but it wastes memory not to copy it.
2712
2713
           The compromise here is to copy on shrink only if at least 25% of
2714
           size can be shaved off. */
2715
68.8M
        if (4 * nbytes > 3 * size) {
2716
            /* It's the same, or shrinking and new/old > 3/4. */
2717
22.2M
            *newptr_p = p;
2718
22.2M
            return 1;
2719
22.2M
        }
2720
46.5M
        size = nbytes;
2721
46.5M
    }
2722
2723
77.0M
    bp = _PyObject_Malloc(ctx, nbytes);
2724
77.0M
    if (bp != NULL) {
2725
77.0M
        memcpy(bp, p, size);
2726
77.0M
        _PyObject_Free(ctx, p);
2727
77.0M
    }
2728
77.0M
    *newptr_p = bp;
2729
77.0M
    return 1;
2730
99.2M
}
2731
2732
2733
void *
2734
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2735
345M
{
2736
345M
    void *ptr2;
2737
2738
345M
    if (ptr == NULL) {
2739
237M
        return _PyObject_Malloc(ctx, nbytes);
2740
237M
    }
2741
2742
107M
    OMState *state = get_state();
2743
107M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2744
99.2M
        return ptr2;
2745
99.2M
    }
2746
2747
8.59M
    return PyMem_RawRealloc(ptr, nbytes);
2748
107M
}
2749
2750
#else   /* ! WITH_PYMALLOC */
2751
2752
/*==========================================================================*/
2753
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2754
 * only be used by extensions that are compiled with pymalloc enabled. */
2755
2756
Py_ssize_t
2757
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2758
{
2759
    return 0;
2760
}
2761
2762
Py_ssize_t
2763
_Py_GetGlobalAllocatedBlocks(void)
2764
{
2765
    return 0;
2766
}
2767
2768
void
2769
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2770
{
2771
    return;
2772
}
2773
2774
void
2775
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2776
{
2777
    return;
2778
}
2779
2780
#endif /* WITH_PYMALLOC */
2781
2782
2783
/*==========================================================================*/
2784
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2785
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2786
 */
2787
2788
/* Uncomment this define to add the "serialno" field */
2789
/* #define PYMEM_DEBUG_SERIALNO */
2790
2791
#ifdef PYMEM_DEBUG_SERIALNO
2792
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2793
2794
/* serialno is always incremented via calling this routine.  The point is
2795
 * to supply a single place to set a breakpoint.
2796
 */
2797
static void
2798
bumpserialno(void)
2799
{
2800
    ++serialno;
2801
}
2802
#endif
2803
2804
0
#define SST SIZEOF_SIZE_T
2805
2806
#ifdef PYMEM_DEBUG_SERIALNO
2807
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2808
#else
2809
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2810
#endif
2811
2812
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2813
static size_t
2814
read_size_t(const void *p)
2815
0
{
2816
0
    const uint8_t *q = (const uint8_t *)p;
2817
0
    size_t result = *q++;
2818
0
    int i;
2819
2820
0
    for (i = SST; --i > 0; ++q)
2821
0
        result = (result << 8) | *q;
2822
0
    return result;
2823
0
}
2824
2825
/* Write n as a big-endian size_t, MSB at address p, LSB at
2826
 * p + sizeof(size_t) - 1.
2827
 */
2828
static void
2829
write_size_t(void *p, size_t n)
2830
0
{
2831
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2832
0
    int i;
2833
2834
0
    for (i = SST; --i >= 0; --q) {
2835
0
        *q = (uint8_t)(n & 0xff);
2836
0
        n >>= 8;
2837
0
    }
2838
0
}
2839
2840
static void
2841
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2842
               bool is_alloc)
2843
0
{
2844
#ifdef Py_GIL_DISABLED
2845
    if (api->api_id == 'o') {
2846
        // Don't overwrite the first few bytes of a PyObject allocation in the
2847
        // free-threaded build
2848
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2849
        size_t debug_offset;
2850
        if (is_alloc) {
2851
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2852
        }
2853
        else {
2854
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2855
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2856
        }
2857
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2858
        if (debug_offset < nbytes) {
2859
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2860
        }
2861
        return;
2862
    }
2863
#endif
2864
0
    memset(data, c, nbytes);
2865
0
}
2866
2867
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2868
   fills them with useful stuff, here calling the underlying malloc's result p:
2869
2870
p[0: S]
2871
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2872
    to read in a memory dump).
2873
p[S]
2874
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2875
p[S+1: 2*S]
2876
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2877
p[2*S: 2*S+n]
2878
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2879
    Used to catch reference to uninitialized memory.
2880
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2881
    handled the request itself.
2882
p[2*S+n: 2*S+n+S]
2883
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2884
p[2*S+n+S: 2*S+n+2*S]
2885
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2886
    and _PyMem_DebugRealloc.
2887
    This is a big-endian size_t.
2888
    If "bad memory" is detected later, the serial number gives an
2889
    excellent way to set a breakpoint on the next run, to capture the
2890
    instant at which this block was passed out.
2891
2892
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2893
for 3 * S extra bytes, and omits the last serialno field.
2894
*/
2895
2896
static void *
2897
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2898
0
{
2899
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2900
0
    uint8_t *p;           /* base address of malloc'ed pad block */
2901
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2902
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2903
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2904
2905
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2906
        /* integer overflow: can't represent total as a Py_ssize_t */
2907
0
        return NULL;
2908
0
    }
2909
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2910
2911
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2912
                ^--- p    ^--- data   ^--- tail
2913
       S: nbytes stored as size_t
2914
       I: API identifier (1 byte)
2915
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2916
       C: Clean bytes used later to store actual data
2917
       N: Serial number stored as size_t
2918
2919
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2920
       is omitted. */
2921
2922
0
    if (use_calloc) {
2923
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2924
0
    }
2925
0
    else {
2926
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2927
0
    }
2928
0
    if (p == NULL) {
2929
0
        return NULL;
2930
0
    }
2931
0
    data = p + 2*SST;
2932
2933
#ifdef PYMEM_DEBUG_SERIALNO
2934
    bumpserialno();
2935
#endif
2936
2937
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2938
0
    write_size_t(p, nbytes);
2939
0
    p[SST] = (uint8_t)api->api_id;
2940
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2941
2942
0
    if (nbytes > 0 && !use_calloc) {
2943
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
2944
0
    }
2945
2946
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2947
0
    tail = data + nbytes;
2948
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2949
#ifdef PYMEM_DEBUG_SERIALNO
2950
    write_size_t(tail + SST, serialno);
2951
#endif
2952
2953
0
    return data;
2954
0
}
2955
2956
void *
2957
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2958
0
{
2959
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2960
0
}
2961
2962
void *
2963
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2964
0
{
2965
0
    size_t nbytes;
2966
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2967
0
    nbytes = nelem * elsize;
2968
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2969
0
}
2970
2971
2972
/* The debug free first checks the 2*SST bytes on each end for sanity (in
2973
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
2974
   Then fills the original bytes with PYMEM_DEADBYTE.
2975
   Then calls the underlying free.
2976
*/
2977
void
2978
_PyMem_DebugRawFree(void *ctx, void *p)
2979
0
{
2980
    /* PyMem_Free(NULL) has no effect */
2981
0
    if (p == NULL) {
2982
0
        return;
2983
0
    }
2984
2985
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2986
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2987
0
    size_t nbytes;
2988
2989
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2990
0
    nbytes = read_size_t(q);
2991
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
2992
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
2993
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
2994
0
    api->alloc.free(api->alloc.ctx, q);
2995
0
}
2996
2997
2998
void *
2999
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3000
0
{
3001
0
    if (p == NULL) {
3002
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3003
0
    }
3004
3005
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3006
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3007
0
    uint8_t *data;        /* pointer to data bytes */
3008
0
    uint8_t *r;
3009
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3010
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3011
0
    size_t original_nbytes;
3012
0
#define ERASED_SIZE 64
3013
3014
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3015
3016
0
    data = (uint8_t *)p;
3017
0
    head = data - 2*SST;
3018
0
    original_nbytes = read_size_t(head);
3019
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3020
        /* integer overflow: can't represent total as a Py_ssize_t */
3021
0
        return NULL;
3022
0
    }
3023
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3024
3025
0
    tail = data + original_nbytes;
3026
#ifdef PYMEM_DEBUG_SERIALNO
3027
    size_t block_serialno = read_size_t(tail + SST);
3028
#endif
3029
0
#ifndef Py_GIL_DISABLED
3030
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3031
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3032
     */
3033
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3034
0
    if (original_nbytes <= sizeof(save)) {
3035
0
        memcpy(save, data, original_nbytes);
3036
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3037
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3038
0
    }
3039
0
    else {
3040
0
        memcpy(save, data, ERASED_SIZE);
3041
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3042
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3043
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3044
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3045
0
    }
3046
0
#endif
3047
3048
    /* Resize and add decorations. */
3049
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3050
0
    if (r == NULL) {
3051
        /* if realloc() failed: rewrite header and footer which have
3052
           just been erased */
3053
0
        nbytes = original_nbytes;
3054
0
    }
3055
0
    else {
3056
0
        head = r;
3057
#ifdef PYMEM_DEBUG_SERIALNO
3058
        bumpserialno();
3059
        block_serialno = serialno;
3060
#endif
3061
0
    }
3062
0
    data = head + 2*SST;
3063
3064
0
    write_size_t(head, nbytes);
3065
0
    head[SST] = (uint8_t)api->api_id;
3066
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3067
3068
0
    tail = data + nbytes;
3069
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3070
#ifdef PYMEM_DEBUG_SERIALNO
3071
    write_size_t(tail + SST, block_serialno);
3072
#endif
3073
3074
0
#ifndef Py_GIL_DISABLED
3075
    /* Restore saved bytes. */
3076
0
    if (original_nbytes <= sizeof(save)) {
3077
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3078
0
    }
3079
0
    else {
3080
0
        size_t i = original_nbytes - ERASED_SIZE;
3081
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3082
0
        if (nbytes > i) {
3083
0
            memcpy(data + i, &save[ERASED_SIZE],
3084
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3085
0
        }
3086
0
    }
3087
0
#endif
3088
3089
0
    if (r == NULL) {
3090
0
        return NULL;
3091
0
    }
3092
3093
0
    if (nbytes > original_nbytes) {
3094
        /* growing: mark new extra memory clean */
3095
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3096
0
               nbytes - original_nbytes);
3097
0
    }
3098
3099
0
    return data;
3100
0
}
3101
3102
static inline void
3103
_PyMem_DebugCheckGIL(const char *func)
3104
0
{
3105
0
    PyThreadState *tstate = _PyThreadState_GET();
3106
0
    if (tstate == NULL) {
3107
0
#ifndef Py_GIL_DISABLED
3108
0
        _Py_FatalErrorFunc(func,
3109
0
                           "Python memory allocator called "
3110
0
                           "without holding the GIL");
3111
#else
3112
        _Py_FatalErrorFunc(func,
3113
                           "Python memory allocator called "
3114
                           "without an active thread state. "
3115
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3116
#endif
3117
0
    }
3118
0
}
3119
3120
void *
3121
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3122
0
{
3123
0
    _PyMem_DebugCheckGIL(__func__);
3124
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3125
0
}
3126
3127
void *
3128
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3129
0
{
3130
0
    _PyMem_DebugCheckGIL(__func__);
3131
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3132
0
}
3133
3134
3135
void
3136
_PyMem_DebugFree(void *ctx, void *ptr)
3137
0
{
3138
0
    _PyMem_DebugCheckGIL(__func__);
3139
0
    _PyMem_DebugRawFree(ctx, ptr);
3140
0
}
3141
3142
3143
void *
3144
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3145
0
{
3146
0
    _PyMem_DebugCheckGIL(__func__);
3147
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3148
0
}
3149
3150
/* Check the forbidden bytes on both ends of the memory allocated for p.
3151
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3152
 * and call Py_FatalError to kill the program.
3153
 * The API id, is also checked.
3154
 */
3155
static void
3156
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3157
0
{
3158
0
    assert(p != NULL);
3159
3160
0
    const uint8_t *q = (const uint8_t *)p;
3161
0
    size_t nbytes;
3162
0
    const uint8_t *tail;
3163
0
    int i;
3164
0
    char id;
3165
3166
    /* Check the API id */
3167
0
    id = (char)q[-SST];
3168
0
    if (id != api) {
3169
0
        _PyObject_DebugDumpAddress(p);
3170
0
        _Py_FatalErrorFormat(func,
3171
0
                             "bad ID: Allocated using API '%c', "
3172
0
                             "verified using API '%c'",
3173
0
                             id, api);
3174
0
    }
3175
3176
    /* Check the stuff at the start of p first:  if there's underwrite
3177
     * corruption, the number-of-bytes field may be nuts, and checking
3178
     * the tail could lead to a segfault then.
3179
     */
3180
0
    for (i = SST-1; i >= 1; --i) {
3181
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3182
0
            _PyObject_DebugDumpAddress(p);
3183
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3184
0
        }
3185
0
    }
3186
3187
0
    nbytes = read_size_t(q - 2*SST);
3188
0
    tail = q + nbytes;
3189
0
    for (i = 0; i < SST; ++i) {
3190
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3191
0
            _PyObject_DebugDumpAddress(p);
3192
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3193
0
        }
3194
0
    }
3195
0
}
3196
3197
/* Display info to stderr about the memory block at p. */
3198
static void
3199
_PyObject_DebugDumpAddress(const void *p)
3200
0
{
3201
0
    const uint8_t *q = (const uint8_t *)p;
3202
0
    const uint8_t *tail;
3203
0
    size_t nbytes;
3204
0
    int i;
3205
0
    int ok;
3206
0
    char id;
3207
3208
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3209
0
    if (p == NULL) {
3210
0
        fprintf(stderr, "\n");
3211
0
        return;
3212
0
    }
3213
0
    id = (char)q[-SST];
3214
0
    fprintf(stderr, " API '%c'\n", id);
3215
3216
0
    nbytes = read_size_t(q - 2*SST);
3217
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3218
3219
    /* In case this is nuts, check the leading pad bytes first. */
3220
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3221
0
    ok = 1;
3222
0
    for (i = 1; i <= SST-1; ++i) {
3223
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3224
0
            ok = 0;
3225
0
            break;
3226
0
        }
3227
0
    }
3228
0
    if (ok)
3229
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3230
0
    else {
3231
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3232
0
            PYMEM_FORBIDDENBYTE);
3233
0
        for (i = SST-1; i >= 1; --i) {
3234
0
            const uint8_t byte = *(q-i);
3235
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3236
0
            if (byte != PYMEM_FORBIDDENBYTE)
3237
0
                fputs(" *** OUCH", stderr);
3238
0
            fputc('\n', stderr);
3239
0
        }
3240
3241
0
        fputs("    Because memory is corrupted at the start, the "
3242
0
              "count of bytes requested\n"
3243
0
              "       may be bogus, and checking the trailing pad "
3244
0
              "bytes may segfault.\n", stderr);
3245
0
    }
3246
3247
0
    tail = q + nbytes;
3248
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3249
0
    ok = 1;
3250
0
    for (i = 0; i < SST; ++i) {
3251
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3252
0
            ok = 0;
3253
0
            break;
3254
0
        }
3255
0
    }
3256
0
    if (ok)
3257
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3258
0
    else {
3259
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3260
0
                PYMEM_FORBIDDENBYTE);
3261
0
        for (i = 0; i < SST; ++i) {
3262
0
            const uint8_t byte = tail[i];
3263
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3264
0
                    i, byte);
3265
0
            if (byte != PYMEM_FORBIDDENBYTE)
3266
0
                fputs(" *** OUCH", stderr);
3267
0
            fputc('\n', stderr);
3268
0
        }
3269
0
    }
3270
3271
#ifdef PYMEM_DEBUG_SERIALNO
3272
    size_t serial = read_size_t(tail + SST);
3273
    fprintf(stderr,
3274
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3275
            serial);
3276
#endif
3277
3278
0
    if (nbytes > 0) {
3279
0
        i = 0;
3280
0
        fputs("    Data at p:", stderr);
3281
        /* print up to 8 bytes at the start */
3282
0
        while (q < tail && i < 8) {
3283
0
            fprintf(stderr, " %02x", *q);
3284
0
            ++i;
3285
0
            ++q;
3286
0
        }
3287
        /* and up to 8 at the end */
3288
0
        if (q < tail) {
3289
0
            if (tail - q > 8) {
3290
0
                fputs(" ...", stderr);
3291
0
                q = tail - 8;
3292
0
            }
3293
0
            while (q < tail) {
3294
0
                fprintf(stderr, " %02x", *q);
3295
0
                ++q;
3296
0
            }
3297
0
        }
3298
0
        fputc('\n', stderr);
3299
0
    }
3300
0
    fputc('\n', stderr);
3301
3302
0
    fflush(stderr);
3303
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3304
0
}
3305
3306
3307
static size_t
3308
printone(FILE *out, const char* msg, size_t value)
3309
0
{
3310
0
    int i, k;
3311
0
    char buf[100];
3312
0
    size_t origvalue = value;
3313
3314
0
    fputs(msg, out);
3315
0
    for (i = (int)strlen(msg); i < 35; ++i)
3316
0
        fputc(' ', out);
3317
0
    fputc('=', out);
3318
3319
    /* Write the value with commas. */
3320
0
    i = 22;
3321
0
    buf[i--] = '\0';
3322
0
    buf[i--] = '\n';
3323
0
    k = 3;
3324
0
    do {
3325
0
        size_t nextvalue = value / 10;
3326
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3327
0
        value = nextvalue;
3328
0
        buf[i--] = (char)(digit + '0');
3329
0
        --k;
3330
0
        if (k == 0 && value && i >= 0) {
3331
0
            k = 3;
3332
0
            buf[i--] = ',';
3333
0
        }
3334
0
    } while (value && i >= 0);
3335
3336
0
    while (i >= 0)
3337
0
        buf[i--] = ' ';
3338
0
    fputs(buf, out);
3339
3340
0
    return origvalue;
3341
0
}
3342
3343
void
3344
_PyDebugAllocatorStats(FILE *out,
3345
                       const char *block_name, int num_blocks, size_t sizeof_block)
3346
0
{
3347
0
    char buf1[128];
3348
0
    char buf2[128];
3349
0
    PyOS_snprintf(buf1, sizeof(buf1),
3350
0
                  "%d %ss * %zd bytes each",
3351
0
                  num_blocks, block_name, sizeof_block);
3352
0
    PyOS_snprintf(buf2, sizeof(buf2),
3353
0
                  "%48s ", buf1);
3354
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3355
0
}
3356
3357
// Return true if the obmalloc state structure is heap allocated,
3358
// by PyMem_RawCalloc().  For the main interpreter, this structure
3359
// allocated in the BSS.  Allocating that way gives some memory savings
3360
// and a small performance win (at least on a demand paged OS).  On
3361
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3362
// memory is for the arena_map_top array.  Since normally only one entry
3363
// of that array is used, only one page of resident memory is actually
3364
// used, rather than the full 256 kB.
3365
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3366
0
{
3367
0
#if WITH_PYMALLOC
3368
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3369
#else
3370
    return false;
3371
#endif
3372
0
}
3373
3374
#ifdef WITH_PYMALLOC
3375
static void
3376
init_obmalloc_pools(PyInterpreterState *interp)
3377
28
{
3378
    // initialize the obmalloc->pools structure.  This must be done
3379
    // before the obmalloc alloc/free functions can be called.
3380
28
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3381
28
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3382
28
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3383
28
}
3384
#endif /* WITH_PYMALLOC */
3385
3386
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3387
28
{
3388
28
#ifdef WITH_PYMALLOC
3389
    /* Initialize obmalloc, but only for subinterpreters,
3390
       since the main interpreter is initialized statically. */
3391
28
    if (_Py_IsMainInterpreter(interp)
3392
0
            || _PyInterpreterState_HasFeature(interp,
3393
28
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3394
28
        interp->obmalloc = &obmalloc_state_main;
3395
28
        if (!obmalloc_state_initialized) {
3396
28
            init_obmalloc_pools(interp);
3397
28
            obmalloc_state_initialized = true;
3398
28
        }
3399
28
    } else {
3400
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3401
0
        if (interp->obmalloc == NULL) {
3402
0
            return -1;
3403
0
        }
3404
0
        init_obmalloc_pools(interp);
3405
0
    }
3406
28
#endif /* WITH_PYMALLOC */
3407
28
    return 0; // success
3408
28
}
3409
3410
3411
#ifdef WITH_PYMALLOC
3412
3413
static void
3414
free_obmalloc_arenas(PyInterpreterState *interp)
3415
0
{
3416
0
    OMState *state = interp->obmalloc;
3417
0
    for (uint i = 0; i < maxarenas; ++i) {
3418
        // free each obmalloc memory arena
3419
0
        struct arena_object *ao = &allarenas[i];
3420
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3421
0
                             (void *)ao->address, ARENA_SIZE);
3422
0
    }
3423
    // free the array containing pointers to all arenas
3424
0
    PyMem_RawFree(allarenas);
3425
0
#if WITH_PYMALLOC_RADIX_TREE
3426
0
#ifdef USE_INTERIOR_NODES
3427
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3428
    // by arena_map_mark_used() but not freed when arenas are freed.
3429
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3430
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3431
0
         if (mid == NULL) {
3432
0
             continue;
3433
0
         }
3434
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3435
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3436
0
            if (bot == NULL) {
3437
0
                continue;
3438
0
            }
3439
0
            PyMem_RawFree(bot);
3440
0
         }
3441
0
         PyMem_RawFree(mid);
3442
0
    }
3443
0
#endif
3444
0
#endif
3445
0
}
3446
3447
#ifdef Py_DEBUG
3448
/* Is target in the list?  The list is traversed via the nextpool pointers.
3449
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3450
 * list, else 0.
3451
 */
3452
static int
3453
pool_is_in_list(const poolp target, poolp list)
3454
{
3455
    poolp origlist = list;
3456
    assert(target != NULL);
3457
    if (list == NULL)
3458
        return 0;
3459
    do {
3460
        if (target == list)
3461
            return 1;
3462
        list = list->nextpool;
3463
    } while (list != NULL && list != origlist);
3464
    return 0;
3465
}
3466
#endif
3467
3468
#ifdef WITH_MIMALLOC
3469
struct _alloc_stats {
3470
    size_t allocated_blocks;
3471
    size_t allocated_bytes;
3472
    size_t allocated_with_overhead;
3473
    size_t bytes_reserved;
3474
    size_t bytes_committed;
3475
};
3476
3477
static bool _collect_alloc_stats(
3478
    const mi_heap_t* heap, const mi_heap_area_t* area,
3479
    void* block, size_t block_size, void* arg)
3480
0
{
3481
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3482
0
    stats->allocated_blocks += area->used;
3483
0
    stats->allocated_bytes += area->used * area->block_size;
3484
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3485
0
    stats->bytes_reserved += area->reserved;
3486
0
    stats->bytes_committed += area->committed;
3487
0
    return 1;
3488
0
}
3489
3490
static void
3491
py_mimalloc_print_stats(FILE *out)
3492
0
{
3493
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3494
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3495
0
    fprintf(out, "Medium block threshold = %zu\n",
3496
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3497
0
    fprintf(out, "Large object max size = %zu\n",
3498
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3499
3500
0
    mi_heap_t *heap = mi_heap_get_default();
3501
0
    struct _alloc_stats stats;
3502
0
    memset(&stats, 0, sizeof(stats));
3503
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3504
3505
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3506
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3507
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3508
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3509
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3510
0
}
3511
#endif
3512
3513
3514
static void
3515
pymalloc_print_stats(FILE *out)
3516
0
{
3517
0
    OMState *state = get_state();
3518
3519
0
    uint i;
3520
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3521
    /* # of pools, allocated blocks, and free blocks per class index */
3522
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3523
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3524
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3525
    /* total # of allocated bytes in used and full pools */
3526
0
    size_t allocated_bytes = 0;
3527
    /* total # of available bytes in used pools */
3528
0
    size_t available_bytes = 0;
3529
    /* # of free pools + pools not yet carved out of current arena */
3530
0
    uint numfreepools = 0;
3531
    /* # of bytes for arena alignment padding */
3532
0
    size_t arena_alignment = 0;
3533
    /* # of bytes in used and full pools used for pool_headers */
3534
0
    size_t pool_header_bytes = 0;
3535
    /* # of bytes in used and full pools wasted due to quantization,
3536
     * i.e. the necessarily leftover space at the ends of used and
3537
     * full pools.
3538
     */
3539
0
    size_t quantization = 0;
3540
    /* # of arenas actually allocated. */
3541
0
    size_t narenas = 0;
3542
    /* running total -- should equal narenas * ARENA_SIZE */
3543
0
    size_t total;
3544
0
    char buf[128];
3545
3546
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3547
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3548
3549
0
    for (i = 0; i < numclasses; ++i)
3550
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3551
3552
    /* Because full pools aren't linked to from anything, it's easiest
3553
     * to march over all the arenas.  If we're lucky, most of the memory
3554
     * will be living in full pools -- would be a shame to miss them.
3555
     */
3556
0
    for (i = 0; i < maxarenas; ++i) {
3557
0
        uintptr_t base = allarenas[i].address;
3558
3559
        /* Skip arenas which are not allocated. */
3560
0
        if (allarenas[i].address == (uintptr_t)NULL)
3561
0
            continue;
3562
0
        narenas += 1;
3563
3564
0
        numfreepools += allarenas[i].nfreepools;
3565
3566
        /* round up to pool alignment */
3567
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3568
0
            arena_alignment += POOL_SIZE;
3569
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3570
0
            base += POOL_SIZE;
3571
0
        }
3572
3573
        /* visit every pool in the arena */
3574
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3575
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3576
0
            poolp p = (poolp)base;
3577
0
            const uint sz = p->szidx;
3578
0
            uint freeblocks;
3579
3580
0
            if (p->ref.count == 0) {
3581
                /* currently unused */
3582
#ifdef Py_DEBUG
3583
                assert(pool_is_in_list(p, allarenas[i].freepools));
3584
#endif
3585
0
                continue;
3586
0
            }
3587
0
            ++numpools[sz];
3588
0
            numblocks[sz] += p->ref.count;
3589
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3590
0
            numfreeblocks[sz] += freeblocks;
3591
#ifdef Py_DEBUG
3592
            if (freeblocks > 0)
3593
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3594
#endif
3595
0
        }
3596
0
    }
3597
0
    assert(narenas == narenas_currently_allocated);
3598
3599
0
    fputc('\n', out);
3600
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3601
0
          "-----   ----   ---------   -------------  ------------\n",
3602
0
          out);
3603
3604
0
    for (i = 0; i < numclasses; ++i) {
3605
0
        size_t p = numpools[i];
3606
0
        size_t b = numblocks[i];
3607
0
        size_t f = numfreeblocks[i];
3608
0
        uint size = INDEX2SIZE(i);
3609
0
        if (p == 0) {
3610
0
            assert(b == 0 && f == 0);
3611
0
            continue;
3612
0
        }
3613
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3614
0
                i, size, p, b, f);
3615
0
        allocated_bytes += b * size;
3616
0
        available_bytes += f * size;
3617
0
        pool_header_bytes += p * POOL_OVERHEAD;
3618
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3619
0
    }
3620
0
    fputc('\n', out);
3621
#ifdef PYMEM_DEBUG_SERIALNO
3622
    if (_PyMem_DebugEnabled()) {
3623
        (void)printone(out, "# times object malloc called", serialno);
3624
    }
3625
#endif
3626
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3627
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3628
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3629
0
    (void)printone(out, "# arenas allocated current", narenas);
3630
3631
0
    PyOS_snprintf(buf, sizeof(buf),
3632
0
                  "%zu arenas * %d bytes/arena",
3633
0
                  narenas, ARENA_SIZE);
3634
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3635
3636
0
    fputc('\n', out);
3637
3638
    /* Account for what all of those arena bytes are being used for. */
3639
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3640
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3641
3642
0
    PyOS_snprintf(buf, sizeof(buf),
3643
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3644
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3645
3646
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3647
0
    total += printone(out, "# bytes lost to quantization", quantization);
3648
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3649
0
    (void)printone(out, "Total", total);
3650
0
    assert(narenas * ARENA_SIZE == total);
3651
3652
0
#if WITH_PYMALLOC_RADIX_TREE
3653
0
    fputs("\narena map counts\n", out);
3654
0
#ifdef USE_INTERIOR_NODES
3655
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3656
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3657
0
    fputc('\n', out);
3658
0
#endif
3659
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3660
0
#ifdef USE_INTERIOR_NODES
3661
0
    total += printone(out, "# bytes lost to arena map mid",
3662
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3663
0
    total += printone(out, "# bytes lost to arena map bot",
3664
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3665
0
    (void)printone(out, "Total", total);
3666
0
#endif
3667
0
#endif
3668
3669
0
}
3670
3671
/* Print summary info to "out" about the state of pymalloc's structures.
3672
 * In Py_DEBUG mode, also perform some expensive internal consistency
3673
 * checks.
3674
 *
3675
 * Return 0 if the memory debug hooks are not installed or no statistics was
3676
 * written into out, return 1 otherwise.
3677
 */
3678
int
3679
_PyObject_DebugMallocStats(FILE *out)
3680
0
{
3681
0
#ifdef WITH_MIMALLOC
3682
0
    if (_PyMem_MimallocEnabled()) {
3683
0
        py_mimalloc_print_stats(out);
3684
0
        return 1;
3685
0
    }
3686
0
    else
3687
0
#endif
3688
0
    if (_PyMem_PymallocEnabled()) {
3689
0
        pymalloc_print_stats(out);
3690
0
        return 1;
3691
0
    }
3692
0
    else {
3693
0
        return 0;
3694
0
    }
3695
0
}
3696
3697
#endif /* #ifdef WITH_PYMALLOC */