Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#ifdef WITH_MIMALLOC
17
// Forward declarations of functions used in our mimalloc modifications
18
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
19
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
20
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
21
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
22
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
23
#  include "pycore_mimalloc.h"
24
#  include "mimalloc/static.c"
25
#  include "mimalloc/internal.h"  // for stats
26
#endif
27
28
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
29
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
30
#endif
31
32
#undef  uint
33
1.83G
#define uint pymem_uint
34
35
36
/* Defined in tracemalloc.c */
37
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
38
39
static void _PyObject_DebugDumpAddress(const void *p);
40
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
41
42
43
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
44
static void set_up_debug_hooks_unlocked(void);
45
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
46
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
47
48
49
/***************************************/
50
/* low-level allocator implementations */
51
/***************************************/
52
53
/* the default raw allocator (wraps malloc) */
54
55
void *
56
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
57
231M
{
58
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
59
       for malloc(0), which would be treated as an error. Some platforms would
60
       return a pointer with no memory behind it, which would break pymalloc.
61
       To solve these problems, allocate an extra byte. */
62
231M
    if (size == 0)
63
40.0M
        size = 1;
64
231M
    return malloc(size);
65
231M
}
66
67
void *
68
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
69
115k
{
70
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
71
       for calloc(0, 0), which would be treated as an error. Some platforms
72
       would return a pointer with no memory behind it, which would break
73
       pymalloc.  To solve these problems, allocate an extra byte. */
74
115k
    if (nelem == 0 || elsize == 0) {
75
2
        nelem = 1;
76
2
        elsize = 1;
77
2
    }
78
115k
    return calloc(nelem, elsize);
79
115k
}
80
81
void *
82
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
83
8.53M
{
84
8.53M
    if (size == 0)
85
0
        size = 1;
86
8.53M
    return realloc(ptr, size);
87
8.53M
}
88
89
void
90
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
91
231M
{
92
231M
    free(ptr);
93
231M
}
94
95
#ifdef WITH_MIMALLOC
96
97
static void
98
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
99
0
{
100
#ifdef Py_GIL_DISABLED
101
    // Clear the QSBR goal and remove the page from the QSBR linked list.
102
    page->qsbr_goal = 0;
103
    if (page->qsbr_node.next != NULL) {
104
        llist_remove(&page->qsbr_node);
105
    }
106
#endif
107
0
}
108
109
// Check if an empty, newly reclaimed page is safe to free now.
110
static bool
111
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
112
0
{
113
0
    assert(mi_page_all_free(page));
114
#ifdef Py_GIL_DISABLED
115
    assert(page->qsbr_node.next == NULL);
116
    if (page->use_qsbr && page->qsbr_goal != 0) {
117
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
118
        if (tstate == NULL) {
119
            return false;
120
        }
121
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
122
    }
123
#endif
124
0
    return true;
125
126
0
}
127
128
#ifdef Py_GIL_DISABLED
129
130
// If we are deferring collection of more than this amount of memory for
131
// mimalloc pages, advance the write sequence.  Advancing allows these
132
// pages to be re-used in a different thread or for a different size class.
133
#define QSBR_PAGE_MEM_LIMIT 4096*20
134
135
// Return true if the global write sequence should be advanced for a mimalloc
136
// page that is deferred from collection.
137
static bool
138
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
139
{
140
    size_t bsize = mi_page_block_size(page);
141
    size_t page_size = page->capacity*bsize;
142
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
143
        qsbr->deferred_page_memory = 0;
144
        return true;
145
    }
146
    qsbr->deferred_page_memory += page_size;
147
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
148
        qsbr->deferred_page_memory = 0;
149
        return true;
150
    }
151
    return false;
152
}
153
#endif
154
155
static bool
156
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
157
0
{
158
#ifdef Py_GIL_DISABLED
159
    assert(mi_page_all_free(page));
160
    if (page->use_qsbr) {
161
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
162
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
163
            _PyMem_mi_page_clear_qsbr(page);
164
            _mi_page_free(page, pq, force);
165
            return true;
166
        }
167
168
        _PyMem_mi_page_clear_qsbr(page);
169
        page->retire_expire = 0;
170
171
        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
172
            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
173
        }
174
        else {
175
            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
176
        }
177
178
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
179
        return false;
180
    }
181
#endif
182
0
    _mi_page_free(page, pq, force);
183
0
    return true;
184
0
}
185
186
static void
187
_PyMem_mi_page_reclaimed(mi_page_t *page)
188
0
{
189
#ifdef Py_GIL_DISABLED
190
    assert(page->qsbr_node.next == NULL);
191
    if (page->qsbr_goal != 0) {
192
        if (mi_page_all_free(page)) {
193
            assert(page->qsbr_node.next == NULL);
194
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
195
            page->retire_expire = 0;
196
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
197
        }
198
        else {
199
            page->qsbr_goal = 0;
200
        }
201
    }
202
#endif
203
0
}
204
205
static void
206
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
207
0
{
208
#ifdef Py_GIL_DISABLED
209
    if (!heap->page_use_qsbr) {
210
        return;
211
    }
212
213
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
214
    struct llist_node *head = &tstate->mimalloc.page_list;
215
    if (llist_empty(head)) {
216
        return;
217
    }
218
219
    struct llist_node *node;
220
    llist_for_each_safe(node, head) {
221
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
222
        if (!mi_page_all_free(page)) {
223
            // We allocated from this page some point after the delayed free
224
            _PyMem_mi_page_clear_qsbr(page);
225
            continue;
226
        }
227
228
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
229
            return;
230
        }
231
232
        _PyMem_mi_page_clear_qsbr(page);
233
        _mi_page_free(page, mi_page_queue_of(page), false);
234
    }
235
#endif
236
0
}
237
238
void *
239
_PyMem_MiMalloc(void *ctx, size_t size)
240
0
{
241
#ifdef Py_GIL_DISABLED
242
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
243
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
244
    return mi_heap_malloc(heap, size);
245
#else
246
0
    return mi_malloc(size);
247
0
#endif
248
0
}
249
250
void *
251
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
252
0
{
253
#ifdef Py_GIL_DISABLED
254
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
255
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
256
    return mi_heap_calloc(heap, nelem, elsize);
257
#else
258
0
    return mi_calloc(nelem, elsize);
259
0
#endif
260
0
}
261
262
void *
263
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
264
0
{
265
#ifdef Py_GIL_DISABLED
266
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
267
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
268
    return mi_heap_realloc(heap, ptr, size);
269
#else
270
0
    return mi_realloc(ptr, size);
271
0
#endif
272
0
}
273
274
void
275
_PyMem_MiFree(void *ctx, void *ptr)
276
0
{
277
0
    mi_free(ptr);
278
0
}
279
280
void *
281
_PyObject_MiMalloc(void *ctx, size_t nbytes)
282
0
{
283
#ifdef Py_GIL_DISABLED
284
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
285
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
286
    return mi_heap_malloc(heap, nbytes);
287
#else
288
0
    return mi_malloc(nbytes);
289
0
#endif
290
0
}
291
292
void *
293
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
294
0
{
295
#ifdef Py_GIL_DISABLED
296
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
297
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
298
    return mi_heap_calloc(heap, nelem, elsize);
299
#else
300
0
    return mi_calloc(nelem, elsize);
301
0
#endif
302
0
}
303
304
305
void *
306
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
307
0
{
308
#ifdef Py_GIL_DISABLED
309
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
310
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
311
    return mi_heap_realloc(heap, ptr, nbytes);
312
#else
313
0
    return mi_realloc(ptr, nbytes);
314
0
#endif
315
0
}
316
317
void
318
_PyObject_MiFree(void *ctx, void *ptr)
319
0
{
320
0
    mi_free(ptr);
321
0
}
322
323
#endif // WITH_MIMALLOC
324
325
326
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
327
328
329
#ifdef WITH_MIMALLOC
330
0
#  define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
331
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
332
#endif
333
334
/* the pymalloc allocator */
335
336
// The actual implementation is further down.
337
338
#if defined(WITH_PYMALLOC)
339
void* _PyObject_Malloc(void *ctx, size_t size);
340
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
341
void _PyObject_Free(void *ctx, void *p);
342
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
343
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
344
#endif  // WITH_PYMALLOC
345
346
#if defined(Py_GIL_DISABLED)
347
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
348
#  define PYRAW_ALLOC MALLOC_ALLOC
349
#  define PYMEM_ALLOC MIMALLOC_ALLOC
350
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
351
#elif defined(WITH_PYMALLOC)
352
0
#  define PYRAW_ALLOC MALLOC_ALLOC
353
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
354
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
355
#else
356
#  define PYRAW_ALLOC MALLOC_ALLOC
357
#  define PYMEM_ALLOC MALLOC_ALLOC
358
#  define PYOBJ_ALLOC MALLOC_ALLOC
359
#endif
360
361
362
/* the default debug allocators */
363
364
// The actual implementation is further down.
365
366
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
367
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
368
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
369
void _PyMem_DebugRawFree(void *ctx, void *ptr);
370
371
void* _PyMem_DebugMalloc(void *ctx, size_t size);
372
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
373
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
374
void _PyMem_DebugFree(void *ctx, void *p);
375
376
#define PYDBGRAW_ALLOC \
377
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
378
#define PYDBGMEM_ALLOC \
379
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
380
#define PYDBGOBJ_ALLOC \
381
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
382
383
/* default raw allocator (not swappable) */
384
385
void *
386
_PyMem_DefaultRawMalloc(size_t size)
387
224
{
388
#ifdef Py_DEBUG
389
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
390
#else
391
224
    return _PyMem_RawMalloc(NULL, size);
392
224
#endif
393
224
}
394
395
void *
396
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
397
0
{
398
#ifdef Py_DEBUG
399
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
400
#else
401
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
402
0
#endif
403
0
}
404
405
void *
406
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
407
0
{
408
#ifdef Py_DEBUG
409
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
410
#else
411
0
    return _PyMem_RawRealloc(NULL, ptr, size);
412
0
#endif
413
0
}
414
415
void
416
_PyMem_DefaultRawFree(void *ptr)
417
252
{
418
#ifdef Py_DEBUG
419
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
420
#else
421
252
    _PyMem_RawFree(NULL, ptr);
422
252
#endif
423
252
}
424
425
wchar_t*
426
_PyMem_DefaultRawWcsdup(const wchar_t *str)
427
168
{
428
168
    assert(str != NULL);
429
430
168
    size_t len = wcslen(str);
431
168
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
432
0
        return NULL;
433
0
    }
434
435
168
    size_t size = (len + 1) * sizeof(wchar_t);
436
168
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
437
168
    if (str2 == NULL) {
438
0
        return NULL;
439
0
    }
440
441
168
    memcpy(str2, str, size);
442
168
    return str2;
443
168
}
444
445
/* the low-level virtual memory allocator */
446
447
#ifdef WITH_PYMALLOC
448
#  ifdef MS_WINDOWS
449
#    include <windows.h>
450
#  elif defined(HAVE_MMAP)
451
#    include <sys/mman.h>
452
#    ifdef MAP_ANONYMOUS
453
#      define ARENAS_USE_MMAP
454
#    endif
455
#  endif
456
#endif
457
458
void *
459
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
460
235k
{
461
#ifdef MS_WINDOWS
462
    return VirtualAlloc(NULL, size,
463
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
464
#elif defined(ARENAS_USE_MMAP)
465
    void *ptr;
466
235k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
467
235k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
468
235k
    if (ptr == MAP_FAILED)
469
0
        return NULL;
470
235k
    assert(ptr != NULL);
471
235k
    _PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
472
235k
    return ptr;
473
#else
474
    return malloc(size);
475
#endif
476
235k
}
477
478
void
479
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
480
#if defined(ARENAS_USE_MMAP)
481
    size_t size
482
#else
483
    size_t Py_UNUSED(size)
484
#endif
485
)
486
235k
{
487
#ifdef MS_WINDOWS
488
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
489
    if (ptr == NULL) {
490
        return;
491
    }
492
    VirtualFree(ptr, 0, MEM_RELEASE);
493
#elif defined(ARENAS_USE_MMAP)
494
    /* Unlike free(), munmap() does not special-case NULL to noop. */
495
235k
    if (ptr == NULL) {
496
0
        return;
497
0
    }
498
235k
    munmap(ptr, size);
499
#else
500
    free(ptr);
501
#endif
502
235k
}
503
504
/*******************************************/
505
/* end low-level allocator implementations */
506
/*******************************************/
507
508
509
56
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
510
944M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
511
2.14G
#define _PyMem (_PyRuntime.allocators.standard.mem)
512
5.44G
#define _PyObject (_PyRuntime.allocators.standard.obj)
513
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
514
941k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
515
516
517
/***************************/
518
/* managing the allocators */
519
/***************************/
520
521
static int
522
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
523
                               PyMemAllocatorEx *old_alloc)
524
0
{
525
0
    if (old_alloc != NULL) {
526
0
        get_allocator_unlocked(domain, old_alloc);
527
0
    }
528
529
530
0
    PyMemAllocatorEx new_alloc;
531
0
    switch(domain)
532
0
    {
533
0
    case PYMEM_DOMAIN_RAW:
534
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
535
0
        break;
536
0
    case PYMEM_DOMAIN_MEM:
537
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
538
0
        break;
539
0
    case PYMEM_DOMAIN_OBJ:
540
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
541
0
        break;
542
0
    default:
543
        /* unknown domain */
544
0
        return -1;
545
0
    }
546
0
    set_allocator_unlocked(domain, &new_alloc);
547
0
    if (debug) {
548
0
        set_up_debug_hooks_domain_unlocked(domain);
549
0
    }
550
0
    return 0;
551
0
}
552
553
554
#ifdef Py_DEBUG
555
static const int pydebug = 1;
556
#else
557
static const int pydebug = 0;
558
#endif
559
560
int
561
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
562
0
{
563
0
    if (name == NULL || *name == '\0') {
564
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
565
           nameions): use default memory allocators */
566
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
567
0
    }
568
0
    else if (strcmp(name, "default") == 0) {
569
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
570
0
    }
571
0
    else if (strcmp(name, "debug") == 0) {
572
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
573
0
    }
574
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
575
0
    else if (strcmp(name, "pymalloc") == 0) {
576
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
577
0
    }
578
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
579
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
580
0
    }
581
0
#endif
582
0
#ifdef WITH_MIMALLOC
583
0
    else if (strcmp(name, "mimalloc") == 0) {
584
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
585
0
    }
586
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
587
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
588
0
    }
589
0
#endif
590
0
#ifndef Py_GIL_DISABLED
591
0
    else if (strcmp(name, "malloc") == 0) {
592
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
593
0
    }
594
0
    else if (strcmp(name, "malloc_debug") == 0) {
595
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
596
0
    }
597
0
#endif
598
0
    else {
599
        /* unknown allocator */
600
0
        return -1;
601
0
    }
602
0
    return 0;
603
0
}
604
605
606
static int
607
set_up_allocators_unlocked(PyMemAllocatorName allocator)
608
0
{
609
0
    switch (allocator) {
610
0
    case PYMEM_ALLOCATOR_NOT_SET:
611
        /* do nothing */
612
0
        break;
613
614
0
    case PYMEM_ALLOCATOR_DEFAULT:
615
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
616
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
617
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
618
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
619
0
        break;
620
621
0
    case PYMEM_ALLOCATOR_DEBUG:
622
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
623
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
624
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
625
0
        _PyRuntime.allocators.is_debug_enabled = 1;
626
0
        break;
627
628
0
#ifdef WITH_PYMALLOC
629
0
    case PYMEM_ALLOCATOR_PYMALLOC:
630
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
631
0
    {
632
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
633
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
634
635
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
636
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
637
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
638
639
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
640
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
641
0
        if (is_debug) {
642
0
            set_up_debug_hooks_unlocked();
643
0
        }
644
0
        break;
645
0
    }
646
0
#endif
647
0
#ifdef WITH_MIMALLOC
648
0
    case PYMEM_ALLOCATOR_MIMALLOC:
649
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
650
0
    {
651
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
652
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
653
654
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
655
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
656
657
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
658
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
659
660
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
661
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
662
0
        if (is_debug) {
663
0
            set_up_debug_hooks_unlocked();
664
0
        }
665
666
0
        break;
667
0
    }
668
0
#endif
669
670
0
    case PYMEM_ALLOCATOR_MALLOC:
671
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
672
0
    {
673
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
674
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
675
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
676
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
677
678
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
679
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
680
0
        if (is_debug) {
681
0
            set_up_debug_hooks_unlocked();
682
0
        }
683
0
        break;
684
0
    }
685
686
0
    default:
687
        /* unknown allocator */
688
0
        return -1;
689
0
    }
690
691
0
    return 0;
692
0
}
693
694
int
695
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
696
0
{
697
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
698
0
    int res = set_up_allocators_unlocked(allocator);
699
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
700
0
    return res;
701
0
}
702
703
704
static int
705
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
706
0
{
707
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
708
0
}
709
710
711
static const char*
712
get_current_allocator_name_unlocked(void)
713
0
{
714
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
715
0
#ifdef WITH_PYMALLOC
716
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
717
0
#endif
718
0
#ifdef WITH_MIMALLOC
719
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
720
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
721
0
#endif
722
723
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
724
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
725
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
726
0
    {
727
0
        return "malloc";
728
0
    }
729
0
#ifdef WITH_PYMALLOC
730
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
731
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
732
0
        pymemallocator_eq(&_PyObject, &pymalloc))
733
0
    {
734
0
        return "pymalloc";
735
0
    }
736
0
#endif
737
0
#ifdef WITH_MIMALLOC
738
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
739
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
740
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
741
0
    {
742
0
        return "mimalloc";
743
0
    }
744
0
#endif
745
746
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
747
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
748
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
749
750
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
751
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
752
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
753
0
    {
754
        /* Debug hooks installed */
755
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
756
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
757
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
758
0
        {
759
0
            return "malloc_debug";
760
0
        }
761
0
#ifdef WITH_PYMALLOC
762
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
763
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
764
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
765
0
        {
766
0
            return "pymalloc_debug";
767
0
        }
768
0
#endif
769
0
#ifdef WITH_MIMALLOC
770
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
771
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
772
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
773
0
        {
774
0
            return "mimalloc_debug";
775
0
        }
776
0
#endif
777
0
    }
778
0
    return NULL;
779
0
}
780
781
const char*
782
_PyMem_GetCurrentAllocatorName(void)
783
0
{
784
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
785
0
    const char *name = get_current_allocator_name_unlocked();
786
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
787
0
    return name;
788
0
}
789
790
791
int
792
_PyMem_DebugEnabled(void)
793
0
{
794
0
    return _PyRuntime.allocators.is_debug_enabled;
795
0
}
796
797
#ifdef WITH_PYMALLOC
798
static int
799
_PyMem_PymallocEnabled(void)
800
0
{
801
0
    if (_PyMem_DebugEnabled()) {
802
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
803
0
    }
804
0
    else {
805
0
        return (_PyObject.malloc == _PyObject_Malloc);
806
0
    }
807
0
}
808
809
#ifdef WITH_MIMALLOC
810
static int
811
_PyMem_MimallocEnabled(void)
812
0
{
813
#ifdef Py_GIL_DISABLED
814
    return 1;
815
#else
816
0
    if (_PyMem_DebugEnabled()) {
817
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
818
0
    }
819
0
    else {
820
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
821
0
    }
822
0
#endif
823
0
}
824
#endif  // WITH_MIMALLOC
825
826
#endif  // WITH_PYMALLOC
827
828
829
static void
830
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
831
0
{
832
0
    PyMemAllocatorEx alloc;
833
834
0
    if (domain == PYMEM_DOMAIN_RAW) {
835
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
836
0
            return;
837
0
        }
838
839
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
840
0
        alloc.ctx = &_PyMem_Debug.raw;
841
0
        alloc.malloc = _PyMem_DebugRawMalloc;
842
0
        alloc.calloc = _PyMem_DebugRawCalloc;
843
0
        alloc.realloc = _PyMem_DebugRawRealloc;
844
0
        alloc.free = _PyMem_DebugRawFree;
845
0
        set_allocator_unlocked(domain, &alloc);
846
0
    }
847
0
    else if (domain == PYMEM_DOMAIN_MEM) {
848
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
849
0
            return;
850
0
        }
851
852
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
853
0
        alloc.ctx = &_PyMem_Debug.mem;
854
0
        alloc.malloc = _PyMem_DebugMalloc;
855
0
        alloc.calloc = _PyMem_DebugCalloc;
856
0
        alloc.realloc = _PyMem_DebugRealloc;
857
0
        alloc.free = _PyMem_DebugFree;
858
0
        set_allocator_unlocked(domain, &alloc);
859
0
    }
860
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
861
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
862
0
            return;
863
0
        }
864
865
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
866
0
        alloc.ctx = &_PyMem_Debug.obj;
867
0
        alloc.malloc = _PyMem_DebugMalloc;
868
0
        alloc.calloc = _PyMem_DebugCalloc;
869
0
        alloc.realloc = _PyMem_DebugRealloc;
870
0
        alloc.free = _PyMem_DebugFree;
871
0
        set_allocator_unlocked(domain, &alloc);
872
0
    }
873
0
}
874
875
876
static void
877
set_up_debug_hooks_unlocked(void)
878
0
{
879
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
880
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
881
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
882
0
    _PyRuntime.allocators.is_debug_enabled = 1;
883
0
}
884
885
void
886
PyMem_SetupDebugHooks(void)
887
0
{
888
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
889
0
    set_up_debug_hooks_unlocked();
890
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
891
0
}
892
893
static void
894
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
895
28
{
896
28
    switch(domain)
897
28
    {
898
28
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
899
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
900
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
901
0
    default:
902
        /* unknown domain: set all attributes to NULL */
903
0
        allocator->ctx = NULL;
904
0
        allocator->malloc = NULL;
905
0
        allocator->calloc = NULL;
906
0
        allocator->realloc = NULL;
907
0
        allocator->free = NULL;
908
28
    }
909
28
}
910
911
static void
912
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
913
0
{
914
0
    switch(domain)
915
0
    {
916
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
917
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
918
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
919
    /* ignore unknown domain */
920
0
    }
921
0
}
922
923
void
924
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
925
28
{
926
28
    PyMutex_Lock(&ALLOCATORS_MUTEX);
927
28
    get_allocator_unlocked(domain, allocator);
928
28
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
929
28
}
930
931
void
932
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
933
0
{
934
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
935
0
    set_allocator_unlocked(domain, allocator);
936
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
937
0
}
938
939
void
940
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
941
0
{
942
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
943
0
    *allocator = _PyObject_Arena;
944
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
945
0
}
946
947
void
948
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
949
0
{
950
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
951
0
    _PyObject_Arena = *allocator;
952
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
953
0
}
954
955
956
/* Note that there is a possible, but very unlikely, race in any place
957
 * below where we call one of the allocator functions.  We access two
958
 * fields in each case:  "malloc", etc. and "ctx".
959
 *
960
 * It is unlikely that the allocator will be changed while one of those
961
 * calls is happening, much less in that very narrow window.
962
 * Furthermore, the likelihood of a race is drastically reduced by the
963
 * fact that the allocator may not be changed after runtime init
964
 * (except with a wrapper).
965
 *
966
 * With the above in mind, we currently don't worry about locking
967
 * around these uses of the runtime-global allocators state. */
968
969
970
/*************************/
971
/* the "arena" allocator */
972
/*************************/
973
974
void *
975
_PyObject_VirtualAlloc(size_t size)
976
229k
{
977
229k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
978
229k
}
979
980
void
981
_PyObject_VirtualFree(void *obj, size_t size)
982
229k
{
983
229k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
984
229k
}
985
986
987
/***********************/
988
/* the "raw" allocator */
989
/***********************/
990
991
void *
992
PyMem_RawMalloc(size_t size)
993
231M
{
994
    /*
995
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
996
     * Most python internals blindly use a signed Py_ssize_t to track
997
     * things without checking for overflows or negatives.
998
     * As size_t is unsigned, checking for size < 0 is not required.
999
     */
1000
231M
    if (size > (size_t)PY_SSIZE_T_MAX)
1001
0
        return NULL;
1002
231M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1003
231M
}
1004
1005
void *
1006
PyMem_RawCalloc(size_t nelem, size_t elsize)
1007
115k
{
1008
    /* see PyMem_RawMalloc() */
1009
115k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1010
0
        return NULL;
1011
115k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1012
115k
}
1013
1014
void*
1015
PyMem_RawRealloc(void *ptr, size_t new_size)
1016
8.53M
{
1017
    /* see PyMem_RawMalloc() */
1018
8.53M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1019
0
        return NULL;
1020
8.53M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1021
8.53M
}
1022
1023
void PyMem_RawFree(void *ptr)
1024
231M
{
1025
231M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1026
231M
}
1027
1028
1029
/***********************/
1030
/* the "mem" allocator */
1031
/***********************/
1032
1033
void *
1034
PyMem_Malloc(size_t size)
1035
243M
{
1036
    /* see PyMem_RawMalloc() */
1037
243M
    if (size > (size_t)PY_SSIZE_T_MAX)
1038
0
        return NULL;
1039
243M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1040
243M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1041
243M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1042
243M
    OBJECT_STAT_INC(allocations);
1043
243M
    return _PyMem.malloc(_PyMem.ctx, size);
1044
243M
}
1045
1046
void *
1047
PyMem_Calloc(size_t nelem, size_t elsize)
1048
50.0M
{
1049
    /* see PyMem_RawMalloc() */
1050
50.0M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1051
0
        return NULL;
1052
50.0M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1053
50.0M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1054
50.0M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1055
50.0M
    OBJECT_STAT_INC(allocations);
1056
50.0M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1057
50.0M
}
1058
1059
void *
1060
PyMem_Realloc(void *ptr, size_t new_size)
1061
260M
{
1062
    /* see PyMem_RawMalloc() */
1063
260M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1064
0
        return NULL;
1065
260M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1066
260M
}
1067
1068
void
1069
PyMem_Free(void *ptr)
1070
518M
{
1071
518M
    OBJECT_STAT_INC(frees);
1072
518M
    _PyMem.free(_PyMem.ctx, ptr);
1073
518M
}
1074
1075
1076
/***************************/
1077
/* pymem utility functions */
1078
/***************************/
1079
1080
wchar_t*
1081
_PyMem_RawWcsdup(const wchar_t *str)
1082
1.12k
{
1083
1.12k
    assert(str != NULL);
1084
1085
1.12k
    size_t len = wcslen(str);
1086
1.12k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1087
0
        return NULL;
1088
0
    }
1089
1090
1.12k
    size_t size = (len + 1) * sizeof(wchar_t);
1091
1.12k
    wchar_t *str2 = PyMem_RawMalloc(size);
1092
1.12k
    if (str2 == NULL) {
1093
0
        return NULL;
1094
0
    }
1095
1096
1.12k
    memcpy(str2, str, size);
1097
1.12k
    return str2;
1098
1.12k
}
1099
1100
char *
1101
_PyMem_RawStrdup(const char *str)
1102
84
{
1103
84
    assert(str != NULL);
1104
84
    size_t size = strlen(str) + 1;
1105
84
    char *copy = PyMem_RawMalloc(size);
1106
84
    if (copy == NULL) {
1107
0
        return NULL;
1108
0
    }
1109
84
    memcpy(copy, str, size);
1110
84
    return copy;
1111
84
}
1112
1113
char *
1114
_PyMem_Strdup(const char *str)
1115
0
{
1116
0
    assert(str != NULL);
1117
0
    size_t size = strlen(str) + 1;
1118
0
    char *copy = PyMem_Malloc(size);
1119
0
    if (copy == NULL) {
1120
0
        return NULL;
1121
0
    }
1122
0
    memcpy(copy, str, size);
1123
0
    return copy;
1124
0
}
1125
1126
/***********************************************/
1127
/* Delayed freeing support for Py_GIL_DISABLED */
1128
/***********************************************/
1129
1130
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1131
#define WORK_ITEMS_PER_CHUNK 254
1132
1133
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1134
struct _mem_work_item {
1135
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1136
    uint64_t qsbr_goal;
1137
};
1138
1139
// A fixed-size buffer of pointers to be freed
1140
struct _mem_work_chunk {
1141
    // Linked list node of chunks in queue
1142
    struct llist_node node;
1143
1144
    Py_ssize_t rd_idx;  // index of next item to read
1145
    Py_ssize_t wr_idx;  // index of next item to write
1146
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1147
};
1148
1149
static int
1150
work_item_should_decref(uintptr_t ptr)
1151
0
{
1152
0
    return ptr & 0x01;
1153
0
}
1154
1155
static void
1156
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1157
0
{
1158
0
    if (work_item_should_decref(ptr)) {
1159
0
        PyObject *obj = (PyObject *)(ptr - 1);
1160
#ifdef Py_GIL_DISABLED
1161
        if (cb == NULL) {
1162
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1163
            Py_DECREF(obj);
1164
            return;
1165
        }
1166
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1167
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1168
        if (refcount == 0) {
1169
            cb(obj, state);
1170
        }
1171
#else
1172
0
        Py_DECREF(obj);
1173
0
#endif
1174
0
    }
1175
0
    else {
1176
0
        PyMem_Free((void *)ptr);
1177
0
    }
1178
0
}
1179
1180
1181
#ifdef Py_GIL_DISABLED
1182
1183
// For deferred advance on free: the number of deferred items before advancing
1184
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1185
// want to process a chunk before it overflows.
1186
#define QSBR_DEFERRED_LIMIT 127
1187
1188
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1189
// helps limit memory usage due to QSBR delaying frees too long.
1190
#define QSBR_FREE_MEM_LIMIT 1024*1024
1191
1192
// Return true if the global write sequence should be advanced for a deferred
1193
// memory free.
1194
static bool
1195
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1196
{
1197
    if (size > QSBR_FREE_MEM_LIMIT) {
1198
        qsbr->deferred_count = 0;
1199
        qsbr->deferred_memory = 0;
1200
        qsbr->should_process = true;
1201
        return true;
1202
    }
1203
    qsbr->deferred_count++;
1204
    qsbr->deferred_memory += size;
1205
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1206
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1207
        qsbr->deferred_count = 0;
1208
        qsbr->deferred_memory = 0;
1209
        qsbr->should_process = true;
1210
        return true;
1211
    }
1212
    return false;
1213
}
1214
#endif
1215
1216
static void
1217
free_delayed(uintptr_t ptr, size_t size)
1218
0
{
1219
0
#ifndef Py_GIL_DISABLED
1220
0
    free_work_item(ptr, NULL, NULL);
1221
#else
1222
    PyInterpreterState *interp = _PyInterpreterState_GET();
1223
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1224
        interp->stoptheworld.world_stopped)
1225
    {
1226
        // Free immediately during interpreter shutdown or if the world is
1227
        // stopped.
1228
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1229
        free_work_item(ptr, NULL, NULL);
1230
        return;
1231
    }
1232
1233
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1234
    struct llist_node *head = &tstate->mem_free_queue;
1235
1236
    struct _mem_work_chunk *buf = NULL;
1237
    if (!llist_empty(head)) {
1238
        // Try to re-use the last buffer
1239
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1240
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1241
            // already full
1242
            buf = NULL;
1243
        }
1244
    }
1245
1246
    if (buf == NULL) {
1247
        buf = PyMem_Calloc(1, sizeof(*buf));
1248
        if (buf != NULL) {
1249
            llist_insert_tail(head, &buf->node);
1250
        }
1251
    }
1252
1253
    if (buf == NULL) {
1254
        // failed to allocate a buffer, free immediately
1255
        PyObject *to_dealloc = NULL;
1256
        _PyEval_StopTheWorld(tstate->base.interp);
1257
        if (work_item_should_decref(ptr)) {
1258
            PyObject *obj = (PyObject *)(ptr - 1);
1259
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1260
            if (refcount == 0) {
1261
                to_dealloc = obj;
1262
            }
1263
        }
1264
        else {
1265
            PyMem_Free((void *)ptr);
1266
        }
1267
        _PyEval_StartTheWorld(tstate->base.interp);
1268
        if (to_dealloc != NULL) {
1269
            _Py_Dealloc(to_dealloc);
1270
        }
1271
        return;
1272
    }
1273
1274
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1275
    uint64_t seq;
1276
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1277
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1278
    }
1279
    else {
1280
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1281
    }
1282
    buf->array[buf->wr_idx].ptr = ptr;
1283
    buf->array[buf->wr_idx].qsbr_goal = seq;
1284
    buf->wr_idx++;
1285
1286
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1287
        // Normally the processing of delayed items is done from the eval
1288
        // breaker.  Processing here is a safety measure to ensure too much
1289
        // work does not accumulate.
1290
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1291
    }
1292
#endif
1293
0
}
1294
1295
void
1296
_PyMem_FreeDelayed(void *ptr, size_t size)
1297
0
{
1298
0
    assert(!((uintptr_t)ptr & 0x01));
1299
0
    if (ptr != NULL) {
1300
0
        free_delayed((uintptr_t)ptr, size);
1301
0
    }
1302
0
}
1303
1304
#ifdef Py_GIL_DISABLED
1305
void
1306
_PyObject_XDecRefDelayed(PyObject *ptr)
1307
{
1308
    assert(!((uintptr_t)ptr & 0x01));
1309
    if (ptr != NULL) {
1310
        // We use 0 as the size since we don't have an easy way to know the
1311
        // actual size.  If we are freeing many objects, the write sequence
1312
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1313
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1314
    }
1315
}
1316
#endif
1317
1318
#ifdef Py_GIL_DISABLED
1319
void
1320
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1321
{
1322
    PyObject *old = *ptr;
1323
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1324
    if (old == NULL) {
1325
        return;
1326
    }
1327
    if (!_Py_IsImmortal(old)) {
1328
         _PyObject_XDecRefDelayed(old);
1329
    }
1330
}
1331
#endif
1332
1333
static struct _mem_work_chunk *
1334
work_queue_first(struct llist_node *head)
1335
0
{
1336
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1337
0
}
1338
1339
static void
1340
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1341
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1342
0
{
1343
0
    while (!llist_empty(head)) {
1344
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1345
1346
0
        if (buf->rd_idx < buf->wr_idx) {
1347
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1348
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1349
0
                return;
1350
0
            }
1351
1352
0
            buf->rd_idx++;
1353
            // NB: free_work_item may re-enter or execute arbitrary code
1354
0
            free_work_item(item->ptr, cb, state);
1355
0
            continue;
1356
0
        }
1357
1358
0
        assert(buf->rd_idx == buf->wr_idx);
1359
0
        if (keep_empty && buf->node.next == head) {
1360
            // Keep the last buffer in the queue to reduce re-allocations
1361
0
            buf->rd_idx = buf->wr_idx = 0;
1362
0
            return;
1363
0
        }
1364
1365
0
        llist_remove(&buf->node);
1366
0
        PyMem_Free(buf);
1367
0
    }
1368
0
}
1369
1370
static void
1371
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1372
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1373
                     void *state)
1374
0
{
1375
0
    assert(PyMutex_IsLocked(&queue->mutex));
1376
0
    process_queue(&queue->head, tstate, false, cb, state);
1377
1378
0
    int more_work = !llist_empty(&queue->head);
1379
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1380
0
}
1381
1382
static void
1383
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1384
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1385
                           void *state)
1386
0
{
1387
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1388
0
        return;
1389
0
    }
1390
1391
    // Try to acquire the lock, but don't block if it's already held.
1392
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1393
0
        process_interp_queue(queue, tstate, cb, state);
1394
0
        PyMutex_Unlock(&queue->mutex);
1395
0
    }
1396
0
}
1397
1398
void
1399
_PyMem_ProcessDelayed(PyThreadState *tstate)
1400
0
{
1401
0
    PyInterpreterState *interp = tstate->interp;
1402
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1403
1404
0
    tstate_impl->qsbr->should_process = false;
1405
1406
    // Process thread-local work
1407
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1408
1409
    // Process shared interpreter work
1410
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1411
0
}
1412
1413
void
1414
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1415
0
{
1416
0
    PyInterpreterState *interp = tstate->interp;
1417
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1418
1419
    // Process thread-local work
1420
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1421
1422
    // Process shared interpreter work
1423
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1424
0
}
1425
1426
void
1427
_PyMem_AbandonDelayed(PyThreadState *tstate)
1428
0
{
1429
0
    PyInterpreterState *interp = tstate->interp;
1430
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1431
1432
0
    if (llist_empty(queue)) {
1433
0
        return;
1434
0
    }
1435
1436
    // Check if the queue contains one empty buffer
1437
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1438
0
    if (buf->rd_idx == buf->wr_idx) {
1439
0
        llist_remove(&buf->node);
1440
0
        PyMem_Free(buf);
1441
0
        assert(llist_empty(queue));
1442
0
        return;
1443
0
    }
1444
1445
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1446
1447
    // Merge the thread's work queue into the interpreter's work queue.
1448
0
    llist_concat(&interp->mem_free_queue.head, queue);
1449
1450
    // Process the merged queue now (see gh-130794).
1451
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1452
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1453
1454
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1455
1456
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1457
0
}
1458
1459
void
1460
_PyMem_FiniDelayed(PyInterpreterState *interp)
1461
0
{
1462
0
    struct llist_node *head = &interp->mem_free_queue.head;
1463
0
    while (!llist_empty(head)) {
1464
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1465
1466
0
        if (buf->rd_idx < buf->wr_idx) {
1467
            // Free the remaining items immediately. There should be no other
1468
            // threads accessing the memory at this point during shutdown.
1469
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1470
0
            buf->rd_idx++;
1471
            // NB: free_work_item may re-enter or execute arbitrary code
1472
0
            free_work_item(item->ptr, NULL, NULL);
1473
0
            continue;
1474
0
        }
1475
1476
0
        llist_remove(&buf->node);
1477
0
        PyMem_Free(buf);
1478
0
    }
1479
0
}
1480
1481
/**************************/
1482
/* the "object" allocator */
1483
/**************************/
1484
1485
void *
1486
PyObject_Malloc(size_t size)
1487
1.32G
{
1488
    /* see PyMem_RawMalloc() */
1489
1.32G
    if (size > (size_t)PY_SSIZE_T_MAX)
1490
0
        return NULL;
1491
1.32G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1492
1.32G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1493
1.32G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1494
1.32G
    OBJECT_STAT_INC(allocations);
1495
1.32G
    return _PyObject.malloc(_PyObject.ctx, size);
1496
1.32G
}
1497
1498
void *
1499
PyObject_Calloc(size_t nelem, size_t elsize)
1500
0
{
1501
    /* see PyMem_RawMalloc() */
1502
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1503
0
        return NULL;
1504
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1505
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1506
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1507
0
    OBJECT_STAT_INC(allocations);
1508
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1509
0
}
1510
1511
void *
1512
PyObject_Realloc(void *ptr, size_t new_size)
1513
70.0M
{
1514
    /* see PyMem_RawMalloc() */
1515
70.0M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1516
0
        return NULL;
1517
70.0M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1518
70.0M
}
1519
1520
void
1521
PyObject_Free(void *ptr)
1522
1.32G
{
1523
1.32G
    OBJECT_STAT_INC(frees);
1524
1.32G
    _PyObject.free(_PyObject.ctx, ptr);
1525
1.32G
}
1526
1527
1528
/* Use __builtin_expect() where available to reduce overhead of
1529
   the valgrind checks */
1530
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1531
11.3G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1532
5.15G
#  define LIKELY(value) __builtin_expect((value), 1)
1533
#else
1534
#  define UNLIKELY(value) (value)
1535
#  define LIKELY(value) (value)
1536
#endif
1537
1538
#ifdef WITH_PYMALLOC
1539
1540
#ifdef WITH_VALGRIND
1541
#include <valgrind/valgrind.h>
1542
1543
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1544
static int running_on_valgrind = -1;
1545
#endif
1546
1547
typedef struct _obmalloc_state OMState;
1548
1549
/* obmalloc state for main interpreter and shared by all interpreters without
1550
 * their own obmalloc state.  By not explicitly initializing this structure, it
1551
 * will be allocated in the BSS which is a small performance win.  The radix
1552
 * tree arrays are fairly large but are sparsely used.  */
1553
static struct _obmalloc_state obmalloc_state_main;
1554
static bool obmalloc_state_initialized;
1555
1556
static inline int
1557
has_own_state(PyInterpreterState *interp)
1558
0
{
1559
0
    return (_Py_IsMainInterpreter(interp) ||
1560
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1561
0
            _Py_IsMainInterpreterFinalizing(interp));
1562
0
}
1563
1564
static inline OMState *
1565
get_state(void)
1566
3.94G
{
1567
3.94G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1568
3.94G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1569
3.94G
    return interp->obmalloc;
1570
3.94G
}
1571
1572
// These macros all rely on a local "state" variable.
1573
1.83G
#define usedpools (state->pools.used)
1574
2.71M
#define allarenas (state->mgmt.arenas)
1575
266
#define maxarenas (state->mgmt.maxarenas)
1576
30.3k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1577
23.0M
#define usable_arenas (state->mgmt.usable_arenas)
1578
15.9M
#define nfp2lasta (state->mgmt.nfp2lasta)
1579
18.9k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1580
6.20k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1581
6.90k
#define narenas_highwater (state->mgmt.narenas_highwater)
1582
462M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1583
1584
#ifdef WITH_MIMALLOC
1585
static bool count_blocks(
1586
    const mi_heap_t* heap, const mi_heap_area_t* area,
1587
    void* block, size_t block_size, void* allocated_blocks)
1588
0
{
1589
0
    *(size_t *)allocated_blocks += area->used;
1590
0
    return 1;
1591
0
}
1592
1593
static Py_ssize_t
1594
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1595
0
{
1596
0
    size_t allocated_blocks = 0;
1597
#ifdef Py_GIL_DISABLED
1598
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1599
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1600
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1601
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1602
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1603
        }
1604
    }
1605
1606
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1607
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1608
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1609
                                        &allocated_blocks);
1610
    }
1611
#else
1612
    // TODO(sgross): this only counts the current thread's blocks.
1613
0
    mi_heap_t *heap = mi_heap_get_default();
1614
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1615
0
#endif
1616
0
    return allocated_blocks;
1617
0
}
1618
#endif
1619
1620
Py_ssize_t
1621
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1622
0
{
1623
0
#ifdef WITH_MIMALLOC
1624
0
    if (_PyMem_MimallocEnabled()) {
1625
0
        return get_mimalloc_allocated_blocks(interp);
1626
0
    }
1627
0
#endif
1628
1629
#ifdef Py_DEBUG
1630
    assert(has_own_state(interp));
1631
#else
1632
0
    if (!has_own_state(interp)) {
1633
0
        _Py_FatalErrorFunc(__func__,
1634
0
                           "the interpreter doesn't have its own allocator");
1635
0
    }
1636
0
#endif
1637
0
    OMState *state = interp->obmalloc;
1638
1639
0
    if (state == NULL) {
1640
0
        return 0;
1641
0
    }
1642
1643
0
    Py_ssize_t n = raw_allocated_blocks;
1644
    /* add up allocated blocks for used pools */
1645
0
    for (uint i = 0; i < maxarenas; ++i) {
1646
        /* Skip arenas which are not allocated. */
1647
0
        if (allarenas[i].address == 0) {
1648
0
            continue;
1649
0
        }
1650
1651
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1652
1653
        /* visit every pool in the arena */
1654
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1655
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1656
0
            poolp p = (poolp)base;
1657
0
            n += p->ref.count;
1658
0
        }
1659
0
    }
1660
0
    return n;
1661
0
}
1662
1663
static void free_obmalloc_arenas(PyInterpreterState *interp);
1664
1665
void
1666
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1667
0
{
1668
0
#ifdef WITH_MIMALLOC
1669
0
    if (_PyMem_MimallocEnabled()) {
1670
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1671
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1672
0
        return;
1673
0
    }
1674
0
#endif
1675
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1676
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1677
0
        assert(has_own_state(interp) || leaked == 0);
1678
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1679
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1680
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1681
            // then some of the memory allocated by obmalloc has not been
1682
            // freed.  It might be safe to free the arenas in that case but
1683
            // it's possible that extension modules are still using that
1684
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1685
            // should be warning when this happens.  It should be possible to
1686
            // use a tool like "-fsanitize=address" to track down these leaks.
1687
0
            free_obmalloc_arenas(interp);
1688
0
        }
1689
0
    }
1690
0
}
1691
1692
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1693
1694
/* We preserve the number of blocks leaked during runtime finalization,
1695
   so they can be reported if the runtime is initialized again. */
1696
// XXX We don't lose any information by dropping this,
1697
// so we should consider doing so.
1698
static Py_ssize_t last_final_leaks = 0;
1699
1700
void
1701
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1702
0
{
1703
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1704
0
    runtime->obmalloc.interpreter_leaks = 0;
1705
0
}
1706
1707
static Py_ssize_t
1708
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1709
0
{
1710
0
    Py_ssize_t total = 0;
1711
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1712
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1713
0
        if (interp == NULL) {
1714
            /* We are at the very end of runtime finalization.
1715
               We can't rely on finalizing->interp since that thread
1716
               state is probably already freed, so we don't worry
1717
               about it. */
1718
0
            assert(PyInterpreterState_Head() == NULL);
1719
0
        }
1720
0
        else {
1721
0
            assert(interp != NULL);
1722
            /* It is probably the last interpreter but not necessarily. */
1723
0
            assert(PyInterpreterState_Next(interp) == NULL);
1724
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1725
0
        }
1726
0
    }
1727
0
    else {
1728
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1729
0
        HEAD_LOCK(runtime);
1730
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1731
0
        assert(interp != NULL);
1732
#ifdef Py_DEBUG
1733
        int got_main = 0;
1734
#endif
1735
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1736
#ifdef Py_DEBUG
1737
            if (_Py_IsMainInterpreter(interp)) {
1738
                assert(!got_main);
1739
                got_main = 1;
1740
                assert(has_own_state(interp));
1741
            }
1742
#endif
1743
0
            if (has_own_state(interp)) {
1744
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1745
0
            }
1746
0
        }
1747
0
        HEAD_UNLOCK(runtime);
1748
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1749
#ifdef Py_DEBUG
1750
        assert(got_main);
1751
#endif
1752
0
    }
1753
0
    total += runtime->obmalloc.interpreter_leaks;
1754
0
    total += last_final_leaks;
1755
0
    return total;
1756
0
}
1757
1758
Py_ssize_t
1759
_Py_GetGlobalAllocatedBlocks(void)
1760
0
{
1761
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1762
0
}
1763
1764
#if WITH_PYMALLOC_RADIX_TREE
1765
/*==========================================================================*/
1766
/* radix tree for tracking arena usage. */
1767
1768
5.83G
#define arena_map_root (state->usage.arena_map_root)
1769
#ifdef USE_INTERIOR_NODES
1770
28
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1771
28
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1772
#endif
1773
1774
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1775
 * it cannot be created */
1776
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1777
arena_map_get(OMState *state, pymem_block *p, int create)
1778
2.02G
{
1779
2.02G
#ifdef USE_INTERIOR_NODES
1780
    /* sanity check that IGNORE_BITS is correct */
1781
2.02G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1782
2.02G
    int i1 = MAP_TOP_INDEX(p);
1783
2.02G
    if (arena_map_root.ptrs[i1] == NULL) {
1784
28
        if (!create) {
1785
0
            return NULL;
1786
0
        }
1787
28
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1788
28
        if (n == NULL) {
1789
0
            return NULL;
1790
0
        }
1791
28
        arena_map_root.ptrs[i1] = n;
1792
28
        arena_map_mid_count++;
1793
28
    }
1794
2.02G
    int i2 = MAP_MID_INDEX(p);
1795
2.02G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1796
239M
        if (!create) {
1797
239M
            return NULL;
1798
239M
        }
1799
28
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1800
28
        if (n == NULL) {
1801
0
            return NULL;
1802
0
        }
1803
28
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1804
28
        arena_map_bot_count++;
1805
28
    }
1806
1.78G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1807
#else
1808
    return &arena_map_root;
1809
#endif
1810
2.02G
}
1811
1812
1813
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1814
 * away 24 bits of the address.  That reduces the space requirement of
1815
 * the tree compared to similar radix tree page-map schemes.  In
1816
 * exchange for slashing the space requirement, it needs more
1817
 * computation to check an address.
1818
 *
1819
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1820
 * explain in decimal so let's say that the arena size is 100 bytes.
1821
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1822
 * pointer address is inside an actual arena, we have to check two ideal
1823
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1824
 * 300.  In the rare case that an arena is aligned in the ideal way
1825
 * (e.g. base address of arena is 200) then we only have to check one
1826
 * ideal address.
1827
 *
1828
 * The tree nodes for 200 and 300 both store the address of arena.
1829
 * There are two cases: the arena starts at a lower ideal arena and
1830
 * extends to this one, or the arena starts in this arena and extends to
1831
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1832
 * these two cases.
1833
 */
1834
1835
1836
/* mark or unmark addresses covered by arena */
1837
static int
1838
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1839
12.0k
{
1840
    /* sanity check that IGNORE_BITS is correct */
1841
12.0k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1842
12.0k
    arena_map_bot_t *n_hi = arena_map_get(
1843
12.0k
            state, (pymem_block *)arena_base, is_used);
1844
12.0k
    if (n_hi == NULL) {
1845
0
        assert(is_used); /* otherwise node should already exist */
1846
0
        return 0; /* failed to allocate space for node */
1847
0
    }
1848
12.0k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1849
12.0k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1850
12.0k
    if (tail == 0) {
1851
        /* is ideal arena address */
1852
60
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1853
60
    }
1854
12.0k
    else {
1855
        /* arena_base address is not ideal (aligned to arena size) and
1856
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1857
         * for the next arena.  Note that it might be in different MAP_TOP
1858
         * and MAP_MID nodes as well so we need to call arena_map_get()
1859
         * again (do the full tree traversal).
1860
         */
1861
12.0k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1862
12.0k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1863
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1864
         * (last address in arena).  If arena_base_next overflows then it
1865
         * must overflow to 0.  However, that would mean arena_base was
1866
         * "ideal" and we should not be in this case. */
1867
12.0k
        assert(arena_base < arena_base_next);
1868
12.0k
        arena_map_bot_t *n_lo = arena_map_get(
1869
12.0k
                state, (pymem_block *)arena_base_next, is_used);
1870
12.0k
        if (n_lo == NULL) {
1871
0
            assert(is_used); /* otherwise should already exist */
1872
0
            n_hi->arenas[i3].tail_hi = 0;
1873
0
            return 0; /* failed to allocate space for node */
1874
0
        }
1875
12.0k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
1876
12.0k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1877
12.0k
    }
1878
12.0k
    return 1;
1879
12.0k
}
1880
1881
/* Return true if 'p' is a pointer inside an obmalloc arena.
1882
 * _PyObject_Free() calls this so it needs to be very fast. */
1883
static int
1884
arena_map_is_used(OMState *state, pymem_block *p)
1885
2.02G
{
1886
2.02G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
1887
2.02G
    if (n == NULL) {
1888
239M
        return 0;
1889
239M
    }
1890
1.78G
    int i3 = MAP_BOT_INDEX(p);
1891
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1892
1.78G
    int32_t hi = n->arenas[i3].tail_hi;
1893
1.78G
    int32_t lo = n->arenas[i3].tail_lo;
1894
1.78G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1895
1.78G
    return (tail < lo) || (tail >= hi && hi != 0);
1896
2.02G
}
1897
1898
/* end of radix tree logic */
1899
/*==========================================================================*/
1900
#endif /* WITH_PYMALLOC_RADIX_TREE */
1901
1902
1903
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1904
 * allocate a new arena, and return the address of an arena_object
1905
 * describing the new arena.  It's expected that the caller will set
1906
 * `usable_arenas` to the return value.
1907
 */
1908
static struct arena_object*
1909
new_arena(OMState *state)
1910
6.20k
{
1911
6.20k
    struct arena_object* arenaobj;
1912
6.20k
    uint excess;        /* number of bytes above pool alignment */
1913
6.20k
    void *address;
1914
1915
6.20k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1916
6.20k
    if (debug_stats == -1) {
1917
28
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1918
28
        debug_stats = (opt != NULL && *opt != '\0');
1919
28
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1920
28
    }
1921
6.20k
    if (debug_stats) {
1922
0
        _PyObject_DebugMallocStats(stderr);
1923
0
    }
1924
1925
6.20k
    if (unused_arena_objects == NULL) {
1926
49
        uint i;
1927
49
        uint numarenas;
1928
49
        size_t nbytes;
1929
1930
        /* Double the number of arena objects on each allocation.
1931
         * Note that it's possible for `numarenas` to overflow.
1932
         */
1933
49
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1934
49
        if (numarenas <= maxarenas)
1935
0
            return NULL;                /* overflow */
1936
#if SIZEOF_SIZE_T <= SIZEOF_INT
1937
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
1938
            return NULL;                /* overflow */
1939
#endif
1940
49
        nbytes = numarenas * sizeof(*allarenas);
1941
49
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1942
49
        if (arenaobj == NULL)
1943
0
            return NULL;
1944
49
        allarenas = arenaobj;
1945
1946
        /* We might need to fix pointers that were copied.  However,
1947
         * new_arena only gets called when all the pages in the
1948
         * previous arenas are full.  Thus, there are *no* pointers
1949
         * into the old array. Thus, we don't have to worry about
1950
         * invalid pointers.  Just to be sure, some asserts:
1951
         */
1952
49
        assert(usable_arenas == NULL);
1953
49
        assert(unused_arena_objects == NULL);
1954
1955
        /* Put the new arenas on the unused_arena_objects list. */
1956
1.18k
        for (i = maxarenas; i < numarenas; ++i) {
1957
1.13k
            allarenas[i].address = 0;              /* mark as unassociated */
1958
1.13k
            allarenas[i].nextarena = i < numarenas - 1 ?
1959
1.13k
                                        &allarenas[i+1] : NULL;
1960
1.13k
        }
1961
1962
        /* Update globals. */
1963
49
        unused_arena_objects = &allarenas[maxarenas];
1964
49
        maxarenas = numarenas;
1965
49
    }
1966
1967
    /* Take the next available arena object off the head of the list. */
1968
6.20k
    assert(unused_arena_objects != NULL);
1969
6.20k
    arenaobj = unused_arena_objects;
1970
6.20k
    unused_arena_objects = arenaobj->nextarena;
1971
6.20k
    assert(arenaobj->address == 0);
1972
6.20k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1973
6.20k
#if WITH_PYMALLOC_RADIX_TREE
1974
6.20k
    if (address != NULL) {
1975
6.20k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
1976
            /* marking arena in radix tree failed, abort */
1977
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1978
0
            address = NULL;
1979
0
        }
1980
6.20k
    }
1981
6.20k
#endif
1982
6.20k
    if (address == NULL) {
1983
        /* The allocation failed: return NULL after putting the
1984
         * arenaobj back.
1985
         */
1986
0
        arenaobj->nextarena = unused_arena_objects;
1987
0
        unused_arena_objects = arenaobj;
1988
0
        return NULL;
1989
0
    }
1990
6.20k
    arenaobj->address = (uintptr_t)address;
1991
1992
6.20k
    ++narenas_currently_allocated;
1993
6.20k
    ++ntimes_arena_allocated;
1994
6.20k
    if (narenas_currently_allocated > narenas_highwater)
1995
697
        narenas_highwater = narenas_currently_allocated;
1996
6.20k
    arenaobj->freepools = NULL;
1997
    /* pool_address <- first pool-aligned address in the arena
1998
       nfreepools <- number of whole pools that fit after alignment */
1999
6.20k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2000
6.20k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2001
6.20k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2002
6.20k
    if (excess != 0) {
2003
6.02k
        --arenaobj->nfreepools;
2004
6.02k
        arenaobj->pool_address += POOL_SIZE - excess;
2005
6.02k
    }
2006
6.20k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2007
2008
6.20k
    return arenaobj;
2009
6.20k
}
2010
2011
2012
2013
#if WITH_PYMALLOC_RADIX_TREE
2014
/* Return true if and only if P is an address that was allocated by
2015
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2016
 */
2017
static bool
2018
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2019
2.02G
{
2020
2.02G
    return arena_map_is_used(state, p);
2021
2.02G
}
2022
#else
2023
/*
2024
address_in_range(P, POOL)
2025
2026
Return true if and only if P is an address that was allocated by pymalloc.
2027
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2028
(the caller is asked to compute this because the macro expands POOL more than
2029
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2030
variable and pass the latter to the macro; because address_in_range is
2031
called on every alloc/realloc/free, micro-efficiency is important here).
2032
2033
Tricky:  Let B be the arena base address associated with the pool, B =
2034
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2035
2036
    B <= P < B + ARENA_SIZE
2037
2038
Subtracting B throughout, this is true iff
2039
2040
    0 <= P-B < ARENA_SIZE
2041
2042
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2043
2044
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2045
before the first arena has been allocated.  `arenas` is still NULL in that
2046
case.  We're relying on that maxarenas is also 0 in that case, so that
2047
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2048
into a NULL arenas.
2049
2050
Details:  given P and POOL, the arena_object corresponding to P is AO =
2051
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2052
stores, etc), POOL is the correct address of P's pool, AO.address is the
2053
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2054
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2055
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2056
controls P.
2057
2058
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2059
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2060
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2061
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2062
control P.
2063
2064
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2065
allocated arena, obmalloc controls all the memory in slice AO.address :
2066
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2067
so P doesn't lie in that slice, so the macro correctly reports that P is not
2068
controlled by obmalloc.
2069
2070
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2071
arena_object (one not currently associated with an allocated arena),
2072
AO.address is 0, and the second test in the macro reduces to:
2073
2074
    P < ARENA_SIZE
2075
2076
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2077
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2078
of the test still passes, and the third clause (AO.address != 0) is necessary
2079
to get the correct result:  AO.address is 0 in this case, so the macro
2080
correctly reports that P is not controlled by obmalloc (despite that P lies in
2081
slice AO.address : AO.address + ARENA_SIZE).
2082
2083
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2084
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2085
corresponded to a currently-allocated arena, so the "P is not controlled by
2086
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2087
was impossible.
2088
2089
Note that the logic is excruciating, and reading up possibly uninitialized
2090
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2091
creates problems for some memory debuggers.  The overwhelming advantage is
2092
that this test determines whether an arbitrary address is controlled by
2093
obmalloc in a small constant time, independent of the number of arenas
2094
obmalloc controls.  Since this test is needed at every entry point, it's
2095
extremely desirable that it be this fast.
2096
*/
2097
2098
static bool _Py_NO_SANITIZE_ADDRESS
2099
            _Py_NO_SANITIZE_THREAD
2100
            _Py_NO_SANITIZE_MEMORY
2101
address_in_range(OMState *state, void *p, poolp pool)
2102
{
2103
    // Since address_in_range may be reading from memory which was not allocated
2104
    // by Python, it is important that pool->arenaindex is read only once, as
2105
    // another thread may be concurrently modifying the value without holding
2106
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2107
    // only once.
2108
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2109
    return arenaindex < maxarenas &&
2110
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2111
        allarenas[arenaindex].address != 0;
2112
}
2113
2114
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2115
2116
/*==========================================================================*/
2117
2118
// Called when freelist is exhausted.  Extend the freelist if there is
2119
// space for a block.  Otherwise, remove this pool from usedpools.
2120
static void
2121
pymalloc_pool_extend(poolp pool, uint size)
2122
342M
{
2123
342M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2124
        /* There is room for another block. */
2125
199M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2126
199M
        pool->nextoffset += INDEX2SIZE(size);
2127
199M
        *(pymem_block **)(pool->freeblock) = NULL;
2128
199M
        return;
2129
199M
    }
2130
2131
    /* Pool is full, unlink from used pools. */
2132
143M
    poolp next;
2133
143M
    next = pool->nextpool;
2134
143M
    pool = pool->prevpool;
2135
143M
    next->prevpool = pool;
2136
143M
    pool->nextpool = next;
2137
143M
}
2138
2139
/* called when pymalloc_alloc can not allocate a block from usedpool.
2140
 * This function takes new pool and allocate a block from it.
2141
 */
2142
static void*
2143
allocate_from_new_pool(OMState *state, uint size)
2144
2.33M
{
2145
    /* There isn't a pool of the right size class immediately
2146
     * available:  use a free pool.
2147
     */
2148
2.33M
    if (UNLIKELY(usable_arenas == NULL)) {
2149
        /* No arena has a free pool:  allocate a new arena. */
2150
#ifdef WITH_MEMORY_LIMITS
2151
        if (narenas_currently_allocated >= MAX_ARENAS) {
2152
            return NULL;
2153
        }
2154
#endif
2155
6.20k
        usable_arenas = new_arena(state);
2156
6.20k
        if (usable_arenas == NULL) {
2157
0
            return NULL;
2158
0
        }
2159
6.20k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2160
6.20k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2161
6.20k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2162
6.20k
    }
2163
2.33M
    assert(usable_arenas->address != 0);
2164
2165
    /* This arena already had the smallest nfreepools value, so decreasing
2166
     * nfreepools doesn't change that, and we don't need to rearrange the
2167
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2168
     * we need to remove its arena_object from usable_arenas.
2169
     */
2170
2.33M
    assert(usable_arenas->nfreepools > 0);
2171
2.33M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2172
        /* It's the last of this size, so there won't be any. */
2173
2.32M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2174
2.32M
    }
2175
    /* If any free pools will remain, it will be the new smallest. */
2176
2.33M
    if (usable_arenas->nfreepools > 1) {
2177
2.19M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2178
2.19M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2179
2.19M
    }
2180
2181
    /* Try to get a cached free pool. */
2182
2.33M
    poolp pool = usable_arenas->freepools;
2183
2.33M
    if (LIKELY(pool != NULL)) {
2184
        /* Unlink from cached pools. */
2185
1.95M
        usable_arenas->freepools = pool->nextpool;
2186
1.95M
        usable_arenas->nfreepools--;
2187
1.95M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2188
            /* Wholly allocated:  remove. */
2189
139k
            assert(usable_arenas->freepools == NULL);
2190
139k
            assert(usable_arenas->nextarena == NULL ||
2191
139k
                   usable_arenas->nextarena->prevarena ==
2192
139k
                   usable_arenas);
2193
139k
            usable_arenas = usable_arenas->nextarena;
2194
139k
            if (usable_arenas != NULL) {
2195
135k
                usable_arenas->prevarena = NULL;
2196
135k
                assert(usable_arenas->address != 0);
2197
135k
            }
2198
139k
        }
2199
1.81M
        else {
2200
            /* nfreepools > 0:  it must be that freepools
2201
             * isn't NULL, or that we haven't yet carved
2202
             * off all the arena's pools for the first
2203
             * time.
2204
             */
2205
1.81M
            assert(usable_arenas->freepools != NULL ||
2206
1.81M
                   usable_arenas->pool_address <=
2207
1.81M
                   (pymem_block*)usable_arenas->address +
2208
1.81M
                       ARENA_SIZE - POOL_SIZE);
2209
1.81M
        }
2210
1.95M
    }
2211
384k
    else {
2212
        /* Carve off a new pool. */
2213
384k
        assert(usable_arenas->nfreepools > 0);
2214
384k
        assert(usable_arenas->freepools == NULL);
2215
384k
        pool = (poolp)usable_arenas->pool_address;
2216
384k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2217
384k
                                 ARENA_SIZE - POOL_SIZE);
2218
384k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2219
384k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2220
384k
        pool->szidx = DUMMY_SIZE_IDX;
2221
384k
        usable_arenas->pool_address += POOL_SIZE;
2222
384k
        --usable_arenas->nfreepools;
2223
2224
384k
        if (usable_arenas->nfreepools == 0) {
2225
6.01k
            assert(usable_arenas->nextarena == NULL ||
2226
6.01k
                   usable_arenas->nextarena->prevarena ==
2227
6.01k
                   usable_arenas);
2228
            /* Unlink the arena:  it is completely allocated. */
2229
6.01k
            usable_arenas = usable_arenas->nextarena;
2230
6.01k
            if (usable_arenas != NULL) {
2231
310
                usable_arenas->prevarena = NULL;
2232
310
                assert(usable_arenas->address != 0);
2233
310
            }
2234
6.01k
        }
2235
384k
    }
2236
2237
    /* Frontlink to used pools. */
2238
2.33M
    pymem_block *bp;
2239
2.33M
    poolp next = usedpools[size + size]; /* == prev */
2240
2.33M
    pool->nextpool = next;
2241
2.33M
    pool->prevpool = next;
2242
2.33M
    next->nextpool = pool;
2243
2.33M
    next->prevpool = pool;
2244
2.33M
    pool->ref.count = 1;
2245
2.33M
    if (pool->szidx == size) {
2246
        /* Luckily, this pool last contained blocks
2247
         * of the same size class, so its header
2248
         * and free list are already initialized.
2249
         */
2250
1.40M
        bp = pool->freeblock;
2251
1.40M
        assert(bp != NULL);
2252
1.40M
        pool->freeblock = *(pymem_block **)bp;
2253
1.40M
        return bp;
2254
1.40M
    }
2255
    /*
2256
     * Initialize the pool header, set up the free list to
2257
     * contain just the second block, and return the first
2258
     * block.
2259
     */
2260
932k
    pool->szidx = size;
2261
932k
    size = INDEX2SIZE(size);
2262
932k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2263
932k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2264
932k
    pool->maxnextoffset = POOL_SIZE - size;
2265
932k
    pool->freeblock = bp + size;
2266
932k
    *(pymem_block **)(pool->freeblock) = NULL;
2267
932k
    return bp;
2268
2.33M
}
2269
2270
/* pymalloc allocator
2271
2272
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2273
2274
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2275
   requests, on error in the code below (as a last chance to serve the request)
2276
   or when the max memory limit has been reached.
2277
*/
2278
static inline void*
2279
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2280
1.92G
{
2281
#ifdef WITH_VALGRIND
2282
    if (UNLIKELY(running_on_valgrind == -1)) {
2283
        running_on_valgrind = RUNNING_ON_VALGRIND;
2284
    }
2285
    if (UNLIKELY(running_on_valgrind)) {
2286
        return NULL;
2287
    }
2288
#endif
2289
2290
1.92G
    if (UNLIKELY(nbytes == 0)) {
2291
40.0M
        return NULL;
2292
40.0M
    }
2293
1.88G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2294
191M
        return NULL;
2295
191M
    }
2296
2297
1.68G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2298
1.68G
    poolp pool = usedpools[size + size];
2299
1.68G
    pymem_block *bp;
2300
2301
1.68G
    if (LIKELY(pool != pool->nextpool)) {
2302
        /*
2303
         * There is a used pool for this size class.
2304
         * Pick up the head block of its free list.
2305
         */
2306
1.68G
        ++pool->ref.count;
2307
1.68G
        bp = pool->freeblock;
2308
1.68G
        assert(bp != NULL);
2309
2310
1.68G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2311
            // Reached the end of the free list, try to extend it.
2312
342M
            pymalloc_pool_extend(pool, size);
2313
342M
        }
2314
1.68G
    }
2315
2.33M
    else {
2316
        /* There isn't a pool of the right size class immediately
2317
         * available:  use a free pool.
2318
         */
2319
2.33M
        bp = allocate_from_new_pool(state, size);
2320
2.33M
    }
2321
2322
1.68G
    return (void *)bp;
2323
1.88G
}
2324
2325
2326
void *
2327
_PyObject_Malloc(void *ctx, size_t nbytes)
2328
1.87G
{
2329
1.87G
    OMState *state = get_state();
2330
1.87G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2331
1.87G
    if (LIKELY(ptr != NULL)) {
2332
1.63G
        return ptr;
2333
1.63G
    }
2334
2335
231M
    ptr = PyMem_RawMalloc(nbytes);
2336
231M
    if (ptr != NULL) {
2337
231M
        raw_allocated_blocks++;
2338
231M
    }
2339
231M
    return ptr;
2340
1.87G
}
2341
2342
2343
void *
2344
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2345
50.0M
{
2346
50.0M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2347
50.0M
    size_t nbytes = nelem * elsize;
2348
2349
50.0M
    OMState *state = get_state();
2350
50.0M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2351
50.0M
    if (LIKELY(ptr != NULL)) {
2352
49.9M
        memset(ptr, 0, nbytes);
2353
49.9M
        return ptr;
2354
49.9M
    }
2355
2356
114k
    ptr = PyMem_RawCalloc(nelem, elsize);
2357
114k
    if (ptr != NULL) {
2358
114k
        raw_allocated_blocks++;
2359
114k
    }
2360
114k
    return ptr;
2361
50.0M
}
2362
2363
2364
static void
2365
insert_to_usedpool(OMState *state, poolp pool)
2366
143M
{
2367
143M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2368
2369
143M
    uint size = pool->szidx;
2370
143M
    poolp next = usedpools[size + size];
2371
143M
    poolp prev = next->prevpool;
2372
2373
    /* insert pool before next:   prev <-> pool <-> next */
2374
143M
    pool->nextpool = next;
2375
143M
    pool->prevpool = prev;
2376
143M
    next->prevpool = pool;
2377
143M
    prev->nextpool = pool;
2378
143M
}
2379
2380
static void
2381
insert_to_freepool(OMState *state, poolp pool)
2382
2.32M
{
2383
2.32M
    poolp next = pool->nextpool;
2384
2.32M
    poolp prev = pool->prevpool;
2385
2.32M
    next->prevpool = prev;
2386
2.32M
    prev->nextpool = next;
2387
2388
    /* Link the pool to freepools.  This is a singly-linked
2389
     * list, and pool->prevpool isn't used there.
2390
     */
2391
2.32M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2392
2.32M
    pool->nextpool = ao->freepools;
2393
2.32M
    ao->freepools = pool;
2394
2.32M
    uint nf = ao->nfreepools;
2395
    /* If this is the rightmost arena with this number of free pools,
2396
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2397
     * are no arenas in usable_arenas with that value.
2398
     */
2399
2.32M
    struct arena_object* lastnf = nfp2lasta[nf];
2400
2.32M
    assert((nf == 0 && lastnf == NULL) ||
2401
2.32M
           (nf > 0 &&
2402
2.32M
            lastnf != NULL &&
2403
2.32M
            lastnf->nfreepools == nf &&
2404
2.32M
            (lastnf->nextarena == NULL ||
2405
2.32M
             nf < lastnf->nextarena->nfreepools)));
2406
2.32M
    if (lastnf == ao) {  /* it is the rightmost */
2407
2.13M
        struct arena_object* p = ao->prevarena;
2408
2.13M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2409
2.13M
    }
2410
2.32M
    ao->nfreepools = ++nf;
2411
2412
    /* All the rest is arena management.  We just freed
2413
     * a pool, and there are 4 cases for arena mgmt:
2414
     * 1. If all the pools are free, return the arena to
2415
     *    the system free().  Except if this is the last
2416
     *    arena in the list, keep it to avoid thrashing:
2417
     *    keeping one wholly free arena in the list avoids
2418
     *    pathological cases where a simple loop would
2419
     *    otherwise provoke needing to allocate and free an
2420
     *    arena on every iteration.  See bpo-37257.
2421
     * 2. If this is the only free pool in the arena,
2422
     *    add the arena back to the `usable_arenas` list.
2423
     * 3. If the "next" arena has a smaller count of free
2424
     *    pools, we have to "slide this arena right" to
2425
     *    restore that usable_arenas is sorted in order of
2426
     *    nfreepools.
2427
     * 4. Else there's nothing more to do.
2428
     */
2429
2.32M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2430
        /* Case 1.  First unlink ao from usable_arenas.
2431
         */
2432
5.85k
        assert(ao->prevarena == NULL ||
2433
5.85k
               ao->prevarena->address != 0);
2434
5.85k
        assert(ao ->nextarena == NULL ||
2435
5.85k
               ao->nextarena->address != 0);
2436
2437
        /* Fix the pointer in the prevarena, or the
2438
         * usable_arenas pointer.
2439
         */
2440
5.85k
        if (ao->prevarena == NULL) {
2441
1.79k
            usable_arenas = ao->nextarena;
2442
1.79k
            assert(usable_arenas == NULL ||
2443
1.79k
                   usable_arenas->address != 0);
2444
1.79k
        }
2445
4.06k
        else {
2446
4.06k
            assert(ao->prevarena->nextarena == ao);
2447
4.06k
            ao->prevarena->nextarena =
2448
4.06k
                ao->nextarena;
2449
4.06k
        }
2450
        /* Fix the pointer in the nextarena. */
2451
5.85k
        if (ao->nextarena != NULL) {
2452
5.85k
            assert(ao->nextarena->prevarena == ao);
2453
5.85k
            ao->nextarena->prevarena =
2454
5.85k
                ao->prevarena;
2455
5.85k
        }
2456
        /* Record that this arena_object slot is
2457
         * available to be reused.
2458
         */
2459
5.85k
        ao->nextarena = unused_arena_objects;
2460
5.85k
        unused_arena_objects = ao;
2461
2462
5.85k
#if WITH_PYMALLOC_RADIX_TREE
2463
        /* mark arena region as not under control of obmalloc */
2464
5.85k
        arena_map_mark_used(state, ao->address, 0);
2465
5.85k
#endif
2466
2467
        /* Free the entire arena. */
2468
5.85k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2469
5.85k
                             (void *)ao->address, ARENA_SIZE);
2470
5.85k
        ao->address = 0;                        /* mark unassociated */
2471
5.85k
        --narenas_currently_allocated;
2472
2473
5.85k
        return;
2474
5.85k
    }
2475
2476
2.31M
    if (nf == 1) {
2477
        /* Case 2.  Put ao at the head of
2478
         * usable_arenas.  Note that because
2479
         * ao->nfreepools was 0 before, ao isn't
2480
         * currently on the usable_arenas list.
2481
         */
2482
145k
        ao->nextarena = usable_arenas;
2483
145k
        ao->prevarena = NULL;
2484
145k
        if (usable_arenas)
2485
141k
            usable_arenas->prevarena = ao;
2486
145k
        usable_arenas = ao;
2487
145k
        assert(usable_arenas->address != 0);
2488
145k
        if (nfp2lasta[1] == NULL) {
2489
142k
            nfp2lasta[1] = ao;
2490
142k
        }
2491
2492
145k
        return;
2493
145k
    }
2494
2495
    /* If this arena is now out of order, we need to keep
2496
     * the list sorted.  The list is kept sorted so that
2497
     * the "most full" arenas are used first, which allows
2498
     * the nearly empty arenas to be completely freed.  In
2499
     * a few un-scientific tests, it seems like this
2500
     * approach allowed a lot more memory to be freed.
2501
     */
2502
    /* If this is the only arena with nf, record that. */
2503
2.17M
    if (nfp2lasta[nf] == NULL) {
2504
2.12M
        nfp2lasta[nf] = ao;
2505
2.12M
    } /* else the rightmost with nf doesn't change */
2506
    /* If this was the rightmost of the old size, it remains in place. */
2507
2.17M
    if (ao == lastnf) {
2508
        /* Case 4.  Nothing to do. */
2509
2.13M
        return;
2510
2.13M
    }
2511
    /* If ao were the only arena in the list, the last block would have
2512
     * gotten us out.
2513
     */
2514
2.17M
    assert(ao->nextarena != NULL);
2515
2516
    /* Case 3:  We have to move the arena towards the end of the list,
2517
     * because it has more free pools than the arena to its right.  It needs
2518
     * to move to follow lastnf.
2519
     * First unlink ao from usable_arenas.
2520
     */
2521
39.3k
    if (ao->prevarena != NULL) {
2522
        /* ao isn't at the head of the list */
2523
27.2k
        assert(ao->prevarena->nextarena == ao);
2524
27.2k
        ao->prevarena->nextarena = ao->nextarena;
2525
27.2k
    }
2526
12.0k
    else {
2527
        /* ao is at the head of the list */
2528
12.0k
        assert(usable_arenas == ao);
2529
12.0k
        usable_arenas = ao->nextarena;
2530
12.0k
    }
2531
39.3k
    ao->nextarena->prevarena = ao->prevarena;
2532
    /* And insert after lastnf. */
2533
39.3k
    ao->prevarena = lastnf;
2534
39.3k
    ao->nextarena = lastnf->nextarena;
2535
39.3k
    if (ao->nextarena != NULL) {
2536
38.0k
        ao->nextarena->prevarena = ao;
2537
38.0k
    }
2538
39.3k
    lastnf->nextarena = ao;
2539
    /* Verify that the swaps worked. */
2540
39.3k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2541
39.3k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2542
39.3k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2543
39.3k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2544
39.3k
           || ao->prevarena->nextarena == ao);
2545
39.3k
}
2546
2547
/* Free a memory block allocated by pymalloc_alloc().
2548
   Return 1 if it was freed.
2549
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2550
static inline int
2551
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2552
1.91G
{
2553
1.91G
    assert(p != NULL);
2554
2555
#ifdef WITH_VALGRIND
2556
    if (UNLIKELY(running_on_valgrind > 0)) {
2557
        return 0;
2558
    }
2559
#endif
2560
2561
1.91G
    poolp pool = POOL_ADDR(p);
2562
1.91G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2563
231M
        return 0;
2564
231M
    }
2565
    /* We allocated this address. */
2566
2567
    /* Link p to the start of the pool's freeblock list.  Since
2568
     * the pool had at least the p block outstanding, the pool
2569
     * wasn't empty (so it's already in a usedpools[] list, or
2570
     * was full and is in no list -- it's not in the freeblocks
2571
     * list in any case).
2572
     */
2573
1.91G
    assert(pool->ref.count > 0);            /* else it was empty */
2574
1.68G
    pymem_block *lastfree = pool->freeblock;
2575
1.68G
    *(pymem_block **)p = lastfree;
2576
1.68G
    pool->freeblock = (pymem_block *)p;
2577
1.68G
    pool->ref.count--;
2578
2579
1.68G
    if (UNLIKELY(lastfree == NULL)) {
2580
        /* Pool was full, so doesn't currently live in any list:
2581
         * link it to the front of the appropriate usedpools[] list.
2582
         * This mimics LRU pool usage for new allocations and
2583
         * targets optimal filling when several pools contain
2584
         * blocks of the same size class.
2585
         */
2586
143M
        insert_to_usedpool(state, pool);
2587
143M
        return 1;
2588
143M
    }
2589
2590
    /* freeblock wasn't NULL, so the pool wasn't full,
2591
     * and the pool is in a usedpools[] list.
2592
     */
2593
1.54G
    if (LIKELY(pool->ref.count != 0)) {
2594
        /* pool isn't empty:  leave it in usedpools */
2595
1.54G
        return 1;
2596
1.54G
    }
2597
2598
    /* Pool is now empty:  unlink from usedpools, and
2599
     * link to the front of freepools.  This ensures that
2600
     * previously freed pools will be allocated later
2601
     * (being not referenced, they are perhaps paged out).
2602
     */
2603
2.32M
    insert_to_freepool(state, pool);
2604
2.32M
    return 1;
2605
1.54G
}
2606
2607
2608
void
2609
_PyObject_Free(void *ctx, void *p)
2610
1.92G
{
2611
    /* PyObject_Free(NULL) has no effect */
2612
1.92G
    if (p == NULL) {
2613
1.99M
        return;
2614
1.99M
    }
2615
2616
1.91G
    OMState *state = get_state();
2617
1.91G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2618
        /* pymalloc didn't allocate this address */
2619
231M
        PyMem_RawFree(p);
2620
231M
        raw_allocated_blocks--;
2621
231M
    }
2622
1.91G
}
2623
2624
2625
/* pymalloc realloc.
2626
2627
   If nbytes==0, then as the Python docs promise, we do not treat this like
2628
   free(p), and return a non-NULL result.
2629
2630
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2631
   newptr_p.
2632
2633
   Return 0 if pymalloc didn't allocated p. */
2634
static int
2635
pymalloc_realloc(OMState *state, void *ctx,
2636
                 void **newptr_p, void *p, size_t nbytes)
2637
106M
{
2638
106M
    void *bp;
2639
106M
    poolp pool;
2640
106M
    size_t size;
2641
2642
106M
    assert(p != NULL);
2643
2644
#ifdef WITH_VALGRIND
2645
    /* Treat running_on_valgrind == -1 the same as 0 */
2646
    if (UNLIKELY(running_on_valgrind > 0)) {
2647
        return 0;
2648
    }
2649
#endif
2650
2651
106M
    pool = POOL_ADDR(p);
2652
106M
    if (!address_in_range(state, p, pool)) {
2653
        /* pymalloc is not managing this block.
2654
2655
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2656
           over this block.  However, if we do, we need to copy the valid data
2657
           from the C-managed block to one of our blocks, and there's no
2658
           portable way to know how much of the memory space starting at p is
2659
           valid.
2660
2661
           As bug 1185883 pointed out the hard way, it's possible that the
2662
           C-managed block is "at the end" of allocated VM space, so that a
2663
           memory fault can occur if we try to copy nbytes bytes starting at p.
2664
           Instead we punt: let C continue to manage this block. */
2665
8.52M
        return 0;
2666
8.52M
    }
2667
2668
    /* pymalloc is in charge of this block */
2669
98.2M
    size = INDEX2SIZE(pool->szidx);
2670
98.2M
    if (nbytes <= size) {
2671
        /* The block is staying the same or shrinking.
2672
2673
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2674
           block to a smaller size class, but it wastes memory not to copy it.
2675
2676
           The compromise here is to copy on shrink only if at least 25% of
2677
           size can be shaved off. */
2678
67.4M
        if (4 * nbytes > 3 * size) {
2679
            /* It's the same, or shrinking and new/old > 3/4. */
2680
21.6M
            *newptr_p = p;
2681
21.6M
            return 1;
2682
21.6M
        }
2683
45.7M
        size = nbytes;
2684
45.7M
    }
2685
2686
76.5M
    bp = _PyObject_Malloc(ctx, nbytes);
2687
76.5M
    if (bp != NULL) {
2688
76.5M
        memcpy(bp, p, size);
2689
76.5M
        _PyObject_Free(ctx, p);
2690
76.5M
    }
2691
76.5M
    *newptr_p = bp;
2692
76.5M
    return 1;
2693
98.2M
}
2694
2695
2696
void *
2697
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2698
330M
{
2699
330M
    void *ptr2;
2700
2701
330M
    if (ptr == NULL) {
2702
224M
        return _PyObject_Malloc(ctx, nbytes);
2703
224M
    }
2704
2705
106M
    OMState *state = get_state();
2706
106M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2707
98.2M
        return ptr2;
2708
98.2M
    }
2709
2710
8.52M
    return PyMem_RawRealloc(ptr, nbytes);
2711
106M
}
2712
2713
#else   /* ! WITH_PYMALLOC */
2714
2715
/*==========================================================================*/
2716
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2717
 * only be used by extensions that are compiled with pymalloc enabled. */
2718
2719
Py_ssize_t
2720
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2721
{
2722
    return 0;
2723
}
2724
2725
Py_ssize_t
2726
_Py_GetGlobalAllocatedBlocks(void)
2727
{
2728
    return 0;
2729
}
2730
2731
void
2732
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2733
{
2734
    return;
2735
}
2736
2737
void
2738
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2739
{
2740
    return;
2741
}
2742
2743
#endif /* WITH_PYMALLOC */
2744
2745
2746
/*==========================================================================*/
2747
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2748
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2749
 */
2750
2751
/* Uncomment this define to add the "serialno" field */
2752
/* #define PYMEM_DEBUG_SERIALNO */
2753
2754
#ifdef PYMEM_DEBUG_SERIALNO
2755
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2756
2757
/* serialno is always incremented via calling this routine.  The point is
2758
 * to supply a single place to set a breakpoint.
2759
 */
2760
static void
2761
bumpserialno(void)
2762
{
2763
    ++serialno;
2764
}
2765
#endif
2766
2767
0
#define SST SIZEOF_SIZE_T
2768
2769
#ifdef PYMEM_DEBUG_SERIALNO
2770
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2771
#else
2772
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2773
#endif
2774
2775
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2776
static size_t
2777
read_size_t(const void *p)
2778
0
{
2779
0
    const uint8_t *q = (const uint8_t *)p;
2780
0
    size_t result = *q++;
2781
0
    int i;
2782
2783
0
    for (i = SST; --i > 0; ++q)
2784
0
        result = (result << 8) | *q;
2785
0
    return result;
2786
0
}
2787
2788
/* Write n as a big-endian size_t, MSB at address p, LSB at
2789
 * p + sizeof(size_t) - 1.
2790
 */
2791
static void
2792
write_size_t(void *p, size_t n)
2793
0
{
2794
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2795
0
    int i;
2796
2797
0
    for (i = SST; --i >= 0; --q) {
2798
0
        *q = (uint8_t)(n & 0xff);
2799
0
        n >>= 8;
2800
0
    }
2801
0
}
2802
2803
static void
2804
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2805
               bool is_alloc)
2806
0
{
2807
#ifdef Py_GIL_DISABLED
2808
    if (api->api_id == 'o') {
2809
        // Don't overwrite the first few bytes of a PyObject allocation in the
2810
        // free-threaded build
2811
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2812
        size_t debug_offset;
2813
        if (is_alloc) {
2814
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2815
        }
2816
        else {
2817
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2818
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2819
        }
2820
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2821
        if (debug_offset < nbytes) {
2822
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2823
        }
2824
        return;
2825
    }
2826
#endif
2827
0
    memset(data, c, nbytes);
2828
0
}
2829
2830
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2831
   fills them with useful stuff, here calling the underlying malloc's result p:
2832
2833
p[0: S]
2834
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2835
    to read in a memory dump).
2836
p[S]
2837
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2838
p[S+1: 2*S]
2839
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2840
p[2*S: 2*S+n]
2841
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2842
    Used to catch reference to uninitialized memory.
2843
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2844
    handled the request itself.
2845
p[2*S+n: 2*S+n+S]
2846
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2847
p[2*S+n+S: 2*S+n+2*S]
2848
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2849
    and _PyMem_DebugRealloc.
2850
    This is a big-endian size_t.
2851
    If "bad memory" is detected later, the serial number gives an
2852
    excellent way to set a breakpoint on the next run, to capture the
2853
    instant at which this block was passed out.
2854
2855
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2856
for 3 * S extra bytes, and omits the last serialno field.
2857
*/
2858
2859
static void *
2860
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2861
0
{
2862
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2863
0
    uint8_t *p;           /* base address of malloc'ed pad block */
2864
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2865
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2866
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2867
2868
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2869
        /* integer overflow: can't represent total as a Py_ssize_t */
2870
0
        return NULL;
2871
0
    }
2872
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2873
2874
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2875
                ^--- p    ^--- data   ^--- tail
2876
       S: nbytes stored as size_t
2877
       I: API identifier (1 byte)
2878
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2879
       C: Clean bytes used later to store actual data
2880
       N: Serial number stored as size_t
2881
2882
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2883
       is omitted. */
2884
2885
0
    if (use_calloc) {
2886
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2887
0
    }
2888
0
    else {
2889
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2890
0
    }
2891
0
    if (p == NULL) {
2892
0
        return NULL;
2893
0
    }
2894
0
    data = p + 2*SST;
2895
2896
#ifdef PYMEM_DEBUG_SERIALNO
2897
    bumpserialno();
2898
#endif
2899
2900
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2901
0
    write_size_t(p, nbytes);
2902
0
    p[SST] = (uint8_t)api->api_id;
2903
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2904
2905
0
    if (nbytes > 0 && !use_calloc) {
2906
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
2907
0
    }
2908
2909
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2910
0
    tail = data + nbytes;
2911
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2912
#ifdef PYMEM_DEBUG_SERIALNO
2913
    write_size_t(tail + SST, serialno);
2914
#endif
2915
2916
0
    return data;
2917
0
}
2918
2919
void *
2920
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2921
0
{
2922
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2923
0
}
2924
2925
void *
2926
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2927
0
{
2928
0
    size_t nbytes;
2929
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2930
0
    nbytes = nelem * elsize;
2931
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2932
0
}
2933
2934
2935
/* The debug free first checks the 2*SST bytes on each end for sanity (in
2936
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
2937
   Then fills the original bytes with PYMEM_DEADBYTE.
2938
   Then calls the underlying free.
2939
*/
2940
void
2941
_PyMem_DebugRawFree(void *ctx, void *p)
2942
0
{
2943
    /* PyMem_Free(NULL) has no effect */
2944
0
    if (p == NULL) {
2945
0
        return;
2946
0
    }
2947
2948
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2949
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2950
0
    size_t nbytes;
2951
2952
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2953
0
    nbytes = read_size_t(q);
2954
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
2955
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
2956
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
2957
0
    api->alloc.free(api->alloc.ctx, q);
2958
0
}
2959
2960
2961
void *
2962
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2963
0
{
2964
0
    if (p == NULL) {
2965
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2966
0
    }
2967
2968
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2969
0
    uint8_t *head;        /* base address of malloc'ed pad block */
2970
0
    uint8_t *data;        /* pointer to data bytes */
2971
0
    uint8_t *r;
2972
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2973
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
2974
0
    size_t original_nbytes;
2975
0
#define ERASED_SIZE 64
2976
2977
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2978
2979
0
    data = (uint8_t *)p;
2980
0
    head = data - 2*SST;
2981
0
    original_nbytes = read_size_t(head);
2982
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2983
        /* integer overflow: can't represent total as a Py_ssize_t */
2984
0
        return NULL;
2985
0
    }
2986
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2987
2988
0
    tail = data + original_nbytes;
2989
#ifdef PYMEM_DEBUG_SERIALNO
2990
    size_t block_serialno = read_size_t(tail + SST);
2991
#endif
2992
0
#ifndef Py_GIL_DISABLED
2993
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2994
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2995
     */
2996
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2997
0
    if (original_nbytes <= sizeof(save)) {
2998
0
        memcpy(save, data, original_nbytes);
2999
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3000
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3001
0
    }
3002
0
    else {
3003
0
        memcpy(save, data, ERASED_SIZE);
3004
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3005
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3006
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3007
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3008
0
    }
3009
0
#endif
3010
3011
    /* Resize and add decorations. */
3012
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3013
0
    if (r == NULL) {
3014
        /* if realloc() failed: rewrite header and footer which have
3015
           just been erased */
3016
0
        nbytes = original_nbytes;
3017
0
    }
3018
0
    else {
3019
0
        head = r;
3020
#ifdef PYMEM_DEBUG_SERIALNO
3021
        bumpserialno();
3022
        block_serialno = serialno;
3023
#endif
3024
0
    }
3025
0
    data = head + 2*SST;
3026
3027
0
    write_size_t(head, nbytes);
3028
0
    head[SST] = (uint8_t)api->api_id;
3029
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3030
3031
0
    tail = data + nbytes;
3032
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3033
#ifdef PYMEM_DEBUG_SERIALNO
3034
    write_size_t(tail + SST, block_serialno);
3035
#endif
3036
3037
0
#ifndef Py_GIL_DISABLED
3038
    /* Restore saved bytes. */
3039
0
    if (original_nbytes <= sizeof(save)) {
3040
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3041
0
    }
3042
0
    else {
3043
0
        size_t i = original_nbytes - ERASED_SIZE;
3044
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3045
0
        if (nbytes > i) {
3046
0
            memcpy(data + i, &save[ERASED_SIZE],
3047
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3048
0
        }
3049
0
    }
3050
0
#endif
3051
3052
0
    if (r == NULL) {
3053
0
        return NULL;
3054
0
    }
3055
3056
0
    if (nbytes > original_nbytes) {
3057
        /* growing: mark new extra memory clean */
3058
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3059
0
               nbytes - original_nbytes);
3060
0
    }
3061
3062
0
    return data;
3063
0
}
3064
3065
static inline void
3066
_PyMem_DebugCheckGIL(const char *func)
3067
0
{
3068
0
    PyThreadState *tstate = _PyThreadState_GET();
3069
0
    if (tstate == NULL) {
3070
0
#ifndef Py_GIL_DISABLED
3071
0
        _Py_FatalErrorFunc(func,
3072
0
                           "Python memory allocator called "
3073
0
                           "without holding the GIL");
3074
#else
3075
        _Py_FatalErrorFunc(func,
3076
                           "Python memory allocator called "
3077
                           "without an active thread state. "
3078
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3079
#endif
3080
0
    }
3081
0
}
3082
3083
void *
3084
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3085
0
{
3086
0
    _PyMem_DebugCheckGIL(__func__);
3087
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3088
0
}
3089
3090
void *
3091
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3092
0
{
3093
0
    _PyMem_DebugCheckGIL(__func__);
3094
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3095
0
}
3096
3097
3098
void
3099
_PyMem_DebugFree(void *ctx, void *ptr)
3100
0
{
3101
0
    _PyMem_DebugCheckGIL(__func__);
3102
0
    _PyMem_DebugRawFree(ctx, ptr);
3103
0
}
3104
3105
3106
void *
3107
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3108
0
{
3109
0
    _PyMem_DebugCheckGIL(__func__);
3110
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3111
0
}
3112
3113
/* Check the forbidden bytes on both ends of the memory allocated for p.
3114
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3115
 * and call Py_FatalError to kill the program.
3116
 * The API id, is also checked.
3117
 */
3118
static void
3119
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3120
0
{
3121
0
    assert(p != NULL);
3122
3123
0
    const uint8_t *q = (const uint8_t *)p;
3124
0
    size_t nbytes;
3125
0
    const uint8_t *tail;
3126
0
    int i;
3127
0
    char id;
3128
3129
    /* Check the API id */
3130
0
    id = (char)q[-SST];
3131
0
    if (id != api) {
3132
0
        _PyObject_DebugDumpAddress(p);
3133
0
        _Py_FatalErrorFormat(func,
3134
0
                             "bad ID: Allocated using API '%c', "
3135
0
                             "verified using API '%c'",
3136
0
                             id, api);
3137
0
    }
3138
3139
    /* Check the stuff at the start of p first:  if there's underwrite
3140
     * corruption, the number-of-bytes field may be nuts, and checking
3141
     * the tail could lead to a segfault then.
3142
     */
3143
0
    for (i = SST-1; i >= 1; --i) {
3144
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3145
0
            _PyObject_DebugDumpAddress(p);
3146
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3147
0
        }
3148
0
    }
3149
3150
0
    nbytes = read_size_t(q - 2*SST);
3151
0
    tail = q + nbytes;
3152
0
    for (i = 0; i < SST; ++i) {
3153
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3154
0
            _PyObject_DebugDumpAddress(p);
3155
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3156
0
        }
3157
0
    }
3158
0
}
3159
3160
/* Display info to stderr about the memory block at p. */
3161
static void
3162
_PyObject_DebugDumpAddress(const void *p)
3163
0
{
3164
0
    const uint8_t *q = (const uint8_t *)p;
3165
0
    const uint8_t *tail;
3166
0
    size_t nbytes;
3167
0
    int i;
3168
0
    int ok;
3169
0
    char id;
3170
3171
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3172
0
    if (p == NULL) {
3173
0
        fprintf(stderr, "\n");
3174
0
        return;
3175
0
    }
3176
0
    id = (char)q[-SST];
3177
0
    fprintf(stderr, " API '%c'\n", id);
3178
3179
0
    nbytes = read_size_t(q - 2*SST);
3180
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3181
3182
    /* In case this is nuts, check the leading pad bytes first. */
3183
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3184
0
    ok = 1;
3185
0
    for (i = 1; i <= SST-1; ++i) {
3186
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3187
0
            ok = 0;
3188
0
            break;
3189
0
        }
3190
0
    }
3191
0
    if (ok)
3192
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3193
0
    else {
3194
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3195
0
            PYMEM_FORBIDDENBYTE);
3196
0
        for (i = SST-1; i >= 1; --i) {
3197
0
            const uint8_t byte = *(q-i);
3198
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3199
0
            if (byte != PYMEM_FORBIDDENBYTE)
3200
0
                fputs(" *** OUCH", stderr);
3201
0
            fputc('\n', stderr);
3202
0
        }
3203
3204
0
        fputs("    Because memory is corrupted at the start, the "
3205
0
              "count of bytes requested\n"
3206
0
              "       may be bogus, and checking the trailing pad "
3207
0
              "bytes may segfault.\n", stderr);
3208
0
    }
3209
3210
0
    tail = q + nbytes;
3211
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3212
0
    ok = 1;
3213
0
    for (i = 0; i < SST; ++i) {
3214
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3215
0
            ok = 0;
3216
0
            break;
3217
0
        }
3218
0
    }
3219
0
    if (ok)
3220
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3221
0
    else {
3222
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3223
0
                PYMEM_FORBIDDENBYTE);
3224
0
        for (i = 0; i < SST; ++i) {
3225
0
            const uint8_t byte = tail[i];
3226
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3227
0
                    i, byte);
3228
0
            if (byte != PYMEM_FORBIDDENBYTE)
3229
0
                fputs(" *** OUCH", stderr);
3230
0
            fputc('\n', stderr);
3231
0
        }
3232
0
    }
3233
3234
#ifdef PYMEM_DEBUG_SERIALNO
3235
    size_t serial = read_size_t(tail + SST);
3236
    fprintf(stderr,
3237
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3238
            serial);
3239
#endif
3240
3241
0
    if (nbytes > 0) {
3242
0
        i = 0;
3243
0
        fputs("    Data at p:", stderr);
3244
        /* print up to 8 bytes at the start */
3245
0
        while (q < tail && i < 8) {
3246
0
            fprintf(stderr, " %02x", *q);
3247
0
            ++i;
3248
0
            ++q;
3249
0
        }
3250
        /* and up to 8 at the end */
3251
0
        if (q < tail) {
3252
0
            if (tail - q > 8) {
3253
0
                fputs(" ...", stderr);
3254
0
                q = tail - 8;
3255
0
            }
3256
0
            while (q < tail) {
3257
0
                fprintf(stderr, " %02x", *q);
3258
0
                ++q;
3259
0
            }
3260
0
        }
3261
0
        fputc('\n', stderr);
3262
0
    }
3263
0
    fputc('\n', stderr);
3264
3265
0
    fflush(stderr);
3266
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3267
0
}
3268
3269
3270
static size_t
3271
printone(FILE *out, const char* msg, size_t value)
3272
0
{
3273
0
    int i, k;
3274
0
    char buf[100];
3275
0
    size_t origvalue = value;
3276
3277
0
    fputs(msg, out);
3278
0
    for (i = (int)strlen(msg); i < 35; ++i)
3279
0
        fputc(' ', out);
3280
0
    fputc('=', out);
3281
3282
    /* Write the value with commas. */
3283
0
    i = 22;
3284
0
    buf[i--] = '\0';
3285
0
    buf[i--] = '\n';
3286
0
    k = 3;
3287
0
    do {
3288
0
        size_t nextvalue = value / 10;
3289
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3290
0
        value = nextvalue;
3291
0
        buf[i--] = (char)(digit + '0');
3292
0
        --k;
3293
0
        if (k == 0 && value && i >= 0) {
3294
0
            k = 3;
3295
0
            buf[i--] = ',';
3296
0
        }
3297
0
    } while (value && i >= 0);
3298
3299
0
    while (i >= 0)
3300
0
        buf[i--] = ' ';
3301
0
    fputs(buf, out);
3302
3303
0
    return origvalue;
3304
0
}
3305
3306
void
3307
_PyDebugAllocatorStats(FILE *out,
3308
                       const char *block_name, int num_blocks, size_t sizeof_block)
3309
0
{
3310
0
    char buf1[128];
3311
0
    char buf2[128];
3312
0
    PyOS_snprintf(buf1, sizeof(buf1),
3313
0
                  "%d %ss * %zd bytes each",
3314
0
                  num_blocks, block_name, sizeof_block);
3315
0
    PyOS_snprintf(buf2, sizeof(buf2),
3316
0
                  "%48s ", buf1);
3317
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3318
0
}
3319
3320
// Return true if the obmalloc state structure is heap allocated,
3321
// by PyMem_RawCalloc().  For the main interpreter, this structure
3322
// allocated in the BSS.  Allocating that way gives some memory savings
3323
// and a small performance win (at least on a demand paged OS).  On
3324
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3325
// memory is for the arena_map_top array.  Since normally only one entry
3326
// of that array is used, only one page of resident memory is actually
3327
// used, rather than the full 256 kB.
3328
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3329
0
{
3330
0
#if WITH_PYMALLOC
3331
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3332
#else
3333
    return false;
3334
#endif
3335
0
}
3336
3337
#ifdef WITH_PYMALLOC
3338
static void
3339
init_obmalloc_pools(PyInterpreterState *interp)
3340
28
{
3341
    // initialize the obmalloc->pools structure.  This must be done
3342
    // before the obmalloc alloc/free functions can be called.
3343
28
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3344
28
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3345
28
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3346
28
}
3347
#endif /* WITH_PYMALLOC */
3348
3349
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3350
28
{
3351
28
#ifdef WITH_PYMALLOC
3352
    /* Initialize obmalloc, but only for subinterpreters,
3353
       since the main interpreter is initialized statically. */
3354
28
    if (_Py_IsMainInterpreter(interp)
3355
0
            || _PyInterpreterState_HasFeature(interp,
3356
28
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3357
28
        interp->obmalloc = &obmalloc_state_main;
3358
28
        if (!obmalloc_state_initialized) {
3359
28
            init_obmalloc_pools(interp);
3360
28
            obmalloc_state_initialized = true;
3361
28
        }
3362
28
    } else {
3363
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3364
0
        if (interp->obmalloc == NULL) {
3365
0
            return -1;
3366
0
        }
3367
0
        init_obmalloc_pools(interp);
3368
0
    }
3369
28
#endif /* WITH_PYMALLOC */
3370
28
    return 0; // success
3371
28
}
3372
3373
3374
#ifdef WITH_PYMALLOC
3375
3376
static void
3377
free_obmalloc_arenas(PyInterpreterState *interp)
3378
0
{
3379
0
    OMState *state = interp->obmalloc;
3380
0
    for (uint i = 0; i < maxarenas; ++i) {
3381
        // free each obmalloc memory arena
3382
0
        struct arena_object *ao = &allarenas[i];
3383
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3384
0
                             (void *)ao->address, ARENA_SIZE);
3385
0
    }
3386
    // free the array containing pointers to all arenas
3387
0
    PyMem_RawFree(allarenas);
3388
0
#if WITH_PYMALLOC_RADIX_TREE
3389
0
#ifdef USE_INTERIOR_NODES
3390
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3391
    // by arena_map_mark_used() but not freed when arenas are freed.
3392
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3393
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3394
0
         if (mid == NULL) {
3395
0
             continue;
3396
0
         }
3397
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3398
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3399
0
            if (bot == NULL) {
3400
0
                continue;
3401
0
            }
3402
0
            PyMem_RawFree(bot);
3403
0
         }
3404
0
         PyMem_RawFree(mid);
3405
0
    }
3406
0
#endif
3407
0
#endif
3408
0
}
3409
3410
#ifdef Py_DEBUG
3411
/* Is target in the list?  The list is traversed via the nextpool pointers.
3412
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3413
 * list, else 0.
3414
 */
3415
static int
3416
pool_is_in_list(const poolp target, poolp list)
3417
{
3418
    poolp origlist = list;
3419
    assert(target != NULL);
3420
    if (list == NULL)
3421
        return 0;
3422
    do {
3423
        if (target == list)
3424
            return 1;
3425
        list = list->nextpool;
3426
    } while (list != NULL && list != origlist);
3427
    return 0;
3428
}
3429
#endif
3430
3431
#ifdef WITH_MIMALLOC
3432
struct _alloc_stats {
3433
    size_t allocated_blocks;
3434
    size_t allocated_bytes;
3435
    size_t allocated_with_overhead;
3436
    size_t bytes_reserved;
3437
    size_t bytes_committed;
3438
};
3439
3440
static bool _collect_alloc_stats(
3441
    const mi_heap_t* heap, const mi_heap_area_t* area,
3442
    void* block, size_t block_size, void* arg)
3443
0
{
3444
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3445
0
    stats->allocated_blocks += area->used;
3446
0
    stats->allocated_bytes += area->used * area->block_size;
3447
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3448
0
    stats->bytes_reserved += area->reserved;
3449
0
    stats->bytes_committed += area->committed;
3450
0
    return 1;
3451
0
}
3452
3453
static void
3454
py_mimalloc_print_stats(FILE *out)
3455
0
{
3456
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3457
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3458
0
    fprintf(out, "Medium block threshold = %zu\n",
3459
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3460
0
    fprintf(out, "Large object max size = %zu\n",
3461
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3462
3463
0
    mi_heap_t *heap = mi_heap_get_default();
3464
0
    struct _alloc_stats stats;
3465
0
    memset(&stats, 0, sizeof(stats));
3466
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3467
3468
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3469
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3470
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3471
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3472
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3473
0
}
3474
#endif
3475
3476
3477
static void
3478
pymalloc_print_stats(FILE *out)
3479
0
{
3480
0
    OMState *state = get_state();
3481
3482
0
    uint i;
3483
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3484
    /* # of pools, allocated blocks, and free blocks per class index */
3485
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3486
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3487
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3488
    /* total # of allocated bytes in used and full pools */
3489
0
    size_t allocated_bytes = 0;
3490
    /* total # of available bytes in used pools */
3491
0
    size_t available_bytes = 0;
3492
    /* # of free pools + pools not yet carved out of current arena */
3493
0
    uint numfreepools = 0;
3494
    /* # of bytes for arena alignment padding */
3495
0
    size_t arena_alignment = 0;
3496
    /* # of bytes in used and full pools used for pool_headers */
3497
0
    size_t pool_header_bytes = 0;
3498
    /* # of bytes in used and full pools wasted due to quantization,
3499
     * i.e. the necessarily leftover space at the ends of used and
3500
     * full pools.
3501
     */
3502
0
    size_t quantization = 0;
3503
    /* # of arenas actually allocated. */
3504
0
    size_t narenas = 0;
3505
    /* running total -- should equal narenas * ARENA_SIZE */
3506
0
    size_t total;
3507
0
    char buf[128];
3508
3509
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3510
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3511
3512
0
    for (i = 0; i < numclasses; ++i)
3513
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3514
3515
    /* Because full pools aren't linked to from anything, it's easiest
3516
     * to march over all the arenas.  If we're lucky, most of the memory
3517
     * will be living in full pools -- would be a shame to miss them.
3518
     */
3519
0
    for (i = 0; i < maxarenas; ++i) {
3520
0
        uintptr_t base = allarenas[i].address;
3521
3522
        /* Skip arenas which are not allocated. */
3523
0
        if (allarenas[i].address == (uintptr_t)NULL)
3524
0
            continue;
3525
0
        narenas += 1;
3526
3527
0
        numfreepools += allarenas[i].nfreepools;
3528
3529
        /* round up to pool alignment */
3530
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3531
0
            arena_alignment += POOL_SIZE;
3532
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3533
0
            base += POOL_SIZE;
3534
0
        }
3535
3536
        /* visit every pool in the arena */
3537
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3538
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3539
0
            poolp p = (poolp)base;
3540
0
            const uint sz = p->szidx;
3541
0
            uint freeblocks;
3542
3543
0
            if (p->ref.count == 0) {
3544
                /* currently unused */
3545
#ifdef Py_DEBUG
3546
                assert(pool_is_in_list(p, allarenas[i].freepools));
3547
#endif
3548
0
                continue;
3549
0
            }
3550
0
            ++numpools[sz];
3551
0
            numblocks[sz] += p->ref.count;
3552
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3553
0
            numfreeblocks[sz] += freeblocks;
3554
#ifdef Py_DEBUG
3555
            if (freeblocks > 0)
3556
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3557
#endif
3558
0
        }
3559
0
    }
3560
0
    assert(narenas == narenas_currently_allocated);
3561
3562
0
    fputc('\n', out);
3563
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3564
0
          "-----   ----   ---------   -------------  ------------\n",
3565
0
          out);
3566
3567
0
    for (i = 0; i < numclasses; ++i) {
3568
0
        size_t p = numpools[i];
3569
0
        size_t b = numblocks[i];
3570
0
        size_t f = numfreeblocks[i];
3571
0
        uint size = INDEX2SIZE(i);
3572
0
        if (p == 0) {
3573
0
            assert(b == 0 && f == 0);
3574
0
            continue;
3575
0
        }
3576
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3577
0
                i, size, p, b, f);
3578
0
        allocated_bytes += b * size;
3579
0
        available_bytes += f * size;
3580
0
        pool_header_bytes += p * POOL_OVERHEAD;
3581
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3582
0
    }
3583
0
    fputc('\n', out);
3584
#ifdef PYMEM_DEBUG_SERIALNO
3585
    if (_PyMem_DebugEnabled()) {
3586
        (void)printone(out, "# times object malloc called", serialno);
3587
    }
3588
#endif
3589
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3590
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3591
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3592
0
    (void)printone(out, "# arenas allocated current", narenas);
3593
3594
0
    PyOS_snprintf(buf, sizeof(buf),
3595
0
                  "%zu arenas * %d bytes/arena",
3596
0
                  narenas, ARENA_SIZE);
3597
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3598
3599
0
    fputc('\n', out);
3600
3601
    /* Account for what all of those arena bytes are being used for. */
3602
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3603
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3604
3605
0
    PyOS_snprintf(buf, sizeof(buf),
3606
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3607
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3608
3609
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3610
0
    total += printone(out, "# bytes lost to quantization", quantization);
3611
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3612
0
    (void)printone(out, "Total", total);
3613
0
    assert(narenas * ARENA_SIZE == total);
3614
3615
0
#if WITH_PYMALLOC_RADIX_TREE
3616
0
    fputs("\narena map counts\n", out);
3617
0
#ifdef USE_INTERIOR_NODES
3618
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3619
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3620
0
    fputc('\n', out);
3621
0
#endif
3622
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3623
0
#ifdef USE_INTERIOR_NODES
3624
0
    total += printone(out, "# bytes lost to arena map mid",
3625
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3626
0
    total += printone(out, "# bytes lost to arena map bot",
3627
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3628
0
    (void)printone(out, "Total", total);
3629
0
#endif
3630
0
#endif
3631
3632
0
}
3633
3634
/* Print summary info to "out" about the state of pymalloc's structures.
3635
 * In Py_DEBUG mode, also perform some expensive internal consistency
3636
 * checks.
3637
 *
3638
 * Return 0 if the memory debug hooks are not installed or no statistics was
3639
 * written into out, return 1 otherwise.
3640
 */
3641
int
3642
_PyObject_DebugMallocStats(FILE *out)
3643
0
{
3644
0
#ifdef WITH_MIMALLOC
3645
0
    if (_PyMem_MimallocEnabled()) {
3646
0
        py_mimalloc_print_stats(out);
3647
0
        return 1;
3648
0
    }
3649
0
    else
3650
0
#endif
3651
0
    if (_PyMem_PymallocEnabled()) {
3652
0
        pymalloc_print_stats(out);
3653
0
        return 1;
3654
0
    }
3655
0
    else {
3656
0
        return 0;
3657
0
    }
3658
0
}
3659
3660
#endif /* #ifdef WITH_PYMALLOC */