Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Objects/obmalloc.c
Line
Count
Source (jump to first uncovered line)
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
6
#include "pycore_obmalloc.h"
7
#include "pycore_obmalloc_init.h"
8
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
9
#include "pycore_pymem.h"
10
#include "pycore_pystate.h"       // _PyInterpreterState_GET
11
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
12
13
#include <stdlib.h>               // malloc()
14
#include <stdbool.h>
15
#ifdef WITH_MIMALLOC
16
// Forward declarations of functions used in our mimalloc modifications
17
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
18
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
19
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
20
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
21
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
22
#  include "pycore_mimalloc.h"
23
#  include "mimalloc/static.c"
24
#  include "mimalloc/internal.h"  // for stats
25
#endif
26
27
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
28
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
29
#endif
30
31
#undef  uint
32
1.73G
#define uint pymem_uint
33
34
35
/* Defined in tracemalloc.c */
36
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
37
38
static void _PyObject_DebugDumpAddress(const void *p);
39
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
40
41
42
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
43
static void set_up_debug_hooks_unlocked(void);
44
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
45
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
46
47
48
/***************************************/
49
/* low-level allocator implementations */
50
/***************************************/
51
52
/* the default raw allocator (wraps malloc) */
53
54
void *
55
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
56
241M
{
57
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
58
       for malloc(0), which would be treated as an error. Some platforms would
59
       return a pointer with no memory behind it, which would break pymalloc.
60
       To solve these problems, allocate an extra byte. */
61
241M
    if (size == 0)
62
30.5M
        size = 1;
63
241M
    return malloc(size);
64
241M
}
65
66
void *
67
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
68
80.7k
{
69
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
70
       for calloc(0, 0), which would be treated as an error. Some platforms
71
       would return a pointer with no memory behind it, which would break
72
       pymalloc.  To solve these problems, allocate an extra byte. */
73
80.7k
    if (nelem == 0 || elsize == 0) {
74
2
        nelem = 1;
75
2
        elsize = 1;
76
2
    }
77
80.7k
    return calloc(nelem, elsize);
78
80.7k
}
79
80
void *
81
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
82
5.85M
{
83
5.85M
    if (size == 0)
84
0
        size = 1;
85
5.85M
    return realloc(ptr, size);
86
5.85M
}
87
88
void
89
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
90
241M
{
91
241M
    free(ptr);
92
241M
}
93
94
#ifdef WITH_MIMALLOC
95
96
static void
97
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
98
0
{
99
#ifdef Py_GIL_DISABLED
100
    // Clear the QSBR goal and remove the page from the QSBR linked list.
101
    page->qsbr_goal = 0;
102
    if (page->qsbr_node.next != NULL) {
103
        llist_remove(&page->qsbr_node);
104
    }
105
#endif
106
0
}
107
108
// Check if an empty, newly reclaimed page is safe to free now.
109
static bool
110
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
111
0
{
112
0
    assert(mi_page_all_free(page));
113
#ifdef Py_GIL_DISABLED
114
    assert(page->qsbr_node.next == NULL);
115
    if (page->use_qsbr && page->qsbr_goal != 0) {
116
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
117
        if (tstate == NULL) {
118
            return false;
119
        }
120
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
121
    }
122
#endif
123
0
    return true;
124
125
0
}
126
127
#ifdef Py_GIL_DISABLED
128
129
// If we are deferring collection of more than this amount of memory for
130
// mimalloc pages, advance the write sequence.  Advancing allows these
131
// pages to be re-used in a different thread or for a different size class.
132
#define QSBR_PAGE_MEM_LIMIT 4096*20
133
134
// Return true if the global write sequence should be advanced for a mimalloc
135
// page that is deferred from collection.
136
static bool
137
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
138
{
139
    size_t bsize = mi_page_block_size(page);
140
    size_t page_size = page->capacity*bsize;
141
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
142
        qsbr->deferred_page_memory = 0;
143
        return true;
144
    }
145
    qsbr->deferred_page_memory += page_size;
146
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
147
        qsbr->deferred_page_memory = 0;
148
        return true;
149
    }
150
    return false;
151
}
152
#endif
153
154
static bool
155
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
156
0
{
157
#ifdef Py_GIL_DISABLED
158
    assert(mi_page_all_free(page));
159
    if (page->use_qsbr) {
160
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
161
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
162
            _PyMem_mi_page_clear_qsbr(page);
163
            _mi_page_free(page, pq, force);
164
            return true;
165
        }
166
167
        _PyMem_mi_page_clear_qsbr(page);
168
        page->retire_expire = 0;
169
170
        if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
171
            page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
172
        }
173
        else {
174
            page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
175
        }
176
177
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
178
        return false;
179
    }
180
#endif
181
0
    _mi_page_free(page, pq, force);
182
0
    return true;
183
0
}
184
185
static void
186
_PyMem_mi_page_reclaimed(mi_page_t *page)
187
0
{
188
#ifdef Py_GIL_DISABLED
189
    assert(page->qsbr_node.next == NULL);
190
    if (page->qsbr_goal != 0) {
191
        if (mi_page_all_free(page)) {
192
            assert(page->qsbr_node.next == NULL);
193
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
194
            page->retire_expire = 0;
195
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
196
        }
197
        else {
198
            page->qsbr_goal = 0;
199
        }
200
    }
201
#endif
202
0
}
203
204
static void
205
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
206
0
{
207
#ifdef Py_GIL_DISABLED
208
    if (!heap->page_use_qsbr) {
209
        return;
210
    }
211
212
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
213
    struct llist_node *head = &tstate->mimalloc.page_list;
214
    if (llist_empty(head)) {
215
        return;
216
    }
217
218
    struct llist_node *node;
219
    llist_for_each_safe(node, head) {
220
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
221
        if (!mi_page_all_free(page)) {
222
            // We allocated from this page some point after the delayed free
223
            _PyMem_mi_page_clear_qsbr(page);
224
            continue;
225
        }
226
227
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
228
            return;
229
        }
230
231
        _PyMem_mi_page_clear_qsbr(page);
232
        _mi_page_free(page, mi_page_queue_of(page), false);
233
    }
234
#endif
235
0
}
236
237
void *
238
_PyMem_MiMalloc(void *ctx, size_t size)
239
0
{
240
#ifdef Py_GIL_DISABLED
241
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
242
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
243
    return mi_heap_malloc(heap, size);
244
#else
245
0
    return mi_malloc(size);
246
0
#endif
247
0
}
248
249
void *
250
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
251
0
{
252
#ifdef Py_GIL_DISABLED
253
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
254
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
255
    return mi_heap_calloc(heap, nelem, elsize);
256
#else
257
0
    return mi_calloc(nelem, elsize);
258
0
#endif
259
0
}
260
261
void *
262
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
263
0
{
264
#ifdef Py_GIL_DISABLED
265
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
266
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
267
    return mi_heap_realloc(heap, ptr, size);
268
#else
269
0
    return mi_realloc(ptr, size);
270
0
#endif
271
0
}
272
273
void
274
_PyMem_MiFree(void *ctx, void *ptr)
275
0
{
276
0
    mi_free(ptr);
277
0
}
278
279
void *
280
_PyObject_MiMalloc(void *ctx, size_t nbytes)
281
0
{
282
#ifdef Py_GIL_DISABLED
283
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
284
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
285
    return mi_heap_malloc(heap, nbytes);
286
#else
287
0
    return mi_malloc(nbytes);
288
0
#endif
289
0
}
290
291
void *
292
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
293
0
{
294
#ifdef Py_GIL_DISABLED
295
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
296
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
297
    return mi_heap_calloc(heap, nelem, elsize);
298
#else
299
0
    return mi_calloc(nelem, elsize);
300
0
#endif
301
0
}
302
303
304
void *
305
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
306
0
{
307
#ifdef Py_GIL_DISABLED
308
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
309
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
310
    return mi_heap_realloc(heap, ptr, nbytes);
311
#else
312
0
    return mi_realloc(ptr, nbytes);
313
0
#endif
314
0
}
315
316
void
317
_PyObject_MiFree(void *ctx, void *ptr)
318
0
{
319
0
    mi_free(ptr);
320
0
}
321
322
#endif // WITH_MIMALLOC
323
324
325
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
326
327
328
#ifdef WITH_MIMALLOC
329
0
#  define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
330
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
331
#endif
332
333
/* the pymalloc allocator */
334
335
// The actual implementation is further down.
336
337
#if defined(WITH_PYMALLOC)
338
void* _PyObject_Malloc(void *ctx, size_t size);
339
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
340
void _PyObject_Free(void *ctx, void *p);
341
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
342
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
343
#endif  // WITH_PYMALLOC
344
345
#if defined(Py_GIL_DISABLED)
346
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
347
#  define PYRAW_ALLOC MALLOC_ALLOC
348
#  define PYMEM_ALLOC MIMALLOC_ALLOC
349
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
350
#elif defined(WITH_PYMALLOC)
351
0
#  define PYRAW_ALLOC MALLOC_ALLOC
352
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
353
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
354
#else
355
#  define PYRAW_ALLOC MALLOC_ALLOC
356
#  define PYMEM_ALLOC MALLOC_ALLOC
357
#  define PYOBJ_ALLOC MALLOC_ALLOC
358
#endif
359
360
361
/* the default debug allocators */
362
363
// The actual implementation is further down.
364
365
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
366
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
367
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
368
void _PyMem_DebugRawFree(void *ctx, void *ptr);
369
370
void* _PyMem_DebugMalloc(void *ctx, size_t size);
371
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
372
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
373
void _PyMem_DebugFree(void *ctx, void *p);
374
375
#define PYDBGRAW_ALLOC \
376
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
377
#define PYDBGMEM_ALLOC \
378
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
379
#define PYDBGOBJ_ALLOC \
380
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
381
382
/* default raw allocator (not swappable) */
383
384
void *
385
_PyMem_DefaultRawMalloc(size_t size)
386
128
{
387
#ifdef Py_DEBUG
388
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
389
#else
390
128
    return _PyMem_RawMalloc(NULL, size);
391
128
#endif
392
128
}
393
394
void *
395
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
396
0
{
397
#ifdef Py_DEBUG
398
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
399
#else
400
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
401
0
#endif
402
0
}
403
404
void *
405
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
406
0
{
407
#ifdef Py_DEBUG
408
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
409
#else
410
0
    return _PyMem_RawRealloc(NULL, ptr, size);
411
0
#endif
412
0
}
413
414
void
415
_PyMem_DefaultRawFree(void *ptr)
416
144
{
417
#ifdef Py_DEBUG
418
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
419
#else
420
144
    _PyMem_RawFree(NULL, ptr);
421
144
#endif
422
144
}
423
424
wchar_t*
425
_PyMem_DefaultRawWcsdup(const wchar_t *str)
426
96
{
427
96
    assert(str != NULL);
428
429
96
    size_t len = wcslen(str);
430
96
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
431
0
        return NULL;
432
0
    }
433
434
96
    size_t size = (len + 1) * sizeof(wchar_t);
435
96
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
436
96
    if (str2 == NULL) {
437
0
        return NULL;
438
0
    }
439
440
96
    memcpy(str2, str, size);
441
96
    return str2;
442
96
}
443
444
/* the low-level virtual memory allocator */
445
446
#ifdef WITH_PYMALLOC
447
#  ifdef MS_WINDOWS
448
#    include <windows.h>
449
#  elif defined(HAVE_MMAP)
450
#    include <sys/mman.h>
451
#    ifdef MAP_ANONYMOUS
452
#      define ARENAS_USE_MMAP
453
#    endif
454
#  endif
455
#endif
456
457
void *
458
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
459
169k
{
460
#ifdef MS_WINDOWS
461
    return VirtualAlloc(NULL, size,
462
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
463
#elif defined(ARENAS_USE_MMAP)
464
    void *ptr;
465
169k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
466
169k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
467
169k
    if (ptr == MAP_FAILED)
468
0
        return NULL;
469
169k
    assert(ptr != NULL);
470
169k
    return ptr;
471
#else
472
    return malloc(size);
473
#endif
474
169k
}
475
476
void
477
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
478
#if defined(ARENAS_USE_MMAP)
479
    size_t size
480
#else
481
    size_t Py_UNUSED(size)
482
#endif
483
)
484
168k
{
485
#ifdef MS_WINDOWS
486
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
487
    if (ptr == NULL) {
488
        return;
489
    }
490
    VirtualFree(ptr, 0, MEM_RELEASE);
491
#elif defined(ARENAS_USE_MMAP)
492
    /* Unlike free(), munmap() does not special-case NULL to noop. */
493
168k
    if (ptr == NULL) {
494
0
        return;
495
0
    }
496
168k
    munmap(ptr, size);
497
#else
498
    free(ptr);
499
#endif
500
168k
}
501
502
/*******************************************/
503
/* end low-level allocator implementations */
504
/*******************************************/
505
506
507
32
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
508
976M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
509
2.76G
#define _PyMem (_PyRuntime.allocators.standard.mem)
510
4.59G
#define _PyObject (_PyRuntime.allocators.standard.obj)
511
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
512
676k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
513
514
515
/***************************/
516
/* managing the allocators */
517
/***************************/
518
519
static int
520
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
521
                               PyMemAllocatorEx *old_alloc)
522
0
{
523
0
    if (old_alloc != NULL) {
524
0
        get_allocator_unlocked(domain, old_alloc);
525
0
    }
526
527
528
0
    PyMemAllocatorEx new_alloc;
529
0
    switch(domain)
530
0
    {
531
0
    case PYMEM_DOMAIN_RAW:
532
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
533
0
        break;
534
0
    case PYMEM_DOMAIN_MEM:
535
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
536
0
        break;
537
0
    case PYMEM_DOMAIN_OBJ:
538
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
539
0
        break;
540
0
    default:
541
        /* unknown domain */
542
0
        return -1;
543
0
    }
544
0
    set_allocator_unlocked(domain, &new_alloc);
545
0
    if (debug) {
546
0
        set_up_debug_hooks_domain_unlocked(domain);
547
0
    }
548
0
    return 0;
549
0
}
550
551
552
#ifdef Py_DEBUG
553
static const int pydebug = 1;
554
#else
555
static const int pydebug = 0;
556
#endif
557
558
int
559
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
560
0
{
561
0
    if (name == NULL || *name == '\0') {
562
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
563
           nameions): use default memory allocators */
564
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
565
0
    }
566
0
    else if (strcmp(name, "default") == 0) {
567
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
568
0
    }
569
0
    else if (strcmp(name, "debug") == 0) {
570
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
571
0
    }
572
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
573
0
    else if (strcmp(name, "pymalloc") == 0) {
574
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
575
0
    }
576
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
577
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
578
0
    }
579
0
#endif
580
0
#ifdef WITH_MIMALLOC
581
0
    else if (strcmp(name, "mimalloc") == 0) {
582
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
583
0
    }
584
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
585
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
586
0
    }
587
0
#endif
588
0
#ifndef Py_GIL_DISABLED
589
0
    else if (strcmp(name, "malloc") == 0) {
590
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
591
0
    }
592
0
    else if (strcmp(name, "malloc_debug") == 0) {
593
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
594
0
    }
595
0
#endif
596
0
    else {
597
        /* unknown allocator */
598
0
        return -1;
599
0
    }
600
0
    return 0;
601
0
}
602
603
604
static int
605
set_up_allocators_unlocked(PyMemAllocatorName allocator)
606
0
{
607
0
    switch (allocator) {
608
0
    case PYMEM_ALLOCATOR_NOT_SET:
609
        /* do nothing */
610
0
        break;
611
612
0
    case PYMEM_ALLOCATOR_DEFAULT:
613
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
614
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
615
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
616
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
617
0
        break;
618
619
0
    case PYMEM_ALLOCATOR_DEBUG:
620
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
621
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
622
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
623
0
        _PyRuntime.allocators.is_debug_enabled = 1;
624
0
        break;
625
626
0
#ifdef WITH_PYMALLOC
627
0
    case PYMEM_ALLOCATOR_PYMALLOC:
628
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
629
0
    {
630
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
631
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
632
633
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
634
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
635
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
636
637
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
638
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
639
0
        if (is_debug) {
640
0
            set_up_debug_hooks_unlocked();
641
0
        }
642
0
        break;
643
0
    }
644
0
#endif
645
0
#ifdef WITH_MIMALLOC
646
0
    case PYMEM_ALLOCATOR_MIMALLOC:
647
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
648
0
    {
649
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
650
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
651
652
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
653
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
654
655
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
656
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
657
658
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
659
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
660
0
        if (is_debug) {
661
0
            set_up_debug_hooks_unlocked();
662
0
        }
663
664
0
        break;
665
0
    }
666
0
#endif
667
668
0
    case PYMEM_ALLOCATOR_MALLOC:
669
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
670
0
    {
671
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
672
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
673
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
674
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
675
676
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
677
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
678
0
        if (is_debug) {
679
0
            set_up_debug_hooks_unlocked();
680
0
        }
681
0
        break;
682
0
    }
683
684
0
    default:
685
        /* unknown allocator */
686
0
        return -1;
687
0
    }
688
689
0
    return 0;
690
0
}
691
692
int
693
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
694
0
{
695
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
696
0
    int res = set_up_allocators_unlocked(allocator);
697
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
698
0
    return res;
699
0
}
700
701
702
static int
703
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
704
0
{
705
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
706
0
}
707
708
709
static const char*
710
get_current_allocator_name_unlocked(void)
711
0
{
712
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
713
0
#ifdef WITH_PYMALLOC
714
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
715
0
#endif
716
0
#ifdef WITH_MIMALLOC
717
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
718
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
719
0
#endif
720
721
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
722
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
723
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
724
0
    {
725
0
        return "malloc";
726
0
    }
727
0
#ifdef WITH_PYMALLOC
728
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
729
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
730
0
        pymemallocator_eq(&_PyObject, &pymalloc))
731
0
    {
732
0
        return "pymalloc";
733
0
    }
734
0
#endif
735
0
#ifdef WITH_MIMALLOC
736
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
737
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
738
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
739
0
    {
740
0
        return "mimalloc";
741
0
    }
742
0
#endif
743
744
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
745
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
746
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
747
748
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
749
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
750
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
751
0
    {
752
        /* Debug hooks installed */
753
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
754
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
755
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
756
0
        {
757
0
            return "malloc_debug";
758
0
        }
759
0
#ifdef WITH_PYMALLOC
760
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
761
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
762
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
763
0
        {
764
0
            return "pymalloc_debug";
765
0
        }
766
0
#endif
767
0
#ifdef WITH_MIMALLOC
768
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
769
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
770
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
771
0
        {
772
0
            return "mimalloc_debug";
773
0
        }
774
0
#endif
775
0
    }
776
0
    return NULL;
777
0
}
778
779
const char*
780
_PyMem_GetCurrentAllocatorName(void)
781
0
{
782
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
783
0
    const char *name = get_current_allocator_name_unlocked();
784
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
785
0
    return name;
786
0
}
787
788
789
int
790
_PyMem_DebugEnabled(void)
791
0
{
792
0
    return _PyRuntime.allocators.is_debug_enabled;
793
0
}
794
795
#ifdef WITH_PYMALLOC
796
static int
797
_PyMem_PymallocEnabled(void)
798
0
{
799
0
    if (_PyMem_DebugEnabled()) {
800
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
801
0
    }
802
0
    else {
803
0
        return (_PyObject.malloc == _PyObject_Malloc);
804
0
    }
805
0
}
806
807
#ifdef WITH_MIMALLOC
808
static int
809
_PyMem_MimallocEnabled(void)
810
0
{
811
#ifdef Py_GIL_DISABLED
812
    return 1;
813
#else
814
0
    if (_PyMem_DebugEnabled()) {
815
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
816
0
    }
817
0
    else {
818
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
819
0
    }
820
0
#endif
821
0
}
822
#endif  // WITH_MIMALLOC
823
824
#endif  // WITH_PYMALLOC
825
826
827
static void
828
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
829
0
{
830
0
    PyMemAllocatorEx alloc;
831
832
0
    if (domain == PYMEM_DOMAIN_RAW) {
833
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
834
0
            return;
835
0
        }
836
837
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
838
0
        alloc.ctx = &_PyMem_Debug.raw;
839
0
        alloc.malloc = _PyMem_DebugRawMalloc;
840
0
        alloc.calloc = _PyMem_DebugRawCalloc;
841
0
        alloc.realloc = _PyMem_DebugRawRealloc;
842
0
        alloc.free = _PyMem_DebugRawFree;
843
0
        set_allocator_unlocked(domain, &alloc);
844
0
    }
845
0
    else if (domain == PYMEM_DOMAIN_MEM) {
846
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
847
0
            return;
848
0
        }
849
850
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
851
0
        alloc.ctx = &_PyMem_Debug.mem;
852
0
        alloc.malloc = _PyMem_DebugMalloc;
853
0
        alloc.calloc = _PyMem_DebugCalloc;
854
0
        alloc.realloc = _PyMem_DebugRealloc;
855
0
        alloc.free = _PyMem_DebugFree;
856
0
        set_allocator_unlocked(domain, &alloc);
857
0
    }
858
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
859
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
860
0
            return;
861
0
        }
862
863
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
864
0
        alloc.ctx = &_PyMem_Debug.obj;
865
0
        alloc.malloc = _PyMem_DebugMalloc;
866
0
        alloc.calloc = _PyMem_DebugCalloc;
867
0
        alloc.realloc = _PyMem_DebugRealloc;
868
0
        alloc.free = _PyMem_DebugFree;
869
0
        set_allocator_unlocked(domain, &alloc);
870
0
    }
871
0
}
872
873
874
static void
875
set_up_debug_hooks_unlocked(void)
876
0
{
877
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
878
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
879
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
880
0
    _PyRuntime.allocators.is_debug_enabled = 1;
881
0
}
882
883
void
884
PyMem_SetupDebugHooks(void)
885
0
{
886
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
887
0
    set_up_debug_hooks_unlocked();
888
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
889
0
}
890
891
static void
892
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
893
16
{
894
16
    switch(domain)
895
16
    {
896
16
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
897
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
898
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
899
0
    default:
900
        /* unknown domain: set all attributes to NULL */
901
0
        allocator->ctx = NULL;
902
0
        allocator->malloc = NULL;
903
0
        allocator->calloc = NULL;
904
0
        allocator->realloc = NULL;
905
0
        allocator->free = NULL;
906
16
    }
907
16
}
908
909
static void
910
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
911
0
{
912
0
    switch(domain)
913
0
    {
914
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
915
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
916
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
917
    /* ignore unknown domain */
918
0
    }
919
0
}
920
921
void
922
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
923
16
{
924
16
    PyMutex_Lock(&ALLOCATORS_MUTEX);
925
16
    get_allocator_unlocked(domain, allocator);
926
16
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
927
16
}
928
929
void
930
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
931
0
{
932
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
933
0
    set_allocator_unlocked(domain, allocator);
934
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
935
0
}
936
937
void
938
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
939
0
{
940
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
941
0
    *allocator = _PyObject_Arena;
942
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
943
0
}
944
945
void
946
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
947
0
{
948
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
949
0
    _PyObject_Arena = *allocator;
950
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
951
0
}
952
953
954
/* Note that there is a possible, but very unlikely, race in any place
955
 * below where we call one of the allocator functions.  We access two
956
 * fields in each case:  "malloc", etc. and "ctx".
957
 *
958
 * It is unlikely that the allocator will be changed while one of those
959
 * calls is happening, much less in that very narrow window.
960
 * Furthermore, the likelihood of a race is drastically reduced by the
961
 * fact that the allocator may not be changed after runtime init
962
 * (except with a wrapper).
963
 *
964
 * With the above in mind, we currently don't worry about locking
965
 * around these uses of the runtime-global allocators state. */
966
967
968
/*************************/
969
/* the "arena" allocator */
970
/*************************/
971
972
void *
973
_PyObject_VirtualAlloc(size_t size)
974
158k
{
975
158k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
976
158k
}
977
978
void
979
_PyObject_VirtualFree(void *obj, size_t size)
980
158k
{
981
158k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
982
158k
}
983
984
985
/***********************/
986
/* the "raw" allocator */
987
/***********************/
988
989
void *
990
PyMem_RawMalloc(size_t size)
991
241M
{
992
    /*
993
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
994
     * Most python internals blindly use a signed Py_ssize_t to track
995
     * things without checking for overflows or negatives.
996
     * As size_t is unsigned, checking for size < 0 is not required.
997
     */
998
241M
    if (size > (size_t)PY_SSIZE_T_MAX)
999
0
        return NULL;
1000
241M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1001
241M
}
1002
1003
void *
1004
PyMem_RawCalloc(size_t nelem, size_t elsize)
1005
80.7k
{
1006
    /* see PyMem_RawMalloc() */
1007
80.7k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1008
0
        return NULL;
1009
80.7k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1010
80.7k
}
1011
1012
void*
1013
PyMem_RawRealloc(void *ptr, size_t new_size)
1014
5.85M
{
1015
    /* see PyMem_RawMalloc() */
1016
5.85M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1017
0
        return NULL;
1018
5.85M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1019
5.85M
}
1020
1021
void PyMem_RawFree(void *ptr)
1022
241M
{
1023
241M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1024
241M
}
1025
1026
1027
/***********************/
1028
/* the "mem" allocator */
1029
/***********************/
1030
1031
void *
1032
PyMem_Malloc(size_t size)
1033
389M
{
1034
    /* see PyMem_RawMalloc() */
1035
389M
    if (size > (size_t)PY_SSIZE_T_MAX)
1036
0
        return NULL;
1037
389M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1038
389M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1039
389M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1040
389M
    OBJECT_STAT_INC(allocations);
1041
389M
    return _PyMem.malloc(_PyMem.ctx, size);
1042
389M
}
1043
1044
void *
1045
PyMem_Calloc(size_t nelem, size_t elsize)
1046
48.3M
{
1047
    /* see PyMem_RawMalloc() */
1048
48.3M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1049
0
        return NULL;
1050
48.3M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1051
48.3M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1052
48.3M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1053
48.3M
    OBJECT_STAT_INC(allocations);
1054
48.3M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1055
48.3M
}
1056
1057
void *
1058
PyMem_Realloc(void *ptr, size_t new_size)
1059
271M
{
1060
    /* see PyMem_RawMalloc() */
1061
271M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1062
0
        return NULL;
1063
271M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1064
271M
}
1065
1066
void
1067
PyMem_Free(void *ptr)
1068
673M
{
1069
673M
    OBJECT_STAT_INC(frees);
1070
673M
    _PyMem.free(_PyMem.ctx, ptr);
1071
673M
}
1072
1073
1074
/***************************/
1075
/* pymem utility functions */
1076
/***************************/
1077
1078
wchar_t*
1079
_PyMem_RawWcsdup(const wchar_t *str)
1080
624
{
1081
624
    assert(str != NULL);
1082
1083
624
    size_t len = wcslen(str);
1084
624
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1085
0
        return NULL;
1086
0
    }
1087
1088
624
    size_t size = (len + 1) * sizeof(wchar_t);
1089
624
    wchar_t *str2 = PyMem_RawMalloc(size);
1090
624
    if (str2 == NULL) {
1091
0
        return NULL;
1092
0
    }
1093
1094
624
    memcpy(str2, str, size);
1095
624
    return str2;
1096
624
}
1097
1098
char *
1099
_PyMem_RawStrdup(const char *str)
1100
48
{
1101
48
    assert(str != NULL);
1102
48
    size_t size = strlen(str) + 1;
1103
48
    char *copy = PyMem_RawMalloc(size);
1104
48
    if (copy == NULL) {
1105
0
        return NULL;
1106
0
    }
1107
48
    memcpy(copy, str, size);
1108
48
    return copy;
1109
48
}
1110
1111
char *
1112
_PyMem_Strdup(const char *str)
1113
0
{
1114
0
    assert(str != NULL);
1115
0
    size_t size = strlen(str) + 1;
1116
0
    char *copy = PyMem_Malloc(size);
1117
0
    if (copy == NULL) {
1118
0
        return NULL;
1119
0
    }
1120
0
    memcpy(copy, str, size);
1121
0
    return copy;
1122
0
}
1123
1124
/***********************************************/
1125
/* Delayed freeing support for Py_GIL_DISABLED */
1126
/***********************************************/
1127
1128
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1129
#define WORK_ITEMS_PER_CHUNK 254
1130
1131
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1132
struct _mem_work_item {
1133
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1134
    uint64_t qsbr_goal;
1135
};
1136
1137
// A fixed-size buffer of pointers to be freed
1138
struct _mem_work_chunk {
1139
    // Linked list node of chunks in queue
1140
    struct llist_node node;
1141
1142
    Py_ssize_t rd_idx;  // index of next item to read
1143
    Py_ssize_t wr_idx;  // index of next item to write
1144
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1145
};
1146
1147
static int
1148
work_item_should_decref(uintptr_t ptr)
1149
0
{
1150
0
    return ptr & 0x01;
1151
0
}
1152
1153
static void
1154
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1155
0
{
1156
0
    if (work_item_should_decref(ptr)) {
1157
0
        PyObject *obj = (PyObject *)(ptr - 1);
1158
#ifdef Py_GIL_DISABLED
1159
        if (cb == NULL) {
1160
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1161
            Py_DECREF(obj);
1162
            return;
1163
        }
1164
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1165
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1166
        if (refcount == 0) {
1167
            cb(obj, state);
1168
        }
1169
#else
1170
0
        Py_DECREF(obj);
1171
0
#endif
1172
0
    }
1173
0
    else {
1174
0
        PyMem_Free((void *)ptr);
1175
0
    }
1176
0
}
1177
1178
1179
#ifdef Py_GIL_DISABLED
1180
1181
// For deferred advance on free: the number of deferred items before advancing
1182
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1183
// want to process a chunk before it overflows.
1184
#define QSBR_DEFERRED_LIMIT 127
1185
1186
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1187
// helps limit memory usage due to QSBR delaying frees too long.
1188
#define QSBR_FREE_MEM_LIMIT 1024*1024
1189
1190
// Return true if the global write sequence should be advanced for a deferred
1191
// memory free.
1192
static bool
1193
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1194
{
1195
    if (size > QSBR_FREE_MEM_LIMIT) {
1196
        qsbr->deferred_count = 0;
1197
        qsbr->deferred_memory = 0;
1198
        qsbr->should_process = true;
1199
        return true;
1200
    }
1201
    qsbr->deferred_count++;
1202
    qsbr->deferred_memory += size;
1203
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1204
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1205
        qsbr->deferred_count = 0;
1206
        qsbr->deferred_memory = 0;
1207
        qsbr->should_process = true;
1208
        return true;
1209
    }
1210
    return false;
1211
}
1212
#endif
1213
1214
static void
1215
free_delayed(uintptr_t ptr, size_t size)
1216
0
{
1217
0
#ifndef Py_GIL_DISABLED
1218
0
    free_work_item(ptr, NULL, NULL);
1219
#else
1220
    PyInterpreterState *interp = _PyInterpreterState_GET();
1221
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1222
        interp->stoptheworld.world_stopped)
1223
    {
1224
        // Free immediately during interpreter shutdown or if the world is
1225
        // stopped.
1226
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1227
        free_work_item(ptr, NULL, NULL);
1228
        return;
1229
    }
1230
1231
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1232
    struct llist_node *head = &tstate->mem_free_queue;
1233
1234
    struct _mem_work_chunk *buf = NULL;
1235
    if (!llist_empty(head)) {
1236
        // Try to re-use the last buffer
1237
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1238
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1239
            // already full
1240
            buf = NULL;
1241
        }
1242
    }
1243
1244
    if (buf == NULL) {
1245
        buf = PyMem_Calloc(1, sizeof(*buf));
1246
        if (buf != NULL) {
1247
            llist_insert_tail(head, &buf->node);
1248
        }
1249
    }
1250
1251
    if (buf == NULL) {
1252
        // failed to allocate a buffer, free immediately
1253
        PyObject *to_dealloc = NULL;
1254
        _PyEval_StopTheWorld(tstate->base.interp);
1255
        if (work_item_should_decref(ptr)) {
1256
            PyObject *obj = (PyObject *)(ptr - 1);
1257
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1258
            if (refcount == 0) {
1259
                to_dealloc = obj;
1260
            }
1261
        }
1262
        else {
1263
            PyMem_Free((void *)ptr);
1264
        }
1265
        _PyEval_StartTheWorld(tstate->base.interp);
1266
        if (to_dealloc != NULL) {
1267
            _Py_Dealloc(to_dealloc);
1268
        }
1269
        return;
1270
    }
1271
1272
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1273
    uint64_t seq;
1274
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1275
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1276
    }
1277
    else {
1278
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1279
    }
1280
    buf->array[buf->wr_idx].ptr = ptr;
1281
    buf->array[buf->wr_idx].qsbr_goal = seq;
1282
    buf->wr_idx++;
1283
1284
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1285
        // Normally the processing of delayed items is done from the eval
1286
        // breaker.  Processing here is a safety measure to ensure too much
1287
        // work does not accumulate.
1288
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1289
    }
1290
#endif
1291
0
}
1292
1293
void
1294
_PyMem_FreeDelayed(void *ptr, size_t size)
1295
0
{
1296
0
    assert(!((uintptr_t)ptr & 0x01));
1297
0
    if (ptr != NULL) {
1298
0
        free_delayed((uintptr_t)ptr, size);
1299
0
    }
1300
0
}
1301
1302
#ifdef Py_GIL_DISABLED
1303
void
1304
_PyObject_XDecRefDelayed(PyObject *ptr)
1305
{
1306
    assert(!((uintptr_t)ptr & 0x01));
1307
    if (ptr != NULL) {
1308
        // We use 0 as the size since we don't have an easy way to know the
1309
        // actual size.  If we are freeing many objects, the write sequence
1310
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1311
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1312
    }
1313
}
1314
#endif
1315
1316
#ifdef Py_GIL_DISABLED
1317
void
1318
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1319
{
1320
    PyObject *old = *ptr;
1321
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1322
    if (old == NULL) {
1323
        return;
1324
    }
1325
    if (!_Py_IsImmortal(old)) {
1326
         _PyObject_XDecRefDelayed(old);
1327
    }
1328
}
1329
#endif
1330
1331
static struct _mem_work_chunk *
1332
work_queue_first(struct llist_node *head)
1333
0
{
1334
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1335
0
}
1336
1337
static void
1338
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1339
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1340
0
{
1341
0
    while (!llist_empty(head)) {
1342
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1343
1344
0
        if (buf->rd_idx < buf->wr_idx) {
1345
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1346
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1347
0
                return;
1348
0
            }
1349
1350
0
            buf->rd_idx++;
1351
            // NB: free_work_item may re-enter or execute arbitrary code
1352
0
            free_work_item(item->ptr, cb, state);
1353
0
            continue;
1354
0
        }
1355
1356
0
        assert(buf->rd_idx == buf->wr_idx);
1357
0
        if (keep_empty && buf->node.next == head) {
1358
            // Keep the last buffer in the queue to reduce re-allocations
1359
0
            buf->rd_idx = buf->wr_idx = 0;
1360
0
            return;
1361
0
        }
1362
1363
0
        llist_remove(&buf->node);
1364
0
        PyMem_Free(buf);
1365
0
    }
1366
0
}
1367
1368
static void
1369
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1370
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1371
                     void *state)
1372
0
{
1373
0
    assert(PyMutex_IsLocked(&queue->mutex));
1374
0
    process_queue(&queue->head, tstate, false, cb, state);
1375
1376
0
    int more_work = !llist_empty(&queue->head);
1377
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1378
0
}
1379
1380
static void
1381
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1382
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1383
                           void *state)
1384
0
{
1385
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1386
0
        return;
1387
0
    }
1388
1389
    // Try to acquire the lock, but don't block if it's already held.
1390
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1391
0
        process_interp_queue(queue, tstate, cb, state);
1392
0
        PyMutex_Unlock(&queue->mutex);
1393
0
    }
1394
0
}
1395
1396
void
1397
_PyMem_ProcessDelayed(PyThreadState *tstate)
1398
0
{
1399
0
    PyInterpreterState *interp = tstate->interp;
1400
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1401
1402
0
    tstate_impl->qsbr->should_process = false;
1403
1404
    // Process thread-local work
1405
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1406
1407
    // Process shared interpreter work
1408
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1409
0
}
1410
1411
void
1412
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1413
0
{
1414
0
    PyInterpreterState *interp = tstate->interp;
1415
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1416
1417
    // Process thread-local work
1418
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1419
1420
    // Process shared interpreter work
1421
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1422
0
}
1423
1424
void
1425
_PyMem_AbandonDelayed(PyThreadState *tstate)
1426
0
{
1427
0
    PyInterpreterState *interp = tstate->interp;
1428
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1429
1430
0
    if (llist_empty(queue)) {
1431
0
        return;
1432
0
    }
1433
1434
    // Check if the queue contains one empty buffer
1435
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1436
0
    if (buf->rd_idx == buf->wr_idx) {
1437
0
        llist_remove(&buf->node);
1438
0
        PyMem_Free(buf);
1439
0
        assert(llist_empty(queue));
1440
0
        return;
1441
0
    }
1442
1443
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1444
1445
    // Merge the thread's work queue into the interpreter's work queue.
1446
0
    llist_concat(&interp->mem_free_queue.head, queue);
1447
1448
    // Process the merged queue now (see gh-130794).
1449
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1450
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1451
1452
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1453
1454
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1455
0
}
1456
1457
void
1458
_PyMem_FiniDelayed(PyInterpreterState *interp)
1459
0
{
1460
0
    struct llist_node *head = &interp->mem_free_queue.head;
1461
0
    while (!llist_empty(head)) {
1462
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1463
1464
0
        if (buf->rd_idx < buf->wr_idx) {
1465
            // Free the remaining items immediately. There should be no other
1466
            // threads accessing the memory at this point during shutdown.
1467
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1468
0
            buf->rd_idx++;
1469
            // NB: free_work_item may re-enter or execute arbitrary code
1470
0
            free_work_item(item->ptr, NULL, NULL);
1471
0
            continue;
1472
0
        }
1473
1474
0
        llist_remove(&buf->node);
1475
0
        PyMem_Free(buf);
1476
0
    }
1477
0
}
1478
1479
/**************************/
1480
/* the "object" allocator */
1481
/**************************/
1482
1483
void *
1484
PyObject_Malloc(size_t size)
1485
1.11G
{
1486
    /* see PyMem_RawMalloc() */
1487
1.11G
    if (size > (size_t)PY_SSIZE_T_MAX)
1488
0
        return NULL;
1489
1.11G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1490
1.11G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1491
1.11G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1492
1.11G
    OBJECT_STAT_INC(allocations);
1493
1.11G
    return _PyObject.malloc(_PyObject.ctx, size);
1494
1.11G
}
1495
1496
void *
1497
PyObject_Calloc(size_t nelem, size_t elsize)
1498
0
{
1499
    /* see PyMem_RawMalloc() */
1500
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1501
0
        return NULL;
1502
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1503
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1504
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1505
0
    OBJECT_STAT_INC(allocations);
1506
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1507
0
}
1508
1509
void *
1510
PyObject_Realloc(void *ptr, size_t new_size)
1511
64.3M
{
1512
    /* see PyMem_RawMalloc() */
1513
64.3M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1514
0
        return NULL;
1515
64.3M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1516
64.3M
}
1517
1518
void
1519
PyObject_Free(void *ptr)
1520
1.11G
{
1521
1.11G
    OBJECT_STAT_INC(frees);
1522
1.11G
    _PyObject.free(_PyObject.ctx, ptr);
1523
1.11G
}
1524
1525
1526
/* If we're using GCC, use __builtin_expect() to reduce overhead of
1527
   the valgrind checks */
1528
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
1529
11.0G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1530
5.00G
#  define LIKELY(value) __builtin_expect((value), 1)
1531
#else
1532
#  define UNLIKELY(value) (value)
1533
#  define LIKELY(value) (value)
1534
#endif
1535
1536
#ifdef WITH_PYMALLOC
1537
1538
#ifdef WITH_VALGRIND
1539
#include <valgrind/valgrind.h>
1540
1541
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1542
static int running_on_valgrind = -1;
1543
#endif
1544
1545
typedef struct _obmalloc_state OMState;
1546
1547
/* obmalloc state for main interpreter and shared by all interpreters without
1548
 * their own obmalloc state.  By not explicitly initializing this structure, it
1549
 * will be allocated in the BSS which is a small performance win.  The radix
1550
 * tree arrays are fairly large but are sparsely used.  */
1551
static struct _obmalloc_state obmalloc_state_main;
1552
static bool obmalloc_state_initialized;
1553
1554
static inline int
1555
has_own_state(PyInterpreterState *interp)
1556
0
{
1557
0
    return (_Py_IsMainInterpreter(interp) ||
1558
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1559
0
            _Py_IsMainInterpreterFinalizing(interp));
1560
0
}
1561
1562
static inline OMState *
1563
get_state(void)
1564
3.82G
{
1565
3.82G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1566
3.82G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1567
3.82G
    return interp->obmalloc;
1568
3.82G
}
1569
1570
// These macros all rely on a local "state" variable.
1571
1.73G
#define usedpools (state->pools.used)
1572
3.31M
#define allarenas (state->mgmt.arenas)
1573
152
#define maxarenas (state->mgmt.maxarenas)
1574
50.9k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1575
26.9M
#define usable_arenas (state->mgmt.usable_arenas)
1576
18.3M
#define nfp2lasta (state->mgmt.nfp2lasta)
1577
30.9k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1578
10.2k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1579
10.5k
#define narenas_highwater (state->mgmt.narenas_highwater)
1580
482M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1581
1582
#ifdef WITH_MIMALLOC
1583
static bool count_blocks(
1584
    const mi_heap_t* heap, const mi_heap_area_t* area,
1585
    void* block, size_t block_size, void* allocated_blocks)
1586
0
{
1587
0
    *(size_t *)allocated_blocks += area->used;
1588
0
    return 1;
1589
0
}
1590
1591
static Py_ssize_t
1592
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1593
0
{
1594
0
    size_t allocated_blocks = 0;
1595
#ifdef Py_GIL_DISABLED
1596
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1597
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1598
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1599
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1600
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1601
        }
1602
    }
1603
1604
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1605
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1606
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1607
                                        &allocated_blocks);
1608
    }
1609
#else
1610
    // TODO(sgross): this only counts the current thread's blocks.
1611
0
    mi_heap_t *heap = mi_heap_get_default();
1612
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1613
0
#endif
1614
0
    return allocated_blocks;
1615
0
}
1616
#endif
1617
1618
Py_ssize_t
1619
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1620
0
{
1621
0
#ifdef WITH_MIMALLOC
1622
0
    if (_PyMem_MimallocEnabled()) {
1623
0
        return get_mimalloc_allocated_blocks(interp);
1624
0
    }
1625
0
#endif
1626
1627
#ifdef Py_DEBUG
1628
    assert(has_own_state(interp));
1629
#else
1630
0
    if (!has_own_state(interp)) {
1631
0
        _Py_FatalErrorFunc(__func__,
1632
0
                           "the interpreter doesn't have its own allocator");
1633
0
    }
1634
0
#endif
1635
0
    OMState *state = interp->obmalloc;
1636
1637
0
    if (state == NULL) {
1638
0
        return 0;
1639
0
    }
1640
1641
0
    Py_ssize_t n = raw_allocated_blocks;
1642
    /* add up allocated blocks for used pools */
1643
0
    for (uint i = 0; i < maxarenas; ++i) {
1644
        /* Skip arenas which are not allocated. */
1645
0
        if (allarenas[i].address == 0) {
1646
0
            continue;
1647
0
        }
1648
1649
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1650
1651
        /* visit every pool in the arena */
1652
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1653
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1654
0
            poolp p = (poolp)base;
1655
0
            n += p->ref.count;
1656
0
        }
1657
0
    }
1658
0
    return n;
1659
0
}
1660
1661
static void free_obmalloc_arenas(PyInterpreterState *interp);
1662
1663
void
1664
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1665
0
{
1666
0
#ifdef WITH_MIMALLOC
1667
0
    if (_PyMem_MimallocEnabled()) {
1668
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1669
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1670
0
        return;
1671
0
    }
1672
0
#endif
1673
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1674
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1675
0
        assert(has_own_state(interp) || leaked == 0);
1676
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1677
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1678
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1679
            // then some of the memory allocated by obmalloc has not been
1680
            // freed.  It might be safe to free the arenas in that case but
1681
            // it's possible that extension modules are still using that
1682
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1683
            // should be warning when this happens.  It should be possible to
1684
            // use a tool like "-fsanitize=address" to track down these leaks.
1685
0
            free_obmalloc_arenas(interp);
1686
0
        }
1687
0
    }
1688
0
}
1689
1690
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1691
1692
/* We preserve the number of blocks leaked during runtime finalization,
1693
   so they can be reported if the runtime is initialized again. */
1694
// XXX We don't lose any information by dropping this,
1695
// so we should consider doing so.
1696
static Py_ssize_t last_final_leaks = 0;
1697
1698
void
1699
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1700
0
{
1701
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1702
0
    runtime->obmalloc.interpreter_leaks = 0;
1703
0
}
1704
1705
static Py_ssize_t
1706
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1707
0
{
1708
0
    Py_ssize_t total = 0;
1709
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1710
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1711
0
        if (interp == NULL) {
1712
            /* We are at the very end of runtime finalization.
1713
               We can't rely on finalizing->interp since that thread
1714
               state is probably already freed, so we don't worry
1715
               about it. */
1716
0
            assert(PyInterpreterState_Head() == NULL);
1717
0
        }
1718
0
        else {
1719
0
            assert(interp != NULL);
1720
            /* It is probably the last interpreter but not necessarily. */
1721
0
            assert(PyInterpreterState_Next(interp) == NULL);
1722
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1723
0
        }
1724
0
    }
1725
0
    else {
1726
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1727
0
        HEAD_LOCK(runtime);
1728
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1729
0
        assert(interp != NULL);
1730
#ifdef Py_DEBUG
1731
        int got_main = 0;
1732
#endif
1733
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1734
#ifdef Py_DEBUG
1735
            if (_Py_IsMainInterpreter(interp)) {
1736
                assert(!got_main);
1737
                got_main = 1;
1738
                assert(has_own_state(interp));
1739
            }
1740
#endif
1741
0
            if (has_own_state(interp)) {
1742
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1743
0
            }
1744
0
        }
1745
0
        HEAD_UNLOCK(runtime);
1746
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1747
#ifdef Py_DEBUG
1748
        assert(got_main);
1749
#endif
1750
0
    }
1751
0
    total += runtime->obmalloc.interpreter_leaks;
1752
0
    total += last_final_leaks;
1753
0
    return total;
1754
0
}
1755
1756
Py_ssize_t
1757
_Py_GetGlobalAllocatedBlocks(void)
1758
0
{
1759
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1760
0
}
1761
1762
#if WITH_PYMALLOC_RADIX_TREE
1763
/*==========================================================================*/
1764
/* radix tree for tracking arena usage. */
1765
1766
5.64G
#define arena_map_root (state->usage.arena_map_root)
1767
#ifdef USE_INTERIOR_NODES
1768
16
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1769
16
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1770
#endif
1771
1772
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1773
 * it cannot be created */
1774
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1775
arena_map_get(OMState *state, pymem_block *p, int create)
1776
1.96G
{
1777
1.96G
#ifdef USE_INTERIOR_NODES
1778
    /* sanity check that IGNORE_BITS is correct */
1779
1.96G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1780
1.96G
    int i1 = MAP_TOP_INDEX(p);
1781
1.96G
    if (arena_map_root.ptrs[i1] == NULL) {
1782
16
        if (!create) {
1783
0
            return NULL;
1784
0
        }
1785
16
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1786
16
        if (n == NULL) {
1787
0
            return NULL;
1788
0
        }
1789
16
        arena_map_root.ptrs[i1] = n;
1790
16
        arena_map_mid_count++;
1791
16
    }
1792
1.96G
    int i2 = MAP_MID_INDEX(p);
1793
1.96G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1794
247M
        if (!create) {
1795
247M
            return NULL;
1796
247M
        }
1797
16
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1798
16
        if (n == NULL) {
1799
0
            return NULL;
1800
0
        }
1801
16
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1802
16
        arena_map_bot_count++;
1803
16
    }
1804
1.71G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1805
#else
1806
    return &arena_map_root;
1807
#endif
1808
1.96G
}
1809
1810
1811
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1812
 * away 24 bits of the address.  That reduces the space requirement of
1813
 * the tree compared to similar radix tree page-map schemes.  In
1814
 * exchange for slashing the space requirement, it needs more
1815
 * computation to check an address.
1816
 *
1817
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1818
 * explain in decimal so let's say that the arena size is 100 bytes.
1819
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1820
 * pointer address is inside an actual arena, we have to check two ideal
1821
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1822
 * 300.  In the rare case that an arena is aligned in the ideal way
1823
 * (e.g. base address of arena is 200) then we only have to check one
1824
 * ideal address.
1825
 *
1826
 * The tree nodes for 200 and 300 both store the address of arena.
1827
 * There are two cases: the arena starts at a lower ideal arena and
1828
 * extends to this one, or the arena starts in this arena and extends to
1829
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1830
 * these two cases.
1831
 */
1832
1833
1834
/* mark or unmark addresses covered by arena */
1835
static int
1836
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1837
20.3k
{
1838
    /* sanity check that IGNORE_BITS is correct */
1839
20.3k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1840
20.3k
    arena_map_bot_t *n_hi = arena_map_get(
1841
20.3k
            state, (pymem_block *)arena_base, is_used);
1842
20.3k
    if (n_hi == NULL) {
1843
0
        assert(is_used); /* otherwise node should already exist */
1844
0
        return 0; /* failed to allocate space for node */
1845
0
    }
1846
20.3k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1847
20.3k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1848
20.3k
    if (tail == 0) {
1849
        /* is ideal arena address */
1850
33
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1851
33
    }
1852
20.3k
    else {
1853
        /* arena_base address is not ideal (aligned to arena size) and
1854
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1855
         * for the next arena.  Note that it might be in different MAP_TOP
1856
         * and MAP_MID nodes as well so we need to call arena_map_get()
1857
         * again (do the full tree traversal).
1858
         */
1859
20.3k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1860
20.3k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1861
        /* If arena_base is a legit arena address, so is arena_base_next - 1
1862
         * (last address in arena).  If arena_base_next overflows then it
1863
         * must overflow to 0.  However, that would mean arena_base was
1864
         * "ideal" and we should not be in this case. */
1865
20.3k
        assert(arena_base < arena_base_next);
1866
20.3k
        arena_map_bot_t *n_lo = arena_map_get(
1867
20.3k
                state, (pymem_block *)arena_base_next, is_used);
1868
20.3k
        if (n_lo == NULL) {
1869
0
            assert(is_used); /* otherwise should already exist */
1870
0
            n_hi->arenas[i3].tail_hi = 0;
1871
0
            return 0; /* failed to allocate space for node */
1872
0
        }
1873
20.3k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
1874
20.3k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1875
20.3k
    }
1876
20.3k
    return 1;
1877
20.3k
}
1878
1879
/* Return true if 'p' is a pointer inside an obmalloc arena.
1880
 * _PyObject_Free() calls this so it needs to be very fast. */
1881
static int
1882
arena_map_is_used(OMState *state, pymem_block *p)
1883
1.96G
{
1884
1.96G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
1885
1.96G
    if (n == NULL) {
1886
247M
        return 0;
1887
247M
    }
1888
1.71G
    int i3 = MAP_BOT_INDEX(p);
1889
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1890
1.71G
    int32_t hi = n->arenas[i3].tail_hi;
1891
1.71G
    int32_t lo = n->arenas[i3].tail_lo;
1892
1.71G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1893
1.71G
    return (tail < lo) || (tail >= hi && hi != 0);
1894
1.96G
}
1895
1896
/* end of radix tree logic */
1897
/*==========================================================================*/
1898
#endif /* WITH_PYMALLOC_RADIX_TREE */
1899
1900
1901
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
1902
 * allocate a new arena, and return the address of an arena_object
1903
 * describing the new arena.  It's expected that the caller will set
1904
 * `usable_arenas` to the return value.
1905
 */
1906
static struct arena_object*
1907
new_arena(OMState *state)
1908
10.2k
{
1909
10.2k
    struct arena_object* arenaobj;
1910
10.2k
    uint excess;        /* number of bytes above pool alignment */
1911
10.2k
    void *address;
1912
1913
10.2k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1914
10.2k
    if (debug_stats == -1) {
1915
16
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1916
16
        debug_stats = (opt != NULL && *opt != '\0');
1917
16
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1918
16
    }
1919
10.2k
    if (debug_stats) {
1920
0
        _PyObject_DebugMallocStats(stderr);
1921
0
    }
1922
1923
10.2k
    if (unused_arena_objects == NULL) {
1924
28
        uint i;
1925
28
        uint numarenas;
1926
28
        size_t nbytes;
1927
1928
        /* Double the number of arena objects on each allocation.
1929
         * Note that it's possible for `numarenas` to overflow.
1930
         */
1931
28
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1932
28
        if (numarenas <= maxarenas)
1933
0
            return NULL;                /* overflow */
1934
#if SIZEOF_SIZE_T <= SIZEOF_INT
1935
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
1936
            return NULL;                /* overflow */
1937
#endif
1938
28
        nbytes = numarenas * sizeof(*allarenas);
1939
28
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1940
28
        if (arenaobj == NULL)
1941
0
            return NULL;
1942
28
        allarenas = arenaobj;
1943
1944
        /* We might need to fix pointers that were copied.  However,
1945
         * new_arena only gets called when all the pages in the
1946
         * previous arenas are full.  Thus, there are *no* pointers
1947
         * into the old array. Thus, we don't have to worry about
1948
         * invalid pointers.  Just to be sure, some asserts:
1949
         */
1950
28
        assert(usable_arenas == NULL);
1951
28
        assert(unused_arena_objects == NULL);
1952
1953
        /* Put the new arenas on the unused_arena_objects list. */
1954
636
        for (i = maxarenas; i < numarenas; ++i) {
1955
608
            allarenas[i].address = 0;              /* mark as unassociated */
1956
608
            allarenas[i].nextarena = i < numarenas - 1 ?
1957
580
                                        &allarenas[i+1] : NULL;
1958
608
        }
1959
1960
        /* Update globals. */
1961
28
        unused_arena_objects = &allarenas[maxarenas];
1962
28
        maxarenas = numarenas;
1963
28
    }
1964
1965
    /* Take the next available arena object off the head of the list. */
1966
10.2k
    assert(unused_arena_objects != NULL);
1967
10.2k
    arenaobj = unused_arena_objects;
1968
10.2k
    unused_arena_objects = arenaobj->nextarena;
1969
10.2k
    assert(arenaobj->address == 0);
1970
10.2k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1971
10.2k
#if WITH_PYMALLOC_RADIX_TREE
1972
10.2k
    if (address != NULL) {
1973
10.2k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
1974
            /* marking arena in radix tree failed, abort */
1975
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1976
0
            address = NULL;
1977
0
        }
1978
10.2k
    }
1979
10.2k
#endif
1980
10.2k
    if (address == NULL) {
1981
        /* The allocation failed: return NULL after putting the
1982
         * arenaobj back.
1983
         */
1984
0
        arenaobj->nextarena = unused_arena_objects;
1985
0
        unused_arena_objects = arenaobj;
1986
0
        return NULL;
1987
0
    }
1988
10.2k
    arenaobj->address = (uintptr_t)address;
1989
1990
10.2k
    ++narenas_currently_allocated;
1991
10.2k
    ++ntimes_arena_allocated;
1992
10.2k
    if (narenas_currently_allocated > narenas_highwater)
1993
346
        narenas_highwater = narenas_currently_allocated;
1994
10.2k
    arenaobj->freepools = NULL;
1995
    /* pool_address <- first pool-aligned address in the arena
1996
       nfreepools <- number of whole pools that fit after alignment */
1997
10.2k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
1998
10.2k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1999
10.2k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2000
10.2k
    if (excess != 0) {
2001
8.08k
        --arenaobj->nfreepools;
2002
8.08k
        arenaobj->pool_address += POOL_SIZE - excess;
2003
8.08k
    }
2004
10.2k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2005
2006
10.2k
    return arenaobj;
2007
10.2k
}
2008
2009
2010
2011
#if WITH_PYMALLOC_RADIX_TREE
2012
/* Return true if and only if P is an address that was allocated by
2013
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2014
 */
2015
static bool
2016
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2017
1.96G
{
2018
1.96G
    return arena_map_is_used(state, p);
2019
1.96G
}
2020
#else
2021
/*
2022
address_in_range(P, POOL)
2023
2024
Return true if and only if P is an address that was allocated by pymalloc.
2025
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2026
(the caller is asked to compute this because the macro expands POOL more than
2027
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2028
variable and pass the latter to the macro; because address_in_range is
2029
called on every alloc/realloc/free, micro-efficiency is important here).
2030
2031
Tricky:  Let B be the arena base address associated with the pool, B =
2032
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2033
2034
    B <= P < B + ARENA_SIZE
2035
2036
Subtracting B throughout, this is true iff
2037
2038
    0 <= P-B < ARENA_SIZE
2039
2040
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2041
2042
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2043
before the first arena has been allocated.  `arenas` is still NULL in that
2044
case.  We're relying on that maxarenas is also 0 in that case, so that
2045
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2046
into a NULL arenas.
2047
2048
Details:  given P and POOL, the arena_object corresponding to P is AO =
2049
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2050
stores, etc), POOL is the correct address of P's pool, AO.address is the
2051
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2052
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2053
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2054
controls P.
2055
2056
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2057
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2058
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2059
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2060
control P.
2061
2062
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2063
allocated arena, obmalloc controls all the memory in slice AO.address :
2064
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2065
so P doesn't lie in that slice, so the macro correctly reports that P is not
2066
controlled by obmalloc.
2067
2068
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2069
arena_object (one not currently associated with an allocated arena),
2070
AO.address is 0, and the second test in the macro reduces to:
2071
2072
    P < ARENA_SIZE
2073
2074
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2075
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2076
of the test still passes, and the third clause (AO.address != 0) is necessary
2077
to get the correct result:  AO.address is 0 in this case, so the macro
2078
correctly reports that P is not controlled by obmalloc (despite that P lies in
2079
slice AO.address : AO.address + ARENA_SIZE).
2080
2081
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2082
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2083
corresponded to a currently-allocated arena, so the "P is not controlled by
2084
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2085
was impossible.
2086
2087
Note that the logic is excruciating, and reading up possibly uninitialized
2088
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2089
creates problems for some memory debuggers.  The overwhelming advantage is
2090
that this test determines whether an arbitrary address is controlled by
2091
obmalloc in a small constant time, independent of the number of arenas
2092
obmalloc controls.  Since this test is needed at every entry point, it's
2093
extremely desirable that it be this fast.
2094
*/
2095
2096
static bool _Py_NO_SANITIZE_ADDRESS
2097
            _Py_NO_SANITIZE_THREAD
2098
            _Py_NO_SANITIZE_MEMORY
2099
address_in_range(OMState *state, void *p, poolp pool)
2100
{
2101
    // Since address_in_range may be reading from memory which was not allocated
2102
    // by Python, it is important that pool->arenaindex is read only once, as
2103
    // another thread may be concurrently modifying the value without holding
2104
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2105
    // only once.
2106
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2107
    return arenaindex < maxarenas &&
2108
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2109
        allarenas[arenaindex].address != 0;
2110
}
2111
2112
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2113
2114
/*==========================================================================*/
2115
2116
// Called when freelist is exhausted.  Extend the freelist if there is
2117
// space for a block.  Otherwise, remove this pool from usedpools.
2118
static void
2119
pymalloc_pool_extend(poolp pool, uint size)
2120
379M
{
2121
379M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2122
        /* There is room for another block. */
2123
270M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2124
270M
        pool->nextoffset += INDEX2SIZE(size);
2125
270M
        *(pymem_block **)(pool->freeblock) = NULL;
2126
270M
        return;
2127
270M
    }
2128
2129
    /* Pool is full, unlink from used pools. */
2130
109M
    poolp next;
2131
109M
    next = pool->nextpool;
2132
109M
    pool = pool->prevpool;
2133
109M
    next->prevpool = pool;
2134
109M
    pool->nextpool = next;
2135
109M
}
2136
2137
/* called when pymalloc_alloc can not allocate a block from usedpool.
2138
 * This function takes new pool and allocate a block from it.
2139
 */
2140
static void*
2141
allocate_from_new_pool(OMState *state, uint size)
2142
2.68M
{
2143
    /* There isn't a pool of the right size class immediately
2144
     * available:  use a free pool.
2145
     */
2146
2.68M
    if (UNLIKELY(usable_arenas == NULL)) {
2147
        /* No arena has a free pool:  allocate a new arena. */
2148
#ifdef WITH_MEMORY_LIMITS
2149
        if (narenas_currently_allocated >= MAX_ARENAS) {
2150
            return NULL;
2151
        }
2152
#endif
2153
10.2k
        usable_arenas = new_arena(state);
2154
10.2k
        if (usable_arenas == NULL) {
2155
0
            return NULL;
2156
0
        }
2157
10.2k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2158
10.2k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2159
10.2k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2160
10.2k
    }
2161
2.68M
    assert(usable_arenas->address != 0);
2162
2163
    /* This arena already had the smallest nfreepools value, so decreasing
2164
     * nfreepools doesn't change that, and we don't need to rearrange the
2165
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2166
     * we need to remove its arena_object from usable_arenas.
2167
     */
2168
2.68M
    assert(usable_arenas->nfreepools > 0);
2169
2.68M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2170
        /* It's the last of this size, so there won't be any. */
2171
2.67M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2172
2.67M
    }
2173
    /* If any free pools will remain, it will be the new smallest. */
2174
2.68M
    if (usable_arenas->nfreepools > 1) {
2175
2.54M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2176
2.54M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2177
2.54M
    }
2178
2179
    /* Try to get a cached free pool. */
2180
2.68M
    poolp pool = usable_arenas->freepools;
2181
2.68M
    if (LIKELY(pool != NULL)) {
2182
        /* Unlink from cached pools. */
2183
2.04M
        usable_arenas->freepools = pool->nextpool;
2184
2.04M
        usable_arenas->nfreepools--;
2185
2.04M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2186
            /* Wholly allocated:  remove. */
2187
127k
            assert(usable_arenas->freepools == NULL);
2188
127k
            assert(usable_arenas->nextarena == NULL ||
2189
127k
                   usable_arenas->nextarena->prevarena ==
2190
127k
                   usable_arenas);
2191
127k
            usable_arenas = usable_arenas->nextarena;
2192
127k
            if (usable_arenas != NULL) {
2193
121k
                usable_arenas->prevarena = NULL;
2194
121k
                assert(usable_arenas->address != 0);
2195
121k
            }
2196
127k
        }
2197
1.91M
        else {
2198
            /* nfreepools > 0:  it must be that freepools
2199
             * isn't NULL, or that we haven't yet carved
2200
             * off all the arena's pools for the first
2201
             * time.
2202
             */
2203
1.91M
            assert(usable_arenas->freepools != NULL ||
2204
1.91M
                   usable_arenas->pool_address <=
2205
1.91M
                   (pymem_block*)usable_arenas->address +
2206
1.91M
                       ARENA_SIZE - POOL_SIZE);
2207
1.91M
        }
2208
2.04M
    }
2209
635k
    else {
2210
        /* Carve off a new pool. */
2211
635k
        assert(usable_arenas->nfreepools > 0);
2212
635k
        assert(usable_arenas->freepools == NULL);
2213
635k
        pool = (poolp)usable_arenas->pool_address;
2214
635k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2215
635k
                                 ARENA_SIZE - POOL_SIZE);
2216
635k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2217
635k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2218
635k
        pool->szidx = DUMMY_SIZE_IDX;
2219
635k
        usable_arenas->pool_address += POOL_SIZE;
2220
635k
        --usable_arenas->nfreepools;
2221
2222
635k
        if (usable_arenas->nfreepools == 0) {
2223
9.88k
            assert(usable_arenas->nextarena == NULL ||
2224
9.88k
                   usable_arenas->nextarena->prevarena ==
2225
9.88k
                   usable_arenas);
2226
            /* Unlink the arena:  it is completely allocated. */
2227
9.88k
            usable_arenas = usable_arenas->nextarena;
2228
9.88k
            if (usable_arenas != NULL) {
2229
445
                usable_arenas->prevarena = NULL;
2230
445
                assert(usable_arenas->address != 0);
2231
445
            }
2232
9.88k
        }
2233
635k
    }
2234
2235
    /* Frontlink to used pools. */
2236
2.68M
    pymem_block *bp;
2237
2.68M
    poolp next = usedpools[size + size]; /* == prev */
2238
2.68M
    pool->nextpool = next;
2239
2.68M
    pool->prevpool = next;
2240
2.68M
    next->nextpool = pool;
2241
2.68M
    next->prevpool = pool;
2242
2.68M
    pool->ref.count = 1;
2243
2.68M
    if (pool->szidx == size) {
2244
        /* Luckily, this pool last contained blocks
2245
         * of the same size class, so its header
2246
         * and free list are already initialized.
2247
         */
2248
1.48M
        bp = pool->freeblock;
2249
1.48M
        assert(bp != NULL);
2250
1.48M
        pool->freeblock = *(pymem_block **)bp;
2251
1.48M
        return bp;
2252
1.48M
    }
2253
    /*
2254
     * Initialize the pool header, set up the free list to
2255
     * contain just the second block, and return the first
2256
     * block.
2257
     */
2258
1.19M
    pool->szidx = size;
2259
1.19M
    size = INDEX2SIZE(size);
2260
1.19M
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2261
1.19M
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2262
1.19M
    pool->maxnextoffset = POOL_SIZE - size;
2263
1.19M
    pool->freeblock = bp + size;
2264
1.19M
    *(pymem_block **)(pool->freeblock) = NULL;
2265
1.19M
    return bp;
2266
2.68M
}
2267
2268
/* pymalloc allocator
2269
2270
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2271
2272
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2273
   requests, on error in the code below (as a last chance to serve the request)
2274
   or when the max memory limit has been reached.
2275
*/
2276
static inline void*
2277
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2278
1.86G
{
2279
#ifdef WITH_VALGRIND
2280
    if (UNLIKELY(running_on_valgrind == -1)) {
2281
        running_on_valgrind = RUNNING_ON_VALGRIND;
2282
    }
2283
    if (UNLIKELY(running_on_valgrind)) {
2284
        return NULL;
2285
    }
2286
#endif
2287
2288
1.86G
    if (UNLIKELY(nbytes == 0)) {
2289
30.5M
        return NULL;
2290
30.5M
    }
2291
1.83G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2292
210M
        return NULL;
2293
210M
    }
2294
2295
1.62G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2296
1.62G
    poolp pool = usedpools[size + size];
2297
1.62G
    pymem_block *bp;
2298
2299
1.62G
    if (LIKELY(pool != pool->nextpool)) {
2300
        /*
2301
         * There is a used pool for this size class.
2302
         * Pick up the head block of its free list.
2303
         */
2304
1.61G
        ++pool->ref.count;
2305
1.61G
        bp = pool->freeblock;
2306
1.61G
        assert(bp != NULL);
2307
2308
1.61G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2309
            // Reached the end of the free list, try to extend it.
2310
379M
            pymalloc_pool_extend(pool, size);
2311
379M
        }
2312
1.61G
    }
2313
2.68M
    else {
2314
        /* There isn't a pool of the right size class immediately
2315
         * available:  use a free pool.
2316
         */
2317
2.68M
        bp = allocate_from_new_pool(state, size);
2318
2.68M
    }
2319
2320
1.62G
    return (void *)bp;
2321
1.83G
}
2322
2323
2324
void *
2325
_PyObject_Malloc(void *ctx, size_t nbytes)
2326
1.81G
{
2327
1.81G
    OMState *state = get_state();
2328
1.81G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2329
1.81G
    if (LIKELY(ptr != NULL)) {
2330
1.57G
        return ptr;
2331
1.57G
    }
2332
2333
241M
    ptr = PyMem_RawMalloc(nbytes);
2334
241M
    if (ptr != NULL) {
2335
241M
        raw_allocated_blocks++;
2336
241M
    }
2337
241M
    return ptr;
2338
1.81G
}
2339
2340
2341
void *
2342
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2343
48.3M
{
2344
48.3M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2345
48.3M
    size_t nbytes = nelem * elsize;
2346
2347
48.3M
    OMState *state = get_state();
2348
48.3M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2349
48.3M
    if (LIKELY(ptr != NULL)) {
2350
48.2M
        memset(ptr, 0, nbytes);
2351
48.2M
        return ptr;
2352
48.2M
    }
2353
2354
79.9k
    ptr = PyMem_RawCalloc(nelem, elsize);
2355
79.9k
    if (ptr != NULL) {
2356
79.9k
        raw_allocated_blocks++;
2357
79.9k
    }
2358
79.9k
    return ptr;
2359
48.3M
}
2360
2361
2362
static void
2363
insert_to_usedpool(OMState *state, poolp pool)
2364
109M
{
2365
109M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2366
2367
109M
    uint size = pool->szidx;
2368
109M
    poolp next = usedpools[size + size];
2369
109M
    poolp prev = next->prevpool;
2370
2371
    /* insert pool before next:   prev <-> pool <-> next */
2372
109M
    pool->nextpool = next;
2373
109M
    pool->prevpool = prev;
2374
109M
    next->prevpool = pool;
2375
109M
    prev->nextpool = pool;
2376
109M
}
2377
2378
static void
2379
insert_to_freepool(OMState *state, poolp pool)
2380
2.67M
{
2381
2.67M
    poolp next = pool->nextpool;
2382
2.67M
    poolp prev = pool->prevpool;
2383
2.67M
    next->prevpool = prev;
2384
2.67M
    prev->nextpool = next;
2385
2386
    /* Link the pool to freepools.  This is a singly-linked
2387
     * list, and pool->prevpool isn't used there.
2388
     */
2389
2.67M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2390
2.67M
    pool->nextpool = ao->freepools;
2391
2.67M
    ao->freepools = pool;
2392
2.67M
    uint nf = ao->nfreepools;
2393
    /* If this is the rightmost arena with this number of free pools,
2394
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2395
     * are no arenas in usable_arenas with that value.
2396
     */
2397
2.67M
    struct arena_object* lastnf = nfp2lasta[nf];
2398
2.67M
    assert((nf == 0 && lastnf == NULL) ||
2399
2.67M
           (nf > 0 &&
2400
2.67M
            lastnf != NULL &&
2401
2.67M
            lastnf->nfreepools == nf &&
2402
2.67M
            (lastnf->nextarena == NULL ||
2403
2.67M
             nf < lastnf->nextarena->nfreepools)));
2404
2.67M
    if (lastnf == ao) {  /* it is the rightmost */
2405
2.50M
        struct arena_object* p = ao->prevarena;
2406
2.50M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2407
2.50M
    }
2408
2.67M
    ao->nfreepools = ++nf;
2409
2410
    /* All the rest is arena management.  We just freed
2411
     * a pool, and there are 4 cases for arena mgmt:
2412
     * 1. If all the pools are free, return the arena to
2413
     *    the system free().  Except if this is the last
2414
     *    arena in the list, keep it to avoid thrashing:
2415
     *    keeping one wholly free arena in the list avoids
2416
     *    pathological cases where a simple loop would
2417
     *    otherwise provoke needing to allocate and free an
2418
     *    arena on every iteration.  See bpo-37257.
2419
     * 2. If this is the only free pool in the arena,
2420
     *    add the arena back to the `usable_arenas` list.
2421
     * 3. If the "next" arena has a smaller count of free
2422
     *    pools, we have to "slide this arena right" to
2423
     *    restore that usable_arenas is sorted in order of
2424
     *    nfreepools.
2425
     * 4. Else there's nothing more to do.
2426
     */
2427
2.67M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2428
        /* Case 1.  First unlink ao from usable_arenas.
2429
         */
2430
10.1k
        assert(ao->prevarena == NULL ||
2431
10.1k
               ao->prevarena->address != 0);
2432
10.1k
        assert(ao ->nextarena == NULL ||
2433
10.1k
               ao->nextarena->address != 0);
2434
2435
        /* Fix the pointer in the prevarena, or the
2436
         * usable_arenas pointer.
2437
         */
2438
10.1k
        if (ao->prevarena == NULL) {
2439
1.52k
            usable_arenas = ao->nextarena;
2440
1.52k
            assert(usable_arenas == NULL ||
2441
1.52k
                   usable_arenas->address != 0);
2442
1.52k
        }
2443
8.59k
        else {
2444
8.59k
            assert(ao->prevarena->nextarena == ao);
2445
8.59k
            ao->prevarena->nextarena =
2446
8.59k
                ao->nextarena;
2447
8.59k
        }
2448
        /* Fix the pointer in the nextarena. */
2449
10.1k
        if (ao->nextarena != NULL) {
2450
10.1k
            assert(ao->nextarena->prevarena == ao);
2451
10.1k
            ao->nextarena->prevarena =
2452
10.1k
                ao->prevarena;
2453
10.1k
        }
2454
        /* Record that this arena_object slot is
2455
         * available to be reused.
2456
         */
2457
10.1k
        ao->nextarena = unused_arena_objects;
2458
10.1k
        unused_arena_objects = ao;
2459
2460
10.1k
#if WITH_PYMALLOC_RADIX_TREE
2461
        /* mark arena region as not under control of obmalloc */
2462
10.1k
        arena_map_mark_used(state, ao->address, 0);
2463
10.1k
#endif
2464
2465
        /* Free the entire arena. */
2466
10.1k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2467
10.1k
                             (void *)ao->address, ARENA_SIZE);
2468
10.1k
        ao->address = 0;                        /* mark unassociated */
2469
10.1k
        --narenas_currently_allocated;
2470
2471
10.1k
        return;
2472
10.1k
    }
2473
2474
2.66M
    if (nf == 1) {
2475
        /* Case 2.  Put ao at the head of
2476
         * usable_arenas.  Note that because
2477
         * ao->nfreepools was 0 before, ao isn't
2478
         * currently on the usable_arenas list.
2479
         */
2480
137k
        ao->nextarena = usable_arenas;
2481
137k
        ao->prevarena = NULL;
2482
137k
        if (usable_arenas)
2483
132k
            usable_arenas->prevarena = ao;
2484
137k
        usable_arenas = ao;
2485
137k
        assert(usable_arenas->address != 0);
2486
137k
        if (nfp2lasta[1] == NULL) {
2487
133k
            nfp2lasta[1] = ao;
2488
133k
        }
2489
2490
137k
        return;
2491
137k
    }
2492
2493
    /* If this arena is now out of order, we need to keep
2494
     * the list sorted.  The list is kept sorted so that
2495
     * the "most full" arenas are used first, which allows
2496
     * the nearly empty arenas to be completely freed.  In
2497
     * a few un-scientific tests, it seems like this
2498
     * approach allowed a lot more memory to be freed.
2499
     */
2500
    /* If this is the only arena with nf, record that. */
2501
2.53M
    if (nfp2lasta[nf] == NULL) {
2502
2.49M
        nfp2lasta[nf] = ao;
2503
2.49M
    } /* else the rightmost with nf doesn't change */
2504
    /* If this was the rightmost of the old size, it remains in place. */
2505
2.53M
    if (ao == lastnf) {
2506
        /* Case 4.  Nothing to do. */
2507
2.49M
        return;
2508
2.49M
    }
2509
    /* If ao were the only arena in the list, the last block would have
2510
     * gotten us out.
2511
     */
2512
31.5k
    assert(ao->nextarena != NULL);
2513
2514
    /* Case 3:  We have to move the arena towards the end of the list,
2515
     * because it has more free pools than the arena to its right.  It needs
2516
     * to move to follow lastnf.
2517
     * First unlink ao from usable_arenas.
2518
     */
2519
31.5k
    if (ao->prevarena != NULL) {
2520
        /* ao isn't at the head of the list */
2521
18.7k
        assert(ao->prevarena->nextarena == ao);
2522
18.7k
        ao->prevarena->nextarena = ao->nextarena;
2523
18.7k
    }
2524
12.7k
    else {
2525
        /* ao is at the head of the list */
2526
12.7k
        assert(usable_arenas == ao);
2527
12.7k
        usable_arenas = ao->nextarena;
2528
12.7k
    }
2529
31.5k
    ao->nextarena->prevarena = ao->prevarena;
2530
    /* And insert after lastnf. */
2531
31.5k
    ao->prevarena = lastnf;
2532
31.5k
    ao->nextarena = lastnf->nextarena;
2533
31.5k
    if (ao->nextarena != NULL) {
2534
29.8k
        ao->nextarena->prevarena = ao;
2535
29.8k
    }
2536
31.5k
    lastnf->nextarena = ao;
2537
    /* Verify that the swaps worked. */
2538
31.5k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2539
31.5k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2540
31.5k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2541
31.5k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2542
31.5k
           || ao->prevarena->nextarena == ao);
2543
31.5k
}
2544
2545
/* Free a memory block allocated by pymalloc_alloc().
2546
   Return 1 if it was freed.
2547
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2548
static inline int
2549
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2550
1.86G
{
2551
1.86G
    assert(p != NULL);
2552
2553
#ifdef WITH_VALGRIND
2554
    if (UNLIKELY(running_on_valgrind > 0)) {
2555
        return 0;
2556
    }
2557
#endif
2558
2559
1.86G
    poolp pool = POOL_ADDR(p);
2560
1.86G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2561
241M
        return 0;
2562
241M
    }
2563
    /* We allocated this address. */
2564
2565
    /* Link p to the start of the pool's freeblock list.  Since
2566
     * the pool had at least the p block outstanding, the pool
2567
     * wasn't empty (so it's already in a usedpools[] list, or
2568
     * was full and is in no list -- it's not in the freeblocks
2569
     * list in any case).
2570
     */
2571
1.62G
    assert(pool->ref.count > 0);            /* else it was empty */
2572
1.62G
    pymem_block *lastfree = pool->freeblock;
2573
1.62G
    *(pymem_block **)p = lastfree;
2574
1.62G
    pool->freeblock = (pymem_block *)p;
2575
1.62G
    pool->ref.count--;
2576
2577
1.62G
    if (UNLIKELY(lastfree == NULL)) {
2578
        /* Pool was full, so doesn't currently live in any list:
2579
         * link it to the front of the appropriate usedpools[] list.
2580
         * This mimics LRU pool usage for new allocations and
2581
         * targets optimal filling when several pools contain
2582
         * blocks of the same size class.
2583
         */
2584
109M
        insert_to_usedpool(state, pool);
2585
109M
        return 1;
2586
109M
    }
2587
2588
    /* freeblock wasn't NULL, so the pool wasn't full,
2589
     * and the pool is in a usedpools[] list.
2590
     */
2591
1.51G
    if (LIKELY(pool->ref.count != 0)) {
2592
        /* pool isn't empty:  leave it in usedpools */
2593
1.50G
        return 1;
2594
1.50G
    }
2595
2596
    /* Pool is now empty:  unlink from usedpools, and
2597
     * link to the front of freepools.  This ensures that
2598
     * previously freed pools will be allocated later
2599
     * (being not referenced, they are perhaps paged out).
2600
     */
2601
2.67M
    insert_to_freepool(state, pool);
2602
2.67M
    return 1;
2603
1.51G
}
2604
2605
2606
void
2607
_PyObject_Free(void *ctx, void *p)
2608
1.86G
{
2609
    /* PyObject_Free(NULL) has no effect */
2610
1.86G
    if (p == NULL) {
2611
507k
        return;
2612
507k
    }
2613
2614
1.86G
    OMState *state = get_state();
2615
1.86G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2616
        /* pymalloc didn't allocate this address */
2617
241M
        PyMem_RawFree(p);
2618
241M
        raw_allocated_blocks--;
2619
241M
    }
2620
1.86G
}
2621
2622
2623
/* pymalloc realloc.
2624
2625
   If nbytes==0, then as the Python docs promise, we do not treat this like
2626
   free(p), and return a non-NULL result.
2627
2628
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2629
   newptr_p.
2630
2631
   Return 0 if pymalloc didn't allocated p. */
2632
static int
2633
pymalloc_realloc(OMState *state, void *ctx,
2634
                 void **newptr_p, void *p, size_t nbytes)
2635
100M
{
2636
100M
    void *bp;
2637
100M
    poolp pool;
2638
100M
    size_t size;
2639
2640
100M
    assert(p != NULL);
2641
2642
#ifdef WITH_VALGRIND
2643
    /* Treat running_on_valgrind == -1 the same as 0 */
2644
    if (UNLIKELY(running_on_valgrind > 0)) {
2645
        return 0;
2646
    }
2647
#endif
2648
2649
100M
    pool = POOL_ADDR(p);
2650
100M
    if (!address_in_range(state, p, pool)) {
2651
        /* pymalloc is not managing this block.
2652
2653
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2654
           over this block.  However, if we do, we need to copy the valid data
2655
           from the C-managed block to one of our blocks, and there's no
2656
           portable way to know how much of the memory space starting at p is
2657
           valid.
2658
2659
           As bug 1185883 pointed out the hard way, it's possible that the
2660
           C-managed block is "at the end" of allocated VM space, so that a
2661
           memory fault can occur if we try to copy nbytes bytes starting at p.
2662
           Instead we punt: let C continue to manage this block. */
2663
5.85M
        return 0;
2664
5.85M
    }
2665
2666
    /* pymalloc is in charge of this block */
2667
94.7M
    size = INDEX2SIZE(pool->szidx);
2668
94.7M
    if (nbytes <= size) {
2669
        /* The block is staying the same or shrinking.
2670
2671
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2672
           block to a smaller size class, but it wastes memory not to copy it.
2673
2674
           The compromise here is to copy on shrink only if at least 25% of
2675
           size can be shaved off. */
2676
61.7M
        if (4 * nbytes > 3 * size) {
2677
            /* It's the same, or shrinking and new/old > 3/4. */
2678
21.0M
            *newptr_p = p;
2679
21.0M
            return 1;
2680
21.0M
        }
2681
40.6M
        size = nbytes;
2682
40.6M
    }
2683
2684
73.6M
    bp = _PyObject_Malloc(ctx, nbytes);
2685
73.6M
    if (bp != NULL) {
2686
73.6M
        memcpy(bp, p, size);
2687
73.6M
        _PyObject_Free(ctx, p);
2688
73.6M
    }
2689
73.6M
    *newptr_p = bp;
2690
73.6M
    return 1;
2691
94.7M
}
2692
2693
2694
void *
2695
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2696
335M
{
2697
335M
    void *ptr2;
2698
2699
335M
    if (ptr == NULL) {
2700
235M
        return _PyObject_Malloc(ctx, nbytes);
2701
235M
    }
2702
2703
100M
    OMState *state = get_state();
2704
100M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2705
94.7M
        return ptr2;
2706
94.7M
    }
2707
2708
5.85M
    return PyMem_RawRealloc(ptr, nbytes);
2709
100M
}
2710
2711
#else   /* ! WITH_PYMALLOC */
2712
2713
/*==========================================================================*/
2714
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2715
 * only be used by extensions that are compiled with pymalloc enabled. */
2716
2717
Py_ssize_t
2718
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2719
{
2720
    return 0;
2721
}
2722
2723
Py_ssize_t
2724
_Py_GetGlobalAllocatedBlocks(void)
2725
{
2726
    return 0;
2727
}
2728
2729
void
2730
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2731
{
2732
    return;
2733
}
2734
2735
void
2736
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2737
{
2738
    return;
2739
}
2740
2741
#endif /* WITH_PYMALLOC */
2742
2743
2744
/*==========================================================================*/
2745
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2746
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2747
 */
2748
2749
/* Uncomment this define to add the "serialno" field */
2750
/* #define PYMEM_DEBUG_SERIALNO */
2751
2752
#ifdef PYMEM_DEBUG_SERIALNO
2753
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2754
2755
/* serialno is always incremented via calling this routine.  The point is
2756
 * to supply a single place to set a breakpoint.
2757
 */
2758
static void
2759
bumpserialno(void)
2760
{
2761
    ++serialno;
2762
}
2763
#endif
2764
2765
0
#define SST SIZEOF_SIZE_T
2766
2767
#ifdef PYMEM_DEBUG_SERIALNO
2768
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2769
#else
2770
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2771
#endif
2772
2773
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2774
static size_t
2775
read_size_t(const void *p)
2776
0
{
2777
0
    const uint8_t *q = (const uint8_t *)p;
2778
0
    size_t result = *q++;
2779
0
    int i;
2780
2781
0
    for (i = SST; --i > 0; ++q)
2782
0
        result = (result << 8) | *q;
2783
0
    return result;
2784
0
}
2785
2786
/* Write n as a big-endian size_t, MSB at address p, LSB at
2787
 * p + sizeof(size_t) - 1.
2788
 */
2789
static void
2790
write_size_t(void *p, size_t n)
2791
0
{
2792
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2793
0
    int i;
2794
2795
0
    for (i = SST; --i >= 0; --q) {
2796
0
        *q = (uint8_t)(n & 0xff);
2797
0
        n >>= 8;
2798
0
    }
2799
0
}
2800
2801
static void
2802
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2803
               bool is_alloc)
2804
0
{
2805
#ifdef Py_GIL_DISABLED
2806
    if (api->api_id == 'o') {
2807
        // Don't overwrite the first few bytes of a PyObject allocation in the
2808
        // free-threaded build
2809
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2810
        size_t debug_offset;
2811
        if (is_alloc) {
2812
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2813
        }
2814
        else {
2815
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2816
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2817
        }
2818
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2819
        if (debug_offset < nbytes) {
2820
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2821
        }
2822
        return;
2823
    }
2824
#endif
2825
0
    memset(data, c, nbytes);
2826
0
}
2827
2828
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2829
   fills them with useful stuff, here calling the underlying malloc's result p:
2830
2831
p[0: S]
2832
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2833
    to read in a memory dump).
2834
p[S]
2835
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2836
p[S+1: 2*S]
2837
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2838
p[2*S: 2*S+n]
2839
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2840
    Used to catch reference to uninitialized memory.
2841
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2842
    handled the request itself.
2843
p[2*S+n: 2*S+n+S]
2844
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2845
p[2*S+n+S: 2*S+n+2*S]
2846
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2847
    and _PyMem_DebugRealloc.
2848
    This is a big-endian size_t.
2849
    If "bad memory" is detected later, the serial number gives an
2850
    excellent way to set a breakpoint on the next run, to capture the
2851
    instant at which this block was passed out.
2852
2853
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2854
for 3 * S extra bytes, and omits the last serialno field.
2855
*/
2856
2857
static void *
2858
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2859
0
{
2860
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2861
0
    uint8_t *p;           /* base address of malloc'ed pad block */
2862
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2863
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2864
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2865
2866
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2867
        /* integer overflow: can't represent total as a Py_ssize_t */
2868
0
        return NULL;
2869
0
    }
2870
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2871
2872
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2873
                ^--- p    ^--- data   ^--- tail
2874
       S: nbytes stored as size_t
2875
       I: API identifier (1 byte)
2876
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2877
       C: Clean bytes used later to store actual data
2878
       N: Serial number stored as size_t
2879
2880
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2881
       is omitted. */
2882
2883
0
    if (use_calloc) {
2884
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2885
0
    }
2886
0
    else {
2887
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2888
0
    }
2889
0
    if (p == NULL) {
2890
0
        return NULL;
2891
0
    }
2892
0
    data = p + 2*SST;
2893
2894
#ifdef PYMEM_DEBUG_SERIALNO
2895
    bumpserialno();
2896
#endif
2897
2898
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2899
0
    write_size_t(p, nbytes);
2900
0
    p[SST] = (uint8_t)api->api_id;
2901
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2902
2903
0
    if (nbytes > 0 && !use_calloc) {
2904
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
2905
0
    }
2906
2907
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2908
0
    tail = data + nbytes;
2909
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
2910
#ifdef PYMEM_DEBUG_SERIALNO
2911
    write_size_t(tail + SST, serialno);
2912
#endif
2913
2914
0
    return data;
2915
0
}
2916
2917
void *
2918
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2919
0
{
2920
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2921
0
}
2922
2923
void *
2924
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2925
0
{
2926
0
    size_t nbytes;
2927
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2928
0
    nbytes = nelem * elsize;
2929
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2930
0
}
2931
2932
2933
/* The debug free first checks the 2*SST bytes on each end for sanity (in
2934
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
2935
   Then fills the original bytes with PYMEM_DEADBYTE.
2936
   Then calls the underlying free.
2937
*/
2938
void
2939
_PyMem_DebugRawFree(void *ctx, void *p)
2940
0
{
2941
    /* PyMem_Free(NULL) has no effect */
2942
0
    if (p == NULL) {
2943
0
        return;
2944
0
    }
2945
2946
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2947
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2948
0
    size_t nbytes;
2949
2950
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2951
0
    nbytes = read_size_t(q);
2952
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
2953
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
2954
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
2955
0
    api->alloc.free(api->alloc.ctx, q);
2956
0
}
2957
2958
2959
void *
2960
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2961
0
{
2962
0
    if (p == NULL) {
2963
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2964
0
    }
2965
2966
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2967
0
    uint8_t *head;        /* base address of malloc'ed pad block */
2968
0
    uint8_t *data;        /* pointer to data bytes */
2969
0
    uint8_t *r;
2970
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2971
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
2972
0
    size_t original_nbytes;
2973
0
#define ERASED_SIZE 64
2974
2975
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2976
2977
0
    data = (uint8_t *)p;
2978
0
    head = data - 2*SST;
2979
0
    original_nbytes = read_size_t(head);
2980
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2981
        /* integer overflow: can't represent total as a Py_ssize_t */
2982
0
        return NULL;
2983
0
    }
2984
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2985
2986
0
    tail = data + original_nbytes;
2987
#ifdef PYMEM_DEBUG_SERIALNO
2988
    size_t block_serialno = read_size_t(tail + SST);
2989
#endif
2990
0
#ifndef Py_GIL_DISABLED
2991
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2992
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2993
     */
2994
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2995
0
    if (original_nbytes <= sizeof(save)) {
2996
0
        memcpy(save, data, original_nbytes);
2997
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
2998
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2999
0
    }
3000
0
    else {
3001
0
        memcpy(save, data, ERASED_SIZE);
3002
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3003
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3004
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3005
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3006
0
    }
3007
0
#endif
3008
3009
    /* Resize and add decorations. */
3010
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3011
0
    if (r == NULL) {
3012
        /* if realloc() failed: rewrite header and footer which have
3013
           just been erased */
3014
0
        nbytes = original_nbytes;
3015
0
    }
3016
0
    else {
3017
0
        head = r;
3018
#ifdef PYMEM_DEBUG_SERIALNO
3019
        bumpserialno();
3020
        block_serialno = serialno;
3021
#endif
3022
0
    }
3023
0
    data = head + 2*SST;
3024
3025
0
    write_size_t(head, nbytes);
3026
0
    head[SST] = (uint8_t)api->api_id;
3027
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3028
3029
0
    tail = data + nbytes;
3030
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3031
#ifdef PYMEM_DEBUG_SERIALNO
3032
    write_size_t(tail + SST, block_serialno);
3033
#endif
3034
3035
0
#ifndef Py_GIL_DISABLED
3036
    /* Restore saved bytes. */
3037
0
    if (original_nbytes <= sizeof(save)) {
3038
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3039
0
    }
3040
0
    else {
3041
0
        size_t i = original_nbytes - ERASED_SIZE;
3042
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3043
0
        if (nbytes > i) {
3044
0
            memcpy(data + i, &save[ERASED_SIZE],
3045
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3046
0
        }
3047
0
    }
3048
0
#endif
3049
3050
0
    if (r == NULL) {
3051
0
        return NULL;
3052
0
    }
3053
3054
0
    if (nbytes > original_nbytes) {
3055
        /* growing: mark new extra memory clean */
3056
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3057
0
               nbytes - original_nbytes);
3058
0
    }
3059
3060
0
    return data;
3061
0
}
3062
3063
static inline void
3064
_PyMem_DebugCheckGIL(const char *func)
3065
0
{
3066
0
    PyThreadState *tstate = _PyThreadState_GET();
3067
0
    if (tstate == NULL) {
3068
0
#ifndef Py_GIL_DISABLED
3069
0
        _Py_FatalErrorFunc(func,
3070
0
                           "Python memory allocator called "
3071
0
                           "without holding the GIL");
3072
#else
3073
        _Py_FatalErrorFunc(func,
3074
                           "Python memory allocator called "
3075
                           "without an active thread state. "
3076
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3077
#endif
3078
0
    }
3079
0
}
3080
3081
void *
3082
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3083
0
{
3084
0
    _PyMem_DebugCheckGIL(__func__);
3085
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3086
0
}
3087
3088
void *
3089
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3090
0
{
3091
0
    _PyMem_DebugCheckGIL(__func__);
3092
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3093
0
}
3094
3095
3096
void
3097
_PyMem_DebugFree(void *ctx, void *ptr)
3098
0
{
3099
0
    _PyMem_DebugCheckGIL(__func__);
3100
0
    _PyMem_DebugRawFree(ctx, ptr);
3101
0
}
3102
3103
3104
void *
3105
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3106
0
{
3107
0
    _PyMem_DebugCheckGIL(__func__);
3108
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3109
0
}
3110
3111
/* Check the forbidden bytes on both ends of the memory allocated for p.
3112
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3113
 * and call Py_FatalError to kill the program.
3114
 * The API id, is also checked.
3115
 */
3116
static void
3117
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3118
0
{
3119
0
    assert(p != NULL);
3120
3121
0
    const uint8_t *q = (const uint8_t *)p;
3122
0
    size_t nbytes;
3123
0
    const uint8_t *tail;
3124
0
    int i;
3125
0
    char id;
3126
3127
    /* Check the API id */
3128
0
    id = (char)q[-SST];
3129
0
    if (id != api) {
3130
0
        _PyObject_DebugDumpAddress(p);
3131
0
        _Py_FatalErrorFormat(func,
3132
0
                             "bad ID: Allocated using API '%c', "
3133
0
                             "verified using API '%c'",
3134
0
                             id, api);
3135
0
    }
3136
3137
    /* Check the stuff at the start of p first:  if there's underwrite
3138
     * corruption, the number-of-bytes field may be nuts, and checking
3139
     * the tail could lead to a segfault then.
3140
     */
3141
0
    for (i = SST-1; i >= 1; --i) {
3142
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3143
0
            _PyObject_DebugDumpAddress(p);
3144
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3145
0
        }
3146
0
    }
3147
3148
0
    nbytes = read_size_t(q - 2*SST);
3149
0
    tail = q + nbytes;
3150
0
    for (i = 0; i < SST; ++i) {
3151
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3152
0
            _PyObject_DebugDumpAddress(p);
3153
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3154
0
        }
3155
0
    }
3156
0
}
3157
3158
/* Display info to stderr about the memory block at p. */
3159
static void
3160
_PyObject_DebugDumpAddress(const void *p)
3161
0
{
3162
0
    const uint8_t *q = (const uint8_t *)p;
3163
0
    const uint8_t *tail;
3164
0
    size_t nbytes;
3165
0
    int i;
3166
0
    int ok;
3167
0
    char id;
3168
3169
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3170
0
    if (p == NULL) {
3171
0
        fprintf(stderr, "\n");
3172
0
        return;
3173
0
    }
3174
0
    id = (char)q[-SST];
3175
0
    fprintf(stderr, " API '%c'\n", id);
3176
3177
0
    nbytes = read_size_t(q - 2*SST);
3178
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3179
3180
    /* In case this is nuts, check the leading pad bytes first. */
3181
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3182
0
    ok = 1;
3183
0
    for (i = 1; i <= SST-1; ++i) {
3184
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3185
0
            ok = 0;
3186
0
            break;
3187
0
        }
3188
0
    }
3189
0
    if (ok)
3190
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3191
0
    else {
3192
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3193
0
            PYMEM_FORBIDDENBYTE);
3194
0
        for (i = SST-1; i >= 1; --i) {
3195
0
            const uint8_t byte = *(q-i);
3196
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3197
0
            if (byte != PYMEM_FORBIDDENBYTE)
3198
0
                fputs(" *** OUCH", stderr);
3199
0
            fputc('\n', stderr);
3200
0
        }
3201
3202
0
        fputs("    Because memory is corrupted at the start, the "
3203
0
              "count of bytes requested\n"
3204
0
              "       may be bogus, and checking the trailing pad "
3205
0
              "bytes may segfault.\n", stderr);
3206
0
    }
3207
3208
0
    tail = q + nbytes;
3209
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3210
0
    ok = 1;
3211
0
    for (i = 0; i < SST; ++i) {
3212
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3213
0
            ok = 0;
3214
0
            break;
3215
0
        }
3216
0
    }
3217
0
    if (ok)
3218
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3219
0
    else {
3220
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3221
0
                PYMEM_FORBIDDENBYTE);
3222
0
        for (i = 0; i < SST; ++i) {
3223
0
            const uint8_t byte = tail[i];
3224
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3225
0
                    i, byte);
3226
0
            if (byte != PYMEM_FORBIDDENBYTE)
3227
0
                fputs(" *** OUCH", stderr);
3228
0
            fputc('\n', stderr);
3229
0
        }
3230
0
    }
3231
3232
#ifdef PYMEM_DEBUG_SERIALNO
3233
    size_t serial = read_size_t(tail + SST);
3234
    fprintf(stderr,
3235
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3236
            serial);
3237
#endif
3238
3239
0
    if (nbytes > 0) {
3240
0
        i = 0;
3241
0
        fputs("    Data at p:", stderr);
3242
        /* print up to 8 bytes at the start */
3243
0
        while (q < tail && i < 8) {
3244
0
            fprintf(stderr, " %02x", *q);
3245
0
            ++i;
3246
0
            ++q;
3247
0
        }
3248
        /* and up to 8 at the end */
3249
0
        if (q < tail) {
3250
0
            if (tail - q > 8) {
3251
0
                fputs(" ...", stderr);
3252
0
                q = tail - 8;
3253
0
            }
3254
0
            while (q < tail) {
3255
0
                fprintf(stderr, " %02x", *q);
3256
0
                ++q;
3257
0
            }
3258
0
        }
3259
0
        fputc('\n', stderr);
3260
0
    }
3261
0
    fputc('\n', stderr);
3262
3263
0
    fflush(stderr);
3264
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3265
0
}
3266
3267
3268
static size_t
3269
printone(FILE *out, const char* msg, size_t value)
3270
0
{
3271
0
    int i, k;
3272
0
    char buf[100];
3273
0
    size_t origvalue = value;
3274
3275
0
    fputs(msg, out);
3276
0
    for (i = (int)strlen(msg); i < 35; ++i)
3277
0
        fputc(' ', out);
3278
0
    fputc('=', out);
3279
3280
    /* Write the value with commas. */
3281
0
    i = 22;
3282
0
    buf[i--] = '\0';
3283
0
    buf[i--] = '\n';
3284
0
    k = 3;
3285
0
    do {
3286
0
        size_t nextvalue = value / 10;
3287
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3288
0
        value = nextvalue;
3289
0
        buf[i--] = (char)(digit + '0');
3290
0
        --k;
3291
0
        if (k == 0 && value && i >= 0) {
3292
0
            k = 3;
3293
0
            buf[i--] = ',';
3294
0
        }
3295
0
    } while (value && i >= 0);
3296
3297
0
    while (i >= 0)
3298
0
        buf[i--] = ' ';
3299
0
    fputs(buf, out);
3300
3301
0
    return origvalue;
3302
0
}
3303
3304
void
3305
_PyDebugAllocatorStats(FILE *out,
3306
                       const char *block_name, int num_blocks, size_t sizeof_block)
3307
0
{
3308
0
    char buf1[128];
3309
0
    char buf2[128];
3310
0
    PyOS_snprintf(buf1, sizeof(buf1),
3311
0
                  "%d %ss * %zd bytes each",
3312
0
                  num_blocks, block_name, sizeof_block);
3313
0
    PyOS_snprintf(buf2, sizeof(buf2),
3314
0
                  "%48s ", buf1);
3315
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3316
0
}
3317
3318
// Return true if the obmalloc state structure is heap allocated,
3319
// by PyMem_RawCalloc().  For the main interpreter, this structure
3320
// allocated in the BSS.  Allocating that way gives some memory savings
3321
// and a small performance win (at least on a demand paged OS).  On
3322
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3323
// memory is for the arena_map_top array.  Since normally only one entry
3324
// of that array is used, only one page of resident memory is actually
3325
// used, rather than the full 256 kB.
3326
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3327
0
{
3328
0
#if WITH_PYMALLOC
3329
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3330
#else
3331
    return false;
3332
#endif
3333
0
}
3334
3335
#ifdef WITH_PYMALLOC
3336
static void
3337
init_obmalloc_pools(PyInterpreterState *interp)
3338
16
{
3339
    // initialize the obmalloc->pools structure.  This must be done
3340
    // before the obmalloc alloc/free functions can be called.
3341
16
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3342
16
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3343
16
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3344
16
}
3345
#endif /* WITH_PYMALLOC */
3346
3347
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3348
16
{
3349
16
#ifdef WITH_PYMALLOC
3350
    /* Initialize obmalloc, but only for subinterpreters,
3351
       since the main interpreter is initialized statically. */
3352
16
    if (_Py_IsMainInterpreter(interp)
3353
16
            || _PyInterpreterState_HasFeature(interp,
3354
16
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3355
16
        interp->obmalloc = &obmalloc_state_main;
3356
16
        if (!obmalloc_state_initialized) {
3357
16
            init_obmalloc_pools(interp);
3358
16
            obmalloc_state_initialized = true;
3359
16
        }
3360
16
    } else {
3361
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3362
0
        if (interp->obmalloc == NULL) {
3363
0
            return -1;
3364
0
        }
3365
0
        init_obmalloc_pools(interp);
3366
0
    }
3367
16
#endif /* WITH_PYMALLOC */
3368
16
    return 0; // success
3369
16
}
3370
3371
3372
#ifdef WITH_PYMALLOC
3373
3374
static void
3375
free_obmalloc_arenas(PyInterpreterState *interp)
3376
0
{
3377
0
    OMState *state = interp->obmalloc;
3378
0
    for (uint i = 0; i < maxarenas; ++i) {
3379
        // free each obmalloc memory arena
3380
0
        struct arena_object *ao = &allarenas[i];
3381
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3382
0
                             (void *)ao->address, ARENA_SIZE);
3383
0
    }
3384
    // free the array containing pointers to all arenas
3385
0
    PyMem_RawFree(allarenas);
3386
0
#if WITH_PYMALLOC_RADIX_TREE
3387
0
#ifdef USE_INTERIOR_NODES
3388
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3389
    // by arena_map_mark_used() but not freed when arenas are freed.
3390
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3391
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3392
0
         if (mid == NULL) {
3393
0
             continue;
3394
0
         }
3395
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3396
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3397
0
            if (bot == NULL) {
3398
0
                continue;
3399
0
            }
3400
0
            PyMem_RawFree(bot);
3401
0
         }
3402
0
         PyMem_RawFree(mid);
3403
0
    }
3404
0
#endif
3405
0
#endif
3406
0
}
3407
3408
#ifdef Py_DEBUG
3409
/* Is target in the list?  The list is traversed via the nextpool pointers.
3410
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3411
 * list, else 0.
3412
 */
3413
static int
3414
pool_is_in_list(const poolp target, poolp list)
3415
{
3416
    poolp origlist = list;
3417
    assert(target != NULL);
3418
    if (list == NULL)
3419
        return 0;
3420
    do {
3421
        if (target == list)
3422
            return 1;
3423
        list = list->nextpool;
3424
    } while (list != NULL && list != origlist);
3425
    return 0;
3426
}
3427
#endif
3428
3429
#ifdef WITH_MIMALLOC
3430
struct _alloc_stats {
3431
    size_t allocated_blocks;
3432
    size_t allocated_bytes;
3433
    size_t allocated_with_overhead;
3434
    size_t bytes_reserved;
3435
    size_t bytes_committed;
3436
};
3437
3438
static bool _collect_alloc_stats(
3439
    const mi_heap_t* heap, const mi_heap_area_t* area,
3440
    void* block, size_t block_size, void* arg)
3441
0
{
3442
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3443
0
    stats->allocated_blocks += area->used;
3444
0
    stats->allocated_bytes += area->used * area->block_size;
3445
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3446
0
    stats->bytes_reserved += area->reserved;
3447
0
    stats->bytes_committed += area->committed;
3448
0
    return 1;
3449
0
}
3450
3451
static void
3452
py_mimalloc_print_stats(FILE *out)
3453
0
{
3454
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3455
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3456
0
    fprintf(out, "Medium block threshold = %zu\n",
3457
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3458
0
    fprintf(out, "Large object max size = %zu\n",
3459
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3460
3461
0
    mi_heap_t *heap = mi_heap_get_default();
3462
0
    struct _alloc_stats stats;
3463
0
    memset(&stats, 0, sizeof(stats));
3464
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3465
3466
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3467
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3468
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3469
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3470
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3471
0
}
3472
#endif
3473
3474
3475
static void
3476
pymalloc_print_stats(FILE *out)
3477
0
{
3478
0
    OMState *state = get_state();
3479
3480
0
    uint i;
3481
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3482
    /* # of pools, allocated blocks, and free blocks per class index */
3483
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3484
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3485
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3486
    /* total # of allocated bytes in used and full pools */
3487
0
    size_t allocated_bytes = 0;
3488
    /* total # of available bytes in used pools */
3489
0
    size_t available_bytes = 0;
3490
    /* # of free pools + pools not yet carved out of current arena */
3491
0
    uint numfreepools = 0;
3492
    /* # of bytes for arena alignment padding */
3493
0
    size_t arena_alignment = 0;
3494
    /* # of bytes in used and full pools used for pool_headers */
3495
0
    size_t pool_header_bytes = 0;
3496
    /* # of bytes in used and full pools wasted due to quantization,
3497
     * i.e. the necessarily leftover space at the ends of used and
3498
     * full pools.
3499
     */
3500
0
    size_t quantization = 0;
3501
    /* # of arenas actually allocated. */
3502
0
    size_t narenas = 0;
3503
    /* running total -- should equal narenas * ARENA_SIZE */
3504
0
    size_t total;
3505
0
    char buf[128];
3506
3507
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3508
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3509
3510
0
    for (i = 0; i < numclasses; ++i)
3511
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3512
3513
    /* Because full pools aren't linked to from anything, it's easiest
3514
     * to march over all the arenas.  If we're lucky, most of the memory
3515
     * will be living in full pools -- would be a shame to miss them.
3516
     */
3517
0
    for (i = 0; i < maxarenas; ++i) {
3518
0
        uintptr_t base = allarenas[i].address;
3519
3520
        /* Skip arenas which are not allocated. */
3521
0
        if (allarenas[i].address == (uintptr_t)NULL)
3522
0
            continue;
3523
0
        narenas += 1;
3524
3525
0
        numfreepools += allarenas[i].nfreepools;
3526
3527
        /* round up to pool alignment */
3528
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3529
0
            arena_alignment += POOL_SIZE;
3530
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3531
0
            base += POOL_SIZE;
3532
0
        }
3533
3534
        /* visit every pool in the arena */
3535
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3536
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3537
0
            poolp p = (poolp)base;
3538
0
            const uint sz = p->szidx;
3539
0
            uint freeblocks;
3540
3541
0
            if (p->ref.count == 0) {
3542
                /* currently unused */
3543
#ifdef Py_DEBUG
3544
                assert(pool_is_in_list(p, allarenas[i].freepools));
3545
#endif
3546
0
                continue;
3547
0
            }
3548
0
            ++numpools[sz];
3549
0
            numblocks[sz] += p->ref.count;
3550
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3551
0
            numfreeblocks[sz] += freeblocks;
3552
#ifdef Py_DEBUG
3553
            if (freeblocks > 0)
3554
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3555
#endif
3556
0
        }
3557
0
    }
3558
0
    assert(narenas == narenas_currently_allocated);
3559
3560
0
    fputc('\n', out);
3561
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3562
0
          "-----   ----   ---------   -------------  ------------\n",
3563
0
          out);
3564
3565
0
    for (i = 0; i < numclasses; ++i) {
3566
0
        size_t p = numpools[i];
3567
0
        size_t b = numblocks[i];
3568
0
        size_t f = numfreeblocks[i];
3569
0
        uint size = INDEX2SIZE(i);
3570
0
        if (p == 0) {
3571
0
            assert(b == 0 && f == 0);
3572
0
            continue;
3573
0
        }
3574
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3575
0
                i, size, p, b, f);
3576
0
        allocated_bytes += b * size;
3577
0
        available_bytes += f * size;
3578
0
        pool_header_bytes += p * POOL_OVERHEAD;
3579
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3580
0
    }
3581
0
    fputc('\n', out);
3582
#ifdef PYMEM_DEBUG_SERIALNO
3583
    if (_PyMem_DebugEnabled()) {
3584
        (void)printone(out, "# times object malloc called", serialno);
3585
    }
3586
#endif
3587
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3588
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3589
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3590
0
    (void)printone(out, "# arenas allocated current", narenas);
3591
3592
0
    PyOS_snprintf(buf, sizeof(buf),
3593
0
                  "%zu arenas * %d bytes/arena",
3594
0
                  narenas, ARENA_SIZE);
3595
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3596
3597
0
    fputc('\n', out);
3598
3599
    /* Account for what all of those arena bytes are being used for. */
3600
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3601
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3602
3603
0
    PyOS_snprintf(buf, sizeof(buf),
3604
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3605
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3606
3607
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3608
0
    total += printone(out, "# bytes lost to quantization", quantization);
3609
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3610
0
    (void)printone(out, "Total", total);
3611
0
    assert(narenas * ARENA_SIZE == total);
3612
3613
0
#if WITH_PYMALLOC_RADIX_TREE
3614
0
    fputs("\narena map counts\n", out);
3615
0
#ifdef USE_INTERIOR_NODES
3616
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3617
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3618
0
    fputc('\n', out);
3619
0
#endif
3620
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3621
0
#ifdef USE_INTERIOR_NODES
3622
0
    total += printone(out, "# bytes lost to arena map mid",
3623
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3624
0
    total += printone(out, "# bytes lost to arena map bot",
3625
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3626
0
    (void)printone(out, "Total", total);
3627
0
#endif
3628
0
#endif
3629
3630
0
}
3631
3632
/* Print summary info to "out" about the state of pymalloc's structures.
3633
 * In Py_DEBUG mode, also perform some expensive internal consistency
3634
 * checks.
3635
 *
3636
 * Return 0 if the memory debug hooks are not installed or no statistics was
3637
 * written into out, return 1 otherwise.
3638
 */
3639
int
3640
_PyObject_DebugMallocStats(FILE *out)
3641
0
{
3642
0
#ifdef WITH_MIMALLOC
3643
0
    if (_PyMem_MimallocEnabled()) {
3644
0
        py_mimalloc_print_stats(out);
3645
0
        return 1;
3646
0
    }
3647
0
    else
3648
0
#endif
3649
0
    if (_PyMem_PymallocEnabled()) {
3650
0
        pymalloc_print_stats(out);
3651
0
        return 1;
3652
0
    }
3653
0
    else {
3654
0
        return 0;
3655
0
    }
3656
0
}
3657
3658
#endif /* #ifdef WITH_PYMALLOC */