Coverage Report

Created: 2026-03-23 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#ifdef WITH_MIMALLOC
18
// Forward declarations of functions used in our mimalloc modifications
19
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
20
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
21
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
22
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
23
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
24
#  include "pycore_mimalloc.h"
25
#  include "mimalloc/static.c"
26
#  include "mimalloc/internal.h"  // for stats
27
#endif
28
29
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
30
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
31
#endif
32
33
#undef  uint
34
1.71G
#define uint pymem_uint
35
36
37
/* Defined in tracemalloc.c */
38
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
39
40
static void _PyObject_DebugDumpAddress(const void *p);
41
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
42
43
44
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
45
static void set_up_debug_hooks_unlocked(void);
46
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
47
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
49
50
/***************************************/
51
/* low-level allocator implementations */
52
/***************************************/
53
54
/* the default raw allocator (wraps malloc) */
55
56
void *
57
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
58
233M
{
59
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
60
       for malloc(0), which would be treated as an error. Some platforms would
61
       return a pointer with no memory behind it, which would break pymalloc.
62
       To solve these problems, allocate an extra byte. */
63
233M
    if (size == 0)
64
36.5M
        size = 1;
65
233M
    return malloc(size);
66
233M
}
67
68
void *
69
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
70
141k
{
71
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
72
       for calloc(0, 0), which would be treated as an error. Some platforms
73
       would return a pointer with no memory behind it, which would break
74
       pymalloc.  To solve these problems, allocate an extra byte. */
75
141k
    if (nelem == 0 || elsize == 0) {
76
399
        nelem = 1;
77
399
        elsize = 1;
78
399
    }
79
141k
    return calloc(nelem, elsize);
80
141k
}
81
82
void *
83
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
84
6.57M
{
85
6.57M
    if (size == 0)
86
0
        size = 1;
87
6.57M
    return realloc(ptr, size);
88
6.57M
}
89
90
void
91
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
92
233M
{
93
233M
    free(ptr);
94
233M
}
95
96
#ifdef WITH_MIMALLOC
97
98
static void
99
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
100
0
{
101
#ifdef Py_GIL_DISABLED
102
    // Clear the QSBR goal and remove the page from the QSBR linked list.
103
    page->qsbr_goal = 0;
104
    if (page->qsbr_node.next != NULL) {
105
        llist_remove(&page->qsbr_node);
106
    }
107
#endif
108
0
}
109
110
// Check if an empty, newly reclaimed page is safe to free now.
111
static bool
112
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
113
0
{
114
0
    assert(mi_page_all_free(page));
115
#ifdef Py_GIL_DISABLED
116
    assert(page->qsbr_node.next == NULL);
117
    if (page->use_qsbr && page->qsbr_goal != 0) {
118
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
119
        if (tstate == NULL) {
120
            return false;
121
        }
122
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
123
    }
124
#endif
125
0
    return true;
126
127
0
}
128
129
#ifdef Py_GIL_DISABLED
130
131
// If we are deferring collection of more than this amount of memory for
132
// mimalloc pages, advance the write sequence.  Advancing allows these
133
// pages to be re-used in a different thread or for a different size class.
134
#define QSBR_PAGE_MEM_LIMIT 4096*20
135
136
// Return true if the global write sequence should be advanced for a mimalloc
137
// page that is deferred from collection.
138
static bool
139
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
140
{
141
    size_t bsize = mi_page_block_size(page);
142
    size_t page_size = page->capacity*bsize;
143
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
144
        qsbr->deferred_page_memory = 0;
145
        return true;
146
    }
147
    qsbr->deferred_page_memory += page_size;
148
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
149
        qsbr->deferred_page_memory = 0;
150
        return true;
151
    }
152
    return false;
153
}
154
155
static _PyThreadStateImpl *
156
tstate_from_heap(mi_heap_t *heap)
157
{
158
    return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
159
}
160
#endif
161
162
static bool
163
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
164
0
{
165
#ifdef Py_GIL_DISABLED
166
    assert(mi_page_all_free(page));
167
    if (page->use_qsbr) {
168
        struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
169
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
170
            _PyMem_mi_page_clear_qsbr(page);
171
            _mi_page_free(page, pq, force);
172
            return true;
173
        }
174
175
        // gh-145615: since we are not freeing this page yet, we want to
176
        // make it available for allocations. Note that the QSBR goal and
177
        // linked list node remain set even if the page is later used for
178
        // an allocation. We only detect and clear the QSBR goal when the
179
        // page becomes empty again (used == 0).
180
        if (mi_page_is_in_full(page)) {
181
            _mi_page_unfull(page);
182
        }
183
184
        _PyMem_mi_page_clear_qsbr(page);
185
        page->retire_expire = 0;
186
187
        if (should_advance_qsbr_for_page(qsbr, page)) {
188
            page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
189
        }
190
        else {
191
            page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
192
        }
193
194
        // We may be freeing a page belonging to a different thread during a
195
        // stop-the-world event. Find the _PyThreadStateImpl for the page.
196
        _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
197
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
198
        return false;
199
    }
200
#endif
201
0
    _mi_page_free(page, pq, force);
202
0
    return true;
203
0
}
204
205
static void
206
_PyMem_mi_page_reclaimed(mi_page_t *page)
207
0
{
208
#ifdef Py_GIL_DISABLED
209
    assert(page->qsbr_node.next == NULL);
210
    if (page->qsbr_goal != 0) {
211
        if (mi_page_all_free(page)) {
212
            assert(page->qsbr_node.next == NULL);
213
            _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
214
            assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
215
            page->retire_expire = 0;
216
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
217
        }
218
        else {
219
            page->qsbr_goal = 0;
220
        }
221
    }
222
#endif
223
0
}
224
225
static void
226
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
227
0
{
228
#ifdef Py_GIL_DISABLED
229
    if (!heap->page_use_qsbr) {
230
        return;
231
    }
232
233
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
234
    struct llist_node *head = &tstate->mimalloc.page_list;
235
    if (llist_empty(head)) {
236
        return;
237
    }
238
239
    struct llist_node *node;
240
    llist_for_each_safe(node, head) {
241
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
242
        if (!mi_page_all_free(page)) {
243
            // We allocated from this page some point after the delayed free
244
            _PyMem_mi_page_clear_qsbr(page);
245
            continue;
246
        }
247
248
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
249
            return;
250
        }
251
252
        _PyMem_mi_page_clear_qsbr(page);
253
        _mi_page_free(page, mi_page_queue_of(page), false);
254
    }
255
#endif
256
0
}
257
258
void *
259
_PyMem_MiMalloc(void *ctx, size_t size)
260
0
{
261
#ifdef Py_GIL_DISABLED
262
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
263
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
264
    return mi_heap_malloc(heap, size);
265
#else
266
0
    return mi_malloc(size);
267
0
#endif
268
0
}
269
270
void *
271
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
272
0
{
273
#ifdef Py_GIL_DISABLED
274
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
275
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
276
    return mi_heap_calloc(heap, nelem, elsize);
277
#else
278
0
    return mi_calloc(nelem, elsize);
279
0
#endif
280
0
}
281
282
void *
283
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
284
0
{
285
#ifdef Py_GIL_DISABLED
286
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
287
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
288
    return mi_heap_realloc(heap, ptr, size);
289
#else
290
0
    return mi_realloc(ptr, size);
291
0
#endif
292
0
}
293
294
void
295
_PyMem_MiFree(void *ctx, void *ptr)
296
0
{
297
0
    mi_free(ptr);
298
0
}
299
300
void *
301
_PyObject_MiMalloc(void *ctx, size_t nbytes)
302
0
{
303
#ifdef Py_GIL_DISABLED
304
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
305
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
306
    return mi_heap_malloc(heap, nbytes);
307
#else
308
0
    return mi_malloc(nbytes);
309
0
#endif
310
0
}
311
312
void *
313
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
314
0
{
315
#ifdef Py_GIL_DISABLED
316
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
317
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
318
    return mi_heap_calloc(heap, nelem, elsize);
319
#else
320
0
    return mi_calloc(nelem, elsize);
321
0
#endif
322
0
}
323
324
325
void *
326
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
327
0
{
328
#ifdef Py_GIL_DISABLED
329
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
330
    // Implement our own realloc logic so that we can copy PyObject header
331
    // in a thread-safe way.
332
    size_t size = mi_usable_size(ptr);
333
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
334
        return ptr;
335
    }
336
337
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
338
    void* newp = mi_heap_malloc(heap, nbytes);
339
    if (newp == NULL) {
340
        return NULL;
341
    }
342
343
    // Free threaded Python allows access from other threads to the PyObject reference count
344
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
345
    // These fields are typically initialized by PyObject_Init() using relaxed
346
    // atomic stores. We need to copy these fields in a thread-safe way here.
347
    // We use the "debug_offset" to determine how many bytes to copy -- it
348
    // includes the PyObject header and plus any extra pre-headers.
349
    size_t offset = heap->debug_offset;
350
    assert(offset % sizeof(void*) == 0);
351
352
    size_t copy_size = (size < nbytes ? size : nbytes);
353
    if (copy_size >= offset) {
354
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
355
            // Use memcpy to avoid strict-aliasing issues. However, we probably
356
            // still have unavoidable strict-aliasing issues with
357
            // _Py_atomic_store_ptr_relaxed here.
358
            void *word;
359
            memcpy(&word, (char*)ptr + i, sizeof(void*));
360
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
361
        }
362
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
363
    }
364
    else {
365
        _mi_memcpy(newp, ptr, copy_size);
366
    }
367
    mi_free(ptr);
368
    return newp;
369
#else
370
0
    return mi_realloc(ptr, nbytes);
371
0
#endif
372
0
}
373
374
void
375
_PyObject_MiFree(void *ctx, void *ptr)
376
0
{
377
0
    mi_free(ptr);
378
0
}
379
380
void *
381
_PyMem_MiRawMalloc(void *ctx, size_t size)
382
0
{
383
0
    return mi_malloc(size);
384
0
}
385
386
void *
387
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
388
0
{
389
0
    return mi_calloc(nelem, elsize);
390
0
}
391
392
void *
393
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
394
0
{
395
0
    return mi_realloc(ptr, size);
396
0
}
397
398
void
399
_PyMem_MiRawFree(void *ctx, void *ptr)
400
0
{
401
0
    mi_free(ptr);
402
0
}
403
#endif // WITH_MIMALLOC
404
405
406
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
407
408
409
#ifdef WITH_MIMALLOC
410
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
411
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
412
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
413
#endif
414
415
/* the pymalloc allocator */
416
417
// The actual implementation is further down.
418
419
#if defined(WITH_PYMALLOC)
420
void* _PyObject_Malloc(void *ctx, size_t size);
421
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
422
void _PyObject_Free(void *ctx, void *p);
423
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
424
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
425
#endif  // WITH_PYMALLOC
426
427
#if defined(Py_GIL_DISABLED)
428
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
429
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
430
#  define PYMEM_ALLOC MIMALLOC_ALLOC
431
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
432
#elif defined(WITH_PYMALLOC)
433
0
#  define PYRAW_ALLOC MALLOC_ALLOC
434
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
435
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
436
#else
437
#  define PYRAW_ALLOC MALLOC_ALLOC
438
#  define PYMEM_ALLOC MALLOC_ALLOC
439
#  define PYOBJ_ALLOC MALLOC_ALLOC
440
#endif
441
442
443
/* the default debug allocators */
444
445
// The actual implementation is further down.
446
447
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
448
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
449
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
450
void _PyMem_DebugRawFree(void *ctx, void *ptr);
451
452
void* _PyMem_DebugMalloc(void *ctx, size_t size);
453
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
454
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
455
void _PyMem_DebugFree(void *ctx, void *p);
456
457
#define PYDBGRAW_ALLOC \
458
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
459
#define PYDBGMEM_ALLOC \
460
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
461
#define PYDBGOBJ_ALLOC \
462
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
463
464
/* default raw allocator (not swappable) */
465
466
void *
467
_PyMem_DefaultRawMalloc(size_t size)
468
288
{
469
#ifdef Py_DEBUG
470
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
471
#else
472
288
    return _PyMem_RawMalloc(NULL, size);
473
288
#endif
474
288
}
475
476
void *
477
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
478
0
{
479
#ifdef Py_DEBUG
480
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
481
#else
482
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
483
0
#endif
484
0
}
485
486
void *
487
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
488
0
{
489
#ifdef Py_DEBUG
490
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
491
#else
492
0
    return _PyMem_RawRealloc(NULL, ptr, size);
493
0
#endif
494
0
}
495
496
void
497
_PyMem_DefaultRawFree(void *ptr)
498
324
{
499
#ifdef Py_DEBUG
500
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
501
#else
502
324
    _PyMem_RawFree(NULL, ptr);
503
324
#endif
504
324
}
505
506
wchar_t*
507
_PyMem_DefaultRawWcsdup(const wchar_t *str)
508
216
{
509
216
    assert(str != NULL);
510
511
216
    size_t len = wcslen(str);
512
216
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
513
0
        return NULL;
514
0
    }
515
516
216
    size_t size = (len + 1) * sizeof(wchar_t);
517
216
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
518
216
    if (str2 == NULL) {
519
0
        return NULL;
520
0
    }
521
522
216
    memcpy(str2, str, size);
523
216
    return str2;
524
216
}
525
526
/* the low-level virtual memory allocator */
527
528
#ifdef WITH_PYMALLOC
529
#  ifdef MS_WINDOWS
530
#    include <windows.h>
531
#  elif defined(HAVE_MMAP)
532
#    include <sys/mman.h>
533
#    ifdef MAP_ANONYMOUS
534
#      define ARENAS_USE_MMAP
535
#    endif
536
#  endif
537
#endif
538
539
/* Return the system's default huge page size in bytes, or 0 if it
540
 * cannot be determined.  The result is cached after the first call.
541
 *
542
 * This is Linux-only (/proc/meminfo).  On other systems that define
543
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
544
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
545
static size_t
546
_pymalloc_system_hugepage_size(void)
547
{
548
    static size_t hp_size = 0;
549
    static int initialized = 0;
550
551
    if (initialized) {
552
        return hp_size;
553
    }
554
555
#ifdef __linux__
556
    FILE *f = fopen("/proc/meminfo", "r");
557
    if (f != NULL) {
558
        char line[256];
559
        while (fgets(line, sizeof(line), f)) {
560
            unsigned long size_kb;
561
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
562
                hp_size = (size_t)size_kb * 1024;
563
                break;
564
            }
565
        }
566
        fclose(f);
567
    }
568
#endif
569
570
    initialized = 1;
571
    return hp_size;
572
}
573
#endif
574
575
void *
576
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
577
47.9k
{
578
#ifdef MS_WINDOWS
579
#  ifdef PYMALLOC_USE_HUGEPAGES
580
    if (_PyRuntime.allocators.use_hugepages) {
581
        SIZE_T lp_size = GetLargePageMinimum();
582
        if (lp_size > 0 && size % lp_size == 0) {
583
            void *ptr = VirtualAlloc(NULL, size,
584
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
585
                            PAGE_READWRITE);
586
            if (ptr != NULL)
587
                return ptr;
588
        }
589
    }
590
    /* Fall back to regular pages */
591
#  endif
592
    return VirtualAlloc(NULL, size,
593
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
594
#elif defined(ARENAS_USE_MMAP)
595
    void *ptr;
596
#  ifdef PYMALLOC_USE_HUGEPAGES
597
#    ifdef MAP_HUGETLB
598
    if (_PyRuntime.allocators.use_hugepages) {
599
        size_t hp_size = _pymalloc_system_hugepage_size();
600
        /* Only use huge pages if the arena size is a multiple of the
601
         * system's default huge page size.  When the arena is smaller
602
         * than the huge page, mmap still succeeds but silently
603
         * allocates an entire huge page; the subsequent munmap with
604
         * the smaller arena size then fails with EINVAL, leaking
605
         * all of that memory. */
606
        if (hp_size > 0 && size % hp_size == 0) {
607
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
608
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
609
            if (ptr != MAP_FAILED) {
610
                assert(ptr != NULL);
611
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
612
                return ptr;
613
            }
614
        }
615
    }
616
    /* Fall back to regular pages */
617
#    endif
618
#  endif
619
47.9k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
620
47.9k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
621
47.9k
    if (ptr == MAP_FAILED)
622
0
        return NULL;
623
47.9k
    assert(ptr != NULL);
624
47.9k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
625
47.9k
    return ptr;
626
#else
627
    return malloc(size);
628
#endif
629
47.9k
}
630
631
void
632
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
633
#if defined(ARENAS_USE_MMAP)
634
    size_t size
635
#else
636
    size_t Py_UNUSED(size)
637
#endif
638
)
639
47.4k
{
640
#ifdef MS_WINDOWS
641
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
642
    if (ptr == NULL) {
643
        return;
644
    }
645
    VirtualFree(ptr, 0, MEM_RELEASE);
646
#elif defined(ARENAS_USE_MMAP)
647
    /* Unlike free(), munmap() does not special-case NULL to noop. */
648
47.4k
    if (ptr == NULL) {
649
0
        return;
650
0
    }
651
47.4k
    munmap(ptr, size);
652
#else
653
    free(ptr);
654
#endif
655
47.4k
}
656
657
/*******************************************/
658
/* end low-level allocator implementations */
659
/*******************************************/
660
661
662
72
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
663
948M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
664
2.04G
#define _PyMem (_PyRuntime.allocators.standard.mem)
665
5.04G
#define _PyObject (_PyRuntime.allocators.standard.obj)
666
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
667
190k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
668
669
670
/***************************/
671
/* managing the allocators */
672
/***************************/
673
674
static int
675
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
676
                               PyMemAllocatorEx *old_alloc)
677
0
{
678
0
    if (old_alloc != NULL) {
679
0
        get_allocator_unlocked(domain, old_alloc);
680
0
    }
681
682
683
0
    PyMemAllocatorEx new_alloc;
684
0
    switch(domain)
685
0
    {
686
0
    case PYMEM_DOMAIN_RAW:
687
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
688
0
        break;
689
0
    case PYMEM_DOMAIN_MEM:
690
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
691
0
        break;
692
0
    case PYMEM_DOMAIN_OBJ:
693
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
694
0
        break;
695
0
    default:
696
        /* unknown domain */
697
0
        return -1;
698
0
    }
699
0
    set_allocator_unlocked(domain, &new_alloc);
700
0
    if (debug) {
701
0
        set_up_debug_hooks_domain_unlocked(domain);
702
0
    }
703
0
    return 0;
704
0
}
705
706
707
#ifdef Py_DEBUG
708
static const int pydebug = 1;
709
#else
710
static const int pydebug = 0;
711
#endif
712
713
int
714
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
715
0
{
716
0
    if (name == NULL || *name == '\0') {
717
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
718
           nameions): use default memory allocators */
719
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
720
0
    }
721
0
    else if (strcmp(name, "default") == 0) {
722
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
723
0
    }
724
0
    else if (strcmp(name, "debug") == 0) {
725
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
726
0
    }
727
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
728
0
    else if (strcmp(name, "pymalloc") == 0) {
729
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
730
0
    }
731
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
732
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
733
0
    }
734
0
#endif
735
0
#ifdef WITH_MIMALLOC
736
0
    else if (strcmp(name, "mimalloc") == 0) {
737
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
738
0
    }
739
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
740
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
741
0
    }
742
0
#endif
743
0
#ifndef Py_GIL_DISABLED
744
0
    else if (strcmp(name, "malloc") == 0) {
745
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
746
0
    }
747
0
    else if (strcmp(name, "malloc_debug") == 0) {
748
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
749
0
    }
750
0
#endif
751
0
    else {
752
        /* unknown allocator */
753
0
        return -1;
754
0
    }
755
0
    return 0;
756
0
}
757
758
759
static int
760
set_up_allocators_unlocked(PyMemAllocatorName allocator)
761
0
{
762
0
    switch (allocator) {
763
0
    case PYMEM_ALLOCATOR_NOT_SET:
764
        /* do nothing */
765
0
        break;
766
767
0
    case PYMEM_ALLOCATOR_DEFAULT:
768
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
769
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
770
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
771
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
772
0
        break;
773
774
0
    case PYMEM_ALLOCATOR_DEBUG:
775
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
776
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
777
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
778
0
        _PyRuntime.allocators.is_debug_enabled = 1;
779
0
        break;
780
781
0
#ifdef WITH_PYMALLOC
782
0
    case PYMEM_ALLOCATOR_PYMALLOC:
783
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
784
0
    {
785
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
786
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
787
788
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
789
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
790
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
791
792
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
793
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
794
0
        if (is_debug) {
795
0
            set_up_debug_hooks_unlocked();
796
0
        }
797
0
        break;
798
0
    }
799
0
#endif
800
0
#ifdef WITH_MIMALLOC
801
0
    case PYMEM_ALLOCATOR_MIMALLOC:
802
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
803
0
    {
804
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
805
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
806
807
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
808
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
809
810
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
811
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
812
813
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
814
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
815
0
        if (is_debug) {
816
0
            set_up_debug_hooks_unlocked();
817
0
        }
818
819
0
        break;
820
0
    }
821
0
#endif
822
823
0
    case PYMEM_ALLOCATOR_MALLOC:
824
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
825
0
    {
826
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
827
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
828
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
829
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
830
831
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
832
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
833
0
        if (is_debug) {
834
0
            set_up_debug_hooks_unlocked();
835
0
        }
836
0
        break;
837
0
    }
838
839
0
    default:
840
        /* unknown allocator */
841
0
        return -1;
842
0
    }
843
844
0
    return 0;
845
0
}
846
847
int
848
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
849
0
{
850
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
851
0
    int res = set_up_allocators_unlocked(allocator);
852
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
853
0
    return res;
854
0
}
855
856
857
static int
858
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
859
0
{
860
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
861
0
}
862
863
864
static const char*
865
get_current_allocator_name_unlocked(void)
866
0
{
867
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
868
0
#ifdef WITH_PYMALLOC
869
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
870
0
#endif
871
0
#ifdef WITH_MIMALLOC
872
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
873
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
874
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
875
0
#endif
876
877
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
878
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
879
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
880
0
    {
881
0
        return "malloc";
882
0
    }
883
0
#ifdef WITH_PYMALLOC
884
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
885
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
886
0
        pymemallocator_eq(&_PyObject, &pymalloc))
887
0
    {
888
0
        return "pymalloc";
889
0
    }
890
0
#endif
891
0
#ifdef WITH_MIMALLOC
892
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
893
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
894
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
895
0
    {
896
0
        return "mimalloc";
897
0
    }
898
0
#endif
899
900
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
901
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
902
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
903
904
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
905
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
906
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
907
0
    {
908
        /* Debug hooks installed */
909
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
910
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
911
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
912
0
        {
913
0
            return "malloc_debug";
914
0
        }
915
0
#ifdef WITH_PYMALLOC
916
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
917
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
918
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
919
0
        {
920
0
            return "pymalloc_debug";
921
0
        }
922
0
#endif
923
0
#ifdef WITH_MIMALLOC
924
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
925
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
926
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
927
0
        {
928
0
            return "mimalloc_debug";
929
0
        }
930
0
#endif
931
0
    }
932
0
    return NULL;
933
0
}
934
935
const char*
936
_PyMem_GetCurrentAllocatorName(void)
937
0
{
938
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
939
0
    const char *name = get_current_allocator_name_unlocked();
940
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
941
0
    return name;
942
0
}
943
944
945
int
946
_PyMem_DebugEnabled(void)
947
0
{
948
0
    return _PyRuntime.allocators.is_debug_enabled;
949
0
}
950
951
#ifdef WITH_PYMALLOC
952
static int
953
_PyMem_PymallocEnabled(void)
954
0
{
955
0
    if (_PyMem_DebugEnabled()) {
956
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
957
0
    }
958
0
    else {
959
0
        return (_PyObject.malloc == _PyObject_Malloc);
960
0
    }
961
0
}
962
963
#ifdef WITH_MIMALLOC
964
static int
965
_PyMem_MimallocEnabled(void)
966
0
{
967
#ifdef Py_GIL_DISABLED
968
    return 1;
969
#else
970
0
    if (_PyMem_DebugEnabled()) {
971
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
972
0
    }
973
0
    else {
974
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
975
0
    }
976
0
#endif
977
0
}
978
#endif  // WITH_MIMALLOC
979
980
#endif  // WITH_PYMALLOC
981
982
983
static void
984
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
985
0
{
986
0
    PyMemAllocatorEx alloc;
987
988
0
    if (domain == PYMEM_DOMAIN_RAW) {
989
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
990
0
            return;
991
0
        }
992
993
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
994
0
        alloc.ctx = &_PyMem_Debug.raw;
995
0
        alloc.malloc = _PyMem_DebugRawMalloc;
996
0
        alloc.calloc = _PyMem_DebugRawCalloc;
997
0
        alloc.realloc = _PyMem_DebugRawRealloc;
998
0
        alloc.free = _PyMem_DebugRawFree;
999
0
        set_allocator_unlocked(domain, &alloc);
1000
0
    }
1001
0
    else if (domain == PYMEM_DOMAIN_MEM) {
1002
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
1003
0
            return;
1004
0
        }
1005
1006
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
1007
0
        alloc.ctx = &_PyMem_Debug.mem;
1008
0
        alloc.malloc = _PyMem_DebugMalloc;
1009
0
        alloc.calloc = _PyMem_DebugCalloc;
1010
0
        alloc.realloc = _PyMem_DebugRealloc;
1011
0
        alloc.free = _PyMem_DebugFree;
1012
0
        set_allocator_unlocked(domain, &alloc);
1013
0
    }
1014
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
1015
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
1016
0
            return;
1017
0
        }
1018
1019
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1020
0
        alloc.ctx = &_PyMem_Debug.obj;
1021
0
        alloc.malloc = _PyMem_DebugMalloc;
1022
0
        alloc.calloc = _PyMem_DebugCalloc;
1023
0
        alloc.realloc = _PyMem_DebugRealloc;
1024
0
        alloc.free = _PyMem_DebugFree;
1025
0
        set_allocator_unlocked(domain, &alloc);
1026
0
    }
1027
0
}
1028
1029
1030
static void
1031
set_up_debug_hooks_unlocked(void)
1032
0
{
1033
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1034
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1035
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1036
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1037
0
}
1038
1039
void
1040
PyMem_SetupDebugHooks(void)
1041
0
{
1042
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1043
0
    set_up_debug_hooks_unlocked();
1044
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1045
0
}
1046
1047
static void
1048
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1049
36
{
1050
36
    switch(domain)
1051
36
    {
1052
36
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1053
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1054
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1055
0
    default:
1056
        /* unknown domain: set all attributes to NULL */
1057
0
        allocator->ctx = NULL;
1058
0
        allocator->malloc = NULL;
1059
0
        allocator->calloc = NULL;
1060
0
        allocator->realloc = NULL;
1061
0
        allocator->free = NULL;
1062
36
    }
1063
36
}
1064
1065
static void
1066
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1067
0
{
1068
0
    switch(domain)
1069
0
    {
1070
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1071
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1072
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1073
    /* ignore unknown domain */
1074
0
    }
1075
0
}
1076
1077
void
1078
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1079
36
{
1080
36
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1081
36
    get_allocator_unlocked(domain, allocator);
1082
36
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1083
36
}
1084
1085
void
1086
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1087
0
{
1088
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1089
0
    set_allocator_unlocked(domain, allocator);
1090
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1091
0
}
1092
1093
void
1094
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1095
0
{
1096
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1097
0
    *allocator = _PyObject_Arena;
1098
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1099
0
}
1100
1101
void
1102
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1103
0
{
1104
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1105
0
    _PyObject_Arena = *allocator;
1106
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1107
0
}
1108
1109
1110
/* Note that there is a possible, but very unlikely, race in any place
1111
 * below where we call one of the allocator functions.  We access two
1112
 * fields in each case:  "malloc", etc. and "ctx".
1113
 *
1114
 * It is unlikely that the allocator will be changed while one of those
1115
 * calls is happening, much less in that very narrow window.
1116
 * Furthermore, the likelihood of a race is drastically reduced by the
1117
 * fact that the allocator may not be changed after runtime init
1118
 * (except with a wrapper).
1119
 *
1120
 * With the above in mind, we currently don't worry about locking
1121
 * around these uses of the runtime-global allocators state. */
1122
1123
1124
/*************************/
1125
/* the "arena" allocator */
1126
/*************************/
1127
1128
void *
1129
_PyObject_VirtualAlloc(size_t size)
1130
43.0k
{
1131
43.0k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
1132
43.0k
}
1133
1134
void
1135
_PyObject_VirtualFree(void *obj, size_t size)
1136
43.0k
{
1137
43.0k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
1138
43.0k
}
1139
1140
1141
/***********************/
1142
/* the "raw" allocator */
1143
/***********************/
1144
1145
void *
1146
PyMem_RawMalloc(size_t size)
1147
233M
{
1148
    /*
1149
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1150
     * Most python internals blindly use a signed Py_ssize_t to track
1151
     * things without checking for overflows or negatives.
1152
     * As size_t is unsigned, checking for size < 0 is not required.
1153
     */
1154
233M
    if (size > (size_t)PY_SSIZE_T_MAX)
1155
0
        return NULL;
1156
233M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1157
233M
}
1158
1159
void *
1160
PyMem_RawCalloc(size_t nelem, size_t elsize)
1161
141k
{
1162
    /* see PyMem_RawMalloc() */
1163
141k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1164
0
        return NULL;
1165
141k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1166
141k
}
1167
1168
void*
1169
PyMem_RawRealloc(void *ptr, size_t new_size)
1170
6.57M
{
1171
    /* see PyMem_RawMalloc() */
1172
6.57M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1173
0
        return NULL;
1174
6.57M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1175
6.57M
}
1176
1177
void PyMem_RawFree(void *ptr)
1178
233M
{
1179
233M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1180
233M
}
1181
1182
1183
/***********************/
1184
/* the "mem" allocator */
1185
/***********************/
1186
1187
void *
1188
PyMem_Malloc(size_t size)
1189
235M
{
1190
    /* see PyMem_RawMalloc() */
1191
235M
    if (size > (size_t)PY_SSIZE_T_MAX)
1192
0
        return NULL;
1193
235M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1194
235M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1195
235M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1196
235M
    OBJECT_STAT_INC(allocations);
1197
235M
    return _PyMem.malloc(_PyMem.ctx, size);
1198
235M
}
1199
1200
void *
1201
PyMem_Calloc(size_t nelem, size_t elsize)
1202
41.1M
{
1203
    /* see PyMem_RawMalloc() */
1204
41.1M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1205
0
        return NULL;
1206
41.1M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1207
41.1M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1208
41.1M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1209
41.1M
    OBJECT_STAT_INC(allocations);
1210
41.1M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1211
41.1M
}
1212
1213
void *
1214
PyMem_Realloc(void *ptr, size_t new_size)
1215
248M
{
1216
    /* see PyMem_RawMalloc() */
1217
248M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1218
0
        return NULL;
1219
248M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1220
248M
}
1221
1222
void
1223
PyMem_Free(void *ptr)
1224
498M
{
1225
498M
    OBJECT_STAT_INC(frees);
1226
498M
    _PyMem.free(_PyMem.ctx, ptr);
1227
498M
}
1228
1229
1230
/***************************/
1231
/* pymem utility functions */
1232
/***************************/
1233
1234
wchar_t*
1235
_PyMem_RawWcsdup(const wchar_t *str)
1236
1.44k
{
1237
1.44k
    assert(str != NULL);
1238
1239
1.44k
    size_t len = wcslen(str);
1240
1.44k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1241
0
        return NULL;
1242
0
    }
1243
1244
1.44k
    size_t size = (len + 1) * sizeof(wchar_t);
1245
1.44k
    wchar_t *str2 = PyMem_RawMalloc(size);
1246
1.44k
    if (str2 == NULL) {
1247
0
        return NULL;
1248
0
    }
1249
1250
1.44k
    memcpy(str2, str, size);
1251
1.44k
    return str2;
1252
1.44k
}
1253
1254
char *
1255
_PyMem_RawStrdup(const char *str)
1256
108
{
1257
108
    assert(str != NULL);
1258
108
    size_t size = strlen(str) + 1;
1259
108
    char *copy = PyMem_RawMalloc(size);
1260
108
    if (copy == NULL) {
1261
0
        return NULL;
1262
0
    }
1263
108
    memcpy(copy, str, size);
1264
108
    return copy;
1265
108
}
1266
1267
char *
1268
_PyMem_Strdup(const char *str)
1269
0
{
1270
0
    assert(str != NULL);
1271
0
    size_t size = strlen(str) + 1;
1272
0
    char *copy = PyMem_Malloc(size);
1273
0
    if (copy == NULL) {
1274
0
        return NULL;
1275
0
    }
1276
0
    memcpy(copy, str, size);
1277
0
    return copy;
1278
0
}
1279
1280
/***********************************************/
1281
/* Delayed freeing support for Py_GIL_DISABLED */
1282
/***********************************************/
1283
1284
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1285
#define WORK_ITEMS_PER_CHUNK 254
1286
1287
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1288
struct _mem_work_item {
1289
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1290
    uint64_t qsbr_goal;
1291
};
1292
1293
// A fixed-size buffer of pointers to be freed
1294
struct _mem_work_chunk {
1295
    // Linked list node of chunks in queue
1296
    struct llist_node node;
1297
1298
    Py_ssize_t rd_idx;  // index of next item to read
1299
    Py_ssize_t wr_idx;  // index of next item to write
1300
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1301
};
1302
1303
static int
1304
work_item_should_decref(uintptr_t ptr)
1305
0
{
1306
0
    return ptr & 0x01;
1307
0
}
1308
1309
static void
1310
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1311
0
{
1312
0
    if (work_item_should_decref(ptr)) {
1313
0
        PyObject *obj = (PyObject *)(ptr - 1);
1314
#ifdef Py_GIL_DISABLED
1315
        if (cb == NULL) {
1316
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1317
            Py_DECREF(obj);
1318
            return;
1319
        }
1320
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1321
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1322
        if (refcount == 0) {
1323
            cb(obj, state);
1324
        }
1325
#else
1326
0
        Py_DECREF(obj);
1327
0
#endif
1328
0
    }
1329
0
    else {
1330
0
        PyMem_Free((void *)ptr);
1331
0
    }
1332
0
}
1333
1334
1335
#ifdef Py_GIL_DISABLED
1336
1337
// For deferred advance on free: the number of deferred items before advancing
1338
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1339
// want to process a chunk before it overflows.
1340
#define QSBR_DEFERRED_LIMIT 127
1341
1342
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1343
// helps limit memory usage due to QSBR delaying frees too long.
1344
#define QSBR_FREE_MEM_LIMIT 1024*1024
1345
1346
// Return true if the global write sequence should be advanced for a deferred
1347
// memory free.
1348
static bool
1349
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1350
{
1351
    if (size > QSBR_FREE_MEM_LIMIT) {
1352
        qsbr->deferred_count = 0;
1353
        qsbr->deferred_memory = 0;
1354
        qsbr->should_process = true;
1355
        return true;
1356
    }
1357
    qsbr->deferred_count++;
1358
    qsbr->deferred_memory += size;
1359
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1360
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1361
        qsbr->deferred_count = 0;
1362
        qsbr->deferred_memory = 0;
1363
        qsbr->should_process = true;
1364
        return true;
1365
    }
1366
    return false;
1367
}
1368
#endif
1369
1370
static void
1371
free_delayed(uintptr_t ptr, size_t size)
1372
0
{
1373
0
#ifndef Py_GIL_DISABLED
1374
0
    free_work_item(ptr, NULL, NULL);
1375
#else
1376
    PyInterpreterState *interp = _PyInterpreterState_GET();
1377
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1378
        interp->stoptheworld.world_stopped)
1379
    {
1380
        // Free immediately during interpreter shutdown or if the world is
1381
        // stopped.
1382
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1383
        free_work_item(ptr, NULL, NULL);
1384
        return;
1385
    }
1386
1387
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1388
    struct llist_node *head = &tstate->mem_free_queue;
1389
1390
    struct _mem_work_chunk *buf = NULL;
1391
    if (!llist_empty(head)) {
1392
        // Try to re-use the last buffer
1393
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1394
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1395
            // already full
1396
            buf = NULL;
1397
        }
1398
    }
1399
1400
    if (buf == NULL) {
1401
        buf = PyMem_Calloc(1, sizeof(*buf));
1402
        if (buf != NULL) {
1403
            llist_insert_tail(head, &buf->node);
1404
        }
1405
    }
1406
1407
    if (buf == NULL) {
1408
        // failed to allocate a buffer, free immediately
1409
        PyObject *to_dealloc = NULL;
1410
        _PyEval_StopTheWorld(tstate->base.interp);
1411
        if (work_item_should_decref(ptr)) {
1412
            PyObject *obj = (PyObject *)(ptr - 1);
1413
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1414
            if (refcount == 0) {
1415
                to_dealloc = obj;
1416
            }
1417
        }
1418
        else {
1419
            PyMem_Free((void *)ptr);
1420
        }
1421
        _PyEval_StartTheWorld(tstate->base.interp);
1422
        if (to_dealloc != NULL) {
1423
            _Py_Dealloc(to_dealloc);
1424
        }
1425
        return;
1426
    }
1427
1428
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1429
    uint64_t seq;
1430
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1431
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1432
    }
1433
    else {
1434
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1435
    }
1436
    buf->array[buf->wr_idx].ptr = ptr;
1437
    buf->array[buf->wr_idx].qsbr_goal = seq;
1438
    buf->wr_idx++;
1439
1440
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1441
        // Normally the processing of delayed items is done from the eval
1442
        // breaker.  Processing here is a safety measure to ensure too much
1443
        // work does not accumulate.
1444
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1445
    }
1446
#endif
1447
0
}
1448
1449
void
1450
_PyMem_FreeDelayed(void *ptr, size_t size)
1451
0
{
1452
0
    assert(!((uintptr_t)ptr & 0x01));
1453
0
    if (ptr != NULL) {
1454
0
        free_delayed((uintptr_t)ptr, size);
1455
0
    }
1456
0
}
1457
1458
#ifdef Py_GIL_DISABLED
1459
void
1460
_PyObject_XDecRefDelayed(PyObject *ptr)
1461
{
1462
    assert(!((uintptr_t)ptr & 0x01));
1463
    if (ptr != NULL) {
1464
        // We use 0 as the size since we don't have an easy way to know the
1465
        // actual size.  If we are freeing many objects, the write sequence
1466
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1467
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1468
    }
1469
}
1470
#endif
1471
1472
#ifdef Py_GIL_DISABLED
1473
void
1474
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1475
{
1476
    PyObject *old = *ptr;
1477
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1478
    if (old == NULL) {
1479
        return;
1480
    }
1481
    if (!_Py_IsImmortal(old)) {
1482
         _PyObject_XDecRefDelayed(old);
1483
    }
1484
}
1485
#endif
1486
1487
static struct _mem_work_chunk *
1488
work_queue_first(struct llist_node *head)
1489
0
{
1490
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1491
0
}
1492
1493
static void
1494
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1495
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1496
0
{
1497
0
    while (!llist_empty(head)) {
1498
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1499
1500
0
        if (buf->rd_idx < buf->wr_idx) {
1501
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1502
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1503
0
                return;
1504
0
            }
1505
1506
0
            buf->rd_idx++;
1507
            // NB: free_work_item may re-enter or execute arbitrary code
1508
0
            free_work_item(item->ptr, cb, state);
1509
0
            continue;
1510
0
        }
1511
1512
0
        assert(buf->rd_idx == buf->wr_idx);
1513
0
        if (keep_empty && buf->node.next == head) {
1514
            // Keep the last buffer in the queue to reduce re-allocations
1515
0
            buf->rd_idx = buf->wr_idx = 0;
1516
0
            return;
1517
0
        }
1518
1519
0
        llist_remove(&buf->node);
1520
0
        PyMem_Free(buf);
1521
0
    }
1522
0
}
1523
1524
static void
1525
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1526
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1527
                     void *state)
1528
0
{
1529
0
    assert(PyMutex_IsLocked(&queue->mutex));
1530
0
    process_queue(&queue->head, tstate, false, cb, state);
1531
1532
0
    int more_work = !llist_empty(&queue->head);
1533
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1534
0
}
1535
1536
static void
1537
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1538
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1539
                           void *state)
1540
0
{
1541
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1542
0
        return;
1543
0
    }
1544
1545
    // Try to acquire the lock, but don't block if it's already held.
1546
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1547
0
        process_interp_queue(queue, tstate, cb, state);
1548
0
        PyMutex_Unlock(&queue->mutex);
1549
0
    }
1550
0
}
1551
1552
void
1553
_PyMem_ProcessDelayed(PyThreadState *tstate)
1554
0
{
1555
0
    PyInterpreterState *interp = tstate->interp;
1556
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1557
1558
0
    tstate_impl->qsbr->should_process = false;
1559
1560
    // Process thread-local work
1561
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1562
1563
    // Process shared interpreter work
1564
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1565
0
}
1566
1567
void
1568
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1569
0
{
1570
0
    PyInterpreterState *interp = tstate->interp;
1571
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1572
1573
    // Process thread-local work
1574
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1575
1576
    // Process shared interpreter work
1577
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1578
0
}
1579
1580
void
1581
_PyMem_AbandonDelayed(PyThreadState *tstate)
1582
0
{
1583
0
    PyInterpreterState *interp = tstate->interp;
1584
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1585
1586
0
    if (llist_empty(queue)) {
1587
0
        return;
1588
0
    }
1589
1590
    // Check if the queue contains one empty buffer
1591
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1592
0
    if (buf->rd_idx == buf->wr_idx) {
1593
0
        llist_remove(&buf->node);
1594
0
        PyMem_Free(buf);
1595
0
        assert(llist_empty(queue));
1596
0
        return;
1597
0
    }
1598
1599
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1600
1601
    // Merge the thread's work queue into the interpreter's work queue.
1602
0
    llist_concat(&interp->mem_free_queue.head, queue);
1603
1604
    // Process the merged queue now (see gh-130794).
1605
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1606
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1607
1608
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1609
1610
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1611
0
}
1612
1613
void
1614
_PyMem_FiniDelayed(PyInterpreterState *interp)
1615
0
{
1616
0
    struct llist_node *head = &interp->mem_free_queue.head;
1617
0
    while (!llist_empty(head)) {
1618
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1619
1620
0
        if (buf->rd_idx < buf->wr_idx) {
1621
            // Free the remaining items immediately. There should be no other
1622
            // threads accessing the memory at this point during shutdown.
1623
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1624
0
            buf->rd_idx++;
1625
            // NB: free_work_item may re-enter or execute arbitrary code
1626
0
            free_work_item(item->ptr, NULL, NULL);
1627
0
            continue;
1628
0
        }
1629
1630
0
        llist_remove(&buf->node);
1631
0
        PyMem_Free(buf);
1632
0
    }
1633
0
}
1634
1635
/**************************/
1636
/* the "object" allocator */
1637
/**************************/
1638
1639
void *
1640
PyObject_Malloc(size_t size)
1641
1.23G
{
1642
    /* see PyMem_RawMalloc() */
1643
1.23G
    if (size > (size_t)PY_SSIZE_T_MAX)
1644
0
        return NULL;
1645
1.23G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1646
1.23G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1647
1.23G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1648
1.23G
    OBJECT_STAT_INC(allocations);
1649
1.23G
    return _PyObject.malloc(_PyObject.ctx, size);
1650
1.23G
}
1651
1652
void *
1653
PyObject_Calloc(size_t nelem, size_t elsize)
1654
0
{
1655
    /* see PyMem_RawMalloc() */
1656
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1657
0
        return NULL;
1658
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1659
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1660
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1661
0
    OBJECT_STAT_INC(allocations);
1662
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1663
0
}
1664
1665
void *
1666
PyObject_Realloc(void *ptr, size_t new_size)
1667
53.1M
{
1668
    /* see PyMem_RawMalloc() */
1669
53.1M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1670
0
        return NULL;
1671
53.1M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1672
53.1M
}
1673
1674
void
1675
PyObject_Free(void *ptr)
1676
1.23G
{
1677
1.23G
    OBJECT_STAT_INC(frees);
1678
1.23G
    _PyObject.free(_PyObject.ctx, ptr);
1679
1.23G
}
1680
1681
1682
/* Use __builtin_expect() where available to reduce overhead of
1683
   the valgrind checks */
1684
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1685
10.5G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1686
4.73G
#  define LIKELY(value) __builtin_expect((value), 1)
1687
#else
1688
#  define UNLIKELY(value) (value)
1689
#  define LIKELY(value) (value)
1690
#endif
1691
1692
#ifdef WITH_PYMALLOC
1693
1694
#ifdef WITH_VALGRIND
1695
#include <valgrind/valgrind.h>
1696
1697
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1698
static int running_on_valgrind = -1;
1699
#endif
1700
1701
typedef struct _obmalloc_state OMState;
1702
1703
/* obmalloc state for main interpreter and shared by all interpreters without
1704
 * their own obmalloc state.  By not explicitly initializing this structure, it
1705
 * will be allocated in the BSS which is a small performance win.  The radix
1706
 * tree arrays are fairly large but are sparsely used.  */
1707
static struct _obmalloc_state obmalloc_state_main;
1708
static bool obmalloc_state_initialized;
1709
1710
static inline int
1711
has_own_state(PyInterpreterState *interp)
1712
0
{
1713
0
    return (_Py_IsMainInterpreter(interp) ||
1714
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1715
0
            _Py_IsMainInterpreterFinalizing(interp));
1716
0
}
1717
1718
static inline OMState *
1719
get_state(void)
1720
3.65G
{
1721
3.65G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1722
3.65G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1723
3.65G
    return interp->obmalloc;
1724
3.65G
}
1725
1726
// These macros all rely on a local "state" variable.
1727
1.71G
#define usedpools (state->pools.used)
1728
2.33M
#define allarenas (state->mgmt.arenas)
1729
360
#define maxarenas (state->mgmt.maxarenas)
1730
23.5k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1731
20.0M
#define usable_arenas (state->mgmt.usable_arenas)
1732
13.8M
#define nfp2lasta (state->mgmt.nfp2lasta)
1733
15.1k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1734
4.91k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1735
5.86k
#define narenas_highwater (state->mgmt.narenas_highwater)
1736
466M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1737
1738
#ifdef WITH_MIMALLOC
1739
static bool count_blocks(
1740
    const mi_heap_t* heap, const mi_heap_area_t* area,
1741
    void* block, size_t block_size, void* allocated_blocks)
1742
0
{
1743
0
    *(size_t *)allocated_blocks += area->used;
1744
0
    return 1;
1745
0
}
1746
1747
static Py_ssize_t
1748
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1749
0
{
1750
0
    size_t allocated_blocks = 0;
1751
#ifdef Py_GIL_DISABLED
1752
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1753
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1754
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1755
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1756
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1757
        }
1758
    }
1759
1760
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1761
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1762
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1763
                                        &allocated_blocks);
1764
    }
1765
#else
1766
    // TODO(sgross): this only counts the current thread's blocks.
1767
0
    mi_heap_t *heap = mi_heap_get_default();
1768
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1769
0
#endif
1770
0
    return allocated_blocks;
1771
0
}
1772
#endif
1773
1774
Py_ssize_t
1775
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1776
0
{
1777
0
#ifdef WITH_MIMALLOC
1778
0
    if (_PyMem_MimallocEnabled()) {
1779
0
        return get_mimalloc_allocated_blocks(interp);
1780
0
    }
1781
0
#endif
1782
1783
#ifdef Py_DEBUG
1784
    assert(has_own_state(interp));
1785
#else
1786
0
    if (!has_own_state(interp)) {
1787
0
        _Py_FatalErrorFunc(__func__,
1788
0
                           "the interpreter doesn't have its own allocator");
1789
0
    }
1790
0
#endif
1791
0
    OMState *state = interp->obmalloc;
1792
1793
0
    if (state == NULL) {
1794
0
        return 0;
1795
0
    }
1796
1797
0
    Py_ssize_t n = raw_allocated_blocks;
1798
    /* add up allocated blocks for used pools */
1799
0
    for (uint i = 0; i < maxarenas; ++i) {
1800
        /* Skip arenas which are not allocated. */
1801
0
        if (allarenas[i].address == 0) {
1802
0
            continue;
1803
0
        }
1804
1805
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1806
1807
        /* visit every pool in the arena */
1808
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1809
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1810
0
            poolp p = (poolp)base;
1811
0
            n += p->ref.count;
1812
0
        }
1813
0
    }
1814
0
    return n;
1815
0
}
1816
1817
static void free_obmalloc_arenas(PyInterpreterState *interp);
1818
1819
void
1820
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1821
0
{
1822
0
#ifdef WITH_MIMALLOC
1823
0
    if (_PyMem_MimallocEnabled()) {
1824
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1825
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1826
0
        return;
1827
0
    }
1828
0
#endif
1829
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1830
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1831
0
        assert(has_own_state(interp) || leaked == 0);
1832
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1833
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1834
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1835
            // then some of the memory allocated by obmalloc has not been
1836
            // freed.  It might be safe to free the arenas in that case but
1837
            // it's possible that extension modules are still using that
1838
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1839
            // should be warning when this happens.  It should be possible to
1840
            // use a tool like "-fsanitize=address" to track down these leaks.
1841
0
            free_obmalloc_arenas(interp);
1842
0
        }
1843
0
    }
1844
0
}
1845
1846
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1847
1848
/* We preserve the number of blocks leaked during runtime finalization,
1849
   so they can be reported if the runtime is initialized again. */
1850
// XXX We don't lose any information by dropping this,
1851
// so we should consider doing so.
1852
static Py_ssize_t last_final_leaks = 0;
1853
1854
void
1855
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1856
0
{
1857
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1858
0
    runtime->obmalloc.interpreter_leaks = 0;
1859
0
}
1860
1861
static Py_ssize_t
1862
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1863
0
{
1864
0
    Py_ssize_t total = 0;
1865
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1866
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1867
0
        if (interp == NULL) {
1868
            /* We are at the very end of runtime finalization.
1869
               We can't rely on finalizing->interp since that thread
1870
               state is probably already freed, so we don't worry
1871
               about it. */
1872
0
            assert(PyInterpreterState_Head() == NULL);
1873
0
        }
1874
0
        else {
1875
0
            assert(interp != NULL);
1876
            /* It is probably the last interpreter but not necessarily. */
1877
0
            assert(PyInterpreterState_Next(interp) == NULL);
1878
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1879
0
        }
1880
0
    }
1881
0
    else {
1882
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1883
0
        HEAD_LOCK(runtime);
1884
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1885
0
        assert(interp != NULL);
1886
#ifdef Py_DEBUG
1887
        int got_main = 0;
1888
#endif
1889
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1890
#ifdef Py_DEBUG
1891
            if (_Py_IsMainInterpreter(interp)) {
1892
                assert(!got_main);
1893
                got_main = 1;
1894
                assert(has_own_state(interp));
1895
            }
1896
#endif
1897
0
            if (has_own_state(interp)) {
1898
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1899
0
            }
1900
0
        }
1901
0
        HEAD_UNLOCK(runtime);
1902
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1903
#ifdef Py_DEBUG
1904
        assert(got_main);
1905
#endif
1906
0
    }
1907
0
    total += runtime->obmalloc.interpreter_leaks;
1908
0
    total += last_final_leaks;
1909
0
    return total;
1910
0
}
1911
1912
Py_ssize_t
1913
_Py_GetGlobalAllocatedBlocks(void)
1914
0
{
1915
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1916
0
}
1917
1918
#if WITH_PYMALLOC_RADIX_TREE
1919
/*==========================================================================*/
1920
/* radix tree for tracking arena usage. */
1921
1922
5.35G
#define arena_map_root (state->usage.arena_map_root)
1923
#ifdef USE_INTERIOR_NODES
1924
36
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1925
36
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1926
#endif
1927
1928
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1929
 * it cannot be created */
1930
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1931
arena_map_get(OMState *state, pymem_block *p, int create)
1932
1.86G
{
1933
1.86G
#ifdef USE_INTERIOR_NODES
1934
    /* sanity check that IGNORE_BITS is correct */
1935
1.86G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1936
1.86G
    int i1 = MAP_TOP_INDEX(p);
1937
1.86G
    if (arena_map_root.ptrs[i1] == NULL) {
1938
36
        if (!create) {
1939
0
            return NULL;
1940
0
        }
1941
36
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1942
36
        if (n == NULL) {
1943
0
            return NULL;
1944
0
        }
1945
36
        arena_map_root.ptrs[i1] = n;
1946
36
        arena_map_mid_count++;
1947
36
    }
1948
1.86G
    int i2 = MAP_MID_INDEX(p);
1949
1.86G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1950
239M
        if (!create) {
1951
239M
            return NULL;
1952
239M
        }
1953
36
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1954
36
        if (n == NULL) {
1955
0
            return NULL;
1956
0
        }
1957
36
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
1958
36
        arena_map_bot_count++;
1959
36
    }
1960
1.62G
    return arena_map_root.ptrs[i1]->ptrs[i2];
1961
#else
1962
    return &arena_map_root;
1963
#endif
1964
1.86G
}
1965
1966
1967
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1968
 * away 24 bits of the address.  That reduces the space requirement of
1969
 * the tree compared to similar radix tree page-map schemes.  In
1970
 * exchange for slashing the space requirement, it needs more
1971
 * computation to check an address.
1972
 *
1973
 * Tracking coverage is done by "ideal" arena address.  It is easier to
1974
 * explain in decimal so let's say that the arena size is 100 bytes.
1975
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1976
 * pointer address is inside an actual arena, we have to check two ideal
1977
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1978
 * 300.  In the rare case that an arena is aligned in the ideal way
1979
 * (e.g. base address of arena is 200) then we only have to check one
1980
 * ideal address.
1981
 *
1982
 * The tree nodes for 200 and 300 both store the address of arena.
1983
 * There are two cases: the arena starts at a lower ideal arena and
1984
 * extends to this one, or the arena starts in this arena and extends to
1985
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
1986
 * these two cases.
1987
 */
1988
1989
1990
/* mark or unmark addresses covered by arena */
1991
static int
1992
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1993
9.30k
{
1994
    /* sanity check that IGNORE_BITS is correct */
1995
9.30k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1996
9.30k
    arena_map_bot_t *n_hi = arena_map_get(
1997
9.30k
            state, (pymem_block *)arena_base, is_used);
1998
9.30k
    if (n_hi == NULL) {
1999
0
        assert(is_used); /* otherwise node should already exist */
2000
0
        return 0; /* failed to allocate space for node */
2001
0
    }
2002
9.30k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
2003
9.30k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
2004
9.30k
    if (tail == 0) {
2005
        /* is ideal arena address */
2006
80
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2007
80
    }
2008
9.22k
    else {
2009
        /* arena_base address is not ideal (aligned to arena size) and
2010
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
2011
         * for the next arena.  Note that it might be in different MAP_TOP
2012
         * and MAP_MID nodes as well so we need to call arena_map_get()
2013
         * again (do the full tree traversal).
2014
         */
2015
9.22k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2016
9.22k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2017
        /* If arena_base is a legit arena address, so is arena_base_next - 1
2018
         * (last address in arena).  If arena_base_next overflows then it
2019
         * must overflow to 0.  However, that would mean arena_base was
2020
         * "ideal" and we should not be in this case. */
2021
9.22k
        assert(arena_base < arena_base_next);
2022
9.22k
        arena_map_bot_t *n_lo = arena_map_get(
2023
9.22k
                state, (pymem_block *)arena_base_next, is_used);
2024
9.22k
        if (n_lo == NULL) {
2025
0
            assert(is_used); /* otherwise should already exist */
2026
0
            n_hi->arenas[i3].tail_hi = 0;
2027
0
            return 0; /* failed to allocate space for node */
2028
0
        }
2029
9.22k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2030
9.22k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2031
9.22k
    }
2032
9.30k
    return 1;
2033
9.30k
}
2034
2035
/* Return true if 'p' is a pointer inside an obmalloc arena.
2036
 * _PyObject_Free() calls this so it needs to be very fast. */
2037
static int
2038
arena_map_is_used(OMState *state, pymem_block *p)
2039
1.86G
{
2040
1.86G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2041
1.86G
    if (n == NULL) {
2042
239M
        return 0;
2043
239M
    }
2044
1.62G
    int i3 = MAP_BOT_INDEX(p);
2045
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2046
1.62G
    int32_t hi = n->arenas[i3].tail_hi;
2047
1.62G
    int32_t lo = n->arenas[i3].tail_lo;
2048
1.62G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2049
1.62G
    return (tail < lo) || (tail >= hi && hi != 0);
2050
1.86G
}
2051
2052
/* end of radix tree logic */
2053
/*==========================================================================*/
2054
#endif /* WITH_PYMALLOC_RADIX_TREE */
2055
2056
2057
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2058
 * allocate a new arena, and return the address of an arena_object
2059
 * describing the new arena.  It's expected that the caller will set
2060
 * `usable_arenas` to the return value.
2061
 */
2062
static struct arena_object*
2063
new_arena(OMState *state)
2064
4.91k
{
2065
4.91k
    struct arena_object* arenaobj;
2066
4.91k
    uint excess;        /* number of bytes above pool alignment */
2067
4.91k
    void *address;
2068
2069
4.91k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2070
4.91k
    if (debug_stats == -1) {
2071
36
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2072
36
        debug_stats = (opt != NULL && *opt != '\0');
2073
36
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2074
36
    }
2075
4.91k
    if (debug_stats) {
2076
0
        _PyObject_DebugMallocStats(stderr);
2077
0
    }
2078
2079
4.91k
    if (unused_arena_objects == NULL) {
2080
66
        uint i;
2081
66
        uint numarenas;
2082
66
        size_t nbytes;
2083
2084
        /* Double the number of arena objects on each allocation.
2085
         * Note that it's possible for `numarenas` to overflow.
2086
         */
2087
66
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2088
66
        if (numarenas <= maxarenas)
2089
0
            return NULL;                /* overflow */
2090
#if SIZEOF_SIZE_T <= SIZEOF_INT
2091
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2092
            return NULL;                /* overflow */
2093
#endif
2094
66
        nbytes = numarenas * sizeof(*allarenas);
2095
66
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2096
66
        if (arenaobj == NULL)
2097
0
            return NULL;
2098
66
        allarenas = arenaobj;
2099
2100
        /* We might need to fix pointers that were copied.  However,
2101
         * new_arena only gets called when all the pages in the
2102
         * previous arenas are full.  Thus, there are *no* pointers
2103
         * into the old array. Thus, we don't have to worry about
2104
         * invalid pointers.  Just to be sure, some asserts:
2105
         */
2106
66
        assert(usable_arenas == NULL);
2107
66
        assert(unused_arena_objects == NULL);
2108
2109
        /* Put the new arenas on the unused_arena_objects list. */
2110
1.69k
        for (i = maxarenas; i < numarenas; ++i) {
2111
1.63k
            allarenas[i].address = 0;              /* mark as unassociated */
2112
1.63k
            allarenas[i].nextarena = i < numarenas - 1 ?
2113
1.63k
                                        &allarenas[i+1] : NULL;
2114
1.63k
        }
2115
2116
        /* Update globals. */
2117
66
        unused_arena_objects = &allarenas[maxarenas];
2118
66
        maxarenas = numarenas;
2119
66
    }
2120
2121
    /* Take the next available arena object off the head of the list. */
2122
4.91k
    assert(unused_arena_objects != NULL);
2123
4.91k
    arenaobj = unused_arena_objects;
2124
4.91k
    unused_arena_objects = arenaobj->nextarena;
2125
4.91k
    assert(arenaobj->address == 0);
2126
4.91k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2127
4.91k
#if WITH_PYMALLOC_RADIX_TREE
2128
4.91k
    if (address != NULL) {
2129
4.91k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2130
            /* marking arena in radix tree failed, abort */
2131
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2132
0
            address = NULL;
2133
0
        }
2134
4.91k
    }
2135
4.91k
#endif
2136
4.91k
    if (address == NULL) {
2137
        /* The allocation failed: return NULL after putting the
2138
         * arenaobj back.
2139
         */
2140
0
        arenaobj->nextarena = unused_arena_objects;
2141
0
        unused_arena_objects = arenaobj;
2142
0
        return NULL;
2143
0
    }
2144
4.91k
    arenaobj->address = (uintptr_t)address;
2145
2146
4.91k
    ++narenas_currently_allocated;
2147
4.91k
    ++ntimes_arena_allocated;
2148
4.91k
    if (narenas_currently_allocated > narenas_highwater)
2149
952
        narenas_highwater = narenas_currently_allocated;
2150
4.91k
    arenaobj->freepools = NULL;
2151
    /* pool_address <- first pool-aligned address in the arena
2152
       nfreepools <- number of whole pools that fit after alignment */
2153
4.91k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2154
4.91k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2155
4.91k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2156
4.91k
    if (excess != 0) {
2157
2.93k
        --arenaobj->nfreepools;
2158
2.93k
        arenaobj->pool_address += POOL_SIZE - excess;
2159
2.93k
    }
2160
4.91k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2161
2162
4.91k
    return arenaobj;
2163
4.91k
}
2164
2165
2166
2167
#if WITH_PYMALLOC_RADIX_TREE
2168
/* Return true if and only if P is an address that was allocated by
2169
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2170
 */
2171
static bool
2172
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2173
1.86G
{
2174
1.86G
    return arena_map_is_used(state, p);
2175
1.86G
}
2176
#else
2177
/*
2178
address_in_range(P, POOL)
2179
2180
Return true if and only if P is an address that was allocated by pymalloc.
2181
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2182
(the caller is asked to compute this because the macro expands POOL more than
2183
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2184
variable and pass the latter to the macro; because address_in_range is
2185
called on every alloc/realloc/free, micro-efficiency is important here).
2186
2187
Tricky:  Let B be the arena base address associated with the pool, B =
2188
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2189
2190
    B <= P < B + ARENA_SIZE
2191
2192
Subtracting B throughout, this is true iff
2193
2194
    0 <= P-B < ARENA_SIZE
2195
2196
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2197
2198
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2199
before the first arena has been allocated.  `arenas` is still NULL in that
2200
case.  We're relying on that maxarenas is also 0 in that case, so that
2201
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2202
into a NULL arenas.
2203
2204
Details:  given P and POOL, the arena_object corresponding to P is AO =
2205
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2206
stores, etc), POOL is the correct address of P's pool, AO.address is the
2207
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2208
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2209
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2210
controls P.
2211
2212
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2213
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2214
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2215
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2216
control P.
2217
2218
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2219
allocated arena, obmalloc controls all the memory in slice AO.address :
2220
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2221
so P doesn't lie in that slice, so the macro correctly reports that P is not
2222
controlled by obmalloc.
2223
2224
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2225
arena_object (one not currently associated with an allocated arena),
2226
AO.address is 0, and the second test in the macro reduces to:
2227
2228
    P < ARENA_SIZE
2229
2230
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2231
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2232
of the test still passes, and the third clause (AO.address != 0) is necessary
2233
to get the correct result:  AO.address is 0 in this case, so the macro
2234
correctly reports that P is not controlled by obmalloc (despite that P lies in
2235
slice AO.address : AO.address + ARENA_SIZE).
2236
2237
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2238
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2239
corresponded to a currently-allocated arena, so the "P is not controlled by
2240
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2241
was impossible.
2242
2243
Note that the logic is excruciating, and reading up possibly uninitialized
2244
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2245
creates problems for some memory debuggers.  The overwhelming advantage is
2246
that this test determines whether an arbitrary address is controlled by
2247
obmalloc in a small constant time, independent of the number of arenas
2248
obmalloc controls.  Since this test is needed at every entry point, it's
2249
extremely desirable that it be this fast.
2250
*/
2251
2252
static bool _Py_NO_SANITIZE_ADDRESS
2253
            _Py_NO_SANITIZE_THREAD
2254
            _Py_NO_SANITIZE_MEMORY
2255
address_in_range(OMState *state, void *p, poolp pool)
2256
{
2257
    // Since address_in_range may be reading from memory which was not allocated
2258
    // by Python, it is important that pool->arenaindex is read only once, as
2259
    // another thread may be concurrently modifying the value without holding
2260
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2261
    // only once.
2262
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2263
    return arenaindex < maxarenas &&
2264
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2265
        allarenas[arenaindex].address != 0;
2266
}
2267
2268
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2269
2270
/*==========================================================================*/
2271
2272
// Called when freelist is exhausted.  Extend the freelist if there is
2273
// space for a block.  Otherwise, remove this pool from usedpools.
2274
static void
2275
pymalloc_pool_extend(poolp pool, uint size)
2276
332M
{
2277
332M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2278
        /* There is room for another block. */
2279
173M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2280
173M
        pool->nextoffset += INDEX2SIZE(size);
2281
173M
        *(pymem_block **)(pool->freeblock) = NULL;
2282
173M
        return;
2283
173M
    }
2284
2285
    /* Pool is full, unlink from used pools. */
2286
158M
    poolp next;
2287
158M
    next = pool->nextpool;
2288
158M
    pool = pool->prevpool;
2289
158M
    next->prevpool = pool;
2290
158M
    pool->nextpool = next;
2291
158M
}
2292
2293
/* called when pymalloc_alloc can not allocate a block from usedpool.
2294
 * This function takes new pool and allocate a block from it.
2295
 */
2296
static void*
2297
allocate_from_new_pool(OMState *state, uint size)
2298
2.04M
{
2299
    /* There isn't a pool of the right size class immediately
2300
     * available:  use a free pool.
2301
     */
2302
2.04M
    if (UNLIKELY(usable_arenas == NULL)) {
2303
        /* No arena has a free pool:  allocate a new arena. */
2304
#ifdef WITH_MEMORY_LIMITS
2305
        if (narenas_currently_allocated >= MAX_ARENAS) {
2306
            return NULL;
2307
        }
2308
#endif
2309
4.91k
        usable_arenas = new_arena(state);
2310
4.91k
        if (usable_arenas == NULL) {
2311
0
            return NULL;
2312
0
        }
2313
4.91k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2314
4.91k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2315
4.91k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2316
4.91k
    }
2317
2.04M
    assert(usable_arenas->address != 0);
2318
2319
    /* This arena already had the smallest nfreepools value, so decreasing
2320
     * nfreepools doesn't change that, and we don't need to rearrange the
2321
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2322
     * we need to remove its arena_object from usable_arenas.
2323
     */
2324
2.04M
    assert(usable_arenas->nfreepools > 0);
2325
2.04M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2326
        /* It's the last of this size, so there won't be any. */
2327
2.03M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2328
2.03M
    }
2329
    /* If any free pools will remain, it will be the new smallest. */
2330
2.04M
    if (usable_arenas->nfreepools > 1) {
2331
1.93M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2332
1.93M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2333
1.93M
    }
2334
2335
    /* Try to get a cached free pool. */
2336
2.04M
    poolp pool = usable_arenas->freepools;
2337
2.04M
    if (LIKELY(pool != NULL)) {
2338
        /* Unlink from cached pools. */
2339
1.74M
        usable_arenas->freepools = pool->nextpool;
2340
1.74M
        usable_arenas->nfreepools--;
2341
1.74M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2342
            /* Wholly allocated:  remove. */
2343
108k
            assert(usable_arenas->freepools == NULL);
2344
108k
            assert(usable_arenas->nextarena == NULL ||
2345
108k
                   usable_arenas->nextarena->prevarena ==
2346
108k
                   usable_arenas);
2347
108k
            usable_arenas = usable_arenas->nextarena;
2348
108k
            if (usable_arenas != NULL) {
2349
105k
                usable_arenas->prevarena = NULL;
2350
105k
                assert(usable_arenas->address != 0);
2351
105k
            }
2352
108k
        }
2353
1.63M
        else {
2354
            /* nfreepools > 0:  it must be that freepools
2355
             * isn't NULL, or that we haven't yet carved
2356
             * off all the arena's pools for the first
2357
             * time.
2358
             */
2359
1.63M
            assert(usable_arenas->freepools != NULL ||
2360
1.63M
                   usable_arenas->pool_address <=
2361
1.63M
                   (pymem_block*)usable_arenas->address +
2362
1.63M
                       ARENA_SIZE - POOL_SIZE);
2363
1.63M
        }
2364
1.74M
    }
2365
305k
    else {
2366
        /* Carve off a new pool. */
2367
305k
        assert(usable_arenas->nfreepools > 0);
2368
305k
        assert(usable_arenas->freepools == NULL);
2369
305k
        pool = (poolp)usable_arenas->pool_address;
2370
305k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2371
305k
                                 ARENA_SIZE - POOL_SIZE);
2372
305k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2373
305k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2374
305k
        pool->szidx = DUMMY_SIZE_IDX;
2375
305k
        usable_arenas->pool_address += POOL_SIZE;
2376
305k
        --usable_arenas->nfreepools;
2377
2378
305k
        if (usable_arenas->nfreepools == 0) {
2379
4.72k
            assert(usable_arenas->nextarena == NULL ||
2380
4.72k
                   usable_arenas->nextarena->prevarena ==
2381
4.72k
                   usable_arenas);
2382
            /* Unlink the arena:  it is completely allocated. */
2383
4.72k
            usable_arenas = usable_arenas->nextarena;
2384
4.72k
            if (usable_arenas != NULL) {
2385
312
                usable_arenas->prevarena = NULL;
2386
312
                assert(usable_arenas->address != 0);
2387
312
            }
2388
4.72k
        }
2389
305k
    }
2390
2391
    /* Frontlink to used pools. */
2392
2.04M
    pymem_block *bp;
2393
2.04M
    poolp next = usedpools[size + size]; /* == prev */
2394
2.04M
    pool->nextpool = next;
2395
2.04M
    pool->prevpool = next;
2396
2.04M
    next->nextpool = pool;
2397
2.04M
    next->prevpool = pool;
2398
2.04M
    pool->ref.count = 1;
2399
2.04M
    if (pool->szidx == size) {
2400
        /* Luckily, this pool last contained blocks
2401
         * of the same size class, so its header
2402
         * and free list are already initialized.
2403
         */
2404
1.22M
        bp = pool->freeblock;
2405
1.22M
        assert(bp != NULL);
2406
1.22M
        pool->freeblock = *(pymem_block **)bp;
2407
1.22M
        return bp;
2408
1.22M
    }
2409
    /*
2410
     * Initialize the pool header, set up the free list to
2411
     * contain just the second block, and return the first
2412
     * block.
2413
     */
2414
819k
    pool->szidx = size;
2415
819k
    size = INDEX2SIZE(size);
2416
819k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2417
819k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2418
819k
    pool->maxnextoffset = POOL_SIZE - size;
2419
819k
    pool->freeblock = bp + size;
2420
819k
    *(pymem_block **)(pool->freeblock) = NULL;
2421
819k
    return bp;
2422
2.04M
}
2423
2424
/* pymalloc allocator
2425
2426
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2427
2428
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2429
   requests, on error in the code below (as a last chance to serve the request)
2430
   or when the max memory limit has been reached.
2431
*/
2432
static inline void*
2433
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2434
1.78G
{
2435
#ifdef WITH_VALGRIND
2436
    if (UNLIKELY(running_on_valgrind == -1)) {
2437
        running_on_valgrind = RUNNING_ON_VALGRIND;
2438
    }
2439
    if (UNLIKELY(running_on_valgrind)) {
2440
        return NULL;
2441
    }
2442
#endif
2443
2444
1.78G
    if (UNLIKELY(nbytes == 0)) {
2445
36.5M
        return NULL;
2446
36.5M
    }
2447
1.75G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2448
196M
        return NULL;
2449
196M
    }
2450
2451
1.55G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2452
1.55G
    poolp pool = usedpools[size + size];
2453
1.55G
    pymem_block *bp;
2454
2455
1.55G
    if (LIKELY(pool != pool->nextpool)) {
2456
        /*
2457
         * There is a used pool for this size class.
2458
         * Pick up the head block of its free list.
2459
         */
2460
1.55G
        ++pool->ref.count;
2461
1.55G
        bp = pool->freeblock;
2462
1.55G
        assert(bp != NULL);
2463
2464
1.55G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2465
            // Reached the end of the free list, try to extend it.
2466
332M
            pymalloc_pool_extend(pool, size);
2467
332M
        }
2468
1.55G
    }
2469
2.04M
    else {
2470
        /* There isn't a pool of the right size class immediately
2471
         * available:  use a free pool.
2472
         */
2473
2.04M
        bp = allocate_from_new_pool(state, size);
2474
2.04M
    }
2475
2476
1.55G
    return (void *)bp;
2477
1.75G
}
2478
2479
2480
void *
2481
_PyObject_Malloc(void *ctx, size_t nbytes)
2482
1.74G
{
2483
1.74G
    OMState *state = get_state();
2484
1.74G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2485
1.74G
    if (LIKELY(ptr != NULL)) {
2486
1.51G
        return ptr;
2487
1.51G
    }
2488
2489
233M
    ptr = PyMem_RawMalloc(nbytes);
2490
233M
    if (ptr != NULL) {
2491
233M
        raw_allocated_blocks++;
2492
233M
    }
2493
233M
    return ptr;
2494
1.74G
}
2495
2496
2497
void *
2498
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2499
41.1M
{
2500
41.1M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2501
41.1M
    size_t nbytes = nelem * elsize;
2502
2503
41.1M
    OMState *state = get_state();
2504
41.1M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2505
41.1M
    if (LIKELY(ptr != NULL)) {
2506
41.0M
        memset(ptr, 0, nbytes);
2507
41.0M
        return ptr;
2508
41.0M
    }
2509
2510
140k
    ptr = PyMem_RawCalloc(nelem, elsize);
2511
140k
    if (ptr != NULL) {
2512
140k
        raw_allocated_blocks++;
2513
140k
    }
2514
140k
    return ptr;
2515
41.1M
}
2516
2517
2518
static void
2519
insert_to_usedpool(OMState *state, poolp pool)
2520
158M
{
2521
158M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2522
2523
158M
    uint size = pool->szidx;
2524
158M
    poolp next = usedpools[size + size];
2525
158M
    poolp prev = next->prevpool;
2526
2527
    /* insert pool before next:   prev <-> pool <-> next */
2528
158M
    pool->nextpool = next;
2529
158M
    pool->prevpool = prev;
2530
158M
    next->prevpool = pool;
2531
158M
    prev->nextpool = pool;
2532
158M
}
2533
2534
static void
2535
insert_to_freepool(OMState *state, poolp pool)
2536
2.02M
{
2537
2.02M
    poolp next = pool->nextpool;
2538
2.02M
    poolp prev = pool->prevpool;
2539
2.02M
    next->prevpool = prev;
2540
2.02M
    prev->nextpool = next;
2541
2542
    /* Link the pool to freepools.  This is a singly-linked
2543
     * list, and pool->prevpool isn't used there.
2544
     */
2545
2.02M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2546
2.02M
    pool->nextpool = ao->freepools;
2547
2.02M
    ao->freepools = pool;
2548
2.02M
    uint nf = ao->nfreepools;
2549
    /* If this is the rightmost arena with this number of free pools,
2550
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2551
     * are no arenas in usable_arenas with that value.
2552
     */
2553
2.02M
    struct arena_object* lastnf = nfp2lasta[nf];
2554
2.02M
    assert((nf == 0 && lastnf == NULL) ||
2555
2.02M
           (nf > 0 &&
2556
2.02M
            lastnf != NULL &&
2557
2.02M
            lastnf->nfreepools == nf &&
2558
2.02M
            (lastnf->nextarena == NULL ||
2559
2.02M
             nf < lastnf->nextarena->nfreepools)));
2560
2.02M
    if (lastnf == ao) {  /* it is the rightmost */
2561
1.85M
        struct arena_object* p = ao->prevarena;
2562
1.85M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2563
1.85M
    }
2564
2.02M
    ao->nfreepools = ++nf;
2565
2566
    /* All the rest is arena management.  We just freed
2567
     * a pool, and there are 4 cases for arena mgmt:
2568
     * 1. If all the pools are free, return the arena to
2569
     *    the system free().  Except if this is the last
2570
     *    arena in the list, keep it to avoid thrashing:
2571
     *    keeping one wholly free arena in the list avoids
2572
     *    pathological cases where a simple loop would
2573
     *    otherwise provoke needing to allocate and free an
2574
     *    arena on every iteration.  See bpo-37257.
2575
     * 2. If this is the only free pool in the arena,
2576
     *    add the arena back to the `usable_arenas` list.
2577
     * 3. If the "next" arena has a smaller count of free
2578
     *    pools, we have to "slide this arena right" to
2579
     *    restore that usable_arenas is sorted in order of
2580
     *    nfreepools.
2581
     * 4. Else there's nothing more to do.
2582
     */
2583
2.02M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2584
        /* Case 1.  First unlink ao from usable_arenas.
2585
         */
2586
4.39k
        assert(ao->prevarena == NULL ||
2587
4.39k
               ao->prevarena->address != 0);
2588
4.39k
        assert(ao ->nextarena == NULL ||
2589
4.39k
               ao->nextarena->address != 0);
2590
2591
        /* Fix the pointer in the prevarena, or the
2592
         * usable_arenas pointer.
2593
         */
2594
4.39k
        if (ao->prevarena == NULL) {
2595
564
            usable_arenas = ao->nextarena;
2596
564
            assert(usable_arenas == NULL ||
2597
564
                   usable_arenas->address != 0);
2598
564
        }
2599
3.83k
        else {
2600
3.83k
            assert(ao->prevarena->nextarena == ao);
2601
3.83k
            ao->prevarena->nextarena =
2602
3.83k
                ao->nextarena;
2603
3.83k
        }
2604
        /* Fix the pointer in the nextarena. */
2605
4.39k
        if (ao->nextarena != NULL) {
2606
4.39k
            assert(ao->nextarena->prevarena == ao);
2607
4.39k
            ao->nextarena->prevarena =
2608
4.39k
                ao->prevarena;
2609
4.39k
        }
2610
        /* Record that this arena_object slot is
2611
         * available to be reused.
2612
         */
2613
4.39k
        ao->nextarena = unused_arena_objects;
2614
4.39k
        unused_arena_objects = ao;
2615
2616
4.39k
#if WITH_PYMALLOC_RADIX_TREE
2617
        /* mark arena region as not under control of obmalloc */
2618
4.39k
        arena_map_mark_used(state, ao->address, 0);
2619
4.39k
#endif
2620
2621
        /* Free the entire arena. */
2622
4.39k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2623
4.39k
                             (void *)ao->address, ARENA_SIZE);
2624
4.39k
        ao->address = 0;                        /* mark unassociated */
2625
4.39k
        --narenas_currently_allocated;
2626
2627
4.39k
        return;
2628
4.39k
    }
2629
2630
2.01M
    if (nf == 1) {
2631
        /* Case 2.  Put ao at the head of
2632
         * usable_arenas.  Note that because
2633
         * ao->nfreepools was 0 before, ao isn't
2634
         * currently on the usable_arenas list.
2635
         */
2636
113k
        ao->nextarena = usable_arenas;
2637
113k
        ao->prevarena = NULL;
2638
113k
        if (usable_arenas)
2639
110k
            usable_arenas->prevarena = ao;
2640
113k
        usable_arenas = ao;
2641
113k
        assert(usable_arenas->address != 0);
2642
113k
        if (nfp2lasta[1] == NULL) {
2643
105k
            nfp2lasta[1] = ao;
2644
105k
        }
2645
2646
113k
        return;
2647
113k
    }
2648
2649
    /* If this arena is now out of order, we need to keep
2650
     * the list sorted.  The list is kept sorted so that
2651
     * the "most full" arenas are used first, which allows
2652
     * the nearly empty arenas to be completely freed.  In
2653
     * a few un-scientific tests, it seems like this
2654
     * approach allowed a lot more memory to be freed.
2655
     */
2656
    /* If this is the only arena with nf, record that. */
2657
1.90M
    if (nfp2lasta[nf] == NULL) {
2658
1.83M
        nfp2lasta[nf] = ao;
2659
1.83M
    } /* else the rightmost with nf doesn't change */
2660
    /* If this was the rightmost of the old size, it remains in place. */
2661
1.90M
    if (ao == lastnf) {
2662
        /* Case 4.  Nothing to do. */
2663
1.84M
        return;
2664
1.84M
    }
2665
    /* If ao were the only arena in the list, the last block would have
2666
     * gotten us out.
2667
     */
2668
1.90M
    assert(ao->nextarena != NULL);
2669
2670
    /* Case 3:  We have to move the arena towards the end of the list,
2671
     * because it has more free pools than the arena to its right.  It needs
2672
     * to move to follow lastnf.
2673
     * First unlink ao from usable_arenas.
2674
     */
2675
55.7k
    if (ao->prevarena != NULL) {
2676
        /* ao isn't at the head of the list */
2677
42.9k
        assert(ao->prevarena->nextarena == ao);
2678
42.9k
        ao->prevarena->nextarena = ao->nextarena;
2679
42.9k
    }
2680
12.8k
    else {
2681
        /* ao is at the head of the list */
2682
12.8k
        assert(usable_arenas == ao);
2683
12.8k
        usable_arenas = ao->nextarena;
2684
12.8k
    }
2685
55.7k
    ao->nextarena->prevarena = ao->prevarena;
2686
    /* And insert after lastnf. */
2687
55.7k
    ao->prevarena = lastnf;
2688
55.7k
    ao->nextarena = lastnf->nextarena;
2689
55.7k
    if (ao->nextarena != NULL) {
2690
54.1k
        ao->nextarena->prevarena = ao;
2691
54.1k
    }
2692
55.7k
    lastnf->nextarena = ao;
2693
    /* Verify that the swaps worked. */
2694
55.7k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2695
55.7k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2696
55.7k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2697
55.7k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2698
55.7k
           || ao->prevarena->nextarena == ao);
2699
55.7k
}
2700
2701
/* Free a memory block allocated by pymalloc_alloc().
2702
   Return 1 if it was freed.
2703
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2704
static inline int
2705
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2706
1.78G
{
2707
1.78G
    assert(p != NULL);
2708
2709
#ifdef WITH_VALGRIND
2710
    if (UNLIKELY(running_on_valgrind > 0)) {
2711
        return 0;
2712
    }
2713
#endif
2714
2715
1.78G
    poolp pool = POOL_ADDR(p);
2716
1.78G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2717
233M
        return 0;
2718
233M
    }
2719
    /* We allocated this address. */
2720
2721
    /* Link p to the start of the pool's freeblock list.  Since
2722
     * the pool had at least the p block outstanding, the pool
2723
     * wasn't empty (so it's already in a usedpools[] list, or
2724
     * was full and is in no list -- it's not in the freeblocks
2725
     * list in any case).
2726
     */
2727
1.78G
    assert(pool->ref.count > 0);            /* else it was empty */
2728
1.55G
    pymem_block *lastfree = pool->freeblock;
2729
1.55G
    *(pymem_block **)p = lastfree;
2730
1.55G
    pool->freeblock = (pymem_block *)p;
2731
1.55G
    pool->ref.count--;
2732
2733
1.55G
    if (UNLIKELY(lastfree == NULL)) {
2734
        /* Pool was full, so doesn't currently live in any list:
2735
         * link it to the front of the appropriate usedpools[] list.
2736
         * This mimics LRU pool usage for new allocations and
2737
         * targets optimal filling when several pools contain
2738
         * blocks of the same size class.
2739
         */
2740
158M
        insert_to_usedpool(state, pool);
2741
158M
        return 1;
2742
158M
    }
2743
2744
    /* freeblock wasn't NULL, so the pool wasn't full,
2745
     * and the pool is in a usedpools[] list.
2746
     */
2747
1.39G
    if (LIKELY(pool->ref.count != 0)) {
2748
        /* pool isn't empty:  leave it in usedpools */
2749
1.38G
        return 1;
2750
1.38G
    }
2751
2752
    /* Pool is now empty:  unlink from usedpools, and
2753
     * link to the front of freepools.  This ensures that
2754
     * previously freed pools will be allocated later
2755
     * (being not referenced, they are perhaps paged out).
2756
     */
2757
2.02M
    insert_to_freepool(state, pool);
2758
2.02M
    return 1;
2759
1.39G
}
2760
2761
2762
void
2763
_PyObject_Free(void *ctx, void *p)
2764
1.78G
{
2765
    /* PyObject_Free(NULL) has no effect */
2766
1.78G
    if (p == NULL) {
2767
1.70M
        return;
2768
1.70M
    }
2769
2770
1.78G
    OMState *state = get_state();
2771
1.78G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2772
        /* pymalloc didn't allocate this address */
2773
233M
        PyMem_RawFree(p);
2774
233M
        raw_allocated_blocks--;
2775
233M
    }
2776
1.78G
}
2777
2778
2779
/* pymalloc realloc.
2780
2781
   If nbytes==0, then as the Python docs promise, we do not treat this like
2782
   free(p), and return a non-NULL result.
2783
2784
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2785
   newptr_p.
2786
2787
   Return 0 if pymalloc didn't allocated p. */
2788
static int
2789
pymalloc_realloc(OMState *state, void *ctx,
2790
                 void **newptr_p, void *p, size_t nbytes)
2791
80.4M
{
2792
80.4M
    void *bp;
2793
80.4M
    poolp pool;
2794
80.4M
    size_t size;
2795
2796
80.4M
    assert(p != NULL);
2797
2798
#ifdef WITH_VALGRIND
2799
    /* Treat running_on_valgrind == -1 the same as 0 */
2800
    if (UNLIKELY(running_on_valgrind > 0)) {
2801
        return 0;
2802
    }
2803
#endif
2804
2805
80.4M
    pool = POOL_ADDR(p);
2806
80.4M
    if (!address_in_range(state, p, pool)) {
2807
        /* pymalloc is not managing this block.
2808
2809
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2810
           over this block.  However, if we do, we need to copy the valid data
2811
           from the C-managed block to one of our blocks, and there's no
2812
           portable way to know how much of the memory space starting at p is
2813
           valid.
2814
2815
           As bug 1185883 pointed out the hard way, it's possible that the
2816
           C-managed block is "at the end" of allocated VM space, so that a
2817
           memory fault can occur if we try to copy nbytes bytes starting at p.
2818
           Instead we punt: let C continue to manage this block. */
2819
6.57M
        return 0;
2820
6.57M
    }
2821
2822
    /* pymalloc is in charge of this block */
2823
73.8M
    size = INDEX2SIZE(pool->szidx);
2824
73.8M
    if (nbytes <= size) {
2825
        /* The block is staying the same or shrinking.
2826
2827
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2828
           block to a smaller size class, but it wastes memory not to copy it.
2829
2830
           The compromise here is to copy on shrink only if at least 25% of
2831
           size can be shaved off. */
2832
55.9M
        if (4 * nbytes > 3 * size) {
2833
            /* It's the same, or shrinking and new/old > 3/4. */
2834
21.3M
            *newptr_p = p;
2835
21.3M
            return 1;
2836
21.3M
        }
2837
34.6M
        size = nbytes;
2838
34.6M
    }
2839
2840
52.4M
    bp = _PyObject_Malloc(ctx, nbytes);
2841
52.4M
    if (bp != NULL) {
2842
52.4M
        memcpy(bp, p, size);
2843
52.4M
        _PyObject_Free(ctx, p);
2844
52.4M
    }
2845
52.4M
    *newptr_p = bp;
2846
52.4M
    return 1;
2847
73.8M
}
2848
2849
2850
void *
2851
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2852
301M
{
2853
301M
    void *ptr2;
2854
2855
301M
    if (ptr == NULL) {
2856
221M
        return _PyObject_Malloc(ctx, nbytes);
2857
221M
    }
2858
2859
80.4M
    OMState *state = get_state();
2860
80.4M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2861
73.8M
        return ptr2;
2862
73.8M
    }
2863
2864
6.57M
    return PyMem_RawRealloc(ptr, nbytes);
2865
80.4M
}
2866
2867
#else   /* ! WITH_PYMALLOC */
2868
2869
/*==========================================================================*/
2870
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2871
 * only be used by extensions that are compiled with pymalloc enabled. */
2872
2873
Py_ssize_t
2874
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2875
{
2876
    return 0;
2877
}
2878
2879
Py_ssize_t
2880
_Py_GetGlobalAllocatedBlocks(void)
2881
{
2882
    return 0;
2883
}
2884
2885
void
2886
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2887
{
2888
    return;
2889
}
2890
2891
void
2892
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2893
{
2894
    return;
2895
}
2896
2897
#endif /* WITH_PYMALLOC */
2898
2899
2900
/*==========================================================================*/
2901
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2902
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2903
 */
2904
2905
/* Uncomment this define to add the "serialno" field */
2906
/* #define PYMEM_DEBUG_SERIALNO */
2907
2908
#ifdef PYMEM_DEBUG_SERIALNO
2909
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2910
2911
/* serialno is always incremented via calling this routine.  The point is
2912
 * to supply a single place to set a breakpoint.
2913
 */
2914
static void
2915
bumpserialno(void)
2916
{
2917
    ++serialno;
2918
}
2919
#endif
2920
2921
0
#define SST SIZEOF_SIZE_T
2922
2923
#ifdef PYMEM_DEBUG_SERIALNO
2924
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2925
#else
2926
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2927
#endif
2928
2929
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2930
static size_t
2931
read_size_t(const void *p)
2932
0
{
2933
0
    const uint8_t *q = (const uint8_t *)p;
2934
0
    size_t result = *q++;
2935
0
    int i;
2936
2937
0
    for (i = SST; --i > 0; ++q)
2938
0
        result = (result << 8) | *q;
2939
0
    return result;
2940
0
}
2941
2942
/* Write n as a big-endian size_t, MSB at address p, LSB at
2943
 * p + sizeof(size_t) - 1.
2944
 */
2945
static void
2946
write_size_t(void *p, size_t n)
2947
0
{
2948
0
    uint8_t *q = (uint8_t *)p + SST - 1;
2949
0
    int i;
2950
2951
0
    for (i = SST; --i >= 0; --q) {
2952
0
        *q = (uint8_t)(n & 0xff);
2953
0
        n >>= 8;
2954
0
    }
2955
0
}
2956
2957
static void
2958
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
2959
               bool is_alloc)
2960
0
{
2961
#ifdef Py_GIL_DISABLED
2962
    if (api->api_id == 'o') {
2963
        // Don't overwrite the first few bytes of a PyObject allocation in the
2964
        // free-threaded build
2965
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
2966
        size_t debug_offset;
2967
        if (is_alloc) {
2968
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
2969
        }
2970
        else {
2971
            char *alloc = (char *)data - 2*SST;  // start of the allocation
2972
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
2973
        }
2974
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
2975
        if (debug_offset < nbytes) {
2976
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
2977
        }
2978
        return;
2979
    }
2980
#endif
2981
0
    memset(data, c, nbytes);
2982
0
}
2983
2984
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2985
   fills them with useful stuff, here calling the underlying malloc's result p:
2986
2987
p[0: S]
2988
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
2989
    to read in a memory dump).
2990
p[S]
2991
    API ID.  See PEP 445.  This is a character, but seems undocumented.
2992
p[S+1: 2*S]
2993
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2994
p[2*S: 2*S+n]
2995
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
2996
    Used to catch reference to uninitialized memory.
2997
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2998
    handled the request itself.
2999
p[2*S+n: 2*S+n+S]
3000
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
3001
p[2*S+n+S: 2*S+n+2*S]
3002
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
3003
    and _PyMem_DebugRealloc.
3004
    This is a big-endian size_t.
3005
    If "bad memory" is detected later, the serial number gives an
3006
    excellent way to set a breakpoint on the next run, to capture the
3007
    instant at which this block was passed out.
3008
3009
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
3010
for 3 * S extra bytes, and omits the last serialno field.
3011
*/
3012
3013
static void *
3014
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
3015
0
{
3016
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3017
0
    uint8_t *p;           /* base address of malloc'ed pad block */
3018
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3019
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3020
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3021
3022
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3023
        /* integer overflow: can't represent total as a Py_ssize_t */
3024
0
        return NULL;
3025
0
    }
3026
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3027
3028
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3029
                ^--- p    ^--- data   ^--- tail
3030
       S: nbytes stored as size_t
3031
       I: API identifier (1 byte)
3032
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3033
       C: Clean bytes used later to store actual data
3034
       N: Serial number stored as size_t
3035
3036
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3037
       is omitted. */
3038
3039
0
    if (use_calloc) {
3040
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3041
0
    }
3042
0
    else {
3043
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3044
0
    }
3045
0
    if (p == NULL) {
3046
0
        return NULL;
3047
0
    }
3048
0
    data = p + 2*SST;
3049
3050
#ifdef PYMEM_DEBUG_SERIALNO
3051
    bumpserialno();
3052
#endif
3053
3054
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3055
0
    write_size_t(p, nbytes);
3056
0
    p[SST] = (uint8_t)api->api_id;
3057
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3058
3059
0
    if (nbytes > 0 && !use_calloc) {
3060
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3061
0
    }
3062
3063
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3064
0
    tail = data + nbytes;
3065
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3066
#ifdef PYMEM_DEBUG_SERIALNO
3067
    write_size_t(tail + SST, serialno);
3068
#endif
3069
3070
0
    return data;
3071
0
}
3072
3073
void *
3074
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3075
0
{
3076
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3077
0
}
3078
3079
void *
3080
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3081
0
{
3082
0
    size_t nbytes;
3083
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3084
0
    nbytes = nelem * elsize;
3085
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3086
0
}
3087
3088
3089
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3090
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3091
   Then fills the original bytes with PYMEM_DEADBYTE.
3092
   Then calls the underlying free.
3093
*/
3094
void
3095
_PyMem_DebugRawFree(void *ctx, void *p)
3096
0
{
3097
    /* PyMem_Free(NULL) has no effect */
3098
0
    if (p == NULL) {
3099
0
        return;
3100
0
    }
3101
3102
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3103
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3104
0
    size_t nbytes;
3105
3106
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3107
0
    nbytes = read_size_t(q);
3108
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3109
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3110
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3111
0
    api->alloc.free(api->alloc.ctx, q);
3112
0
}
3113
3114
3115
void *
3116
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3117
0
{
3118
0
    if (p == NULL) {
3119
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3120
0
    }
3121
3122
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3123
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3124
0
    uint8_t *data;        /* pointer to data bytes */
3125
0
    uint8_t *r;
3126
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3127
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3128
0
    size_t original_nbytes;
3129
0
#define ERASED_SIZE 64
3130
3131
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3132
3133
0
    data = (uint8_t *)p;
3134
0
    head = data - 2*SST;
3135
0
    original_nbytes = read_size_t(head);
3136
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3137
        /* integer overflow: can't represent total as a Py_ssize_t */
3138
0
        return NULL;
3139
0
    }
3140
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3141
3142
0
    tail = data + original_nbytes;
3143
#ifdef PYMEM_DEBUG_SERIALNO
3144
    size_t block_serialno = read_size_t(tail + SST);
3145
#endif
3146
0
#ifndef Py_GIL_DISABLED
3147
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3148
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3149
     */
3150
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3151
0
    if (original_nbytes <= sizeof(save)) {
3152
0
        memcpy(save, data, original_nbytes);
3153
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3154
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3155
0
    }
3156
0
    else {
3157
0
        memcpy(save, data, ERASED_SIZE);
3158
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3159
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3160
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3161
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3162
0
    }
3163
0
#endif
3164
3165
    /* Resize and add decorations. */
3166
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3167
0
    if (r == NULL) {
3168
        /* if realloc() failed: rewrite header and footer which have
3169
           just been erased */
3170
0
        nbytes = original_nbytes;
3171
0
    }
3172
0
    else {
3173
0
        head = r;
3174
#ifdef PYMEM_DEBUG_SERIALNO
3175
        bumpserialno();
3176
        block_serialno = serialno;
3177
#endif
3178
0
    }
3179
0
    data = head + 2*SST;
3180
3181
0
    write_size_t(head, nbytes);
3182
0
    head[SST] = (uint8_t)api->api_id;
3183
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3184
3185
0
    tail = data + nbytes;
3186
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3187
#ifdef PYMEM_DEBUG_SERIALNO
3188
    write_size_t(tail + SST, block_serialno);
3189
#endif
3190
3191
0
#ifndef Py_GIL_DISABLED
3192
    /* Restore saved bytes. */
3193
0
    if (original_nbytes <= sizeof(save)) {
3194
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3195
0
    }
3196
0
    else {
3197
0
        size_t i = original_nbytes - ERASED_SIZE;
3198
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3199
0
        if (nbytes > i) {
3200
0
            memcpy(data + i, &save[ERASED_SIZE],
3201
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3202
0
        }
3203
0
    }
3204
0
#endif
3205
3206
0
    if (r == NULL) {
3207
0
        return NULL;
3208
0
    }
3209
3210
0
    if (nbytes > original_nbytes) {
3211
        /* growing: mark new extra memory clean */
3212
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3213
0
               nbytes - original_nbytes);
3214
0
    }
3215
3216
0
    return data;
3217
0
}
3218
3219
static inline void
3220
_PyMem_DebugCheckGIL(const char *func)
3221
0
{
3222
0
    PyThreadState *tstate = _PyThreadState_GET();
3223
0
    if (tstate == NULL) {
3224
0
#ifndef Py_GIL_DISABLED
3225
0
        _Py_FatalErrorFunc(func,
3226
0
                           "Python memory allocator called "
3227
0
                           "without holding the GIL");
3228
#else
3229
        _Py_FatalErrorFunc(func,
3230
                           "Python memory allocator called "
3231
                           "without an active thread state. "
3232
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3233
#endif
3234
0
    }
3235
0
}
3236
3237
void *
3238
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3239
0
{
3240
0
    _PyMem_DebugCheckGIL(__func__);
3241
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3242
0
}
3243
3244
void *
3245
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3246
0
{
3247
0
    _PyMem_DebugCheckGIL(__func__);
3248
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3249
0
}
3250
3251
3252
void
3253
_PyMem_DebugFree(void *ctx, void *ptr)
3254
0
{
3255
0
    _PyMem_DebugCheckGIL(__func__);
3256
0
    _PyMem_DebugRawFree(ctx, ptr);
3257
0
}
3258
3259
3260
void *
3261
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3262
0
{
3263
0
    _PyMem_DebugCheckGIL(__func__);
3264
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3265
0
}
3266
3267
/* Check the forbidden bytes on both ends of the memory allocated for p.
3268
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3269
 * and call Py_FatalError to kill the program.
3270
 * The API id, is also checked.
3271
 */
3272
static void
3273
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3274
0
{
3275
0
    assert(p != NULL);
3276
3277
0
    const uint8_t *q = (const uint8_t *)p;
3278
0
    size_t nbytes;
3279
0
    const uint8_t *tail;
3280
0
    int i;
3281
0
    char id;
3282
3283
    /* Check the API id */
3284
0
    id = (char)q[-SST];
3285
0
    if (id != api) {
3286
0
        _PyObject_DebugDumpAddress(p);
3287
0
        _Py_FatalErrorFormat(func,
3288
0
                             "bad ID: Allocated using API '%c', "
3289
0
                             "verified using API '%c'",
3290
0
                             id, api);
3291
0
    }
3292
3293
    /* Check the stuff at the start of p first:  if there's underwrite
3294
     * corruption, the number-of-bytes field may be nuts, and checking
3295
     * the tail could lead to a segfault then.
3296
     */
3297
0
    for (i = SST-1; i >= 1; --i) {
3298
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3299
0
            _PyObject_DebugDumpAddress(p);
3300
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3301
0
        }
3302
0
    }
3303
3304
0
    nbytes = read_size_t(q - 2*SST);
3305
0
    tail = q + nbytes;
3306
0
    for (i = 0; i < SST; ++i) {
3307
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3308
0
            _PyObject_DebugDumpAddress(p);
3309
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3310
0
        }
3311
0
    }
3312
0
}
3313
3314
/* Display info to stderr about the memory block at p. */
3315
static void
3316
_PyObject_DebugDumpAddress(const void *p)
3317
0
{
3318
0
    const uint8_t *q = (const uint8_t *)p;
3319
0
    const uint8_t *tail;
3320
0
    size_t nbytes;
3321
0
    int i;
3322
0
    int ok;
3323
0
    char id;
3324
3325
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3326
0
    if (p == NULL) {
3327
0
        fprintf(stderr, "\n");
3328
0
        return;
3329
0
    }
3330
0
    id = (char)q[-SST];
3331
0
    fprintf(stderr, " API '%c'\n", id);
3332
3333
0
    nbytes = read_size_t(q - 2*SST);
3334
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3335
3336
    /* In case this is nuts, check the leading pad bytes first. */
3337
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3338
0
    ok = 1;
3339
0
    for (i = 1; i <= SST-1; ++i) {
3340
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3341
0
            ok = 0;
3342
0
            break;
3343
0
        }
3344
0
    }
3345
0
    if (ok)
3346
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3347
0
    else {
3348
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3349
0
            PYMEM_FORBIDDENBYTE);
3350
0
        for (i = SST-1; i >= 1; --i) {
3351
0
            const uint8_t byte = *(q-i);
3352
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3353
0
            if (byte != PYMEM_FORBIDDENBYTE)
3354
0
                fputs(" *** OUCH", stderr);
3355
0
            fputc('\n', stderr);
3356
0
        }
3357
3358
0
        fputs("    Because memory is corrupted at the start, the "
3359
0
              "count of bytes requested\n"
3360
0
              "       may be bogus, and checking the trailing pad "
3361
0
              "bytes may segfault.\n", stderr);
3362
0
    }
3363
3364
0
    tail = q + nbytes;
3365
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3366
0
    ok = 1;
3367
0
    for (i = 0; i < SST; ++i) {
3368
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3369
0
            ok = 0;
3370
0
            break;
3371
0
        }
3372
0
    }
3373
0
    if (ok)
3374
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3375
0
    else {
3376
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3377
0
                PYMEM_FORBIDDENBYTE);
3378
0
        for (i = 0; i < SST; ++i) {
3379
0
            const uint8_t byte = tail[i];
3380
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3381
0
                    i, byte);
3382
0
            if (byte != PYMEM_FORBIDDENBYTE)
3383
0
                fputs(" *** OUCH", stderr);
3384
0
            fputc('\n', stderr);
3385
0
        }
3386
0
    }
3387
3388
#ifdef PYMEM_DEBUG_SERIALNO
3389
    size_t serial = read_size_t(tail + SST);
3390
    fprintf(stderr,
3391
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3392
            serial);
3393
#endif
3394
3395
0
    if (nbytes > 0) {
3396
0
        i = 0;
3397
0
        fputs("    Data at p:", stderr);
3398
        /* print up to 8 bytes at the start */
3399
0
        while (q < tail && i < 8) {
3400
0
            fprintf(stderr, " %02x", *q);
3401
0
            ++i;
3402
0
            ++q;
3403
0
        }
3404
        /* and up to 8 at the end */
3405
0
        if (q < tail) {
3406
0
            if (tail - q > 8) {
3407
0
                fputs(" ...", stderr);
3408
0
                q = tail - 8;
3409
0
            }
3410
0
            while (q < tail) {
3411
0
                fprintf(stderr, " %02x", *q);
3412
0
                ++q;
3413
0
            }
3414
0
        }
3415
0
        fputc('\n', stderr);
3416
0
    }
3417
0
    fputc('\n', stderr);
3418
3419
0
    fflush(stderr);
3420
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3421
0
}
3422
3423
3424
static size_t
3425
printone(FILE *out, const char* msg, size_t value)
3426
0
{
3427
0
    int i, k;
3428
0
    char buf[100];
3429
0
    size_t origvalue = value;
3430
3431
0
    fputs(msg, out);
3432
0
    for (i = (int)strlen(msg); i < 35; ++i)
3433
0
        fputc(' ', out);
3434
0
    fputc('=', out);
3435
3436
    /* Write the value with commas. */
3437
0
    i = 22;
3438
0
    buf[i--] = '\0';
3439
0
    buf[i--] = '\n';
3440
0
    k = 3;
3441
0
    do {
3442
0
        size_t nextvalue = value / 10;
3443
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3444
0
        value = nextvalue;
3445
0
        buf[i--] = (char)(digit + '0');
3446
0
        --k;
3447
0
        if (k == 0 && value && i >= 0) {
3448
0
            k = 3;
3449
0
            buf[i--] = ',';
3450
0
        }
3451
0
    } while (value && i >= 0);
3452
3453
0
    while (i >= 0)
3454
0
        buf[i--] = ' ';
3455
0
    fputs(buf, out);
3456
3457
0
    return origvalue;
3458
0
}
3459
3460
void
3461
_PyDebugAllocatorStats(FILE *out,
3462
                       const char *block_name, int num_blocks, size_t sizeof_block)
3463
0
{
3464
0
    char buf1[128];
3465
0
    char buf2[128];
3466
0
    PyOS_snprintf(buf1, sizeof(buf1),
3467
0
                  "%d %ss * %zd bytes each",
3468
0
                  num_blocks, block_name, sizeof_block);
3469
0
    PyOS_snprintf(buf2, sizeof(buf2),
3470
0
                  "%48s ", buf1);
3471
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3472
0
}
3473
3474
// Return true if the obmalloc state structure is heap allocated,
3475
// by PyMem_RawCalloc().  For the main interpreter, this structure
3476
// allocated in the BSS.  Allocating that way gives some memory savings
3477
// and a small performance win (at least on a demand paged OS).  On
3478
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3479
// memory is for the arena_map_top array.  Since normally only one entry
3480
// of that array is used, only one page of resident memory is actually
3481
// used, rather than the full 256 kB.
3482
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3483
0
{
3484
0
#if WITH_PYMALLOC
3485
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3486
#else
3487
    return false;
3488
#endif
3489
0
}
3490
3491
#ifdef WITH_PYMALLOC
3492
static void
3493
init_obmalloc_pools(PyInterpreterState *interp)
3494
36
{
3495
    // initialize the obmalloc->pools structure.  This must be done
3496
    // before the obmalloc alloc/free functions can be called.
3497
36
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3498
36
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3499
36
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3500
36
}
3501
#endif /* WITH_PYMALLOC */
3502
3503
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3504
36
{
3505
36
#ifdef WITH_PYMALLOC
3506
    /* Initialize obmalloc, but only for subinterpreters,
3507
       since the main interpreter is initialized statically. */
3508
36
    if (_Py_IsMainInterpreter(interp)
3509
0
            || _PyInterpreterState_HasFeature(interp,
3510
36
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3511
36
        interp->obmalloc = &obmalloc_state_main;
3512
36
        if (!obmalloc_state_initialized) {
3513
36
            init_obmalloc_pools(interp);
3514
36
            obmalloc_state_initialized = true;
3515
36
        }
3516
36
    } else {
3517
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3518
0
        if (interp->obmalloc == NULL) {
3519
0
            return -1;
3520
0
        }
3521
0
        init_obmalloc_pools(interp);
3522
0
    }
3523
36
#endif /* WITH_PYMALLOC */
3524
36
    return 0; // success
3525
36
}
3526
3527
3528
#ifdef WITH_PYMALLOC
3529
3530
static void
3531
free_obmalloc_arenas(PyInterpreterState *interp)
3532
0
{
3533
0
    OMState *state = interp->obmalloc;
3534
0
    for (uint i = 0; i < maxarenas; ++i) {
3535
        // free each obmalloc memory arena
3536
0
        struct arena_object *ao = &allarenas[i];
3537
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3538
0
                             (void *)ao->address, ARENA_SIZE);
3539
0
    }
3540
    // free the array containing pointers to all arenas
3541
0
    PyMem_RawFree(allarenas);
3542
0
#if WITH_PYMALLOC_RADIX_TREE
3543
0
#ifdef USE_INTERIOR_NODES
3544
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3545
    // by arena_map_mark_used() but not freed when arenas are freed.
3546
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3547
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3548
0
         if (mid == NULL) {
3549
0
             continue;
3550
0
         }
3551
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3552
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3553
0
            if (bot == NULL) {
3554
0
                continue;
3555
0
            }
3556
0
            PyMem_RawFree(bot);
3557
0
         }
3558
0
         PyMem_RawFree(mid);
3559
0
    }
3560
0
#endif
3561
0
#endif
3562
0
}
3563
3564
#ifdef Py_DEBUG
3565
/* Is target in the list?  The list is traversed via the nextpool pointers.
3566
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3567
 * list, else 0.
3568
 */
3569
static int
3570
pool_is_in_list(const poolp target, poolp list)
3571
{
3572
    poolp origlist = list;
3573
    assert(target != NULL);
3574
    if (list == NULL)
3575
        return 0;
3576
    do {
3577
        if (target == list)
3578
            return 1;
3579
        list = list->nextpool;
3580
    } while (list != NULL && list != origlist);
3581
    return 0;
3582
}
3583
#endif
3584
3585
#ifdef WITH_MIMALLOC
3586
struct _alloc_stats {
3587
    size_t allocated_blocks;
3588
    size_t allocated_bytes;
3589
    size_t allocated_with_overhead;
3590
    size_t bytes_reserved;
3591
    size_t bytes_committed;
3592
};
3593
3594
static bool _collect_alloc_stats(
3595
    const mi_heap_t* heap, const mi_heap_area_t* area,
3596
    void* block, size_t block_size, void* arg)
3597
0
{
3598
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3599
0
    stats->allocated_blocks += area->used;
3600
0
    stats->allocated_bytes += area->used * area->block_size;
3601
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3602
0
    stats->bytes_reserved += area->reserved;
3603
0
    stats->bytes_committed += area->committed;
3604
0
    return 1;
3605
0
}
3606
3607
static void
3608
py_mimalloc_print_stats(FILE *out)
3609
0
{
3610
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3611
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3612
0
    fprintf(out, "Medium block threshold = %zu\n",
3613
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3614
0
    fprintf(out, "Large object max size = %zu\n",
3615
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3616
3617
0
    mi_heap_t *heap = mi_heap_get_default();
3618
0
    struct _alloc_stats stats;
3619
0
    memset(&stats, 0, sizeof(stats));
3620
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3621
3622
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3623
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3624
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3625
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3626
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3627
0
}
3628
#endif
3629
3630
3631
static void
3632
pymalloc_print_stats(FILE *out)
3633
0
{
3634
0
    OMState *state = get_state();
3635
3636
0
    uint i;
3637
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3638
    /* # of pools, allocated blocks, and free blocks per class index */
3639
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3640
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3641
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3642
    /* total # of allocated bytes in used and full pools */
3643
0
    size_t allocated_bytes = 0;
3644
    /* total # of available bytes in used pools */
3645
0
    size_t available_bytes = 0;
3646
    /* # of free pools + pools not yet carved out of current arena */
3647
0
    uint numfreepools = 0;
3648
    /* # of bytes for arena alignment padding */
3649
0
    size_t arena_alignment = 0;
3650
    /* # of bytes in used and full pools used for pool_headers */
3651
0
    size_t pool_header_bytes = 0;
3652
    /* # of bytes in used and full pools wasted due to quantization,
3653
     * i.e. the necessarily leftover space at the ends of used and
3654
     * full pools.
3655
     */
3656
0
    size_t quantization = 0;
3657
    /* # of arenas actually allocated. */
3658
0
    size_t narenas = 0;
3659
    /* running total -- should equal narenas * ARENA_SIZE */
3660
0
    size_t total;
3661
0
    char buf[128];
3662
3663
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3664
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3665
3666
0
    for (i = 0; i < numclasses; ++i)
3667
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3668
3669
    /* Because full pools aren't linked to from anything, it's easiest
3670
     * to march over all the arenas.  If we're lucky, most of the memory
3671
     * will be living in full pools -- would be a shame to miss them.
3672
     */
3673
0
    for (i = 0; i < maxarenas; ++i) {
3674
0
        uintptr_t base = allarenas[i].address;
3675
3676
        /* Skip arenas which are not allocated. */
3677
0
        if (allarenas[i].address == (uintptr_t)NULL)
3678
0
            continue;
3679
0
        narenas += 1;
3680
3681
0
        numfreepools += allarenas[i].nfreepools;
3682
3683
        /* round up to pool alignment */
3684
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3685
0
            arena_alignment += POOL_SIZE;
3686
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3687
0
            base += POOL_SIZE;
3688
0
        }
3689
3690
        /* visit every pool in the arena */
3691
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3692
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3693
0
            poolp p = (poolp)base;
3694
0
            const uint sz = p->szidx;
3695
0
            uint freeblocks;
3696
3697
0
            if (p->ref.count == 0) {
3698
                /* currently unused */
3699
#ifdef Py_DEBUG
3700
                assert(pool_is_in_list(p, allarenas[i].freepools));
3701
#endif
3702
0
                continue;
3703
0
            }
3704
0
            ++numpools[sz];
3705
0
            numblocks[sz] += p->ref.count;
3706
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3707
0
            numfreeblocks[sz] += freeblocks;
3708
#ifdef Py_DEBUG
3709
            if (freeblocks > 0)
3710
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3711
#endif
3712
0
        }
3713
0
    }
3714
0
    assert(narenas == narenas_currently_allocated);
3715
3716
0
    fputc('\n', out);
3717
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3718
0
          "-----   ----   ---------   -------------  ------------\n",
3719
0
          out);
3720
3721
0
    for (i = 0; i < numclasses; ++i) {
3722
0
        size_t p = numpools[i];
3723
0
        size_t b = numblocks[i];
3724
0
        size_t f = numfreeblocks[i];
3725
0
        uint size = INDEX2SIZE(i);
3726
0
        if (p == 0) {
3727
0
            assert(b == 0 && f == 0);
3728
0
            continue;
3729
0
        }
3730
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3731
0
                i, size, p, b, f);
3732
0
        allocated_bytes += b * size;
3733
0
        available_bytes += f * size;
3734
0
        pool_header_bytes += p * POOL_OVERHEAD;
3735
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3736
0
    }
3737
0
    fputc('\n', out);
3738
#ifdef PYMEM_DEBUG_SERIALNO
3739
    if (_PyMem_DebugEnabled()) {
3740
        (void)printone(out, "# times object malloc called", serialno);
3741
    }
3742
#endif
3743
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3744
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3745
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3746
0
    (void)printone(out, "# arenas allocated current", narenas);
3747
3748
0
    PyOS_snprintf(buf, sizeof(buf),
3749
0
                  "%zu arenas * %d bytes/arena",
3750
0
                  narenas, ARENA_SIZE);
3751
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3752
3753
0
    fputc('\n', out);
3754
3755
    /* Account for what all of those arena bytes are being used for. */
3756
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3757
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3758
3759
0
    PyOS_snprintf(buf, sizeof(buf),
3760
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3761
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3762
3763
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3764
0
    total += printone(out, "# bytes lost to quantization", quantization);
3765
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3766
0
    (void)printone(out, "Total", total);
3767
0
    assert(narenas * ARENA_SIZE == total);
3768
3769
0
#if WITH_PYMALLOC_RADIX_TREE
3770
0
    fputs("\narena map counts\n", out);
3771
0
#ifdef USE_INTERIOR_NODES
3772
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3773
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3774
0
    fputc('\n', out);
3775
0
#endif
3776
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3777
0
#ifdef USE_INTERIOR_NODES
3778
0
    total += printone(out, "# bytes lost to arena map mid",
3779
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3780
0
    total += printone(out, "# bytes lost to arena map bot",
3781
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3782
0
    (void)printone(out, "Total", total);
3783
0
#endif
3784
0
#endif
3785
3786
0
}
3787
3788
/* Print summary info to "out" about the state of pymalloc's structures.
3789
 * In Py_DEBUG mode, also perform some expensive internal consistency
3790
 * checks.
3791
 *
3792
 * Return 0 if the memory debug hooks are not installed or no statistics was
3793
 * written into out, return 1 otherwise.
3794
 */
3795
int
3796
_PyObject_DebugMallocStats(FILE *out)
3797
0
{
3798
0
#ifdef WITH_MIMALLOC
3799
0
    if (_PyMem_MimallocEnabled()) {
3800
0
        py_mimalloc_print_stats(out);
3801
0
        return 1;
3802
0
    }
3803
0
    else
3804
0
#endif
3805
0
    if (_PyMem_PymallocEnabled()) {
3806
0
        pymalloc_print_stats(out);
3807
0
        return 1;
3808
0
    }
3809
0
    else {
3810
0
        return 0;
3811
0
    }
3812
0
}
3813
3814
#endif /* #ifdef WITH_PYMALLOC */