Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#include <errno.h>                // errno
18
#ifdef WITH_MIMALLOC
19
// Forward declarations of functions used in our mimalloc modifications
20
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
21
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
22
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
23
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
24
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
25
#  include "pycore_mimalloc.h"
26
#  include "mimalloc/static.c"
27
#  include "mimalloc/internal.h"  // for stats
28
#endif
29
30
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
31
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
32
#endif
33
34
#undef  uint
35
2.00G
#define uint pymem_uint
36
37
38
/* Defined in tracemalloc.c */
39
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
40
41
static void _PyObject_DebugDumpAddress(const void *p);
42
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
43
44
45
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
46
static void set_up_debug_hooks_unlocked(void);
47
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
49
50
51
/***************************************/
52
/* low-level allocator implementations */
53
/***************************************/
54
55
/* the default raw allocator (wraps malloc) */
56
57
void *
58
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
59
194M
{
60
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
61
       for malloc(0), which would be treated as an error. Some platforms would
62
       return a pointer with no memory behind it, which would break pymalloc.
63
       To solve these problems, allocate an extra byte. */
64
194M
    if (size == 0)
65
70.1k
        size = 1;
66
194M
    return malloc(size);
67
194M
}
68
69
void *
70
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
71
141k
{
72
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
73
       for calloc(0, 0), which would be treated as an error. Some platforms
74
       would return a pointer with no memory behind it, which would break
75
       pymalloc.  To solve these problems, allocate an extra byte. */
76
141k
    if (nelem == 0 || elsize == 0) {
77
671
        nelem = 1;
78
671
        elsize = 1;
79
671
    }
80
141k
    return calloc(nelem, elsize);
81
141k
}
82
83
void *
84
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
85
6.85M
{
86
6.85M
    if (size == 0)
87
0
        size = 1;
88
6.85M
    return realloc(ptr, size);
89
6.85M
}
90
91
void
92
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
93
194M
{
94
194M
    free(ptr);
95
194M
}
96
97
#ifdef WITH_MIMALLOC
98
99
static void
100
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
101
0
{
102
#ifdef Py_GIL_DISABLED
103
    // Clear the QSBR goal and remove the page from the QSBR linked list.
104
    page->qsbr_goal = 0;
105
    if (page->qsbr_node.next != NULL) {
106
        llist_remove(&page->qsbr_node);
107
    }
108
#endif
109
0
}
110
111
// Check if an empty, newly reclaimed page is safe to free now.
112
static bool
113
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
114
0
{
115
0
    assert(mi_page_all_free(page));
116
#ifdef Py_GIL_DISABLED
117
    assert(page->qsbr_node.next == NULL);
118
    if (page->use_qsbr && page->qsbr_goal != 0) {
119
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
120
        if (tstate == NULL) {
121
            return false;
122
        }
123
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
124
    }
125
#endif
126
0
    return true;
127
128
0
}
129
130
#ifdef Py_GIL_DISABLED
131
132
// If we are deferring collection of more than this amount of memory for
133
// mimalloc pages, advance the write sequence.  Advancing allows these
134
// pages to be re-used in a different thread or for a different size class.
135
#define QSBR_PAGE_MEM_LIMIT 4096*20
136
137
// Return true if the global write sequence should be advanced for a mimalloc
138
// page that is deferred from collection.
139
static bool
140
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
141
{
142
    size_t bsize = mi_page_block_size(page);
143
    size_t page_size = page->capacity*bsize;
144
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
145
        qsbr->deferred_page_memory = 0;
146
        return true;
147
    }
148
    qsbr->deferred_page_memory += page_size;
149
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
150
        qsbr->deferred_page_memory = 0;
151
        return true;
152
    }
153
    return false;
154
}
155
156
static _PyThreadStateImpl *
157
tstate_from_heap(mi_heap_t *heap)
158
{
159
    return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
160
}
161
#endif
162
163
static bool
164
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
165
0
{
166
#ifdef Py_GIL_DISABLED
167
    assert(mi_page_all_free(page));
168
    if (page->use_qsbr) {
169
        struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
170
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
171
            _PyMem_mi_page_clear_qsbr(page);
172
            _mi_page_free(page, pq, force);
173
            return true;
174
        }
175
176
        // gh-145615: since we are not freeing this page yet, we want to
177
        // make it available for allocations. Note that the QSBR goal and
178
        // linked list node remain set even if the page is later used for
179
        // an allocation. We only detect and clear the QSBR goal when the
180
        // page becomes empty again (used == 0).
181
        if (mi_page_is_in_full(page)) {
182
            _mi_page_unfull(page);
183
        }
184
185
        _PyMem_mi_page_clear_qsbr(page);
186
        page->retire_expire = 0;
187
188
        if (should_advance_qsbr_for_page(qsbr, page)) {
189
            page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
190
        }
191
        else {
192
            page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
193
        }
194
195
        // We may be freeing a page belonging to a different thread during a
196
        // stop-the-world event. Find the _PyThreadStateImpl for the page.
197
        _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
198
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
199
        return false;
200
    }
201
#endif
202
0
    _mi_page_free(page, pq, force);
203
0
    return true;
204
0
}
205
206
static void
207
_PyMem_mi_page_reclaimed(mi_page_t *page)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    assert(page->qsbr_node.next == NULL);
211
    if (page->qsbr_goal != 0) {
212
        if (mi_page_all_free(page)) {
213
            assert(page->qsbr_node.next == NULL);
214
            _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
215
            assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
216
            page->retire_expire = 0;
217
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
218
        }
219
        else {
220
            page->qsbr_goal = 0;
221
        }
222
    }
223
#endif
224
0
}
225
226
static void
227
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
228
0
{
229
#ifdef Py_GIL_DISABLED
230
    if (!heap->page_use_qsbr) {
231
        return;
232
    }
233
234
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
235
    struct llist_node *head = &tstate->mimalloc.page_list;
236
    if (llist_empty(head)) {
237
        return;
238
    }
239
240
    struct llist_node *node;
241
    llist_for_each_safe(node, head) {
242
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
243
        if (!mi_page_all_free(page)) {
244
            // We allocated from this page some point after the delayed free
245
            _PyMem_mi_page_clear_qsbr(page);
246
            continue;
247
        }
248
249
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
250
            return;
251
        }
252
253
        _PyMem_mi_page_clear_qsbr(page);
254
        _mi_page_free(page, mi_page_queue_of(page), false);
255
    }
256
#endif
257
0
}
258
259
void *
260
_PyMem_MiMalloc(void *ctx, size_t size)
261
0
{
262
#ifdef Py_GIL_DISABLED
263
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
264
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
265
    return mi_heap_malloc(heap, size);
266
#else
267
0
    return mi_malloc(size);
268
0
#endif
269
0
}
270
271
void *
272
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
273
0
{
274
#ifdef Py_GIL_DISABLED
275
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
276
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
277
    return mi_heap_calloc(heap, nelem, elsize);
278
#else
279
0
    return mi_calloc(nelem, elsize);
280
0
#endif
281
0
}
282
283
void *
284
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
285
0
{
286
#ifdef Py_GIL_DISABLED
287
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
288
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
289
    return mi_heap_realloc(heap, ptr, size);
290
#else
291
0
    return mi_realloc(ptr, size);
292
0
#endif
293
0
}
294
295
void
296
_PyMem_MiFree(void *ctx, void *ptr)
297
0
{
298
0
    mi_free(ptr);
299
0
}
300
301
void *
302
_PyObject_MiMalloc(void *ctx, size_t nbytes)
303
0
{
304
#ifdef Py_GIL_DISABLED
305
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
306
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
307
    return mi_heap_malloc(heap, nbytes);
308
#else
309
0
    return mi_malloc(nbytes);
310
0
#endif
311
0
}
312
313
void *
314
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
315
0
{
316
#ifdef Py_GIL_DISABLED
317
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    return mi_heap_calloc(heap, nelem, elsize);
320
#else
321
0
    return mi_calloc(nelem, elsize);
322
0
#endif
323
0
}
324
325
326
void *
327
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
328
0
{
329
#ifdef Py_GIL_DISABLED
330
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
331
    // Implement our own realloc logic so that we can copy PyObject header
332
    // in a thread-safe way.
333
    size_t size = mi_usable_size(ptr);
334
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
335
        return ptr;
336
    }
337
338
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
339
    void* newp = mi_heap_malloc(heap, nbytes);
340
    if (newp == NULL) {
341
        return NULL;
342
    }
343
344
    // Free threaded Python allows access from other threads to the PyObject reference count
345
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
346
    // These fields are typically initialized by PyObject_Init() using relaxed
347
    // atomic stores. We need to copy these fields in a thread-safe way here.
348
    // We use the "debug_offset" to determine how many bytes to copy -- it
349
    // includes the PyObject header and plus any extra pre-headers.
350
    size_t offset = heap->debug_offset;
351
    assert(offset % sizeof(void*) == 0);
352
353
    size_t copy_size = (size < nbytes ? size : nbytes);
354
    if (copy_size >= offset) {
355
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
356
            // Use memcpy to avoid strict-aliasing issues. However, we probably
357
            // still have unavoidable strict-aliasing issues with
358
            // _Py_atomic_store_ptr_relaxed here.
359
            void *word;
360
            memcpy(&word, (char*)ptr + i, sizeof(void*));
361
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
362
        }
363
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
364
    }
365
    else {
366
        // memcpy(dst, NULL, 0) is undefined behavior. See gh-151297.
367
        if mi_likely(ptr) {
368
            _mi_memcpy(newp, ptr, copy_size);
369
        }
370
    }
371
    mi_free(ptr);
372
    return newp;
373
#else
374
0
    return mi_realloc(ptr, nbytes);
375
0
#endif
376
0
}
377
378
void
379
_PyObject_MiFree(void *ctx, void *ptr)
380
0
{
381
0
    mi_free(ptr);
382
0
}
383
384
void *
385
_PyMem_MiRawMalloc(void *ctx, size_t size)
386
0
{
387
0
    return mi_malloc(size);
388
0
}
389
390
void *
391
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
392
0
{
393
0
    return mi_calloc(nelem, elsize);
394
0
}
395
396
void *
397
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
398
0
{
399
0
    return mi_realloc(ptr, size);
400
0
}
401
402
void
403
_PyMem_MiRawFree(void *ctx, void *ptr)
404
0
{
405
0
    mi_free(ptr);
406
0
}
407
#endif // WITH_MIMALLOC
408
409
410
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
411
412
413
#ifdef WITH_MIMALLOC
414
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
415
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
416
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
417
#endif
418
419
/* the pymalloc allocator */
420
421
// The actual implementation is further down.
422
423
#if defined(WITH_PYMALLOC)
424
void* _PyObject_Malloc(void *ctx, size_t size);
425
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
426
void _PyObject_Free(void *ctx, void *p);
427
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
428
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
429
#endif  // WITH_PYMALLOC
430
431
#if defined(Py_GIL_DISABLED)
432
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
433
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
434
#  define PYMEM_ALLOC MIMALLOC_ALLOC
435
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
436
#elif defined(WITH_PYMALLOC)
437
0
#  define PYRAW_ALLOC MALLOC_ALLOC
438
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
439
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
440
#else
441
#  define PYRAW_ALLOC MALLOC_ALLOC
442
#  define PYMEM_ALLOC MALLOC_ALLOC
443
#  define PYOBJ_ALLOC MALLOC_ALLOC
444
#endif
445
446
447
/* the default debug allocators */
448
449
// The actual implementation is further down.
450
451
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
452
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
453
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
454
void _PyMem_DebugRawFree(void *ctx, void *ptr);
455
456
void* _PyMem_DebugMalloc(void *ctx, size_t size);
457
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
458
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
459
void _PyMem_DebugFree(void *ctx, void *p);
460
461
#define PYDBGRAW_ALLOC \
462
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
463
#define PYDBGMEM_ALLOC \
464
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
465
#define PYDBGOBJ_ALLOC \
466
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
467
468
/* default raw allocator (not swappable) */
469
470
void *
471
_PyMem_DefaultRawMalloc(size_t size)
472
288
{
473
#ifdef Py_DEBUG
474
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
475
#else
476
288
    return _PyMem_RawMalloc(NULL, size);
477
288
#endif
478
288
}
479
480
void *
481
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
482
0
{
483
#ifdef Py_DEBUG
484
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
485
#else
486
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
487
0
#endif
488
0
}
489
490
void *
491
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
492
0
{
493
#ifdef Py_DEBUG
494
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
495
#else
496
0
    return _PyMem_RawRealloc(NULL, ptr, size);
497
0
#endif
498
0
}
499
500
void
501
_PyMem_DefaultRawFree(void *ptr)
502
324
{
503
#ifdef Py_DEBUG
504
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
505
#else
506
324
    _PyMem_RawFree(NULL, ptr);
507
324
#endif
508
324
}
509
510
wchar_t*
511
_PyMem_DefaultRawWcsdup(const wchar_t *str)
512
216
{
513
216
    assert(str != NULL);
514
515
216
    size_t len = wcslen(str);
516
216
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
517
0
        return NULL;
518
0
    }
519
520
216
    size_t size = (len + 1) * sizeof(wchar_t);
521
216
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
522
216
    if (str2 == NULL) {
523
0
        return NULL;
524
0
    }
525
526
216
    memcpy(str2, str, size);
527
216
    return str2;
528
216
}
529
530
/* the low-level virtual memory allocator */
531
532
#ifdef WITH_PYMALLOC
533
#  ifdef MS_WINDOWS
534
#    include <windows.h>
535
#  elif defined(HAVE_MMAP)
536
#    include <sys/mman.h>
537
#    ifdef MAP_ANONYMOUS
538
#      define ARENAS_USE_MMAP
539
#    endif
540
#  endif
541
#endif
542
543
/* Return the system's default huge page size in bytes, or 0 if it
544
 * cannot be determined.  The result is cached after the first call.
545
 *
546
 * This is Linux-only (/proc/meminfo).  On other systems that define
547
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
548
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
549
static size_t
550
_pymalloc_system_hugepage_size(void)
551
{
552
    static size_t hp_size = 0;
553
    static int initialized = 0;
554
555
    if (initialized) {
556
        return hp_size;
557
    }
558
559
#ifdef __linux__
560
    FILE *f = fopen("/proc/meminfo", "r");
561
    if (f != NULL) {
562
        char line[256];
563
        while (fgets(line, sizeof(line), f)) {
564
            unsigned long size_kb;
565
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
566
                hp_size = (size_t)size_kb * 1024;
567
                break;
568
            }
569
        }
570
        fclose(f);
571
    }
572
#endif
573
574
    initialized = 1;
575
    return hp_size;
576
}
577
#endif
578
579
#if (defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)) || \
580
    (defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB))
581
static size_t
582
_pymalloc_round_up_to_multiple(size_t size, size_t multiple)
583
{
584
    if (multiple == 0 || size == 0) {
585
        return size;
586
    }
587
588
    size_t remainder = size % multiple;
589
    if (remainder == 0) {
590
        return size;
591
    }
592
593
    size_t padding = multiple - remainder;
594
    if (size > SIZE_MAX - padding) {
595
        return 0;
596
    }
597
    return size + padding;
598
}
599
#endif
600
601
static size_t
602
_pymalloc_virtual_alloc_size(size_t size)
603
53.7k
{
604
#if defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)
605
    if (_PyRuntime.allocators.use_hugepages) {
606
        SIZE_T large_page_size = GetLargePageMinimum();
607
        if (large_page_size > 0) {
608
            return _pymalloc_round_up_to_multiple(size, (size_t)large_page_size);
609
        }
610
    }
611
#elif defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
612
    if (_PyRuntime.allocators.use_hugepages) {
613
        size_t hp_size = _pymalloc_system_hugepage_size();
614
        if (hp_size > 0) {
615
            return _pymalloc_round_up_to_multiple(size, hp_size);
616
        }
617
    }
618
#endif
619
53.7k
    return size;
620
53.7k
}
621
622
void *
623
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
624
33.8k
{
625
#ifdef MS_WINDOWS
626
#  ifdef PYMALLOC_USE_HUGEPAGES
627
    if (_PyRuntime.allocators.use_hugepages) {
628
        SIZE_T lp_size = GetLargePageMinimum();
629
        if (lp_size > 0 && size % lp_size == 0) {
630
            void *ptr = VirtualAlloc(NULL, size,
631
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
632
                            PAGE_READWRITE);
633
            if (ptr != NULL)
634
                return ptr;
635
        }
636
    }
637
    /* Fall back to regular pages */
638
#  endif
639
    return VirtualAlloc(NULL, size,
640
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
641
#elif defined(ARENAS_USE_MMAP)
642
    void *ptr;
643
#  ifdef PYMALLOC_USE_HUGEPAGES
644
#    ifdef MAP_HUGETLB
645
    if (_PyRuntime.allocators.use_hugepages) {
646
        size_t hp_size = _pymalloc_system_hugepage_size();
647
        /* Only use huge pages if the arena size is a multiple of the
648
         * system's default huge page size.  When the arena is smaller
649
         * than the huge page, mmap still succeeds but silently
650
         * allocates an entire huge page; the subsequent munmap with
651
         * the smaller arena size then fails with EINVAL, leaking
652
         * all of that memory. */
653
        if (hp_size > 0 && size % hp_size == 0) {
654
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
655
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
656
            if (ptr != MAP_FAILED) {
657
                assert(ptr != NULL);
658
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
659
                return ptr;
660
            }
661
        }
662
    }
663
    /* Fall back to regular pages */
664
#    endif
665
#  endif
666
33.8k
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
667
33.8k
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
668
33.8k
    if (ptr == MAP_FAILED)
669
0
        return NULL;
670
33.8k
    assert(ptr != NULL);
671
33.8k
#ifdef MADV_HUGEPAGE
672
33.8k
    (void)madvise(ptr, size, MADV_HUGEPAGE);
673
33.8k
#endif
674
33.8k
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
675
33.8k
    return ptr;
676
#else
677
    return malloc(size);
678
#endif
679
33.8k
}
680
681
void
682
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
683
#if defined(ARENAS_USE_MMAP)
684
    size_t size
685
#else
686
    size_t Py_UNUSED(size)
687
#endif
688
)
689
33.4k
{
690
#ifdef MS_WINDOWS
691
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
692
    if (ptr == NULL) {
693
        return;
694
    }
695
    VirtualFree(ptr, 0, MEM_RELEASE);
696
#elif defined(ARENAS_USE_MMAP)
697
    /* Unlike free(), munmap() does not special-case NULL to noop. */
698
33.4k
    if (ptr == NULL) {
699
0
        return;
700
0
    }
701
33.4k
    if (munmap(ptr, size) < 0) {
702
0
        _Py_FatalErrorFormat(__func__,
703
0
                             "munmap(%p, %zu) failed with errno %d",
704
0
                             ptr, size, errno);
705
0
    }
706
#else
707
    free(ptr);
708
#endif
709
33.4k
}
710
711
/*******************************************/
712
/* end low-level allocator implementations */
713
/*******************************************/
714
715
716
72
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
717
791M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
718
2.02G
#define _PyMem (_PyRuntime.allocators.standard.mem)
719
5.89G
#define _PyObject (_PyRuntime.allocators.standard.obj)
720
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
721
134k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
722
723
724
/***************************/
725
/* managing the allocators */
726
/***************************/
727
728
static int
729
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
730
                               PyMemAllocatorEx *old_alloc)
731
0
{
732
0
    if (old_alloc != NULL) {
733
0
        get_allocator_unlocked(domain, old_alloc);
734
0
    }
735
736
737
0
    PyMemAllocatorEx new_alloc;
738
0
    switch(domain)
739
0
    {
740
0
    case PYMEM_DOMAIN_RAW:
741
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
742
0
        break;
743
0
    case PYMEM_DOMAIN_MEM:
744
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
745
0
        break;
746
0
    case PYMEM_DOMAIN_OBJ:
747
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
748
0
        break;
749
0
    default:
750
        /* unknown domain */
751
0
        return -1;
752
0
    }
753
0
    set_allocator_unlocked(domain, &new_alloc);
754
0
    if (debug) {
755
0
        set_up_debug_hooks_domain_unlocked(domain);
756
0
    }
757
0
    return 0;
758
0
}
759
760
761
#ifdef Py_DEBUG
762
static const int pydebug = 1;
763
#else
764
static const int pydebug = 0;
765
#endif
766
767
int
768
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
769
0
{
770
0
    if (name == NULL || *name == '\0') {
771
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
772
           nameions): use default memory allocators */
773
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
774
0
    }
775
0
    else if (strcmp(name, "default") == 0) {
776
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
777
0
    }
778
0
    else if (strcmp(name, "debug") == 0) {
779
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
780
0
    }
781
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
782
0
    else if (strcmp(name, "pymalloc") == 0) {
783
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
784
0
    }
785
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
786
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
787
0
    }
788
0
#endif
789
0
#ifdef WITH_MIMALLOC
790
0
    else if (strcmp(name, "mimalloc") == 0) {
791
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
792
0
    }
793
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
794
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
795
0
    }
796
0
#endif
797
0
#ifndef Py_GIL_DISABLED
798
0
    else if (strcmp(name, "malloc") == 0) {
799
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
800
0
    }
801
0
    else if (strcmp(name, "malloc_debug") == 0) {
802
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
803
0
    }
804
0
#endif
805
0
    else {
806
        /* unknown allocator */
807
0
        return -1;
808
0
    }
809
0
    return 0;
810
0
}
811
812
813
static int
814
set_up_allocators_unlocked(PyMemAllocatorName allocator)
815
0
{
816
0
    switch (allocator) {
817
0
    case PYMEM_ALLOCATOR_NOT_SET:
818
        /* do nothing */
819
0
        break;
820
821
0
    case PYMEM_ALLOCATOR_DEFAULT:
822
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
823
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
824
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
825
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
826
0
        break;
827
828
0
    case PYMEM_ALLOCATOR_DEBUG:
829
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
830
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
831
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
832
0
        _PyRuntime.allocators.is_debug_enabled = 1;
833
0
        break;
834
835
0
#ifdef WITH_PYMALLOC
836
0
    case PYMEM_ALLOCATOR_PYMALLOC:
837
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
838
0
    {
839
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
840
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
841
842
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
843
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
844
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
845
846
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
847
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
848
0
        if (is_debug) {
849
0
            set_up_debug_hooks_unlocked();
850
0
        }
851
0
        break;
852
0
    }
853
0
#endif
854
0
#ifdef WITH_MIMALLOC
855
0
    case PYMEM_ALLOCATOR_MIMALLOC:
856
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
857
0
    {
858
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
859
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
860
861
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
862
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
863
864
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
865
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
866
867
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
868
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
869
0
        if (is_debug) {
870
0
            set_up_debug_hooks_unlocked();
871
0
        }
872
873
0
        break;
874
0
    }
875
0
#endif
876
877
0
    case PYMEM_ALLOCATOR_MALLOC:
878
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
879
0
    {
880
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
881
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
882
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
883
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
884
885
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
886
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
887
0
        if (is_debug) {
888
0
            set_up_debug_hooks_unlocked();
889
0
        }
890
0
        break;
891
0
    }
892
893
0
    default:
894
        /* unknown allocator */
895
0
        return -1;
896
0
    }
897
898
0
    return 0;
899
0
}
900
901
int
902
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
903
0
{
904
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
905
0
    int res = set_up_allocators_unlocked(allocator);
906
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
907
0
    return res;
908
0
}
909
910
911
static int
912
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
913
0
{
914
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
915
0
}
916
917
918
static const char*
919
get_current_allocator_name_unlocked(void)
920
0
{
921
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
922
0
#ifdef WITH_PYMALLOC
923
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
924
0
#endif
925
0
#ifdef WITH_MIMALLOC
926
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
927
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
928
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
929
0
#endif
930
931
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
932
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
933
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
934
0
    {
935
0
        return "malloc";
936
0
    }
937
0
#ifdef WITH_PYMALLOC
938
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
939
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
940
0
        pymemallocator_eq(&_PyObject, &pymalloc))
941
0
    {
942
0
        return "pymalloc";
943
0
    }
944
0
#endif
945
0
#ifdef WITH_MIMALLOC
946
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
947
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
948
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
949
0
    {
950
0
        return "mimalloc";
951
0
    }
952
0
#endif
953
954
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
955
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
956
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
957
958
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
959
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
960
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
961
0
    {
962
        /* Debug hooks installed */
963
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
964
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
965
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
966
0
        {
967
0
            return "malloc_debug";
968
0
        }
969
0
#ifdef WITH_PYMALLOC
970
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
971
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
972
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
973
0
        {
974
0
            return "pymalloc_debug";
975
0
        }
976
0
#endif
977
0
#ifdef WITH_MIMALLOC
978
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
979
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
980
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
981
0
        {
982
0
            return "mimalloc_debug";
983
0
        }
984
0
#endif
985
0
    }
986
0
    return NULL;
987
0
}
988
989
const char*
990
_PyMem_GetCurrentAllocatorName(void)
991
0
{
992
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
993
0
    const char *name = get_current_allocator_name_unlocked();
994
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
995
0
    return name;
996
0
}
997
998
999
int
1000
_PyMem_DebugEnabled(void)
1001
0
{
1002
0
    return _PyRuntime.allocators.is_debug_enabled;
1003
0
}
1004
1005
#ifdef WITH_PYMALLOC
1006
static int
1007
_PyMem_PymallocEnabled(void)
1008
0
{
1009
0
    if (_PyMem_DebugEnabled()) {
1010
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
1011
0
    }
1012
0
    else {
1013
0
        return (_PyObject.malloc == _PyObject_Malloc);
1014
0
    }
1015
0
}
1016
1017
#ifdef WITH_MIMALLOC
1018
static int
1019
_PyMem_MimallocEnabled(void)
1020
0
{
1021
#ifdef Py_GIL_DISABLED
1022
    return 1;
1023
#else
1024
0
    if (_PyMem_DebugEnabled()) {
1025
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
1026
0
    }
1027
0
    else {
1028
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
1029
0
    }
1030
0
#endif
1031
0
}
1032
#endif  // WITH_MIMALLOC
1033
1034
#endif  // WITH_PYMALLOC
1035
1036
1037
static void
1038
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
1039
0
{
1040
0
    PyMemAllocatorEx alloc;
1041
1042
0
    if (domain == PYMEM_DOMAIN_RAW) {
1043
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
1044
0
            return;
1045
0
        }
1046
1047
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
1048
0
        alloc.ctx = &_PyMem_Debug.raw;
1049
0
        alloc.malloc = _PyMem_DebugRawMalloc;
1050
0
        alloc.calloc = _PyMem_DebugRawCalloc;
1051
0
        alloc.realloc = _PyMem_DebugRawRealloc;
1052
0
        alloc.free = _PyMem_DebugRawFree;
1053
0
        set_allocator_unlocked(domain, &alloc);
1054
0
    }
1055
0
    else if (domain == PYMEM_DOMAIN_MEM) {
1056
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
1057
0
            return;
1058
0
        }
1059
1060
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
1061
0
        alloc.ctx = &_PyMem_Debug.mem;
1062
0
        alloc.malloc = _PyMem_DebugMalloc;
1063
0
        alloc.calloc = _PyMem_DebugCalloc;
1064
0
        alloc.realloc = _PyMem_DebugRealloc;
1065
0
        alloc.free = _PyMem_DebugFree;
1066
0
        set_allocator_unlocked(domain, &alloc);
1067
0
    }
1068
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
1069
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
1070
0
            return;
1071
0
        }
1072
1073
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1074
0
        alloc.ctx = &_PyMem_Debug.obj;
1075
0
        alloc.malloc = _PyMem_DebugMalloc;
1076
0
        alloc.calloc = _PyMem_DebugCalloc;
1077
0
        alloc.realloc = _PyMem_DebugRealloc;
1078
0
        alloc.free = _PyMem_DebugFree;
1079
0
        set_allocator_unlocked(domain, &alloc);
1080
0
    }
1081
0
}
1082
1083
1084
static void
1085
set_up_debug_hooks_unlocked(void)
1086
0
{
1087
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1088
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1089
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1090
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1091
0
}
1092
1093
void
1094
PyMem_SetupDebugHooks(void)
1095
0
{
1096
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1097
0
    set_up_debug_hooks_unlocked();
1098
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1099
0
}
1100
1101
static void
1102
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1103
36
{
1104
36
    switch(domain)
1105
36
    {
1106
36
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1107
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1108
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1109
0
    default:
1110
        /* unknown domain: set all attributes to NULL */
1111
0
        allocator->ctx = NULL;
1112
0
        allocator->malloc = NULL;
1113
0
        allocator->calloc = NULL;
1114
0
        allocator->realloc = NULL;
1115
0
        allocator->free = NULL;
1116
36
    }
1117
36
}
1118
1119
static void
1120
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1121
0
{
1122
0
    switch(domain)
1123
0
    {
1124
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1125
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1126
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1127
    /* ignore unknown domain */
1128
0
    }
1129
0
}
1130
1131
void
1132
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1133
36
{
1134
36
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1135
36
    get_allocator_unlocked(domain, allocator);
1136
36
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1137
36
}
1138
1139
void
1140
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1141
0
{
1142
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1143
0
    set_allocator_unlocked(domain, allocator);
1144
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1145
0
}
1146
1147
void
1148
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1149
0
{
1150
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1151
0
    *allocator = _PyObject_Arena;
1152
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1153
0
}
1154
1155
void
1156
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1157
0
{
1158
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1159
0
    _PyObject_Arena = *allocator;
1160
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1161
0
}
1162
1163
1164
/* Note that there is a possible, but very unlikely, race in any place
1165
 * below where we call one of the allocator functions.  We access two
1166
 * fields in each case:  "malloc", etc. and "ctx".
1167
 *
1168
 * It is unlikely that the allocator will be changed while one of those
1169
 * calls is happening, much less in that very narrow window.
1170
 * Furthermore, the likelihood of a race is drastically reduced by the
1171
 * fact that the allocator may not be changed after runtime init
1172
 * (except with a wrapper).
1173
 *
1174
 * With the above in mind, we currently don't worry about locking
1175
 * around these uses of the runtime-global allocators state. */
1176
1177
1178
/*************************/
1179
/* the "arena" allocator */
1180
/*************************/
1181
1182
void *
1183
_PyObject_VirtualAlloc(size_t size)
1184
26.8k
{
1185
26.8k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1186
26.8k
    if (alloc_size == 0 && size != 0) {
1187
0
        return NULL;
1188
0
    }
1189
26.8k
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, alloc_size);
1190
26.8k
}
1191
1192
void
1193
_PyObject_VirtualFree(void *obj, size_t size)
1194
26.8k
{
1195
26.8k
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1196
26.8k
    assert(alloc_size != 0 || size == 0);
1197
26.8k
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, alloc_size);
1198
26.8k
}
1199
1200
1201
/***********************/
1202
/* the "raw" allocator */
1203
/***********************/
1204
1205
void *
1206
PyMem_RawMalloc(size_t size)
1207
194M
{
1208
    /*
1209
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1210
     * Most python internals blindly use a signed Py_ssize_t to track
1211
     * things without checking for overflows or negatives.
1212
     * As size_t is unsigned, checking for size < 0 is not required.
1213
     */
1214
194M
    if (size > (size_t)PY_SSIZE_T_MAX)
1215
0
        return NULL;
1216
194M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1217
194M
}
1218
1219
void *
1220
PyMem_RawCalloc(size_t nelem, size_t elsize)
1221
141k
{
1222
    /* see PyMem_RawMalloc() */
1223
141k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1224
0
        return NULL;
1225
141k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1226
141k
}
1227
1228
void*
1229
PyMem_RawRealloc(void *ptr, size_t new_size)
1230
6.85M
{
1231
    /* see PyMem_RawMalloc() */
1232
6.85M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1233
0
        return NULL;
1234
6.85M
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1235
6.85M
}
1236
1237
void PyMem_RawFree(void *ptr)
1238
194M
{
1239
194M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1240
194M
}
1241
1242
1243
/***********************/
1244
/* the "mem" allocator */
1245
/***********************/
1246
1247
void *
1248
PyMem_Malloc(size_t size)
1249
204M
{
1250
    /* see PyMem_RawMalloc() */
1251
204M
    if (size > (size_t)PY_SSIZE_T_MAX)
1252
0
        return NULL;
1253
204M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1254
204M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1255
204M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1256
204M
    OBJECT_STAT_INC(allocations);
1257
204M
    return _PyMem.malloc(_PyMem.ctx, size);
1258
204M
}
1259
1260
void *
1261
PyMem_Calloc(size_t nelem, size_t elsize)
1262
42.7M
{
1263
    /* see PyMem_RawMalloc() */
1264
42.7M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1265
0
        return NULL;
1266
42.7M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1267
42.7M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1268
42.7M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1269
42.7M
    OBJECT_STAT_INC(allocations);
1270
42.7M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1271
42.7M
}
1272
1273
void *
1274
PyMem_Realloc(void *ptr, size_t new_size)
1275
256M
{
1276
    /* see PyMem_RawMalloc() */
1277
256M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1278
0
        return NULL;
1279
256M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1280
256M
}
1281
1282
void
1283
PyMem_Free(void *ptr)
1284
509M
{
1285
509M
    OBJECT_STAT_INC(frees);
1286
509M
    _PyMem.free(_PyMem.ctx, ptr);
1287
509M
}
1288
1289
1290
/***************************/
1291
/* pymem utility functions */
1292
/***************************/
1293
1294
wchar_t*
1295
_PyMem_RawWcsdup(const wchar_t *str)
1296
1.44k
{
1297
1.44k
    assert(str != NULL);
1298
1299
1.44k
    size_t len = wcslen(str);
1300
1.44k
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1301
0
        return NULL;
1302
0
    }
1303
1304
1.44k
    size_t size = (len + 1) * sizeof(wchar_t);
1305
1.44k
    wchar_t *str2 = PyMem_RawMalloc(size);
1306
1.44k
    if (str2 == NULL) {
1307
0
        return NULL;
1308
0
    }
1309
1310
1.44k
    memcpy(str2, str, size);
1311
1.44k
    return str2;
1312
1.44k
}
1313
1314
char *
1315
_PyMem_RawStrdup(const char *str)
1316
108
{
1317
108
    assert(str != NULL);
1318
108
    size_t size = strlen(str) + 1;
1319
108
    char *copy = PyMem_RawMalloc(size);
1320
108
    if (copy == NULL) {
1321
0
        return NULL;
1322
0
    }
1323
108
    memcpy(copy, str, size);
1324
108
    return copy;
1325
108
}
1326
1327
char *
1328
_PyMem_Strdup(const char *str)
1329
0
{
1330
0
    assert(str != NULL);
1331
0
    size_t size = strlen(str) + 1;
1332
0
    char *copy = PyMem_Malloc(size);
1333
0
    if (copy == NULL) {
1334
0
        return NULL;
1335
0
    }
1336
0
    memcpy(copy, str, size);
1337
0
    return copy;
1338
0
}
1339
1340
/***********************************************/
1341
/* Delayed freeing support for Py_GIL_DISABLED */
1342
/***********************************************/
1343
1344
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1345
#define WORK_ITEMS_PER_CHUNK 254
1346
1347
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1348
struct _mem_work_item {
1349
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1350
    uint64_t qsbr_goal;
1351
};
1352
1353
// A fixed-size buffer of pointers to be freed
1354
struct _mem_work_chunk {
1355
    // Linked list node of chunks in queue
1356
    struct llist_node node;
1357
1358
    Py_ssize_t rd_idx;  // index of next item to read
1359
    Py_ssize_t wr_idx;  // index of next item to write
1360
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1361
};
1362
1363
static int
1364
work_item_should_decref(uintptr_t ptr)
1365
0
{
1366
0
    return ptr & 0x01;
1367
0
}
1368
1369
static void
1370
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1371
0
{
1372
0
    if (work_item_should_decref(ptr)) {
1373
0
        PyObject *obj = (PyObject *)(ptr - 1);
1374
#ifdef Py_GIL_DISABLED
1375
        if (cb == NULL) {
1376
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1377
            Py_DECREF(obj);
1378
            return;
1379
        }
1380
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1381
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1382
        if (refcount == 0) {
1383
            cb(obj, state);
1384
        }
1385
#else
1386
0
        Py_DECREF(obj);
1387
0
#endif
1388
0
    }
1389
0
    else {
1390
0
        PyMem_Free((void *)ptr);
1391
0
    }
1392
0
}
1393
1394
1395
#ifdef Py_GIL_DISABLED
1396
1397
// For deferred advance on free: the number of deferred items before advancing
1398
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1399
// want to process a chunk before it overflows.
1400
#define QSBR_DEFERRED_LIMIT 127
1401
1402
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1403
// helps limit memory usage due to QSBR delaying frees too long.
1404
#define QSBR_FREE_MEM_LIMIT 1024*1024
1405
1406
// Return true if the global write sequence should be advanced for a deferred
1407
// memory free.
1408
static bool
1409
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1410
{
1411
    if (size > QSBR_FREE_MEM_LIMIT) {
1412
        qsbr->deferred_count = 0;
1413
        qsbr->deferred_memory = 0;
1414
        qsbr->should_process = true;
1415
        return true;
1416
    }
1417
    qsbr->deferred_count++;
1418
    qsbr->deferred_memory += size;
1419
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1420
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1421
        qsbr->deferred_count = 0;
1422
        qsbr->deferred_memory = 0;
1423
        qsbr->should_process = true;
1424
        return true;
1425
    }
1426
    return false;
1427
}
1428
#endif
1429
1430
static void
1431
free_delayed(uintptr_t ptr, size_t size)
1432
0
{
1433
0
#ifndef Py_GIL_DISABLED
1434
0
    free_work_item(ptr, NULL, NULL);
1435
#else
1436
    PyInterpreterState *interp = _PyInterpreterState_GET();
1437
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1438
        interp->stoptheworld.world_stopped)
1439
    {
1440
        // Free immediately during interpreter shutdown or if the world is
1441
        // stopped.
1442
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1443
        free_work_item(ptr, NULL, NULL);
1444
        return;
1445
    }
1446
1447
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1448
    struct llist_node *head = &tstate->mem_free_queue;
1449
1450
    struct _mem_work_chunk *buf = NULL;
1451
    if (!llist_empty(head)) {
1452
        // Try to re-use the last buffer
1453
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1454
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1455
            // already full
1456
            buf = NULL;
1457
        }
1458
    }
1459
1460
    if (buf == NULL) {
1461
        buf = PyMem_Calloc(1, sizeof(*buf));
1462
        if (buf != NULL) {
1463
            llist_insert_tail(head, &buf->node);
1464
        }
1465
    }
1466
1467
    if (buf == NULL) {
1468
        // failed to allocate a buffer, free immediately
1469
        PyObject *to_dealloc = NULL;
1470
        _PyEval_StopTheWorld(tstate->base.interp);
1471
        if (work_item_should_decref(ptr)) {
1472
            PyObject *obj = (PyObject *)(ptr - 1);
1473
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1474
            if (refcount == 0) {
1475
                to_dealloc = obj;
1476
            }
1477
        }
1478
        else {
1479
            PyMem_Free((void *)ptr);
1480
        }
1481
        _PyEval_StartTheWorld(tstate->base.interp);
1482
        if (to_dealloc != NULL) {
1483
            _Py_Dealloc(to_dealloc);
1484
        }
1485
        return;
1486
    }
1487
1488
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1489
    uint64_t seq;
1490
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1491
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1492
    }
1493
    else {
1494
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1495
    }
1496
    buf->array[buf->wr_idx].ptr = ptr;
1497
    buf->array[buf->wr_idx].qsbr_goal = seq;
1498
    buf->wr_idx++;
1499
1500
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1501
        // Normally the processing of delayed items is done from the eval
1502
        // breaker.  Processing here is a safety measure to ensure too much
1503
        // work does not accumulate.
1504
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1505
    }
1506
#endif
1507
0
}
1508
1509
void
1510
_PyMem_FreeDelayed(void *ptr, size_t size)
1511
0
{
1512
0
    assert(!((uintptr_t)ptr & 0x01));
1513
0
    if (ptr != NULL) {
1514
0
        free_delayed((uintptr_t)ptr, size);
1515
0
    }
1516
0
}
1517
1518
#ifdef Py_GIL_DISABLED
1519
void
1520
_PyObject_XDecRefDelayed(PyObject *ptr)
1521
{
1522
    assert(!((uintptr_t)ptr & 0x01));
1523
    if (ptr != NULL) {
1524
        // We use 0 as the size since we don't have an easy way to know the
1525
        // actual size.  If we are freeing many objects, the write sequence
1526
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1527
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1528
    }
1529
}
1530
#endif
1531
1532
#ifdef Py_GIL_DISABLED
1533
void
1534
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1535
{
1536
    PyObject *old = *ptr;
1537
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1538
    if (old == NULL) {
1539
        return;
1540
    }
1541
    if (!_Py_IsImmortal(old)) {
1542
         _PyObject_XDecRefDelayed(old);
1543
    }
1544
}
1545
#endif
1546
1547
static struct _mem_work_chunk *
1548
work_queue_first(struct llist_node *head)
1549
0
{
1550
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1551
0
}
1552
1553
static void
1554
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1555
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1556
0
{
1557
0
    while (!llist_empty(head)) {
1558
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1559
1560
0
        if (buf->rd_idx < buf->wr_idx) {
1561
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1562
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1563
0
                return;
1564
0
            }
1565
1566
0
            buf->rd_idx++;
1567
            // NB: free_work_item may re-enter or execute arbitrary code
1568
0
            free_work_item(item->ptr, cb, state);
1569
0
            continue;
1570
0
        }
1571
1572
0
        assert(buf->rd_idx == buf->wr_idx);
1573
0
        if (keep_empty && buf->node.next == head) {
1574
            // Keep the last buffer in the queue to reduce re-allocations
1575
0
            buf->rd_idx = buf->wr_idx = 0;
1576
0
            return;
1577
0
        }
1578
1579
0
        llist_remove(&buf->node);
1580
0
        PyMem_Free(buf);
1581
0
    }
1582
0
}
1583
1584
static void
1585
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1586
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1587
                     void *state)
1588
0
{
1589
0
    assert(PyMutex_IsLocked(&queue->mutex));
1590
0
    process_queue(&queue->head, tstate, false, cb, state);
1591
1592
0
    int more_work = !llist_empty(&queue->head);
1593
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1594
0
}
1595
1596
static void
1597
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1598
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1599
                           void *state)
1600
0
{
1601
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1602
0
        return;
1603
0
    }
1604
1605
    // Try to acquire the lock, but don't block if it's already held.
1606
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1607
0
        process_interp_queue(queue, tstate, cb, state);
1608
0
        PyMutex_Unlock(&queue->mutex);
1609
0
    }
1610
0
}
1611
1612
void
1613
_PyMem_ProcessDelayed(PyThreadState *tstate)
1614
0
{
1615
0
    PyInterpreterState *interp = tstate->interp;
1616
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1617
1618
0
    tstate_impl->qsbr->should_process = false;
1619
1620
    // Process thread-local work
1621
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1622
1623
    // Process shared interpreter work
1624
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1625
0
}
1626
1627
void
1628
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1629
0
{
1630
0
    PyInterpreterState *interp = tstate->interp;
1631
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1632
1633
    // Process thread-local work
1634
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1635
1636
    // Process shared interpreter work
1637
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1638
0
}
1639
1640
void
1641
_PyMem_AbandonDelayed(PyThreadState *tstate)
1642
0
{
1643
0
    PyInterpreterState *interp = tstate->interp;
1644
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1645
1646
0
    if (llist_empty(queue)) {
1647
0
        return;
1648
0
    }
1649
1650
    // Check if the queue contains one empty buffer
1651
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1652
0
    if (buf->rd_idx == buf->wr_idx) {
1653
0
        llist_remove(&buf->node);
1654
0
        PyMem_Free(buf);
1655
0
        assert(llist_empty(queue));
1656
0
        return;
1657
0
    }
1658
1659
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1660
1661
    // Merge the thread's work queue into the interpreter's work queue.
1662
0
    llist_concat(&interp->mem_free_queue.head, queue);
1663
1664
    // Process the merged queue now (see gh-130794).
1665
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1666
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1667
1668
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1669
1670
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1671
0
}
1672
1673
void
1674
_PyMem_FiniDelayed(PyInterpreterState *interp)
1675
0
{
1676
0
    struct llist_node *head = &interp->mem_free_queue.head;
1677
0
    while (!llist_empty(head)) {
1678
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1679
1680
0
        if (buf->rd_idx < buf->wr_idx) {
1681
            // Free the remaining items immediately. There should be no other
1682
            // threads accessing the memory at this point during shutdown.
1683
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1684
0
            buf->rd_idx++;
1685
            // NB: free_work_item may re-enter or execute arbitrary code
1686
0
            free_work_item(item->ptr, NULL, NULL);
1687
0
            continue;
1688
0
        }
1689
1690
0
        llist_remove(&buf->node);
1691
0
        PyMem_Free(buf);
1692
0
    }
1693
0
}
1694
1695
/**************************/
1696
/* the "object" allocator */
1697
/**************************/
1698
1699
void *
1700
PyObject_Malloc(size_t size)
1701
1.44G
{
1702
    /* see PyMem_RawMalloc() */
1703
1.44G
    if (size > (size_t)PY_SSIZE_T_MAX)
1704
0
        return NULL;
1705
1.44G
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1706
1.44G
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1707
1.44G
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1708
1.44G
    OBJECT_STAT_INC(allocations);
1709
1.44G
    return _PyObject.malloc(_PyObject.ctx, size);
1710
1.44G
}
1711
1712
void *
1713
PyObject_Calloc(size_t nelem, size_t elsize)
1714
0
{
1715
    /* see PyMem_RawMalloc() */
1716
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1717
0
        return NULL;
1718
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1719
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1720
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1721
0
    OBJECT_STAT_INC(allocations);
1722
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1723
0
}
1724
1725
void *
1726
PyObject_Realloc(void *ptr, size_t new_size)
1727
63.7M
{
1728
    /* see PyMem_RawMalloc() */
1729
63.7M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1730
0
        return NULL;
1731
63.7M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1732
63.7M
}
1733
1734
void
1735
PyObject_Free(void *ptr)
1736
1.44G
{
1737
1.44G
    OBJECT_STAT_INC(frees);
1738
1.44G
    _PyObject.free(_PyObject.ctx, ptr);
1739
1.44G
}
1740
1741
1742
/* Use __builtin_expect() where available to reduce overhead of
1743
   the valgrind checks */
1744
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1745
11.8G
#  define UNLIKELY(value) __builtin_expect((value), 0)
1746
5.31G
#  define LIKELY(value) __builtin_expect((value), 1)
1747
#else
1748
#  define UNLIKELY(value) (value)
1749
#  define LIKELY(value) (value)
1750
#endif
1751
1752
#ifdef WITH_PYMALLOC
1753
1754
#ifdef WITH_VALGRIND
1755
#include <valgrind/valgrind.h>
1756
1757
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1758
static int running_on_valgrind = -1;
1759
#endif
1760
1761
typedef struct _obmalloc_state OMState;
1762
1763
/* obmalloc state for main interpreter and shared by all interpreters without
1764
 * their own obmalloc state.  By not explicitly initializing this structure, it
1765
 * will be allocated in the BSS which is a small performance win.  The radix
1766
 * tree arrays are fairly large but are sparsely used.  */
1767
static struct _obmalloc_state obmalloc_state_main;
1768
static bool obmalloc_state_initialized;
1769
1770
static inline int
1771
has_own_state(PyInterpreterState *interp)
1772
0
{
1773
0
    return (_Py_IsMainInterpreter(interp) ||
1774
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1775
0
            _Py_IsMainInterpreterFinalizing(interp));
1776
0
}
1777
1778
static inline OMState *
1779
get_state(void)
1780
4.04G
{
1781
4.04G
    PyInterpreterState *interp = _PyInterpreterState_GET();
1782
4.04G
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1783
4.04G
    return interp->obmalloc;
1784
4.04G
}
1785
1786
// These macros all rely on a local "state" variable.
1787
2.00G
#define usedpools (state->pools.used)
1788
2.83M
#define allarenas (state->mgmt.arenas)
1789
342
#define maxarenas (state->mgmt.maxarenas)
1790
34.1k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1791
23.4M
#define usable_arenas (state->mgmt.usable_arenas)
1792
16.5M
#define nfp2lasta (state->mgmt.nfp2lasta)
1793
21.5k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1794
7.00k
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1795
7.96k
#define narenas_highwater (state->mgmt.narenas_highwater)
1796
387M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1797
1798
#ifdef WITH_MIMALLOC
1799
static bool count_blocks(
1800
    const mi_heap_t* heap, const mi_heap_area_t* area,
1801
    void* block, size_t block_size, void* allocated_blocks)
1802
0
{
1803
0
    *(size_t *)allocated_blocks += area->used;
1804
0
    return 1;
1805
0
}
1806
1807
static Py_ssize_t
1808
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1809
0
{
1810
0
    size_t allocated_blocks = 0;
1811
#ifdef Py_GIL_DISABLED
1812
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1813
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1814
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1815
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1816
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1817
        }
1818
    }
1819
1820
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1821
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1822
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1823
                                        &allocated_blocks);
1824
    }
1825
#else
1826
    // TODO(sgross): this only counts the current thread's blocks.
1827
0
    mi_heap_t *heap = mi_heap_get_default();
1828
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1829
0
#endif
1830
0
    return allocated_blocks;
1831
0
}
1832
#endif
1833
1834
Py_ssize_t
1835
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1836
0
{
1837
0
#ifdef WITH_MIMALLOC
1838
0
    if (_PyMem_MimallocEnabled()) {
1839
0
        return get_mimalloc_allocated_blocks(interp);
1840
0
    }
1841
0
#endif
1842
1843
#ifdef Py_DEBUG
1844
    assert(has_own_state(interp));
1845
#else
1846
0
    if (!has_own_state(interp)) {
1847
0
        _Py_FatalErrorFunc(__func__,
1848
0
                           "the interpreter doesn't have its own allocator");
1849
0
    }
1850
0
#endif
1851
0
    OMState *state = interp->obmalloc;
1852
1853
0
    if (state == NULL) {
1854
0
        return 0;
1855
0
    }
1856
1857
0
    Py_ssize_t n = raw_allocated_blocks;
1858
    /* add up allocated blocks for used pools */
1859
0
    for (uint i = 0; i < maxarenas; ++i) {
1860
        /* Skip arenas which are not allocated. */
1861
0
        if (allarenas[i].address == 0) {
1862
0
            continue;
1863
0
        }
1864
1865
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1866
1867
        /* visit every pool in the arena */
1868
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1869
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1870
0
            poolp p = (poolp)base;
1871
0
            n += p->ref.count;
1872
0
        }
1873
0
    }
1874
0
    return n;
1875
0
}
1876
1877
static void free_obmalloc_arenas(PyInterpreterState *interp);
1878
1879
void
1880
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1881
0
{
1882
0
#ifdef WITH_MIMALLOC
1883
0
    if (_PyMem_MimallocEnabled()) {
1884
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1885
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1886
0
        return;
1887
0
    }
1888
0
#endif
1889
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1890
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1891
0
        assert(has_own_state(interp) || leaked == 0);
1892
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1893
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1894
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1895
            // then some of the memory allocated by obmalloc has not been
1896
            // freed.  It might be safe to free the arenas in that case but
1897
            // it's possible that extension modules are still using that
1898
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1899
            // should be warning when this happens.  It should be possible to
1900
            // use a tool like "-fsanitize=address" to track down these leaks.
1901
0
            free_obmalloc_arenas(interp);
1902
0
        }
1903
0
    }
1904
0
}
1905
1906
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1907
1908
/* We preserve the number of blocks leaked during runtime finalization,
1909
   so they can be reported if the runtime is initialized again. */
1910
// XXX We don't lose any information by dropping this,
1911
// so we should consider doing so.
1912
static Py_ssize_t last_final_leaks = 0;
1913
1914
void
1915
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1916
0
{
1917
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1918
0
    runtime->obmalloc.interpreter_leaks = 0;
1919
0
}
1920
1921
static Py_ssize_t
1922
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1923
0
{
1924
0
    Py_ssize_t total = 0;
1925
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1926
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1927
0
        if (interp == NULL) {
1928
            /* We are at the very end of runtime finalization.
1929
               We can't rely on finalizing->interp since that thread
1930
               state is probably already freed, so we don't worry
1931
               about it. */
1932
0
            assert(PyInterpreterState_Head() == NULL);
1933
0
        }
1934
0
        else {
1935
0
            assert(interp != NULL);
1936
            /* It is probably the last interpreter but not necessarily. */
1937
0
            assert(PyInterpreterState_Next(interp) == NULL);
1938
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1939
0
        }
1940
0
    }
1941
0
    else {
1942
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1943
0
        HEAD_LOCK(runtime);
1944
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1945
0
        assert(interp != NULL);
1946
#ifdef Py_DEBUG
1947
        int got_main = 0;
1948
#endif
1949
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1950
#ifdef Py_DEBUG
1951
            if (_Py_IsMainInterpreter(interp)) {
1952
                assert(!got_main);
1953
                got_main = 1;
1954
                assert(has_own_state(interp));
1955
            }
1956
#endif
1957
0
            if (has_own_state(interp)) {
1958
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1959
0
            }
1960
0
        }
1961
0
        HEAD_UNLOCK(runtime);
1962
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1963
#ifdef Py_DEBUG
1964
        assert(got_main);
1965
#endif
1966
0
    }
1967
0
    total += runtime->obmalloc.interpreter_leaks;
1968
0
    total += last_final_leaks;
1969
0
    return total;
1970
0
}
1971
1972
Py_ssize_t
1973
_Py_GetGlobalAllocatedBlocks(void)
1974
0
{
1975
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1976
0
}
1977
1978
#if WITH_PYMALLOC_RADIX_TREE
1979
/*==========================================================================*/
1980
/* radix tree for tracking arena usage. */
1981
1982
6.01G
#define arena_map_root (state->usage.arena_map_root)
1983
#ifdef USE_INTERIOR_NODES
1984
36
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1985
36
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1986
#endif
1987
1988
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1989
 * it cannot be created */
1990
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1991
arena_map_get(OMState *state, pymem_block *p, int create)
1992
2.07G
{
1993
2.07G
#ifdef USE_INTERIOR_NODES
1994
    /* sanity check that IGNORE_BITS is correct */
1995
2.07G
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1996
2.07G
    int i1 = MAP_TOP_INDEX(p);
1997
2.07G
    if (arena_map_root.ptrs[i1] == NULL) {
1998
36
        if (!create) {
1999
0
            return NULL;
2000
0
        }
2001
36
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
2002
36
        if (n == NULL) {
2003
0
            return NULL;
2004
0
        }
2005
36
        arena_map_root.ptrs[i1] = n;
2006
36
        arena_map_mid_count++;
2007
36
    }
2008
2.07G
    int i2 = MAP_MID_INDEX(p);
2009
2.07G
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
2010
200M
        if (!create) {
2011
200M
            return NULL;
2012
200M
        }
2013
36
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
2014
36
        if (n == NULL) {
2015
0
            return NULL;
2016
0
        }
2017
36
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
2018
36
        arena_map_bot_count++;
2019
36
    }
2020
1.87G
    return arena_map_root.ptrs[i1]->ptrs[i2];
2021
#else
2022
    return &arena_map_root;
2023
#endif
2024
2.07G
}
2025
2026
2027
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
2028
 * away 24 bits of the address.  That reduces the space requirement of
2029
 * the tree compared to similar radix tree page-map schemes.  In
2030
 * exchange for slashing the space requirement, it needs more
2031
 * computation to check an address.
2032
 *
2033
 * Tracking coverage is done by "ideal" arena address.  It is easier to
2034
 * explain in decimal so let's say that the arena size is 100 bytes.
2035
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
2036
 * pointer address is inside an actual arena, we have to check two ideal
2037
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
2038
 * 300.  In the rare case that an arena is aligned in the ideal way
2039
 * (e.g. base address of arena is 200) then we only have to check one
2040
 * ideal address.
2041
 *
2042
 * The tree nodes for 200 and 300 both store the address of arena.
2043
 * There are two cases: the arena starts at a lower ideal arena and
2044
 * extends to this one, or the arena starts in this arena and extends to
2045
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
2046
 * these two cases.
2047
 */
2048
2049
2050
/* mark or unmark addresses covered by arena */
2051
static int
2052
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
2053
13.5k
{
2054
    /* sanity check that IGNORE_BITS is correct */
2055
13.5k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
2056
13.5k
    arena_map_bot_t *n_hi = arena_map_get(
2057
13.5k
            state, (pymem_block *)arena_base, is_used);
2058
13.5k
    if (n_hi == NULL) {
2059
0
        assert(is_used); /* otherwise node should already exist */
2060
0
        return 0; /* failed to allocate space for node */
2061
0
    }
2062
13.5k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
2063
13.5k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
2064
13.5k
    if (tail == 0) {
2065
        /* is ideal arena address */
2066
68
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2067
68
    }
2068
13.4k
    else {
2069
        /* arena_base address is not ideal (aligned to arena size) and
2070
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
2071
         * for the next arena.  Note that it might be in different MAP_TOP
2072
         * and MAP_MID nodes as well so we need to call arena_map_get()
2073
         * again (do the full tree traversal).
2074
         */
2075
13.4k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2076
13.4k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2077
        /* If arena_base is a legit arena address, so is arena_base_next - 1
2078
         * (last address in arena).  If arena_base_next overflows then it
2079
         * must overflow to 0.  However, that would mean arena_base was
2080
         * "ideal" and we should not be in this case. */
2081
13.4k
        assert(arena_base < arena_base_next);
2082
13.4k
        arena_map_bot_t *n_lo = arena_map_get(
2083
13.4k
                state, (pymem_block *)arena_base_next, is_used);
2084
13.4k
        if (n_lo == NULL) {
2085
0
            assert(is_used); /* otherwise should already exist */
2086
0
            n_hi->arenas[i3].tail_hi = 0;
2087
0
            return 0; /* failed to allocate space for node */
2088
0
        }
2089
13.4k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2090
13.4k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2091
13.4k
    }
2092
13.5k
    return 1;
2093
13.5k
}
2094
2095
/* Return true if 'p' is a pointer inside an obmalloc arena.
2096
 * _PyObject_Free() calls this so it needs to be very fast. */
2097
static int
2098
arena_map_is_used(OMState *state, pymem_block *p)
2099
2.07G
{
2100
2.07G
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2101
2.07G
    if (n == NULL) {
2102
200M
        return 0;
2103
200M
    }
2104
1.87G
    int i3 = MAP_BOT_INDEX(p);
2105
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2106
1.87G
    int32_t hi = n->arenas[i3].tail_hi;
2107
1.87G
    int32_t lo = n->arenas[i3].tail_lo;
2108
1.87G
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2109
1.87G
    return (tail < lo) || (tail >= hi && hi != 0);
2110
2.07G
}
2111
2112
/* end of radix tree logic */
2113
/*==========================================================================*/
2114
#endif /* WITH_PYMALLOC_RADIX_TREE */
2115
2116
2117
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2118
 * allocate a new arena, and return the address of an arena_object
2119
 * describing the new arena.  It's expected that the caller will set
2120
 * `usable_arenas` to the return value.
2121
 */
2122
static struct arena_object*
2123
new_arena(OMState *state)
2124
7.00k
{
2125
7.00k
    struct arena_object* arenaobj;
2126
7.00k
    uint excess;        /* number of bytes above pool alignment */
2127
7.00k
    void *address;
2128
2129
7.00k
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2130
7.00k
    if (debug_stats == -1) {
2131
36
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2132
36
        debug_stats = (opt != NULL && *opt != '\0');
2133
36
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2134
36
    }
2135
7.00k
    if (debug_stats) {
2136
0
        _PyObject_DebugMallocStats(stderr);
2137
0
    }
2138
2139
7.00k
    if (unused_arena_objects == NULL) {
2140
63
        uint i;
2141
63
        uint numarenas;
2142
63
        size_t nbytes;
2143
2144
        /* Double the number of arena objects on each allocation.
2145
         * Note that it's possible for `numarenas` to overflow.
2146
         */
2147
63
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2148
63
        if (numarenas <= maxarenas)
2149
0
            return NULL;                /* overflow */
2150
#if SIZEOF_SIZE_T <= SIZEOF_INT
2151
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2152
            return NULL;                /* overflow */
2153
#endif
2154
63
        nbytes = numarenas * sizeof(*allarenas);
2155
63
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2156
63
        if (arenaobj == NULL)
2157
0
            return NULL;
2158
63
        allarenas = arenaobj;
2159
2160
        /* We might need to fix pointers that were copied.  However,
2161
         * new_arena only gets called when all the pages in the
2162
         * previous arenas are full.  Thus, there are *no* pointers
2163
         * into the old array. Thus, we don't have to worry about
2164
         * invalid pointers.  Just to be sure, some asserts:
2165
         */
2166
63
        assert(usable_arenas == NULL);
2167
63
        assert(unused_arena_objects == NULL);
2168
2169
        /* Put the new arenas on the unused_arena_objects list. */
2170
1.69k
        for (i = maxarenas; i < numarenas; ++i) {
2171
1.63k
            allarenas[i].address = 0;              /* mark as unassociated */
2172
1.63k
            allarenas[i].nextarena = i < numarenas - 1 ?
2173
1.63k
                                        &allarenas[i+1] : NULL;
2174
1.63k
        }
2175
2176
        /* Update globals. */
2177
63
        unused_arena_objects = &allarenas[maxarenas];
2178
63
        maxarenas = numarenas;
2179
63
    }
2180
2181
    /* Take the next available arena object off the head of the list. */
2182
7.00k
    assert(unused_arena_objects != NULL);
2183
7.00k
    arenaobj = unused_arena_objects;
2184
7.00k
    unused_arena_objects = arenaobj->nextarena;
2185
7.00k
    assert(arenaobj->address == 0);
2186
7.00k
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2187
7.00k
#if WITH_PYMALLOC_RADIX_TREE
2188
7.00k
    if (address != NULL) {
2189
7.00k
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2190
            /* marking arena in radix tree failed, abort */
2191
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2192
0
            address = NULL;
2193
0
        }
2194
7.00k
    }
2195
7.00k
#endif
2196
7.00k
    if (address == NULL) {
2197
        /* The allocation failed: return NULL after putting the
2198
         * arenaobj back.
2199
         */
2200
0
        arenaobj->nextarena = unused_arena_objects;
2201
0
        unused_arena_objects = arenaobj;
2202
0
        return NULL;
2203
0
    }
2204
7.00k
    arenaobj->address = (uintptr_t)address;
2205
2206
7.00k
    ++narenas_currently_allocated;
2207
7.00k
    ++ntimes_arena_allocated;
2208
7.00k
    if (narenas_currently_allocated > narenas_highwater)
2209
963
        narenas_highwater = narenas_currently_allocated;
2210
7.00k
    arenaobj->freepools = NULL;
2211
    /* pool_address <- first pool-aligned address in the arena
2212
       nfreepools <- number of whole pools that fit after alignment */
2213
7.00k
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2214
7.00k
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2215
7.00k
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2216
7.00k
    if (excess != 0) {
2217
5.92k
        --arenaobj->nfreepools;
2218
5.92k
        arenaobj->pool_address += POOL_SIZE - excess;
2219
5.92k
    }
2220
7.00k
    arenaobj->ntotalpools = arenaobj->nfreepools;
2221
2222
7.00k
    return arenaobj;
2223
7.00k
}
2224
2225
2226
2227
#if WITH_PYMALLOC_RADIX_TREE
2228
/* Return true if and only if P is an address that was allocated by
2229
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2230
 */
2231
static bool
2232
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2233
2.07G
{
2234
2.07G
    return arena_map_is_used(state, p);
2235
2.07G
}
2236
#else
2237
/*
2238
address_in_range(P, POOL)
2239
2240
Return true if and only if P is an address that was allocated by pymalloc.
2241
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2242
(the caller is asked to compute this because the macro expands POOL more than
2243
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2244
variable and pass the latter to the macro; because address_in_range is
2245
called on every alloc/realloc/free, micro-efficiency is important here).
2246
2247
Tricky:  Let B be the arena base address associated with the pool, B =
2248
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2249
2250
    B <= P < B + ARENA_SIZE
2251
2252
Subtracting B throughout, this is true iff
2253
2254
    0 <= P-B < ARENA_SIZE
2255
2256
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2257
2258
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2259
before the first arena has been allocated.  `arenas` is still NULL in that
2260
case.  We're relying on that maxarenas is also 0 in that case, so that
2261
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2262
into a NULL arenas.
2263
2264
Details:  given P and POOL, the arena_object corresponding to P is AO =
2265
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2266
stores, etc), POOL is the correct address of P's pool, AO.address is the
2267
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2268
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2269
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2270
controls P.
2271
2272
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2273
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2274
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2275
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2276
control P.
2277
2278
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2279
allocated arena, obmalloc controls all the memory in slice AO.address :
2280
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2281
so P doesn't lie in that slice, so the macro correctly reports that P is not
2282
controlled by obmalloc.
2283
2284
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2285
arena_object (one not currently associated with an allocated arena),
2286
AO.address is 0, and the second test in the macro reduces to:
2287
2288
    P < ARENA_SIZE
2289
2290
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2291
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2292
of the test still passes, and the third clause (AO.address != 0) is necessary
2293
to get the correct result:  AO.address is 0 in this case, so the macro
2294
correctly reports that P is not controlled by obmalloc (despite that P lies in
2295
slice AO.address : AO.address + ARENA_SIZE).
2296
2297
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2298
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2299
corresponded to a currently-allocated arena, so the "P is not controlled by
2300
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2301
was impossible.
2302
2303
Note that the logic is excruciating, and reading up possibly uninitialized
2304
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2305
creates problems for some memory debuggers.  The overwhelming advantage is
2306
that this test determines whether an arbitrary address is controlled by
2307
obmalloc in a small constant time, independent of the number of arenas
2308
obmalloc controls.  Since this test is needed at every entry point, it's
2309
extremely desirable that it be this fast.
2310
*/
2311
2312
static bool _Py_NO_SANITIZE_ADDRESS
2313
            _Py_NO_SANITIZE_THREAD
2314
            _Py_NO_SANITIZE_MEMORY
2315
address_in_range(OMState *state, void *p, poolp pool)
2316
{
2317
    // Since address_in_range may be reading from memory which was not allocated
2318
    // by Python, it is important that pool->arenaindex is read only once, as
2319
    // another thread may be concurrently modifying the value without holding
2320
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2321
    // only once.
2322
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2323
    return arenaindex < maxarenas &&
2324
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2325
        allarenas[arenaindex].address != 0;
2326
}
2327
2328
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2329
2330
/*==========================================================================*/
2331
2332
// Called when freelist is exhausted.  Extend the freelist if there is
2333
// space for a block.  Otherwise, remove this pool from usedpools.
2334
static void
2335
pymalloc_pool_extend(poolp pool, uint size)
2336
440M
{
2337
440M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2338
        /* There is room for another block. */
2339
217M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2340
217M
        pool->nextoffset += INDEX2SIZE(size);
2341
217M
        *(pymem_block **)(pool->freeblock) = NULL;
2342
217M
        return;
2343
217M
    }
2344
2345
    /* Pool is full, unlink from used pools. */
2346
222M
    poolp next;
2347
222M
    next = pool->nextpool;
2348
222M
    pool = pool->prevpool;
2349
222M
    next->prevpool = pool;
2350
222M
    pool->nextpool = next;
2351
222M
}
2352
2353
/* called when pymalloc_alloc can not allocate a block from usedpool.
2354
 * This function takes new pool and allocate a block from it.
2355
 */
2356
static void*
2357
allocate_from_new_pool(OMState *state, uint size)
2358
2.41M
{
2359
    /* There isn't a pool of the right size class immediately
2360
     * available:  use a free pool.
2361
     */
2362
2.41M
    if (UNLIKELY(usable_arenas == NULL)) {
2363
        /* No arena has a free pool:  allocate a new arena. */
2364
#ifdef WITH_MEMORY_LIMITS
2365
        if (narenas_currently_allocated >= MAX_ARENAS) {
2366
            return NULL;
2367
        }
2368
#endif
2369
7.00k
        usable_arenas = new_arena(state);
2370
7.00k
        if (usable_arenas == NULL) {
2371
0
            return NULL;
2372
0
        }
2373
7.00k
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2374
7.00k
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2375
7.00k
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2376
7.00k
    }
2377
2.41M
    assert(usable_arenas->address != 0);
2378
2379
    /* This arena already had the smallest nfreepools value, so decreasing
2380
     * nfreepools doesn't change that, and we don't need to rearrange the
2381
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2382
     * we need to remove its arena_object from usable_arenas.
2383
     */
2384
2.41M
    assert(usable_arenas->nfreepools > 0);
2385
2.41M
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2386
        /* It's the last of this size, so there won't be any. */
2387
2.40M
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2388
2.40M
    }
2389
    /* If any free pools will remain, it will be the new smallest. */
2390
2.41M
    if (usable_arenas->nfreepools > 1) {
2391
2.36M
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2392
2.36M
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2393
2.36M
    }
2394
2395
    /* Try to get a cached free pool. */
2396
2.41M
    poolp pool = usable_arenas->freepools;
2397
2.41M
    if (LIKELY(pool != NULL)) {
2398
        /* Unlink from cached pools. */
2399
1.98M
        usable_arenas->freepools = pool->nextpool;
2400
1.98M
        usable_arenas->nfreepools--;
2401
1.98M
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2402
            /* Wholly allocated:  remove. */
2403
47.8k
            assert(usable_arenas->freepools == NULL);
2404
47.8k
            assert(usable_arenas->nextarena == NULL ||
2405
47.8k
                   usable_arenas->nextarena->prevarena ==
2406
47.8k
                   usable_arenas);
2407
47.8k
            usable_arenas = usable_arenas->nextarena;
2408
47.8k
            if (usable_arenas != NULL) {
2409
44.1k
                usable_arenas->prevarena = NULL;
2410
44.1k
                assert(usable_arenas->address != 0);
2411
44.1k
            }
2412
47.8k
        }
2413
1.93M
        else {
2414
            /* nfreepools > 0:  it must be that freepools
2415
             * isn't NULL, or that we haven't yet carved
2416
             * off all the arena's pools for the first
2417
             * time.
2418
             */
2419
1.93M
            assert(usable_arenas->freepools != NULL ||
2420
1.93M
                   usable_arenas->pool_address <=
2421
1.93M
                   (pymem_block*)usable_arenas->address +
2422
1.93M
                       ARENA_SIZE - POOL_SIZE);
2423
1.93M
        }
2424
1.98M
    }
2425
435k
    else {
2426
        /* Carve off a new pool. */
2427
435k
        assert(usable_arenas->nfreepools > 0);
2428
435k
        assert(usable_arenas->freepools == NULL);
2429
435k
        pool = (poolp)usable_arenas->pool_address;
2430
435k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2431
435k
                                 ARENA_SIZE - POOL_SIZE);
2432
435k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2433
435k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2434
435k
        pool->szidx = DUMMY_SIZE_IDX;
2435
435k
        usable_arenas->pool_address += POOL_SIZE;
2436
435k
        --usable_arenas->nfreepools;
2437
2438
435k
        if (usable_arenas->nfreepools == 0) {
2439
6.79k
            assert(usable_arenas->nextarena == NULL ||
2440
6.79k
                   usable_arenas->nextarena->prevarena ==
2441
6.79k
                   usable_arenas);
2442
            /* Unlink the arena:  it is completely allocated. */
2443
6.79k
            usable_arenas = usable_arenas->nextarena;
2444
6.79k
            if (usable_arenas != NULL) {
2445
369
                usable_arenas->prevarena = NULL;
2446
369
                assert(usable_arenas->address != 0);
2447
369
            }
2448
6.79k
        }
2449
435k
    }
2450
2451
    /* Frontlink to used pools. */
2452
2.41M
    pymem_block *bp;
2453
2.41M
    poolp next = usedpools[size + size]; /* == prev */
2454
2.41M
    pool->nextpool = next;
2455
2.41M
    pool->prevpool = next;
2456
2.41M
    next->nextpool = pool;
2457
2.41M
    next->prevpool = pool;
2458
2.41M
    pool->ref.count = 1;
2459
2.41M
    if (pool->szidx == size) {
2460
        /* Luckily, this pool last contained blocks
2461
         * of the same size class, so its header
2462
         * and free list are already initialized.
2463
         */
2464
1.42M
        bp = pool->freeblock;
2465
1.42M
        assert(bp != NULL);
2466
1.42M
        pool->freeblock = *(pymem_block **)bp;
2467
1.42M
        return bp;
2468
1.42M
    }
2469
    /*
2470
     * Initialize the pool header, set up the free list to
2471
     * contain just the second block, and return the first
2472
     * block.
2473
     */
2474
994k
    pool->szidx = size;
2475
994k
    size = INDEX2SIZE(size);
2476
994k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2477
994k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2478
994k
    pool->maxnextoffset = POOL_SIZE - size;
2479
994k
    pool->freeblock = bp + size;
2480
994k
    *(pymem_block **)(pool->freeblock) = NULL;
2481
994k
    return bp;
2482
2.41M
}
2483
2484
/* pymalloc allocator
2485
2486
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2487
2488
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2489
   requests, on error in the code below (as a last chance to serve the request)
2490
   or when the max memory limit has been reached.
2491
*/
2492
static inline void*
2493
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2494
1.97G
{
2495
#ifdef WITH_VALGRIND
2496
    if (UNLIKELY(running_on_valgrind == -1)) {
2497
        running_on_valgrind = RUNNING_ON_VALGRIND;
2498
    }
2499
    if (UNLIKELY(running_on_valgrind)) {
2500
        return NULL;
2501
    }
2502
#endif
2503
2504
1.97G
    if (UNLIKELY(nbytes == 0)) {
2505
70.8k
        return NULL;
2506
70.8k
    }
2507
1.97G
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2508
193M
        return NULL;
2509
193M
    }
2510
2511
1.78G
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2512
1.78G
    poolp pool = usedpools[size + size];
2513
1.78G
    pymem_block *bp;
2514
2515
1.78G
    if (LIKELY(pool != pool->nextpool)) {
2516
        /*
2517
         * There is a used pool for this size class.
2518
         * Pick up the head block of its free list.
2519
         */
2520
1.77G
        ++pool->ref.count;
2521
1.77G
        bp = pool->freeblock;
2522
1.77G
        assert(bp != NULL);
2523
2524
1.77G
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2525
            // Reached the end of the free list, try to extend it.
2526
440M
            pymalloc_pool_extend(pool, size);
2527
440M
        }
2528
1.77G
    }
2529
2.41M
    else {
2530
        /* There isn't a pool of the right size class immediately
2531
         * available:  use a free pool.
2532
         */
2533
2.41M
        bp = allocate_from_new_pool(state, size);
2534
2.41M
    }
2535
2536
1.78G
    return (void *)bp;
2537
1.97G
}
2538
2539
2540
void *
2541
_PyObject_Malloc(void *ctx, size_t nbytes)
2542
1.93G
{
2543
1.93G
    OMState *state = get_state();
2544
1.93G
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2545
1.93G
    if (LIKELY(ptr != NULL)) {
2546
1.73G
        return ptr;
2547
1.73G
    }
2548
2549
193M
    ptr = PyMem_RawMalloc(nbytes);
2550
193M
    if (ptr != NULL) {
2551
193M
        raw_allocated_blocks++;
2552
193M
    }
2553
193M
    return ptr;
2554
1.93G
}
2555
2556
2557
void *
2558
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2559
42.7M
{
2560
42.7M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2561
42.7M
    size_t nbytes = nelem * elsize;
2562
2563
42.7M
    OMState *state = get_state();
2564
42.7M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2565
42.7M
    if (LIKELY(ptr != NULL)) {
2566
42.6M
        memset(ptr, 0, nbytes);
2567
42.6M
        return ptr;
2568
42.6M
    }
2569
2570
140k
    ptr = PyMem_RawCalloc(nelem, elsize);
2571
140k
    if (ptr != NULL) {
2572
140k
        raw_allocated_blocks++;
2573
140k
    }
2574
140k
    return ptr;
2575
42.7M
}
2576
2577
2578
static void
2579
insert_to_usedpool(OMState *state, poolp pool)
2580
222M
{
2581
222M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2582
2583
222M
    uint size = pool->szidx;
2584
222M
    poolp next = usedpools[size + size];
2585
222M
    poolp prev = next->prevpool;
2586
2587
    /* insert pool before next:   prev <-> pool <-> next */
2588
222M
    pool->nextpool = next;
2589
222M
    pool->prevpool = prev;
2590
222M
    next->prevpool = pool;
2591
222M
    prev->nextpool = pool;
2592
222M
}
2593
2594
static void
2595
insert_to_freepool(OMState *state, poolp pool)
2596
2.39M
{
2597
2.39M
    poolp next = pool->nextpool;
2598
2.39M
    poolp prev = pool->prevpool;
2599
2.39M
    next->prevpool = prev;
2600
2.39M
    prev->nextpool = next;
2601
2602
    /* Link the pool to freepools.  This is a singly-linked
2603
     * list, and pool->prevpool isn't used there.
2604
     */
2605
2.39M
    struct arena_object *ao = &allarenas[pool->arenaindex];
2606
2.39M
    pool->nextpool = ao->freepools;
2607
2.39M
    ao->freepools = pool;
2608
2.39M
    uint nf = ao->nfreepools;
2609
    /* If this is the rightmost arena with this number of free pools,
2610
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2611
     * are no arenas in usable_arenas with that value.
2612
     */
2613
2.39M
    struct arena_object* lastnf = nfp2lasta[nf];
2614
2.39M
    assert((nf == 0 && lastnf == NULL) ||
2615
2.39M
           (nf > 0 &&
2616
2.39M
            lastnf != NULL &&
2617
2.39M
            lastnf->nfreepools == nf &&
2618
2.39M
            (lastnf->nextarena == NULL ||
2619
2.39M
             nf < lastnf->nextarena->nfreepools)));
2620
2.39M
    if (lastnf == ao) {  /* it is the rightmost */
2621
2.27M
        struct arena_object* p = ao->prevarena;
2622
2.27M
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2623
2.27M
    }
2624
2.39M
    ao->nfreepools = ++nf;
2625
2626
    /* All the rest is arena management.  We just freed
2627
     * a pool, and there are 4 cases for arena mgmt:
2628
     * 1. If all the pools are free, return the arena to
2629
     *    the system free().  Except if this is the last
2630
     *    arena in the list, keep it to avoid thrashing:
2631
     *    keeping one wholly free arena in the list avoids
2632
     *    pathological cases where a simple loop would
2633
     *    otherwise provoke needing to allocate and free an
2634
     *    arena on every iteration.  See bpo-37257.
2635
     * 2. If this is the only free pool in the arena,
2636
     *    add the arena back to the `usable_arenas` list.
2637
     * 3. If the "next" arena has a smaller count of free
2638
     *    pools, we have to "slide this arena right" to
2639
     *    restore that usable_arenas is sorted in order of
2640
     *    nfreepools.
2641
     * 4. Else there's nothing more to do.
2642
     */
2643
2.39M
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2644
        /* Case 1.  First unlink ao from usable_arenas.
2645
         */
2646
6.55k
        assert(ao->prevarena == NULL ||
2647
6.55k
               ao->prevarena->address != 0);
2648
6.55k
        assert(ao ->nextarena == NULL ||
2649
6.55k
               ao->nextarena->address != 0);
2650
2651
        /* Fix the pointer in the prevarena, or the
2652
         * usable_arenas pointer.
2653
         */
2654
6.55k
        if (ao->prevarena == NULL) {
2655
1.09k
            usable_arenas = ao->nextarena;
2656
1.09k
            assert(usable_arenas == NULL ||
2657
1.09k
                   usable_arenas->address != 0);
2658
1.09k
        }
2659
5.46k
        else {
2660
5.46k
            assert(ao->prevarena->nextarena == ao);
2661
5.46k
            ao->prevarena->nextarena =
2662
5.46k
                ao->nextarena;
2663
5.46k
        }
2664
        /* Fix the pointer in the nextarena. */
2665
6.55k
        if (ao->nextarena != NULL) {
2666
6.55k
            assert(ao->nextarena->prevarena == ao);
2667
6.55k
            ao->nextarena->prevarena =
2668
6.55k
                ao->prevarena;
2669
6.55k
        }
2670
        /* Record that this arena_object slot is
2671
         * available to be reused.
2672
         */
2673
6.55k
        ao->nextarena = unused_arena_objects;
2674
6.55k
        unused_arena_objects = ao;
2675
2676
6.55k
#if WITH_PYMALLOC_RADIX_TREE
2677
        /* mark arena region as not under control of obmalloc */
2678
6.55k
        arena_map_mark_used(state, ao->address, 0);
2679
6.55k
#endif
2680
2681
        /* Free the entire arena. */
2682
6.55k
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2683
6.55k
                             (void *)ao->address, ARENA_SIZE);
2684
6.55k
        ao->address = 0;                        /* mark unassociated */
2685
6.55k
        --narenas_currently_allocated;
2686
2687
6.55k
        return;
2688
6.55k
    }
2689
2690
2.39M
    if (nf == 1) {
2691
        /* Case 2.  Put ao at the head of
2692
         * usable_arenas.  Note that because
2693
         * ao->nfreepools was 0 before, ao isn't
2694
         * currently on the usable_arenas list.
2695
         */
2696
54.4k
        ao->nextarena = usable_arenas;
2697
54.4k
        ao->prevarena = NULL;
2698
54.4k
        if (usable_arenas)
2699
51.2k
            usable_arenas->prevarena = ao;
2700
54.4k
        usable_arenas = ao;
2701
54.4k
        assert(usable_arenas->address != 0);
2702
54.4k
        if (nfp2lasta[1] == NULL) {
2703
47.7k
            nfp2lasta[1] = ao;
2704
47.7k
        }
2705
2706
54.4k
        return;
2707
54.4k
    }
2708
2709
    /* If this arena is now out of order, we need to keep
2710
     * the list sorted.  The list is kept sorted so that
2711
     * the "most full" arenas are used first, which allows
2712
     * the nearly empty arenas to be completely freed.  In
2713
     * a few un-scientific tests, it seems like this
2714
     * approach allowed a lot more memory to be freed.
2715
     */
2716
    /* If this is the only arena with nf, record that. */
2717
2.33M
    if (nfp2lasta[nf] == NULL) {
2718
2.25M
        nfp2lasta[nf] = ao;
2719
2.25M
    } /* else the rightmost with nf doesn't change */
2720
    /* If this was the rightmost of the old size, it remains in place. */
2721
2.33M
    if (ao == lastnf) {
2722
        /* Case 4.  Nothing to do. */
2723
2.27M
        return;
2724
2.27M
    }
2725
    /* If ao were the only arena in the list, the last block would have
2726
     * gotten us out.
2727
     */
2728
2.33M
    assert(ao->nextarena != NULL);
2729
2730
    /* Case 3:  We have to move the arena towards the end of the list,
2731
     * because it has more free pools than the arena to its right.  It needs
2732
     * to move to follow lastnf.
2733
     * First unlink ao from usable_arenas.
2734
     */
2735
64.7k
    if (ao->prevarena != NULL) {
2736
        /* ao isn't at the head of the list */
2737
49.8k
        assert(ao->prevarena->nextarena == ao);
2738
49.8k
        ao->prevarena->nextarena = ao->nextarena;
2739
49.8k
    }
2740
14.8k
    else {
2741
        /* ao is at the head of the list */
2742
14.8k
        assert(usable_arenas == ao);
2743
14.8k
        usable_arenas = ao->nextarena;
2744
14.8k
    }
2745
64.7k
    ao->nextarena->prevarena = ao->prevarena;
2746
    /* And insert after lastnf. */
2747
64.7k
    ao->prevarena = lastnf;
2748
64.7k
    ao->nextarena = lastnf->nextarena;
2749
64.7k
    if (ao->nextarena != NULL) {
2750
63.2k
        ao->nextarena->prevarena = ao;
2751
63.2k
    }
2752
64.7k
    lastnf->nextarena = ao;
2753
    /* Verify that the swaps worked. */
2754
64.7k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2755
64.7k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2756
64.7k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2757
64.7k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2758
64.7k
           || ao->prevarena->nextarena == ao);
2759
64.7k
}
2760
2761
/* Free a memory block allocated by pymalloc_alloc().
2762
   Return 1 if it was freed.
2763
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2764
static inline int
2765
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2766
1.97G
{
2767
1.97G
    assert(p != NULL);
2768
2769
#ifdef WITH_VALGRIND
2770
    if (UNLIKELY(running_on_valgrind > 0)) {
2771
        return 0;
2772
    }
2773
#endif
2774
2775
1.97G
    poolp pool = POOL_ADDR(p);
2776
1.97G
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2777
193M
        return 0;
2778
193M
    }
2779
    /* We allocated this address. */
2780
2781
    /* Link p to the start of the pool's freeblock list.  Since
2782
     * the pool had at least the p block outstanding, the pool
2783
     * wasn't empty (so it's already in a usedpools[] list, or
2784
     * was full and is in no list -- it's not in the freeblocks
2785
     * list in any case).
2786
     */
2787
1.97G
    assert(pool->ref.count > 0);            /* else it was empty */
2788
1.77G
    pymem_block *lastfree = pool->freeblock;
2789
1.77G
    *(pymem_block **)p = lastfree;
2790
1.77G
    pool->freeblock = (pymem_block *)p;
2791
1.77G
    pool->ref.count--;
2792
2793
1.77G
    if (UNLIKELY(lastfree == NULL)) {
2794
        /* Pool was full, so doesn't currently live in any list:
2795
         * link it to the front of the appropriate usedpools[] list.
2796
         * This mimics LRU pool usage for new allocations and
2797
         * targets optimal filling when several pools contain
2798
         * blocks of the same size class.
2799
         */
2800
222M
        insert_to_usedpool(state, pool);
2801
222M
        return 1;
2802
222M
    }
2803
2804
    /* freeblock wasn't NULL, so the pool wasn't full,
2805
     * and the pool is in a usedpools[] list.
2806
     */
2807
1.55G
    if (LIKELY(pool->ref.count != 0)) {
2808
        /* pool isn't empty:  leave it in usedpools */
2809
1.55G
        return 1;
2810
1.55G
    }
2811
2812
    /* Pool is now empty:  unlink from usedpools, and
2813
     * link to the front of freepools.  This ensures that
2814
     * previously freed pools will be allocated later
2815
     * (being not referenced, they are perhaps paged out).
2816
     */
2817
2.39M
    insert_to_freepool(state, pool);
2818
2.39M
    return 1;
2819
1.55G
}
2820
2821
2822
void
2823
_PyObject_Free(void *ctx, void *p)
2824
2.01G
{
2825
    /* PyObject_Free(NULL) has no effect */
2826
2.01G
    if (p == NULL) {
2827
42.4M
        return;
2828
42.4M
    }
2829
2830
1.97G
    OMState *state = get_state();
2831
1.97G
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2832
        /* pymalloc didn't allocate this address */
2833
193M
        PyMem_RawFree(p);
2834
193M
        raw_allocated_blocks--;
2835
193M
    }
2836
1.97G
}
2837
2838
2839
/* pymalloc realloc.
2840
2841
   If nbytes==0, then as the Python docs promise, we do not treat this like
2842
   free(p), and return a non-NULL result.
2843
2844
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2845
   newptr_p.
2846
2847
   Return 0 if pymalloc didn't allocated p. */
2848
static int
2849
pymalloc_realloc(OMState *state, void *ctx,
2850
                 void **newptr_p, void *p, size_t nbytes)
2851
100M
{
2852
100M
    void *bp;
2853
100M
    poolp pool;
2854
100M
    size_t size;
2855
2856
100M
    assert(p != NULL);
2857
2858
#ifdef WITH_VALGRIND
2859
    /* Treat running_on_valgrind == -1 the same as 0 */
2860
    if (UNLIKELY(running_on_valgrind > 0)) {
2861
        return 0;
2862
    }
2863
#endif
2864
2865
100M
    pool = POOL_ADDR(p);
2866
100M
    if (!address_in_range(state, p, pool)) {
2867
        /* pymalloc is not managing this block.
2868
2869
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2870
           over this block.  However, if we do, we need to copy the valid data
2871
           from the C-managed block to one of our blocks, and there's no
2872
           portable way to know how much of the memory space starting at p is
2873
           valid.
2874
2875
           As bug 1185883 pointed out the hard way, it's possible that the
2876
           C-managed block is "at the end" of allocated VM space, so that a
2877
           memory fault can occur if we try to copy nbytes bytes starting at p.
2878
           Instead we punt: let C continue to manage this block. */
2879
6.85M
        return 0;
2880
6.85M
    }
2881
2882
    /* pymalloc is in charge of this block */
2883
93.3M
    size = INDEX2SIZE(pool->szidx);
2884
93.3M
    if (nbytes <= size) {
2885
        /* The block is staying the same or shrinking.
2886
2887
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2888
           block to a smaller size class, but it wastes memory not to copy it.
2889
2890
           The compromise here is to copy on shrink only if at least 25% of
2891
           size can be shaved off. */
2892
66.6M
        if (4 * nbytes > 3 * size) {
2893
            /* It's the same, or shrinking and new/old > 3/4. */
2894
29.6M
            *newptr_p = p;
2895
29.6M
            return 1;
2896
29.6M
        }
2897
36.9M
        size = nbytes;
2898
36.9M
    }
2899
2900
63.7M
    bp = _PyObject_Malloc(ctx, nbytes);
2901
63.7M
    if (bp != NULL) {
2902
63.7M
        memcpy(bp, p, size);
2903
63.7M
        _PyObject_Free(ctx, p);
2904
63.7M
    }
2905
63.7M
    *newptr_p = bp;
2906
63.7M
    return 1;
2907
93.3M
}
2908
2909
2910
void *
2911
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2912
320M
{
2913
320M
    void *ptr2;
2914
2915
320M
    if (ptr == NULL) {
2916
220M
        return _PyObject_Malloc(ctx, nbytes);
2917
220M
    }
2918
2919
100M
    OMState *state = get_state();
2920
100M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2921
93.3M
        return ptr2;
2922
93.3M
    }
2923
2924
6.85M
    return PyMem_RawRealloc(ptr, nbytes);
2925
100M
}
2926
2927
#else   /* ! WITH_PYMALLOC */
2928
2929
/*==========================================================================*/
2930
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2931
 * only be used by extensions that are compiled with pymalloc enabled. */
2932
2933
Py_ssize_t
2934
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2935
{
2936
    return 0;
2937
}
2938
2939
Py_ssize_t
2940
_Py_GetGlobalAllocatedBlocks(void)
2941
{
2942
    return 0;
2943
}
2944
2945
void
2946
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2947
{
2948
    return;
2949
}
2950
2951
void
2952
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2953
{
2954
    return;
2955
}
2956
2957
#endif /* WITH_PYMALLOC */
2958
2959
2960
/*==========================================================================*/
2961
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2962
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2963
 */
2964
2965
/* Uncomment this define to add the "serialno" field */
2966
/* #define PYMEM_DEBUG_SERIALNO */
2967
2968
#ifdef PYMEM_DEBUG_SERIALNO
2969
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2970
2971
/* serialno is always incremented via calling this routine.  The point is
2972
 * to supply a single place to set a breakpoint.
2973
 */
2974
static void
2975
bumpserialno(void)
2976
{
2977
    ++serialno;
2978
}
2979
#endif
2980
2981
0
#define SST SIZEOF_SIZE_T
2982
2983
#ifdef PYMEM_DEBUG_SERIALNO
2984
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2985
#else
2986
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2987
#endif
2988
2989
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2990
static size_t
2991
read_size_t(const void *p)
2992
0
{
2993
0
    const uint8_t *q = (const uint8_t *)p;
2994
0
    size_t result = *q++;
2995
0
    int i;
2996
2997
0
    for (i = SST; --i > 0; ++q)
2998
0
        result = (result << 8) | *q;
2999
0
    return result;
3000
0
}
3001
3002
/* Write n as a big-endian size_t, MSB at address p, LSB at
3003
 * p + sizeof(size_t) - 1.
3004
 */
3005
static void
3006
write_size_t(void *p, size_t n)
3007
0
{
3008
0
    uint8_t *q = (uint8_t *)p + SST - 1;
3009
0
    int i;
3010
3011
0
    for (i = SST; --i >= 0; --q) {
3012
0
        *q = (uint8_t)(n & 0xff);
3013
0
        n >>= 8;
3014
0
    }
3015
0
}
3016
3017
static void
3018
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
3019
               bool is_alloc)
3020
0
{
3021
#ifdef Py_GIL_DISABLED
3022
    if (api->api_id == 'o') {
3023
        // Don't overwrite the first few bytes of a PyObject allocation in the
3024
        // free-threaded build
3025
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
3026
        size_t debug_offset;
3027
        if (is_alloc) {
3028
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
3029
        }
3030
        else {
3031
            char *alloc = (char *)data - 2*SST;  // start of the allocation
3032
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
3033
        }
3034
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
3035
        if (debug_offset < nbytes) {
3036
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
3037
        }
3038
        return;
3039
    }
3040
#endif
3041
0
    memset(data, c, nbytes);
3042
0
}
3043
3044
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
3045
   fills them with useful stuff, here calling the underlying malloc's result p:
3046
3047
p[0: S]
3048
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
3049
    to read in a memory dump).
3050
p[S]
3051
    API ID.  See PEP 445.  This is a character, but seems undocumented.
3052
p[S+1: 2*S]
3053
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
3054
p[2*S: 2*S+n]
3055
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
3056
    Used to catch reference to uninitialized memory.
3057
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
3058
    handled the request itself.
3059
p[2*S+n: 2*S+n+S]
3060
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
3061
p[2*S+n+S: 2*S+n+2*S]
3062
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
3063
    and _PyMem_DebugRealloc.
3064
    This is a big-endian size_t.
3065
    If "bad memory" is detected later, the serial number gives an
3066
    excellent way to set a breakpoint on the next run, to capture the
3067
    instant at which this block was passed out.
3068
3069
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
3070
for 3 * S extra bytes, and omits the last serialno field.
3071
*/
3072
3073
static void *
3074
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
3075
0
{
3076
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3077
0
    uint8_t *p;           /* base address of malloc'ed pad block */
3078
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3079
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3080
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3081
3082
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3083
        /* integer overflow: can't represent total as a Py_ssize_t */
3084
0
        return NULL;
3085
0
    }
3086
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3087
3088
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3089
                ^--- p    ^--- data   ^--- tail
3090
       S: nbytes stored as size_t
3091
       I: API identifier (1 byte)
3092
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3093
       C: Clean bytes used later to store actual data
3094
       N: Serial number stored as size_t
3095
3096
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3097
       is omitted. */
3098
3099
0
    if (use_calloc) {
3100
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3101
0
    }
3102
0
    else {
3103
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3104
0
    }
3105
0
    if (p == NULL) {
3106
0
        return NULL;
3107
0
    }
3108
0
    data = p + 2*SST;
3109
3110
#ifdef PYMEM_DEBUG_SERIALNO
3111
    bumpserialno();
3112
#endif
3113
3114
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3115
0
    write_size_t(p, nbytes);
3116
0
    p[SST] = (uint8_t)api->api_id;
3117
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3118
3119
0
    if (nbytes > 0 && !use_calloc) {
3120
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3121
0
    }
3122
3123
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3124
0
    tail = data + nbytes;
3125
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3126
#ifdef PYMEM_DEBUG_SERIALNO
3127
    write_size_t(tail + SST, serialno);
3128
#endif
3129
3130
0
    return data;
3131
0
}
3132
3133
void *
3134
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3135
0
{
3136
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3137
0
}
3138
3139
void *
3140
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3141
0
{
3142
0
    size_t nbytes;
3143
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3144
0
    nbytes = nelem * elsize;
3145
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3146
0
}
3147
3148
3149
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3150
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3151
   Then fills the original bytes with PYMEM_DEADBYTE.
3152
   Then calls the underlying free.
3153
*/
3154
void
3155
_PyMem_DebugRawFree(void *ctx, void *p)
3156
0
{
3157
    /* PyMem_Free(NULL) has no effect */
3158
0
    if (p == NULL) {
3159
0
        return;
3160
0
    }
3161
3162
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3163
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3164
0
    size_t nbytes;
3165
3166
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3167
0
    nbytes = read_size_t(q);
3168
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3169
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3170
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3171
0
    api->alloc.free(api->alloc.ctx, q);
3172
0
}
3173
3174
3175
void *
3176
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3177
0
{
3178
0
    if (p == NULL) {
3179
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3180
0
    }
3181
3182
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3183
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3184
0
    uint8_t *data;        /* pointer to data bytes */
3185
0
    uint8_t *r;
3186
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3187
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3188
0
    size_t original_nbytes;
3189
0
#define ERASED_SIZE 64
3190
3191
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3192
3193
0
    data = (uint8_t *)p;
3194
0
    head = data - 2*SST;
3195
0
    original_nbytes = read_size_t(head);
3196
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3197
        /* integer overflow: can't represent total as a Py_ssize_t */
3198
0
        return NULL;
3199
0
    }
3200
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3201
3202
0
    tail = data + original_nbytes;
3203
#ifdef PYMEM_DEBUG_SERIALNO
3204
    size_t block_serialno = read_size_t(tail + SST);
3205
#endif
3206
0
#ifndef Py_GIL_DISABLED
3207
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3208
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3209
     */
3210
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3211
0
    if (original_nbytes <= sizeof(save)) {
3212
0
        memcpy(save, data, original_nbytes);
3213
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3214
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3215
0
    }
3216
0
    else {
3217
0
        memcpy(save, data, ERASED_SIZE);
3218
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3219
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3220
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3221
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3222
0
    }
3223
0
#endif
3224
3225
    /* Resize and add decorations. */
3226
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3227
0
    if (r == NULL) {
3228
        /* if realloc() failed: rewrite header and footer which have
3229
           just been erased */
3230
0
        nbytes = original_nbytes;
3231
0
    }
3232
0
    else {
3233
0
        head = r;
3234
#ifdef PYMEM_DEBUG_SERIALNO
3235
        bumpserialno();
3236
        block_serialno = serialno;
3237
#endif
3238
0
    }
3239
0
    data = head + 2*SST;
3240
3241
0
    write_size_t(head, nbytes);
3242
0
    head[SST] = (uint8_t)api->api_id;
3243
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3244
3245
0
    tail = data + nbytes;
3246
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3247
#ifdef PYMEM_DEBUG_SERIALNO
3248
    write_size_t(tail + SST, block_serialno);
3249
#endif
3250
3251
0
#ifndef Py_GIL_DISABLED
3252
    /* Restore saved bytes. */
3253
0
    if (original_nbytes <= sizeof(save)) {
3254
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3255
0
    }
3256
0
    else {
3257
0
        size_t i = original_nbytes - ERASED_SIZE;
3258
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3259
0
        if (nbytes > i) {
3260
0
            memcpy(data + i, &save[ERASED_SIZE],
3261
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3262
0
        }
3263
0
    }
3264
0
#endif
3265
3266
0
    if (r == NULL) {
3267
0
        return NULL;
3268
0
    }
3269
3270
0
    if (nbytes > original_nbytes) {
3271
        /* growing: mark new extra memory clean */
3272
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3273
0
               nbytes - original_nbytes);
3274
0
    }
3275
3276
0
    return data;
3277
0
}
3278
3279
static inline void
3280
_PyMem_DebugCheckGIL(const char *func)
3281
0
{
3282
0
    PyThreadState *tstate = _PyThreadState_GET();
3283
0
    if (tstate == NULL) {
3284
0
#ifndef Py_GIL_DISABLED
3285
0
        _Py_FatalErrorFunc(func,
3286
0
                           "Python memory allocator called "
3287
0
                           "without holding the GIL");
3288
#else
3289
        _Py_FatalErrorFunc(func,
3290
                           "Python memory allocator called "
3291
                           "without an active thread state. "
3292
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3293
#endif
3294
0
    }
3295
0
}
3296
3297
void *
3298
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3299
0
{
3300
0
    _PyMem_DebugCheckGIL(__func__);
3301
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3302
0
}
3303
3304
void *
3305
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3306
0
{
3307
0
    _PyMem_DebugCheckGIL(__func__);
3308
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3309
0
}
3310
3311
3312
void
3313
_PyMem_DebugFree(void *ctx, void *ptr)
3314
0
{
3315
0
    _PyMem_DebugCheckGIL(__func__);
3316
0
    _PyMem_DebugRawFree(ctx, ptr);
3317
0
}
3318
3319
3320
void *
3321
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3322
0
{
3323
0
    _PyMem_DebugCheckGIL(__func__);
3324
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3325
0
}
3326
3327
/* Check the forbidden bytes on both ends of the memory allocated for p.
3328
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3329
 * and call Py_FatalError to kill the program.
3330
 * The API id, is also checked.
3331
 */
3332
static void
3333
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3334
0
{
3335
0
    assert(p != NULL);
3336
3337
0
    const uint8_t *q = (const uint8_t *)p;
3338
0
    size_t nbytes;
3339
0
    const uint8_t *tail;
3340
0
    int i;
3341
0
    char id;
3342
3343
    /* Check the API id */
3344
0
    id = (char)q[-SST];
3345
0
    if (id != api) {
3346
0
        _PyObject_DebugDumpAddress(p);
3347
0
        _Py_FatalErrorFormat(func,
3348
0
                             "bad ID: Allocated using API '%c', "
3349
0
                             "verified using API '%c'",
3350
0
                             id, api);
3351
0
    }
3352
3353
    /* Check the stuff at the start of p first:  if there's underwrite
3354
     * corruption, the number-of-bytes field may be nuts, and checking
3355
     * the tail could lead to a segfault then.
3356
     */
3357
0
    for (i = SST-1; i >= 1; --i) {
3358
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3359
0
            _PyObject_DebugDumpAddress(p);
3360
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3361
0
        }
3362
0
    }
3363
3364
0
    nbytes = read_size_t(q - 2*SST);
3365
0
    tail = q + nbytes;
3366
0
    for (i = 0; i < SST; ++i) {
3367
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3368
0
            _PyObject_DebugDumpAddress(p);
3369
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3370
0
        }
3371
0
    }
3372
0
}
3373
3374
/* Display info to stderr about the memory block at p. */
3375
static void
3376
_PyObject_DebugDumpAddress(const void *p)
3377
0
{
3378
0
    const uint8_t *q = (const uint8_t *)p;
3379
0
    const uint8_t *tail;
3380
0
    size_t nbytes;
3381
0
    int i;
3382
0
    int ok;
3383
0
    char id;
3384
3385
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3386
0
    if (p == NULL) {
3387
0
        fprintf(stderr, "\n");
3388
0
        return;
3389
0
    }
3390
0
    id = (char)q[-SST];
3391
0
    fprintf(stderr, " API '%c'\n", id);
3392
3393
0
    nbytes = read_size_t(q - 2*SST);
3394
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3395
3396
    /* In case this is nuts, check the leading pad bytes first. */
3397
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3398
0
    ok = 1;
3399
0
    for (i = 1; i <= SST-1; ++i) {
3400
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3401
0
            ok = 0;
3402
0
            break;
3403
0
        }
3404
0
    }
3405
0
    if (ok)
3406
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3407
0
    else {
3408
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3409
0
            PYMEM_FORBIDDENBYTE);
3410
0
        for (i = SST-1; i >= 1; --i) {
3411
0
            const uint8_t byte = *(q-i);
3412
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3413
0
            if (byte != PYMEM_FORBIDDENBYTE)
3414
0
                fputs(" *** OUCH", stderr);
3415
0
            fputc('\n', stderr);
3416
0
        }
3417
3418
0
        fputs("    Because memory is corrupted at the start, the "
3419
0
              "count of bytes requested\n"
3420
0
              "       may be bogus, and checking the trailing pad "
3421
0
              "bytes may segfault.\n", stderr);
3422
0
    }
3423
3424
0
    tail = q + nbytes;
3425
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3426
0
    ok = 1;
3427
0
    for (i = 0; i < SST; ++i) {
3428
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3429
0
            ok = 0;
3430
0
            break;
3431
0
        }
3432
0
    }
3433
0
    if (ok)
3434
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3435
0
    else {
3436
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3437
0
                PYMEM_FORBIDDENBYTE);
3438
0
        for (i = 0; i < SST; ++i) {
3439
0
            const uint8_t byte = tail[i];
3440
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3441
0
                    i, byte);
3442
0
            if (byte != PYMEM_FORBIDDENBYTE)
3443
0
                fputs(" *** OUCH", stderr);
3444
0
            fputc('\n', stderr);
3445
0
        }
3446
0
    }
3447
3448
#ifdef PYMEM_DEBUG_SERIALNO
3449
    size_t serial = read_size_t(tail + SST);
3450
    fprintf(stderr,
3451
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3452
            serial);
3453
#endif
3454
3455
0
    if (nbytes > 0) {
3456
0
        i = 0;
3457
0
        fputs("    Data at p:", stderr);
3458
        /* print up to 8 bytes at the start */
3459
0
        while (q < tail && i < 8) {
3460
0
            fprintf(stderr, " %02x", *q);
3461
0
            ++i;
3462
0
            ++q;
3463
0
        }
3464
        /* and up to 8 at the end */
3465
0
        if (q < tail) {
3466
0
            if (tail - q > 8) {
3467
0
                fputs(" ...", stderr);
3468
0
                q = tail - 8;
3469
0
            }
3470
0
            while (q < tail) {
3471
0
                fprintf(stderr, " %02x", *q);
3472
0
                ++q;
3473
0
            }
3474
0
        }
3475
0
        fputc('\n', stderr);
3476
0
    }
3477
0
    fputc('\n', stderr);
3478
3479
0
    fflush(stderr);
3480
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3481
0
}
3482
3483
3484
static size_t
3485
printone(FILE *out, const char* msg, size_t value)
3486
0
{
3487
0
    int i, k;
3488
0
    char buf[100];
3489
0
    size_t origvalue = value;
3490
3491
0
    fputs(msg, out);
3492
0
    for (i = (int)strlen(msg); i < 35; ++i)
3493
0
        fputc(' ', out);
3494
0
    fputc('=', out);
3495
3496
    /* Write the value with commas. */
3497
0
    i = 22;
3498
0
    buf[i--] = '\0';
3499
0
    buf[i--] = '\n';
3500
0
    k = 3;
3501
0
    do {
3502
0
        size_t nextvalue = value / 10;
3503
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3504
0
        value = nextvalue;
3505
0
        buf[i--] = (char)(digit + '0');
3506
0
        --k;
3507
0
        if (k == 0 && value && i >= 0) {
3508
0
            k = 3;
3509
0
            buf[i--] = ',';
3510
0
        }
3511
0
    } while (value && i >= 0);
3512
3513
0
    while (i >= 0)
3514
0
        buf[i--] = ' ';
3515
0
    fputs(buf, out);
3516
3517
0
    return origvalue;
3518
0
}
3519
3520
void
3521
_PyDebugAllocatorStats(FILE *out,
3522
                       const char *block_name, int num_blocks, size_t sizeof_block)
3523
0
{
3524
0
    char buf1[128];
3525
0
    char buf2[128];
3526
0
    PyOS_snprintf(buf1, sizeof(buf1),
3527
0
                  "%d %ss * %zd bytes each",
3528
0
                  num_blocks, block_name, sizeof_block);
3529
0
    PyOS_snprintf(buf2, sizeof(buf2),
3530
0
                  "%48s ", buf1);
3531
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3532
0
}
3533
3534
// Return true if the obmalloc state structure is heap allocated,
3535
// by PyMem_RawCalloc().  For the main interpreter, this structure
3536
// allocated in the BSS.  Allocating that way gives some memory savings
3537
// and a small performance win (at least on a demand paged OS).  On
3538
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3539
// memory is for the arena_map_top array.  Since normally only one entry
3540
// of that array is used, only one page of resident memory is actually
3541
// used, rather than the full 256 kB.
3542
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3543
0
{
3544
0
#if WITH_PYMALLOC
3545
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3546
#else
3547
    return false;
3548
#endif
3549
0
}
3550
3551
#ifdef WITH_PYMALLOC
3552
static void
3553
init_obmalloc_pools(PyInterpreterState *interp)
3554
36
{
3555
    // initialize the obmalloc->pools structure.  This must be done
3556
    // before the obmalloc alloc/free functions can be called.
3557
36
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3558
36
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3559
36
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3560
36
}
3561
#endif /* WITH_PYMALLOC */
3562
3563
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3564
36
{
3565
36
#ifdef WITH_PYMALLOC
3566
    /* Initialize obmalloc, but only for subinterpreters,
3567
       since the main interpreter is initialized statically. */
3568
36
    if (_Py_IsMainInterpreter(interp)
3569
0
            || _PyInterpreterState_HasFeature(interp,
3570
36
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3571
36
        interp->obmalloc = &obmalloc_state_main;
3572
36
        if (!obmalloc_state_initialized) {
3573
36
            init_obmalloc_pools(interp);
3574
36
            obmalloc_state_initialized = true;
3575
36
        }
3576
36
    } else {
3577
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3578
0
        if (interp->obmalloc == NULL) {
3579
0
            return -1;
3580
0
        }
3581
0
        init_obmalloc_pools(interp);
3582
0
    }
3583
36
#endif /* WITH_PYMALLOC */
3584
36
    return 0; // success
3585
36
}
3586
3587
3588
#ifdef WITH_PYMALLOC
3589
3590
static void
3591
free_obmalloc_arenas(PyInterpreterState *interp)
3592
0
{
3593
0
    OMState *state = interp->obmalloc;
3594
0
    for (uint i = 0; i < maxarenas; ++i) {
3595
        // free each obmalloc memory arena
3596
0
        struct arena_object *ao = &allarenas[i];
3597
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3598
0
                             (void *)ao->address, ARENA_SIZE);
3599
0
    }
3600
    // free the array containing pointers to all arenas
3601
0
    PyMem_RawFree(allarenas);
3602
0
#if WITH_PYMALLOC_RADIX_TREE
3603
0
#ifdef USE_INTERIOR_NODES
3604
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3605
    // by arena_map_mark_used() but not freed when arenas are freed.
3606
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3607
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3608
0
         if (mid == NULL) {
3609
0
             continue;
3610
0
         }
3611
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3612
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3613
0
            if (bot == NULL) {
3614
0
                continue;
3615
0
            }
3616
0
            PyMem_RawFree(bot);
3617
0
         }
3618
0
         PyMem_RawFree(mid);
3619
0
    }
3620
0
#endif
3621
0
#endif
3622
0
}
3623
3624
#ifdef Py_DEBUG
3625
/* Is target in the list?  The list is traversed via the nextpool pointers.
3626
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3627
 * list, else 0.
3628
 */
3629
static int
3630
pool_is_in_list(const poolp target, poolp list)
3631
{
3632
    poolp origlist = list;
3633
    assert(target != NULL);
3634
    if (list == NULL)
3635
        return 0;
3636
    do {
3637
        if (target == list)
3638
            return 1;
3639
        list = list->nextpool;
3640
    } while (list != NULL && list != origlist);
3641
    return 0;
3642
}
3643
#endif
3644
3645
#ifdef WITH_MIMALLOC
3646
struct _alloc_stats {
3647
    size_t allocated_blocks;
3648
    size_t allocated_bytes;
3649
    size_t allocated_with_overhead;
3650
    size_t bytes_reserved;
3651
    size_t bytes_committed;
3652
};
3653
3654
static bool _collect_alloc_stats(
3655
    const mi_heap_t* heap, const mi_heap_area_t* area,
3656
    void* block, size_t block_size, void* arg)
3657
0
{
3658
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3659
0
    stats->allocated_blocks += area->used;
3660
0
    stats->allocated_bytes += area->used * area->block_size;
3661
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3662
0
    stats->bytes_reserved += area->reserved;
3663
0
    stats->bytes_committed += area->committed;
3664
0
    return 1;
3665
0
}
3666
3667
static void
3668
py_mimalloc_print_stats(FILE *out)
3669
0
{
3670
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3671
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3672
0
    fprintf(out, "Medium block threshold = %zu\n",
3673
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3674
0
    fprintf(out, "Large object max size = %zu\n",
3675
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3676
3677
0
    mi_heap_t *heap = mi_heap_get_default();
3678
0
    struct _alloc_stats stats;
3679
0
    memset(&stats, 0, sizeof(stats));
3680
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3681
3682
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3683
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3684
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3685
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3686
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3687
0
}
3688
#endif
3689
3690
3691
static void
3692
pymalloc_print_stats(FILE *out)
3693
0
{
3694
0
    OMState *state = get_state();
3695
3696
0
    uint i;
3697
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3698
    /* # of pools, allocated blocks, and free blocks per class index */
3699
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3700
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3701
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3702
    /* total # of allocated bytes in used and full pools */
3703
0
    size_t allocated_bytes = 0;
3704
    /* total # of available bytes in used pools */
3705
0
    size_t available_bytes = 0;
3706
    /* # of free pools + pools not yet carved out of current arena */
3707
0
    uint numfreepools = 0;
3708
    /* # of bytes for arena alignment padding */
3709
0
    size_t arena_alignment = 0;
3710
    /* # of bytes in used and full pools used for pool_headers */
3711
0
    size_t pool_header_bytes = 0;
3712
    /* # of bytes in used and full pools wasted due to quantization,
3713
     * i.e. the necessarily leftover space at the ends of used and
3714
     * full pools.
3715
     */
3716
0
    size_t quantization = 0;
3717
    /* # of arenas actually allocated. */
3718
0
    size_t narenas = 0;
3719
    /* running total -- should equal narenas * ARENA_SIZE */
3720
0
    size_t total;
3721
0
    char buf[128];
3722
3723
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3724
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3725
3726
0
    for (i = 0; i < numclasses; ++i)
3727
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3728
3729
    /* Because full pools aren't linked to from anything, it's easiest
3730
     * to march over all the arenas.  If we're lucky, most of the memory
3731
     * will be living in full pools -- would be a shame to miss them.
3732
     */
3733
0
    for (i = 0; i < maxarenas; ++i) {
3734
0
        uintptr_t base = allarenas[i].address;
3735
3736
        /* Skip arenas which are not allocated. */
3737
0
        if (allarenas[i].address == (uintptr_t)NULL)
3738
0
            continue;
3739
0
        narenas += 1;
3740
3741
0
        numfreepools += allarenas[i].nfreepools;
3742
3743
        /* round up to pool alignment */
3744
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3745
0
            arena_alignment += POOL_SIZE;
3746
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3747
0
            base += POOL_SIZE;
3748
0
        }
3749
3750
        /* visit every pool in the arena */
3751
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3752
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3753
0
            poolp p = (poolp)base;
3754
0
            const uint sz = p->szidx;
3755
0
            uint freeblocks;
3756
3757
0
            if (p->ref.count == 0) {
3758
                /* currently unused */
3759
#ifdef Py_DEBUG
3760
                assert(pool_is_in_list(p, allarenas[i].freepools));
3761
#endif
3762
0
                continue;
3763
0
            }
3764
0
            ++numpools[sz];
3765
0
            numblocks[sz] += p->ref.count;
3766
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3767
0
            numfreeblocks[sz] += freeblocks;
3768
#ifdef Py_DEBUG
3769
            if (freeblocks > 0)
3770
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3771
#endif
3772
0
        }
3773
0
    }
3774
0
    assert(narenas == narenas_currently_allocated);
3775
3776
0
    fputc('\n', out);
3777
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3778
0
          "-----   ----   ---------   -------------  ------------\n",
3779
0
          out);
3780
3781
0
    for (i = 0; i < numclasses; ++i) {
3782
0
        size_t p = numpools[i];
3783
0
        size_t b = numblocks[i];
3784
0
        size_t f = numfreeblocks[i];
3785
0
        uint size = INDEX2SIZE(i);
3786
0
        if (p == 0) {
3787
0
            assert(b == 0 && f == 0);
3788
0
            continue;
3789
0
        }
3790
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3791
0
                i, size, p, b, f);
3792
0
        allocated_bytes += b * size;
3793
0
        available_bytes += f * size;
3794
0
        pool_header_bytes += p * POOL_OVERHEAD;
3795
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3796
0
    }
3797
0
    fputc('\n', out);
3798
#ifdef PYMEM_DEBUG_SERIALNO
3799
    if (_PyMem_DebugEnabled()) {
3800
        (void)printone(out, "# times object malloc called", serialno);
3801
    }
3802
#endif
3803
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3804
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3805
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3806
0
    (void)printone(out, "# arenas allocated current", narenas);
3807
3808
0
    PyOS_snprintf(buf, sizeof(buf),
3809
0
                  "%zu arenas * %d bytes/arena",
3810
0
                  narenas, ARENA_SIZE);
3811
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3812
3813
0
    fputc('\n', out);
3814
3815
    /* Account for what all of those arena bytes are being used for. */
3816
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3817
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3818
3819
0
    PyOS_snprintf(buf, sizeof(buf),
3820
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3821
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3822
3823
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3824
0
    total += printone(out, "# bytes lost to quantization", quantization);
3825
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3826
0
    (void)printone(out, "Total", total);
3827
0
    assert(narenas * ARENA_SIZE == total);
3828
3829
0
#if WITH_PYMALLOC_RADIX_TREE
3830
0
    fputs("\narena map counts\n", out);
3831
0
#ifdef USE_INTERIOR_NODES
3832
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3833
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3834
0
    fputc('\n', out);
3835
0
#endif
3836
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3837
0
#ifdef USE_INTERIOR_NODES
3838
0
    total += printone(out, "# bytes lost to arena map mid",
3839
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3840
0
    total += printone(out, "# bytes lost to arena map bot",
3841
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3842
0
    (void)printone(out, "Total", total);
3843
0
#endif
3844
0
#endif
3845
3846
0
}
3847
3848
/* Print summary info to "out" about the state of pymalloc's structures.
3849
 * In Py_DEBUG mode, also perform some expensive internal consistency
3850
 * checks.
3851
 *
3852
 * Return 0 if the memory debug hooks are not installed or no statistics was
3853
 * written into out, return 1 otherwise.
3854
 */
3855
int
3856
_PyObject_DebugMallocStats(FILE *out)
3857
0
{
3858
0
#ifdef WITH_MIMALLOC
3859
0
    if (_PyMem_MimallocEnabled()) {
3860
0
        py_mimalloc_print_stats(out);
3861
0
        return 1;
3862
0
    }
3863
0
    else
3864
0
#endif
3865
0
    if (_PyMem_PymallocEnabled()) {
3866
0
        pymalloc_print_stats(out);
3867
0
        return 1;
3868
0
    }
3869
0
    else {
3870
0
        return 0;
3871
0
    }
3872
0
}
3873
3874
#endif /* #ifdef WITH_PYMALLOC */