Coverage Report

Created: 2026-06-01 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Objects/obmalloc.c
Line
Count
Source
1
/* Python's malloc wrappers (see pymem.h) */
2
3
#include "Python.h"
4
#include "pycore_interp.h"        // _PyInterpreterState_HasFeature
5
#include "pycore_mmap.h"          // _PyAnnotateMemoryMap()
6
#include "pycore_object.h"        // _PyDebugAllocatorStats() definition
7
#include "pycore_obmalloc.h"
8
#include "pycore_obmalloc_init.h"
9
#include "pycore_pyerrors.h"      // _Py_FatalErrorFormat()
10
#include "pycore_pymem.h"
11
#include "pycore_pystate.h"       // _PyInterpreterState_GET
12
#include "pycore_stats.h"         // OBJECT_STAT_INC_COND()
13
14
#include <stdlib.h>               // malloc()
15
#include <stdbool.h>
16
#include <stdio.h>                // fopen(), fgets(), sscanf()
17
#include <errno.h>                // errno
18
#ifdef WITH_MIMALLOC
19
// Forward declarations of functions used in our mimalloc modifications
20
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
21
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
22
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
23
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
24
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
25
#  include "pycore_mimalloc.h"
26
#  include "mimalloc/static.c"
27
#  include "mimalloc/internal.h"  // for stats
28
#endif
29
30
#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
31
#  error "Py_GIL_DISABLED requires WITH_MIMALLOC"
32
#endif
33
34
#undef  uint
35
111M
#define uint pymem_uint
36
37
38
/* Defined in tracemalloc.c */
39
extern void _PyMem_DumpTraceback(int fd, const void *ptr);
40
41
static void _PyObject_DebugDumpAddress(const void *p);
42
static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
43
44
45
static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
46
static void set_up_debug_hooks_unlocked(void);
47
static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
48
static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
49
50
51
/***************************************/
52
/* low-level allocator implementations */
53
/***************************************/
54
55
/* the default raw allocator (wraps malloc) */
56
57
void *
58
_PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
59
1.87M
{
60
    /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
61
       for malloc(0), which would be treated as an error. Some platforms would
62
       return a pointer with no memory behind it, which would break pymalloc.
63
       To solve these problems, allocate an extra byte. */
64
1.87M
    if (size == 0)
65
38.0k
        size = 1;
66
1.87M
    return malloc(size);
67
1.87M
}
68
69
void *
70
_PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
71
261k
{
72
    /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
73
       for calloc(0, 0), which would be treated as an error. Some platforms
74
       would return a pointer with no memory behind it, which would break
75
       pymalloc.  To solve these problems, allocate an extra byte. */
76
261k
    if (nelem == 0 || elsize == 0) {
77
0
        nelem = 1;
78
0
        elsize = 1;
79
0
    }
80
261k
    return calloc(nelem, elsize);
81
261k
}
82
83
void *
84
_PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
85
597k
{
86
597k
    if (size == 0)
87
0
        size = 1;
88
597k
    return realloc(ptr, size);
89
597k
}
90
91
void
92
_PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
93
2.10M
{
94
2.10M
    free(ptr);
95
2.10M
}
96
97
#ifdef WITH_MIMALLOC
98
99
static void
100
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
101
0
{
102
#ifdef Py_GIL_DISABLED
103
    // Clear the QSBR goal and remove the page from the QSBR linked list.
104
    page->qsbr_goal = 0;
105
    if (page->qsbr_node.next != NULL) {
106
        llist_remove(&page->qsbr_node);
107
    }
108
#endif
109
0
}
110
111
// Check if an empty, newly reclaimed page is safe to free now.
112
static bool
113
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
114
0
{
115
0
    assert(mi_page_all_free(page));
116
#ifdef Py_GIL_DISABLED
117
    assert(page->qsbr_node.next == NULL);
118
    if (page->use_qsbr && page->qsbr_goal != 0) {
119
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
120
        if (tstate == NULL) {
121
            return false;
122
        }
123
        return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
124
    }
125
#endif
126
0
    return true;
127
128
0
}
129
130
#ifdef Py_GIL_DISABLED
131
132
// If we are deferring collection of more than this amount of memory for
133
// mimalloc pages, advance the write sequence.  Advancing allows these
134
// pages to be re-used in a different thread or for a different size class.
135
#define QSBR_PAGE_MEM_LIMIT 4096*20
136
137
// Return true if the global write sequence should be advanced for a mimalloc
138
// page that is deferred from collection.
139
static bool
140
should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
141
{
142
    size_t bsize = mi_page_block_size(page);
143
    size_t page_size = page->capacity*bsize;
144
    if (page_size > QSBR_PAGE_MEM_LIMIT) {
145
        qsbr->deferred_page_memory = 0;
146
        return true;
147
    }
148
    qsbr->deferred_page_memory += page_size;
149
    if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
150
        qsbr->deferred_page_memory = 0;
151
        return true;
152
    }
153
    return false;
154
}
155
156
static _PyThreadStateImpl *
157
tstate_from_heap(mi_heap_t *heap)
158
{
159
    return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
160
}
161
#endif
162
163
static bool
164
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
165
0
{
166
#ifdef Py_GIL_DISABLED
167
    assert(mi_page_all_free(page));
168
    if (page->use_qsbr) {
169
        struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
170
        if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
171
            _PyMem_mi_page_clear_qsbr(page);
172
            _mi_page_free(page, pq, force);
173
            return true;
174
        }
175
176
        // gh-145615: since we are not freeing this page yet, we want to
177
        // make it available for allocations. Note that the QSBR goal and
178
        // linked list node remain set even if the page is later used for
179
        // an allocation. We only detect and clear the QSBR goal when the
180
        // page becomes empty again (used == 0).
181
        if (mi_page_is_in_full(page)) {
182
            _mi_page_unfull(page);
183
        }
184
185
        _PyMem_mi_page_clear_qsbr(page);
186
        page->retire_expire = 0;
187
188
        if (should_advance_qsbr_for_page(qsbr, page)) {
189
            page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
190
        }
191
        else {
192
            page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
193
        }
194
195
        // We may be freeing a page belonging to a different thread during a
196
        // stop-the-world event. Find the _PyThreadStateImpl for the page.
197
        _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
198
        llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
199
        return false;
200
    }
201
#endif
202
0
    _mi_page_free(page, pq, force);
203
0
    return true;
204
0
}
205
206
static void
207
_PyMem_mi_page_reclaimed(mi_page_t *page)
208
0
{
209
#ifdef Py_GIL_DISABLED
210
    assert(page->qsbr_node.next == NULL);
211
    if (page->qsbr_goal != 0) {
212
        if (mi_page_all_free(page)) {
213
            assert(page->qsbr_node.next == NULL);
214
            _PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
215
            assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
216
            page->retire_expire = 0;
217
            llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
218
        }
219
        else {
220
            page->qsbr_goal = 0;
221
        }
222
    }
223
#endif
224
0
}
225
226
static void
227
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
228
0
{
229
#ifdef Py_GIL_DISABLED
230
    if (!heap->page_use_qsbr) {
231
        return;
232
    }
233
234
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
235
    struct llist_node *head = &tstate->mimalloc.page_list;
236
    if (llist_empty(head)) {
237
        return;
238
    }
239
240
    struct llist_node *node;
241
    llist_for_each_safe(node, head) {
242
        mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
243
        if (!mi_page_all_free(page)) {
244
            // We allocated from this page some point after the delayed free
245
            _PyMem_mi_page_clear_qsbr(page);
246
            continue;
247
        }
248
249
        if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
250
            return;
251
        }
252
253
        _PyMem_mi_page_clear_qsbr(page);
254
        _mi_page_free(page, mi_page_queue_of(page), false);
255
    }
256
#endif
257
0
}
258
259
void *
260
_PyMem_MiMalloc(void *ctx, size_t size)
261
0
{
262
#ifdef Py_GIL_DISABLED
263
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
264
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
265
    return mi_heap_malloc(heap, size);
266
#else
267
0
    return mi_malloc(size);
268
0
#endif
269
0
}
270
271
void *
272
_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
273
0
{
274
#ifdef Py_GIL_DISABLED
275
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
276
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
277
    return mi_heap_calloc(heap, nelem, elsize);
278
#else
279
0
    return mi_calloc(nelem, elsize);
280
0
#endif
281
0
}
282
283
void *
284
_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
285
0
{
286
#ifdef Py_GIL_DISABLED
287
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
288
    mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
289
    return mi_heap_realloc(heap, ptr, size);
290
#else
291
0
    return mi_realloc(ptr, size);
292
0
#endif
293
0
}
294
295
void
296
_PyMem_MiFree(void *ctx, void *ptr)
297
0
{
298
0
    mi_free(ptr);
299
0
}
300
301
void *
302
_PyObject_MiMalloc(void *ctx, size_t nbytes)
303
0
{
304
#ifdef Py_GIL_DISABLED
305
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
306
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
307
    return mi_heap_malloc(heap, nbytes);
308
#else
309
0
    return mi_malloc(nbytes);
310
0
#endif
311
0
}
312
313
void *
314
_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
315
0
{
316
#ifdef Py_GIL_DISABLED
317
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
318
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
319
    return mi_heap_calloc(heap, nelem, elsize);
320
#else
321
0
    return mi_calloc(nelem, elsize);
322
0
#endif
323
0
}
324
325
326
void *
327
_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
328
0
{
329
#ifdef Py_GIL_DISABLED
330
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
331
    // Implement our own realloc logic so that we can copy PyObject header
332
    // in a thread-safe way.
333
    size_t size = mi_usable_size(ptr);
334
    if (nbytes <= size && nbytes >= (size / 2) && nbytes > 0) {
335
        return ptr;
336
    }
337
338
    mi_heap_t *heap = tstate->mimalloc.current_object_heap;
339
    void* newp = mi_heap_malloc(heap, nbytes);
340
    if (newp == NULL) {
341
        return NULL;
342
    }
343
344
    // Free threaded Python allows access from other threads to the PyObject reference count
345
    // fields for a period of time after the object is freed (see InternalDocs/qsbr.md).
346
    // These fields are typically initialized by PyObject_Init() using relaxed
347
    // atomic stores. We need to copy these fields in a thread-safe way here.
348
    // We use the "debug_offset" to determine how many bytes to copy -- it
349
    // includes the PyObject header and plus any extra pre-headers.
350
    size_t offset = heap->debug_offset;
351
    assert(offset % sizeof(void*) == 0);
352
353
    size_t copy_size = (size < nbytes ? size : nbytes);
354
    if (copy_size >= offset) {
355
        for (size_t i = 0; i != offset; i += sizeof(void*)) {
356
            // Use memcpy to avoid strict-aliasing issues. However, we probably
357
            // still have unavoidable strict-aliasing issues with
358
            // _Py_atomic_store_ptr_relaxed here.
359
            void *word;
360
            memcpy(&word, (char*)ptr + i, sizeof(void*));
361
            _Py_atomic_store_ptr_relaxed((void**)((char*)newp + i), word);
362
        }
363
        _mi_memcpy((char*)newp + offset, (char*)ptr + offset, copy_size - offset);
364
    }
365
    else {
366
        _mi_memcpy(newp, ptr, copy_size);
367
    }
368
    mi_free(ptr);
369
    return newp;
370
#else
371
0
    return mi_realloc(ptr, nbytes);
372
0
#endif
373
0
}
374
375
void
376
_PyObject_MiFree(void *ctx, void *ptr)
377
0
{
378
0
    mi_free(ptr);
379
0
}
380
381
void *
382
_PyMem_MiRawMalloc(void *ctx, size_t size)
383
0
{
384
0
    return mi_malloc(size);
385
0
}
386
387
void *
388
_PyMem_MiRawCalloc(void *ctx, size_t nelem, size_t elsize)
389
0
{
390
0
    return mi_calloc(nelem, elsize);
391
0
}
392
393
void *
394
_PyMem_MiRawRealloc(void *ctx, void *ptr, size_t size)
395
0
{
396
0
    return mi_realloc(ptr, size);
397
0
}
398
399
void
400
_PyMem_MiRawFree(void *ctx, void *ptr)
401
0
{
402
0
    mi_free(ptr);
403
0
}
404
#endif // WITH_MIMALLOC
405
406
407
0
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
408
409
410
#ifdef WITH_MIMALLOC
411
0
#  define MIMALLOC_ALLOC    {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
412
0
#  define MIMALLOC_RAWALLOC {NULL, _PyMem_MiRawMalloc, _PyMem_MiRawCalloc, _PyMem_MiRawRealloc, _PyMem_MiRawFree}
413
0
#  define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
414
#endif
415
416
/* the pymalloc allocator */
417
418
// The actual implementation is further down.
419
420
#if defined(WITH_PYMALLOC)
421
void* _PyObject_Malloc(void *ctx, size_t size);
422
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
423
void _PyObject_Free(void *ctx, void *p);
424
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
425
0
#  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
426
#endif  // WITH_PYMALLOC
427
428
#if defined(Py_GIL_DISABLED)
429
// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
430
#  define PYRAW_ALLOC MIMALLOC_RAWALLOC
431
#  define PYMEM_ALLOC MIMALLOC_ALLOC
432
#  define PYOBJ_ALLOC MIMALLOC_OBJALLOC
433
#elif defined(WITH_PYMALLOC)
434
0
#  define PYRAW_ALLOC MALLOC_ALLOC
435
0
#  define PYMEM_ALLOC PYMALLOC_ALLOC
436
0
#  define PYOBJ_ALLOC PYMALLOC_ALLOC
437
#else
438
#  define PYRAW_ALLOC MALLOC_ALLOC
439
#  define PYMEM_ALLOC MALLOC_ALLOC
440
#  define PYOBJ_ALLOC MALLOC_ALLOC
441
#endif
442
443
444
/* the default debug allocators */
445
446
// The actual implementation is further down.
447
448
void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
449
void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
450
void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
451
void _PyMem_DebugRawFree(void *ctx, void *ptr);
452
453
void* _PyMem_DebugMalloc(void *ctx, size_t size);
454
void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
455
void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
456
void _PyMem_DebugFree(void *ctx, void *p);
457
458
#define PYDBGRAW_ALLOC \
459
0
    {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
460
#define PYDBGMEM_ALLOC \
461
0
    {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
462
#define PYDBGOBJ_ALLOC \
463
0
    {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
464
465
/* default raw allocator (not swappable) */
466
467
void *
468
_PyMem_DefaultRawMalloc(size_t size)
469
140
{
470
#ifdef Py_DEBUG
471
    return _PyMem_DebugRawMalloc(&_PyRuntime.allocators.debug.raw, size);
472
#else
473
140
    return _PyMem_RawMalloc(NULL, size);
474
140
#endif
475
140
}
476
477
void *
478
_PyMem_DefaultRawCalloc(size_t nelem, size_t elsize)
479
0
{
480
#ifdef Py_DEBUG
481
    return _PyMem_DebugRawCalloc(&_PyRuntime.allocators.debug.raw, nelem, elsize);
482
#else
483
0
    return _PyMem_RawCalloc(NULL, nelem, elsize);
484
0
#endif
485
0
}
486
487
void *
488
_PyMem_DefaultRawRealloc(void *ptr, size_t size)
489
0
{
490
#ifdef Py_DEBUG
491
    return _PyMem_DebugRawRealloc(&_PyRuntime.allocators.debug.raw, ptr, size);
492
#else
493
0
    return _PyMem_RawRealloc(NULL, ptr, size);
494
0
#endif
495
0
}
496
497
void
498
_PyMem_DefaultRawFree(void *ptr)
499
160
{
500
#ifdef Py_DEBUG
501
    _PyMem_DebugRawFree(&_PyRuntime.allocators.debug.raw, ptr);
502
#else
503
160
    _PyMem_RawFree(NULL, ptr);
504
160
#endif
505
160
}
506
507
wchar_t*
508
_PyMem_DefaultRawWcsdup(const wchar_t *str)
509
100
{
510
100
    assert(str != NULL);
511
512
100
    size_t len = wcslen(str);
513
100
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
514
0
        return NULL;
515
0
    }
516
517
100
    size_t size = (len + 1) * sizeof(wchar_t);
518
100
    wchar_t *str2 = _PyMem_DefaultRawMalloc(size);
519
100
    if (str2 == NULL) {
520
0
        return NULL;
521
0
    }
522
523
100
    memcpy(str2, str, size);
524
100
    return str2;
525
100
}
526
527
/* the low-level virtual memory allocator */
528
529
#ifdef WITH_PYMALLOC
530
#  ifdef MS_WINDOWS
531
#    include <windows.h>
532
#  elif defined(HAVE_MMAP)
533
#    include <sys/mman.h>
534
#    ifdef MAP_ANONYMOUS
535
#      define ARENAS_USE_MMAP
536
#    endif
537
#  endif
538
#endif
539
540
/* Return the system's default huge page size in bytes, or 0 if it
541
 * cannot be determined.  The result is cached after the first call.
542
 *
543
 * This is Linux-only (/proc/meminfo).  On other systems that define
544
 * MAP_HUGETLB the caller should skip huge pages gracefully. */
545
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
546
static size_t
547
_pymalloc_system_hugepage_size(void)
548
{
549
    static size_t hp_size = 0;
550
    static int initialized = 0;
551
552
    if (initialized) {
553
        return hp_size;
554
    }
555
556
#ifdef __linux__
557
    FILE *f = fopen("/proc/meminfo", "r");
558
    if (f != NULL) {
559
        char line[256];
560
        while (fgets(line, sizeof(line), f)) {
561
            unsigned long size_kb;
562
            if (sscanf(line, "Hugepagesize: %lu kB", &size_kb) == 1) {
563
                hp_size = (size_t)size_kb * 1024;
564
                break;
565
            }
566
        }
567
        fclose(f);
568
    }
569
#endif
570
571
    initialized = 1;
572
    return hp_size;
573
}
574
#endif
575
576
#if (defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)) || \
577
    (defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB))
578
static size_t
579
_pymalloc_round_up_to_multiple(size_t size, size_t multiple)
580
{
581
    if (multiple == 0 || size == 0) {
582
        return size;
583
    }
584
585
    size_t remainder = size % multiple;
586
    if (remainder == 0) {
587
        return size;
588
    }
589
590
    size_t padding = multiple - remainder;
591
    if (size > SIZE_MAX - padding) {
592
        return 0;
593
    }
594
    return size + padding;
595
}
596
#endif
597
598
static size_t
599
_pymalloc_virtual_alloc_size(size_t size)
600
20
{
601
#if defined(MS_WINDOWS) && defined(PYMALLOC_USE_HUGEPAGES)
602
    if (_PyRuntime.allocators.use_hugepages) {
603
        SIZE_T large_page_size = GetLargePageMinimum();
604
        if (large_page_size > 0) {
605
            return _pymalloc_round_up_to_multiple(size, (size_t)large_page_size);
606
        }
607
    }
608
#elif defined(PYMALLOC_USE_HUGEPAGES) && defined(ARENAS_USE_MMAP) && defined(MAP_HUGETLB)
609
    if (_PyRuntime.allocators.use_hugepages) {
610
        size_t hp_size = _pymalloc_system_hugepage_size();
611
        if (hp_size > 0) {
612
            return _pymalloc_round_up_to_multiple(size, hp_size);
613
        }
614
    }
615
#endif
616
20
    return size;
617
20
}
618
619
void *
620
_PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
621
619
{
622
#ifdef MS_WINDOWS
623
#  ifdef PYMALLOC_USE_HUGEPAGES
624
    if (_PyRuntime.allocators.use_hugepages) {
625
        SIZE_T lp_size = GetLargePageMinimum();
626
        if (lp_size > 0 && size % lp_size == 0) {
627
            void *ptr = VirtualAlloc(NULL, size,
628
                            MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES,
629
                            PAGE_READWRITE);
630
            if (ptr != NULL)
631
                return ptr;
632
        }
633
    }
634
    /* Fall back to regular pages */
635
#  endif
636
    return VirtualAlloc(NULL, size,
637
                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
638
#elif defined(ARENAS_USE_MMAP)
639
    void *ptr;
640
#  ifdef PYMALLOC_USE_HUGEPAGES
641
#    ifdef MAP_HUGETLB
642
    if (_PyRuntime.allocators.use_hugepages) {
643
        size_t hp_size = _pymalloc_system_hugepage_size();
644
        /* Only use huge pages if the arena size is a multiple of the
645
         * system's default huge page size.  When the arena is smaller
646
         * than the huge page, mmap still succeeds but silently
647
         * allocates an entire huge page; the subsequent munmap with
648
         * the smaller arena size then fails with EINVAL, leaking
649
         * all of that memory. */
650
        if (hp_size > 0 && size % hp_size == 0) {
651
            ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
652
                       MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
653
            if (ptr != MAP_FAILED) {
654
                assert(ptr != NULL);
655
                (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage");
656
                return ptr;
657
            }
658
        }
659
    }
660
    /* Fall back to regular pages */
661
#    endif
662
#  endif
663
619
    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
664
619
               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
665
619
    if (ptr == MAP_FAILED)
666
0
        return NULL;
667
619
    assert(ptr != NULL);
668
619
#ifdef MADV_HUGEPAGE
669
619
    (void)madvise(ptr, size, MADV_HUGEPAGE);
670
619
#endif
671
619
    (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc");
672
619
    return ptr;
673
#else
674
    return malloc(size);
675
#endif
676
619
}
677
678
void
679
_PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
680
#if defined(ARENAS_USE_MMAP)
681
    size_t size
682
#else
683
    size_t Py_UNUSED(size)
684
#endif
685
)
686
499
{
687
#ifdef MS_WINDOWS
688
    /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
689
    if (ptr == NULL) {
690
        return;
691
    }
692
    VirtualFree(ptr, 0, MEM_RELEASE);
693
#elif defined(ARENAS_USE_MMAP)
694
    /* Unlike free(), munmap() does not special-case NULL to noop. */
695
499
    if (ptr == NULL) {
696
0
        return;
697
0
    }
698
499
    if (munmap(ptr, size) < 0) {
699
0
        _Py_FatalErrorFormat(__func__,
700
0
                             "munmap(%p, %zu) failed with errno %d",
701
0
                             ptr, size, errno);
702
0
    }
703
#else
704
    free(ptr);
705
#endif
706
499
}
707
708
/*******************************************/
709
/* end low-level allocator implementations */
710
/*******************************************/
711
712
713
40
#define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
714
9.68M
#define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
715
137M
#define _PyMem (_PyRuntime.allocators.standard.mem)
716
292M
#define _PyObject (_PyRuntime.allocators.standard.obj)
717
0
#define _PyMem_Debug (_PyRuntime.allocators.debug)
718
2.23k
#define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
719
720
721
/***************************/
722
/* managing the allocators */
723
/***************************/
724
725
static int
726
set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
727
                               PyMemAllocatorEx *old_alloc)
728
0
{
729
0
    if (old_alloc != NULL) {
730
0
        get_allocator_unlocked(domain, old_alloc);
731
0
    }
732
733
734
0
    PyMemAllocatorEx new_alloc;
735
0
    switch(domain)
736
0
    {
737
0
    case PYMEM_DOMAIN_RAW:
738
0
        new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
739
0
        break;
740
0
    case PYMEM_DOMAIN_MEM:
741
0
        new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
742
0
        break;
743
0
    case PYMEM_DOMAIN_OBJ:
744
0
        new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
745
0
        break;
746
0
    default:
747
        /* unknown domain */
748
0
        return -1;
749
0
    }
750
0
    set_allocator_unlocked(domain, &new_alloc);
751
0
    if (debug) {
752
0
        set_up_debug_hooks_domain_unlocked(domain);
753
0
    }
754
0
    return 0;
755
0
}
756
757
758
#ifdef Py_DEBUG
759
static const int pydebug = 1;
760
#else
761
static const int pydebug = 0;
762
#endif
763
764
int
765
_PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
766
0
{
767
0
    if (name == NULL || *name == '\0') {
768
        /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
769
           nameions): use default memory allocators */
770
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
771
0
    }
772
0
    else if (strcmp(name, "default") == 0) {
773
0
        *allocator = PYMEM_ALLOCATOR_DEFAULT;
774
0
    }
775
0
    else if (strcmp(name, "debug") == 0) {
776
0
        *allocator = PYMEM_ALLOCATOR_DEBUG;
777
0
    }
778
0
#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
779
0
    else if (strcmp(name, "pymalloc") == 0) {
780
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC;
781
0
    }
782
0
    else if (strcmp(name, "pymalloc_debug") == 0) {
783
0
        *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
784
0
    }
785
0
#endif
786
0
#ifdef WITH_MIMALLOC
787
0
    else if (strcmp(name, "mimalloc") == 0) {
788
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC;
789
0
    }
790
0
    else if (strcmp(name, "mimalloc_debug") == 0) {
791
0
        *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
792
0
    }
793
0
#endif
794
0
#ifndef Py_GIL_DISABLED
795
0
    else if (strcmp(name, "malloc") == 0) {
796
0
        *allocator = PYMEM_ALLOCATOR_MALLOC;
797
0
    }
798
0
    else if (strcmp(name, "malloc_debug") == 0) {
799
0
        *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
800
0
    }
801
0
#endif
802
0
    else {
803
        /* unknown allocator */
804
0
        return -1;
805
0
    }
806
0
    return 0;
807
0
}
808
809
810
static int
811
set_up_allocators_unlocked(PyMemAllocatorName allocator)
812
0
{
813
0
    switch (allocator) {
814
0
    case PYMEM_ALLOCATOR_NOT_SET:
815
        /* do nothing */
816
0
        break;
817
818
0
    case PYMEM_ALLOCATOR_DEFAULT:
819
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
820
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
821
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
822
0
        _PyRuntime.allocators.is_debug_enabled = pydebug;
823
0
        break;
824
825
0
    case PYMEM_ALLOCATOR_DEBUG:
826
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
827
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
828
0
        (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
829
0
        _PyRuntime.allocators.is_debug_enabled = 1;
830
0
        break;
831
832
0
#ifdef WITH_PYMALLOC
833
0
    case PYMEM_ALLOCATOR_PYMALLOC:
834
0
    case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
835
0
    {
836
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
837
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
838
839
0
        PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
840
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
841
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
842
843
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
844
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
845
0
        if (is_debug) {
846
0
            set_up_debug_hooks_unlocked();
847
0
        }
848
0
        break;
849
0
    }
850
0
#endif
851
0
#ifdef WITH_MIMALLOC
852
0
    case PYMEM_ALLOCATOR_MIMALLOC:
853
0
    case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
854
0
    {
855
0
        PyMemAllocatorEx malloc_alloc = MIMALLOC_RAWALLOC;
856
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
857
858
0
        PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
859
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
860
861
0
        PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
862
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
863
864
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
865
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
866
0
        if (is_debug) {
867
0
            set_up_debug_hooks_unlocked();
868
0
        }
869
870
0
        break;
871
0
    }
872
0
#endif
873
874
0
    case PYMEM_ALLOCATOR_MALLOC:
875
0
    case PYMEM_ALLOCATOR_MALLOC_DEBUG:
876
0
    {
877
0
        PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
878
0
        set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
879
0
        set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
880
0
        set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
881
882
0
        int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
883
0
        _PyRuntime.allocators.is_debug_enabled = is_debug;
884
0
        if (is_debug) {
885
0
            set_up_debug_hooks_unlocked();
886
0
        }
887
0
        break;
888
0
    }
889
890
0
    default:
891
        /* unknown allocator */
892
0
        return -1;
893
0
    }
894
895
0
    return 0;
896
0
}
897
898
int
899
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
900
0
{
901
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
902
0
    int res = set_up_allocators_unlocked(allocator);
903
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
904
0
    return res;
905
0
}
906
907
908
static int
909
pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
910
0
{
911
0
    return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
912
0
}
913
914
915
static const char*
916
get_current_allocator_name_unlocked(void)
917
0
{
918
0
    PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
919
0
#ifdef WITH_PYMALLOC
920
0
    PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
921
0
#endif
922
0
#ifdef WITH_MIMALLOC
923
0
    PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
924
0
    PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
925
0
    PyMemAllocatorEx mimalloc_raw = MIMALLOC_RAWALLOC;
926
0
#endif
927
928
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
929
0
        pymemallocator_eq(&_PyMem, &malloc_alloc) &&
930
0
        pymemallocator_eq(&_PyObject, &malloc_alloc))
931
0
    {
932
0
        return "malloc";
933
0
    }
934
0
#ifdef WITH_PYMALLOC
935
0
    if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
936
0
        pymemallocator_eq(&_PyMem, &pymalloc) &&
937
0
        pymemallocator_eq(&_PyObject, &pymalloc))
938
0
    {
939
0
        return "pymalloc";
940
0
    }
941
0
#endif
942
0
#ifdef WITH_MIMALLOC
943
0
    if (pymemallocator_eq(&_PyMem_Raw, &mimalloc_raw) &&
944
0
        pymemallocator_eq(&_PyMem, &mimalloc) &&
945
0
        pymemallocator_eq(&_PyObject, &mimalloc_obj))
946
0
    {
947
0
        return "mimalloc";
948
0
    }
949
0
#endif
950
951
0
    PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
952
0
    PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
953
0
    PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
954
955
0
    if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
956
0
        pymemallocator_eq(&_PyMem, &dbg_mem) &&
957
0
        pymemallocator_eq(&_PyObject, &dbg_obj))
958
0
    {
959
        /* Debug hooks installed */
960
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
961
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
962
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
963
0
        {
964
0
            return "malloc_debug";
965
0
        }
966
0
#ifdef WITH_PYMALLOC
967
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
968
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
969
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
970
0
        {
971
0
            return "pymalloc_debug";
972
0
        }
973
0
#endif
974
0
#ifdef WITH_MIMALLOC
975
0
        if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &mimalloc_raw) &&
976
0
            pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
977
0
            pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
978
0
        {
979
0
            return "mimalloc_debug";
980
0
        }
981
0
#endif
982
0
    }
983
0
    return NULL;
984
0
}
985
986
const char*
987
_PyMem_GetCurrentAllocatorName(void)
988
0
{
989
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
990
0
    const char *name = get_current_allocator_name_unlocked();
991
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
992
0
    return name;
993
0
}
994
995
996
int
997
_PyMem_DebugEnabled(void)
998
0
{
999
0
    return _PyRuntime.allocators.is_debug_enabled;
1000
0
}
1001
1002
#ifdef WITH_PYMALLOC
1003
static int
1004
_PyMem_PymallocEnabled(void)
1005
0
{
1006
0
    if (_PyMem_DebugEnabled()) {
1007
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
1008
0
    }
1009
0
    else {
1010
0
        return (_PyObject.malloc == _PyObject_Malloc);
1011
0
    }
1012
0
}
1013
1014
#ifdef WITH_MIMALLOC
1015
static int
1016
_PyMem_MimallocEnabled(void)
1017
0
{
1018
#ifdef Py_GIL_DISABLED
1019
    return 1;
1020
#else
1021
0
    if (_PyMem_DebugEnabled()) {
1022
0
        return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
1023
0
    }
1024
0
    else {
1025
0
        return (_PyObject.malloc == _PyObject_MiMalloc);
1026
0
    }
1027
0
#endif
1028
0
}
1029
#endif  // WITH_MIMALLOC
1030
1031
#endif  // WITH_PYMALLOC
1032
1033
1034
static void
1035
set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
1036
0
{
1037
0
    PyMemAllocatorEx alloc;
1038
1039
0
    if (domain == PYMEM_DOMAIN_RAW) {
1040
0
        if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
1041
0
            return;
1042
0
        }
1043
1044
0
        get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
1045
0
        alloc.ctx = &_PyMem_Debug.raw;
1046
0
        alloc.malloc = _PyMem_DebugRawMalloc;
1047
0
        alloc.calloc = _PyMem_DebugRawCalloc;
1048
0
        alloc.realloc = _PyMem_DebugRawRealloc;
1049
0
        alloc.free = _PyMem_DebugRawFree;
1050
0
        set_allocator_unlocked(domain, &alloc);
1051
0
    }
1052
0
    else if (domain == PYMEM_DOMAIN_MEM) {
1053
0
        if (_PyMem.malloc == _PyMem_DebugMalloc) {
1054
0
            return;
1055
0
        }
1056
1057
0
        get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
1058
0
        alloc.ctx = &_PyMem_Debug.mem;
1059
0
        alloc.malloc = _PyMem_DebugMalloc;
1060
0
        alloc.calloc = _PyMem_DebugCalloc;
1061
0
        alloc.realloc = _PyMem_DebugRealloc;
1062
0
        alloc.free = _PyMem_DebugFree;
1063
0
        set_allocator_unlocked(domain, &alloc);
1064
0
    }
1065
0
    else if (domain == PYMEM_DOMAIN_OBJ)  {
1066
0
        if (_PyObject.malloc == _PyMem_DebugMalloc) {
1067
0
            return;
1068
0
        }
1069
1070
0
        get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
1071
0
        alloc.ctx = &_PyMem_Debug.obj;
1072
0
        alloc.malloc = _PyMem_DebugMalloc;
1073
0
        alloc.calloc = _PyMem_DebugCalloc;
1074
0
        alloc.realloc = _PyMem_DebugRealloc;
1075
0
        alloc.free = _PyMem_DebugFree;
1076
0
        set_allocator_unlocked(domain, &alloc);
1077
0
    }
1078
0
}
1079
1080
1081
static void
1082
set_up_debug_hooks_unlocked(void)
1083
0
{
1084
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
1085
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
1086
0
    set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
1087
0
    _PyRuntime.allocators.is_debug_enabled = 1;
1088
0
}
1089
1090
void
1091
PyMem_SetupDebugHooks(void)
1092
0
{
1093
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1094
0
    set_up_debug_hooks_unlocked();
1095
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1096
0
}
1097
1098
static void
1099
get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1100
20
{
1101
20
    switch(domain)
1102
20
    {
1103
20
    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
1104
0
    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
1105
0
    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
1106
0
    default:
1107
        /* unknown domain: set all attributes to NULL */
1108
0
        allocator->ctx = NULL;
1109
0
        allocator->malloc = NULL;
1110
0
        allocator->calloc = NULL;
1111
0
        allocator->realloc = NULL;
1112
0
        allocator->free = NULL;
1113
20
    }
1114
20
}
1115
1116
static void
1117
set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1118
0
{
1119
0
    switch(domain)
1120
0
    {
1121
0
    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
1122
0
    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
1123
0
    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
1124
    /* ignore unknown domain */
1125
0
    }
1126
0
}
1127
1128
void
1129
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1130
20
{
1131
20
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1132
20
    get_allocator_unlocked(domain, allocator);
1133
20
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1134
20
}
1135
1136
void
1137
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
1138
0
{
1139
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1140
0
    set_allocator_unlocked(domain, allocator);
1141
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1142
0
}
1143
1144
void
1145
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
1146
0
{
1147
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1148
0
    *allocator = _PyObject_Arena;
1149
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1150
0
}
1151
1152
void
1153
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
1154
0
{
1155
0
    PyMutex_Lock(&ALLOCATORS_MUTEX);
1156
0
    _PyObject_Arena = *allocator;
1157
0
    PyMutex_Unlock(&ALLOCATORS_MUTEX);
1158
0
}
1159
1160
1161
/* Note that there is a possible, but very unlikely, race in any place
1162
 * below where we call one of the allocator functions.  We access two
1163
 * fields in each case:  "malloc", etc. and "ctx".
1164
 *
1165
 * It is unlikely that the allocator will be changed while one of those
1166
 * calls is happening, much less in that very narrow window.
1167
 * Furthermore, the likelihood of a race is drastically reduced by the
1168
 * fact that the allocator may not be changed after runtime init
1169
 * (except with a wrapper).
1170
 *
1171
 * With the above in mind, we currently don't worry about locking
1172
 * around these uses of the runtime-global allocators state. */
1173
1174
1175
/*************************/
1176
/* the "arena" allocator */
1177
/*************************/
1178
1179
void *
1180
_PyObject_VirtualAlloc(size_t size)
1181
20
{
1182
20
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1183
20
    if (alloc_size == 0 && size != 0) {
1184
0
        return NULL;
1185
0
    }
1186
20
    return _PyObject_Arena.alloc(_PyObject_Arena.ctx, alloc_size);
1187
20
}
1188
1189
void
1190
_PyObject_VirtualFree(void *obj, size_t size)
1191
0
{
1192
0
    size_t alloc_size = _pymalloc_virtual_alloc_size(size);
1193
0
    assert(alloc_size != 0 || size == 0);
1194
0
    _PyObject_Arena.free(_PyObject_Arena.ctx, obj, alloc_size);
1195
0
}
1196
1197
1198
/***********************/
1199
/* the "raw" allocator */
1200
/***********************/
1201
1202
void *
1203
PyMem_RawMalloc(size_t size)
1204
1.87M
{
1205
    /*
1206
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1207
     * Most python internals blindly use a signed Py_ssize_t to track
1208
     * things without checking for overflows or negatives.
1209
     * As size_t is unsigned, checking for size < 0 is not required.
1210
     */
1211
1.87M
    if (size > (size_t)PY_SSIZE_T_MAX)
1212
0
        return NULL;
1213
1.87M
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
1214
1.87M
}
1215
1216
void *
1217
PyMem_RawCalloc(size_t nelem, size_t elsize)
1218
261k
{
1219
    /* see PyMem_RawMalloc() */
1220
261k
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1221
0
        return NULL;
1222
261k
    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
1223
261k
}
1224
1225
void*
1226
PyMem_RawRealloc(void *ptr, size_t new_size)
1227
597k
{
1228
    /* see PyMem_RawMalloc() */
1229
597k
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1230
0
        return NULL;
1231
597k
    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
1232
597k
}
1233
1234
void PyMem_RawFree(void *ptr)
1235
2.10M
{
1236
2.10M
    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
1237
2.10M
}
1238
1239
1240
/***********************/
1241
/* the "mem" allocator */
1242
/***********************/
1243
1244
void *
1245
PyMem_Malloc(size_t size)
1246
21.2M
{
1247
    /* see PyMem_RawMalloc() */
1248
21.2M
    if (size > (size_t)PY_SSIZE_T_MAX)
1249
0
        return NULL;
1250
21.2M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1251
21.2M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1252
21.2M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1253
21.2M
    OBJECT_STAT_INC(allocations);
1254
21.2M
    return _PyMem.malloc(_PyMem.ctx, size);
1255
21.2M
}
1256
1257
void *
1258
PyMem_Calloc(size_t nelem, size_t elsize)
1259
4.65M
{
1260
    /* see PyMem_RawMalloc() */
1261
4.65M
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1262
0
        return NULL;
1263
4.65M
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1264
4.65M
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1265
4.65M
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1266
4.65M
    OBJECT_STAT_INC(allocations);
1267
4.65M
    return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
1268
4.65M
}
1269
1270
void *
1271
PyMem_Realloc(void *ptr, size_t new_size)
1272
10.3M
{
1273
    /* see PyMem_RawMalloc() */
1274
10.3M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1275
0
        return NULL;
1276
10.3M
    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
1277
10.3M
}
1278
1279
void
1280
PyMem_Free(void *ptr)
1281
32.5M
{
1282
32.5M
    OBJECT_STAT_INC(frees);
1283
32.5M
    _PyMem.free(_PyMem.ctx, ptr);
1284
32.5M
}
1285
1286
1287
/***************************/
1288
/* pymem utility functions */
1289
/***************************/
1290
1291
wchar_t*
1292
_PyMem_RawWcsdup(const wchar_t *str)
1293
820
{
1294
820
    assert(str != NULL);
1295
1296
820
    size_t len = wcslen(str);
1297
820
    if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
1298
0
        return NULL;
1299
0
    }
1300
1301
820
    size_t size = (len + 1) * sizeof(wchar_t);
1302
820
    wchar_t *str2 = PyMem_RawMalloc(size);
1303
820
    if (str2 == NULL) {
1304
0
        return NULL;
1305
0
    }
1306
1307
820
    memcpy(str2, str, size);
1308
820
    return str2;
1309
820
}
1310
1311
char *
1312
_PyMem_RawStrdup(const char *str)
1313
82
{
1314
82
    assert(str != NULL);
1315
82
    size_t size = strlen(str) + 1;
1316
82
    char *copy = PyMem_RawMalloc(size);
1317
82
    if (copy == NULL) {
1318
0
        return NULL;
1319
0
    }
1320
82
    memcpy(copy, str, size);
1321
82
    return copy;
1322
82
}
1323
1324
char *
1325
_PyMem_Strdup(const char *str)
1326
0
{
1327
0
    assert(str != NULL);
1328
0
    size_t size = strlen(str) + 1;
1329
0
    char *copy = PyMem_Malloc(size);
1330
0
    if (copy == NULL) {
1331
0
        return NULL;
1332
0
    }
1333
0
    memcpy(copy, str, size);
1334
0
    return copy;
1335
0
}
1336
1337
/***********************************************/
1338
/* Delayed freeing support for Py_GIL_DISABLED */
1339
/***********************************************/
1340
1341
// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
1342
#define WORK_ITEMS_PER_CHUNK 254
1343
1344
// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
1345
struct _mem_work_item {
1346
    uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
1347
    uint64_t qsbr_goal;
1348
};
1349
1350
// A fixed-size buffer of pointers to be freed
1351
struct _mem_work_chunk {
1352
    // Linked list node of chunks in queue
1353
    struct llist_node node;
1354
1355
    Py_ssize_t rd_idx;  // index of next item to read
1356
    Py_ssize_t wr_idx;  // index of next item to write
1357
    struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
1358
};
1359
1360
static int
1361
work_item_should_decref(uintptr_t ptr)
1362
0
{
1363
0
    return ptr & 0x01;
1364
0
}
1365
1366
static void
1367
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1368
0
{
1369
0
    if (work_item_should_decref(ptr)) {
1370
0
        PyObject *obj = (PyObject *)(ptr - 1);
1371
#ifdef Py_GIL_DISABLED
1372
        if (cb == NULL) {
1373
            assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
1374
            Py_DECREF(obj);
1375
            return;
1376
        }
1377
        assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
1378
        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1379
        if (refcount == 0) {
1380
            cb(obj, state);
1381
        }
1382
#else
1383
0
        Py_DECREF(obj);
1384
0
#endif
1385
0
    }
1386
0
    else {
1387
0
        PyMem_Free((void *)ptr);
1388
0
    }
1389
0
}
1390
1391
1392
#ifdef Py_GIL_DISABLED
1393
1394
// For deferred advance on free: the number of deferred items before advancing
1395
// the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally
1396
// want to process a chunk before it overflows.
1397
#define QSBR_DEFERRED_LIMIT 127
1398
1399
// If the deferred memory exceeds 1 MiB, advance the write sequence.  This
1400
// helps limit memory usage due to QSBR delaying frees too long.
1401
#define QSBR_FREE_MEM_LIMIT 1024*1024
1402
1403
// Return true if the global write sequence should be advanced for a deferred
1404
// memory free.
1405
static bool
1406
should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
1407
{
1408
    if (size > QSBR_FREE_MEM_LIMIT) {
1409
        qsbr->deferred_count = 0;
1410
        qsbr->deferred_memory = 0;
1411
        qsbr->should_process = true;
1412
        return true;
1413
    }
1414
    qsbr->deferred_count++;
1415
    qsbr->deferred_memory += size;
1416
    if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
1417
            qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
1418
        qsbr->deferred_count = 0;
1419
        qsbr->deferred_memory = 0;
1420
        qsbr->should_process = true;
1421
        return true;
1422
    }
1423
    return false;
1424
}
1425
#endif
1426
1427
static void
1428
free_delayed(uintptr_t ptr, size_t size)
1429
0
{
1430
0
#ifndef Py_GIL_DISABLED
1431
0
    free_work_item(ptr, NULL, NULL);
1432
#else
1433
    PyInterpreterState *interp = _PyInterpreterState_GET();
1434
    if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
1435
        interp->stoptheworld.world_stopped)
1436
    {
1437
        // Free immediately during interpreter shutdown or if the world is
1438
        // stopped.
1439
        assert(!interp->stoptheworld.world_stopped || !work_item_should_decref(ptr));
1440
        free_work_item(ptr, NULL, NULL);
1441
        return;
1442
    }
1443
1444
    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1445
    struct llist_node *head = &tstate->mem_free_queue;
1446
1447
    struct _mem_work_chunk *buf = NULL;
1448
    if (!llist_empty(head)) {
1449
        // Try to re-use the last buffer
1450
        buf = llist_data(head->prev, struct _mem_work_chunk, node);
1451
        if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1452
            // already full
1453
            buf = NULL;
1454
        }
1455
    }
1456
1457
    if (buf == NULL) {
1458
        buf = PyMem_Calloc(1, sizeof(*buf));
1459
        if (buf != NULL) {
1460
            llist_insert_tail(head, &buf->node);
1461
        }
1462
    }
1463
1464
    if (buf == NULL) {
1465
        // failed to allocate a buffer, free immediately
1466
        PyObject *to_dealloc = NULL;
1467
        _PyEval_StopTheWorld(tstate->base.interp);
1468
        if (work_item_should_decref(ptr)) {
1469
            PyObject *obj = (PyObject *)(ptr - 1);
1470
            Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
1471
            if (refcount == 0) {
1472
                to_dealloc = obj;
1473
            }
1474
        }
1475
        else {
1476
            PyMem_Free((void *)ptr);
1477
        }
1478
        _PyEval_StartTheWorld(tstate->base.interp);
1479
        if (to_dealloc != NULL) {
1480
            _Py_Dealloc(to_dealloc);
1481
        }
1482
        return;
1483
    }
1484
1485
    assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1486
    uint64_t seq;
1487
    if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
1488
        seq = _Py_qsbr_advance(tstate->qsbr->shared);
1489
    }
1490
    else {
1491
        seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
1492
    }
1493
    buf->array[buf->wr_idx].ptr = ptr;
1494
    buf->array[buf->wr_idx].qsbr_goal = seq;
1495
    buf->wr_idx++;
1496
1497
    if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
1498
        // Normally the processing of delayed items is done from the eval
1499
        // breaker.  Processing here is a safety measure to ensure too much
1500
        // work does not accumulate.
1501
        _PyMem_ProcessDelayed((PyThreadState *)tstate);
1502
    }
1503
#endif
1504
0
}
1505
1506
void
1507
_PyMem_FreeDelayed(void *ptr, size_t size)
1508
0
{
1509
0
    assert(!((uintptr_t)ptr & 0x01));
1510
0
    if (ptr != NULL) {
1511
0
        free_delayed((uintptr_t)ptr, size);
1512
0
    }
1513
0
}
1514
1515
#ifdef Py_GIL_DISABLED
1516
void
1517
_PyObject_XDecRefDelayed(PyObject *ptr)
1518
{
1519
    assert(!((uintptr_t)ptr & 0x01));
1520
    if (ptr != NULL) {
1521
        // We use 0 as the size since we don't have an easy way to know the
1522
        // actual size.  If we are freeing many objects, the write sequence
1523
        // will be advanced due to QSBR_DEFERRED_LIMIT.
1524
        free_delayed(((uintptr_t)ptr)|0x01, 0);
1525
    }
1526
}
1527
#endif
1528
1529
#ifdef Py_GIL_DISABLED
1530
void
1531
_PyObject_XSetRefDelayed(PyObject **ptr, PyObject *value)
1532
{
1533
    PyObject *old = *ptr;
1534
    FT_ATOMIC_STORE_PTR_RELEASE(*ptr, value);
1535
    if (old == NULL) {
1536
        return;
1537
    }
1538
    if (!_Py_IsImmortal(old)) {
1539
         _PyObject_XDecRefDelayed(old);
1540
    }
1541
}
1542
#endif
1543
1544
static struct _mem_work_chunk *
1545
work_queue_first(struct llist_node *head)
1546
0
{
1547
0
    return llist_data(head->next, struct _mem_work_chunk, node);
1548
0
}
1549
1550
static void
1551
process_queue(struct llist_node *head, _PyThreadStateImpl *tstate,
1552
              bool keep_empty, delayed_dealloc_cb cb, void *state)
1553
0
{
1554
0
    while (!llist_empty(head)) {
1555
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1556
1557
0
        if (buf->rd_idx < buf->wr_idx) {
1558
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1559
0
            if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) {
1560
0
                return;
1561
0
            }
1562
1563
0
            buf->rd_idx++;
1564
            // NB: free_work_item may re-enter or execute arbitrary code
1565
0
            free_work_item(item->ptr, cb, state);
1566
0
            continue;
1567
0
        }
1568
1569
0
        assert(buf->rd_idx == buf->wr_idx);
1570
0
        if (keep_empty && buf->node.next == head) {
1571
            // Keep the last buffer in the queue to reduce re-allocations
1572
0
            buf->rd_idx = buf->wr_idx = 0;
1573
0
            return;
1574
0
        }
1575
1576
0
        llist_remove(&buf->node);
1577
0
        PyMem_Free(buf);
1578
0
    }
1579
0
}
1580
1581
static void
1582
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1583
                     _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1584
                     void *state)
1585
0
{
1586
0
    assert(PyMutex_IsLocked(&queue->mutex));
1587
0
    process_queue(&queue->head, tstate, false, cb, state);
1588
1589
0
    int more_work = !llist_empty(&queue->head);
1590
0
    _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
1591
0
}
1592
1593
static void
1594
maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
1595
                           _PyThreadStateImpl *tstate, delayed_dealloc_cb cb,
1596
                           void *state)
1597
0
{
1598
0
    if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
1599
0
        return;
1600
0
    }
1601
1602
    // Try to acquire the lock, but don't block if it's already held.
1603
0
    if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
1604
0
        process_interp_queue(queue, tstate, cb, state);
1605
0
        PyMutex_Unlock(&queue->mutex);
1606
0
    }
1607
0
}
1608
1609
void
1610
_PyMem_ProcessDelayed(PyThreadState *tstate)
1611
0
{
1612
0
    PyInterpreterState *interp = tstate->interp;
1613
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1614
1615
0
    tstate_impl->qsbr->should_process = false;
1616
1617
    // Process thread-local work
1618
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);
1619
1620
    // Process shared interpreter work
1621
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL);
1622
0
}
1623
1624
void
1625
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
1626
0
{
1627
0
    PyInterpreterState *interp = tstate->interp;
1628
0
    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1629
1630
    // Process thread-local work
1631
0
    process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state);
1632
1633
    // Process shared interpreter work
1634
0
    maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state);
1635
0
}
1636
1637
void
1638
_PyMem_AbandonDelayed(PyThreadState *tstate)
1639
0
{
1640
0
    PyInterpreterState *interp = tstate->interp;
1641
0
    struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
1642
1643
0
    if (llist_empty(queue)) {
1644
0
        return;
1645
0
    }
1646
1647
    // Check if the queue contains one empty buffer
1648
0
    struct _mem_work_chunk *buf = work_queue_first(queue);
1649
0
    if (buf->rd_idx == buf->wr_idx) {
1650
0
        llist_remove(&buf->node);
1651
0
        PyMem_Free(buf);
1652
0
        assert(llist_empty(queue));
1653
0
        return;
1654
0
    }
1655
1656
0
    PyMutex_Lock(&interp->mem_free_queue.mutex);
1657
1658
    // Merge the thread's work queue into the interpreter's work queue.
1659
0
    llist_concat(&interp->mem_free_queue.head, queue);
1660
1661
    // Process the merged queue now (see gh-130794).
1662
0
    _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
1663
0
    process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL);
1664
1665
0
    PyMutex_Unlock(&interp->mem_free_queue.mutex);
1666
1667
0
    assert(llist_empty(queue));  // the thread's queue is now empty
1668
0
}
1669
1670
void
1671
_PyMem_FiniDelayed(PyInterpreterState *interp)
1672
0
{
1673
0
    struct llist_node *head = &interp->mem_free_queue.head;
1674
0
    while (!llist_empty(head)) {
1675
0
        struct _mem_work_chunk *buf = work_queue_first(head);
1676
1677
0
        if (buf->rd_idx < buf->wr_idx) {
1678
            // Free the remaining items immediately. There should be no other
1679
            // threads accessing the memory at this point during shutdown.
1680
0
            struct _mem_work_item *item = &buf->array[buf->rd_idx];
1681
0
            buf->rd_idx++;
1682
            // NB: free_work_item may re-enter or execute arbitrary code
1683
0
            free_work_item(item->ptr, NULL, NULL);
1684
0
            continue;
1685
0
        }
1686
1687
0
        llist_remove(&buf->node);
1688
0
        PyMem_Free(buf);
1689
0
    }
1690
0
}
1691
1692
/**************************/
1693
/* the "object" allocator */
1694
/**************************/
1695
1696
void *
1697
PyObject_Malloc(size_t size)
1698
71.3M
{
1699
    /* see PyMem_RawMalloc() */
1700
71.3M
    if (size > (size_t)PY_SSIZE_T_MAX)
1701
0
        return NULL;
1702
71.3M
    OBJECT_STAT_INC_COND(allocations512, size < 512);
1703
71.3M
    OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
1704
71.3M
    OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
1705
71.3M
    OBJECT_STAT_INC(allocations);
1706
71.3M
    return _PyObject.malloc(_PyObject.ctx, size);
1707
71.3M
}
1708
1709
void *
1710
PyObject_Calloc(size_t nelem, size_t elsize)
1711
0
{
1712
    /* see PyMem_RawMalloc() */
1713
0
    if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
1714
0
        return NULL;
1715
0
    OBJECT_STAT_INC_COND(allocations512, elsize < 512);
1716
0
    OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
1717
0
    OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
1718
0
    OBJECT_STAT_INC(allocations);
1719
0
    return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
1720
0
}
1721
1722
void *
1723
PyObject_Realloc(void *ptr, size_t new_size)
1724
4.15M
{
1725
    /* see PyMem_RawMalloc() */
1726
4.15M
    if (new_size > (size_t)PY_SSIZE_T_MAX)
1727
0
        return NULL;
1728
4.15M
    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
1729
4.15M
}
1730
1731
void
1732
PyObject_Free(void *ptr)
1733
70.9M
{
1734
70.9M
    OBJECT_STAT_INC(frees);
1735
70.9M
    _PyObject.free(_PyObject.ctx, ptr);
1736
70.9M
}
1737
1738
1739
/* Use __builtin_expect() where available to reduce overhead of
1740
   the valgrind checks */
1741
#if (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))) && defined(__OPTIMIZE__)
1742
679M
#  define UNLIKELY(value) __builtin_expect((value), 0)
1743
312M
#  define LIKELY(value) __builtin_expect((value), 1)
1744
#else
1745
#  define UNLIKELY(value) (value)
1746
#  define LIKELY(value) (value)
1747
#endif
1748
1749
#ifdef WITH_PYMALLOC
1750
1751
#ifdef WITH_VALGRIND
1752
#include <valgrind/valgrind.h>
1753
1754
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
1755
static int running_on_valgrind = -1;
1756
#endif
1757
1758
typedef struct _obmalloc_state OMState;
1759
1760
/* obmalloc state for main interpreter and shared by all interpreters without
1761
 * their own obmalloc state.  By not explicitly initializing this structure, it
1762
 * will be allocated in the BSS which is a small performance win.  The radix
1763
 * tree arrays are fairly large but are sparsely used.  */
1764
static struct _obmalloc_state obmalloc_state_main;
1765
static bool obmalloc_state_initialized;
1766
1767
static inline int
1768
has_own_state(PyInterpreterState *interp)
1769
0
{
1770
0
    return (_Py_IsMainInterpreter(interp) ||
1771
0
            !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
1772
0
            _Py_IsMainInterpreterFinalizing(interp));
1773
0
}
1774
1775
static inline OMState *
1776
get_state(void)
1777
222M
{
1778
222M
    PyInterpreterState *interp = _PyInterpreterState_GET();
1779
222M
    assert(interp->obmalloc != NULL); // otherwise not initialized or freed
1780
222M
    return interp->obmalloc;
1781
222M
}
1782
1783
// These macros all rely on a local "state" variable.
1784
111M
#define usedpools (state->pools.used)
1785
378k
#define allarenas (state->mgmt.arenas)
1786
124
#define maxarenas (state->mgmt.maxarenas)
1787
2.81k
#define unused_arena_objects (state->mgmt.unused_arena_objects)
1788
3.23M
#define usable_arenas (state->mgmt.usable_arenas)
1789
2.33M
#define nfp2lasta (state->mgmt.nfp2lasta)
1790
1.86k
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
1791
599
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
1792
766
#define narenas_highwater (state->mgmt.narenas_highwater)
1793
4.19M
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
1794
1795
#ifdef WITH_MIMALLOC
1796
static bool count_blocks(
1797
    const mi_heap_t* heap, const mi_heap_area_t* area,
1798
    void* block, size_t block_size, void* allocated_blocks)
1799
0
{
1800
0
    *(size_t *)allocated_blocks += area->used;
1801
0
    return 1;
1802
0
}
1803
1804
static Py_ssize_t
1805
get_mimalloc_allocated_blocks(PyInterpreterState *interp)
1806
0
{
1807
0
    size_t allocated_blocks = 0;
1808
#ifdef Py_GIL_DISABLED
1809
    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, t) {
1810
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
1811
        for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
1812
            mi_heap_t *heap = &tstate->mimalloc.heaps[i];
1813
            mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1814
        }
1815
    }
1816
1817
    mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
1818
    for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
1819
        _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
1820
                                        &allocated_blocks);
1821
    }
1822
#else
1823
    // TODO(sgross): this only counts the current thread's blocks.
1824
0
    mi_heap_t *heap = mi_heap_get_default();
1825
0
    mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
1826
0
#endif
1827
0
    return allocated_blocks;
1828
0
}
1829
#endif
1830
1831
Py_ssize_t
1832
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
1833
0
{
1834
0
#ifdef WITH_MIMALLOC
1835
0
    if (_PyMem_MimallocEnabled()) {
1836
0
        return get_mimalloc_allocated_blocks(interp);
1837
0
    }
1838
0
#endif
1839
1840
#ifdef Py_DEBUG
1841
    assert(has_own_state(interp));
1842
#else
1843
0
    if (!has_own_state(interp)) {
1844
0
        _Py_FatalErrorFunc(__func__,
1845
0
                           "the interpreter doesn't have its own allocator");
1846
0
    }
1847
0
#endif
1848
0
    OMState *state = interp->obmalloc;
1849
1850
0
    if (state == NULL) {
1851
0
        return 0;
1852
0
    }
1853
1854
0
    Py_ssize_t n = raw_allocated_blocks;
1855
    /* add up allocated blocks for used pools */
1856
0
    for (uint i = 0; i < maxarenas; ++i) {
1857
        /* Skip arenas which are not allocated. */
1858
0
        if (allarenas[i].address == 0) {
1859
0
            continue;
1860
0
        }
1861
1862
0
        uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
1863
1864
        /* visit every pool in the arena */
1865
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
1866
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
1867
0
            poolp p = (poolp)base;
1868
0
            n += p->ref.count;
1869
0
        }
1870
0
    }
1871
0
    return n;
1872
0
}
1873
1874
static void free_obmalloc_arenas(PyInterpreterState *interp);
1875
1876
void
1877
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
1878
0
{
1879
0
#ifdef WITH_MIMALLOC
1880
0
    if (_PyMem_MimallocEnabled()) {
1881
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1882
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1883
0
        return;
1884
0
    }
1885
0
#endif
1886
0
    if (has_own_state(interp) && interp->obmalloc != NULL) {
1887
0
        Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
1888
0
        assert(has_own_state(interp) || leaked == 0);
1889
0
        interp->runtime->obmalloc.interpreter_leaks += leaked;
1890
0
        if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
1891
            // free the obmalloc arenas and radix tree nodes.  If leaked > 0
1892
            // then some of the memory allocated by obmalloc has not been
1893
            // freed.  It might be safe to free the arenas in that case but
1894
            // it's possible that extension modules are still using that
1895
            // memory.  So, it is safer to not free and to leak.  Perhaps there
1896
            // should be warning when this happens.  It should be possible to
1897
            // use a tool like "-fsanitize=address" to track down these leaks.
1898
0
            free_obmalloc_arenas(interp);
1899
0
        }
1900
0
    }
1901
0
}
1902
1903
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
1904
1905
/* We preserve the number of blocks leaked during runtime finalization,
1906
   so they can be reported if the runtime is initialized again. */
1907
// XXX We don't lose any information by dropping this,
1908
// so we should consider doing so.
1909
static Py_ssize_t last_final_leaks = 0;
1910
1911
void
1912
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
1913
0
{
1914
0
    last_final_leaks = get_num_global_allocated_blocks(runtime);
1915
0
    runtime->obmalloc.interpreter_leaks = 0;
1916
0
}
1917
1918
static Py_ssize_t
1919
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
1920
0
{
1921
0
    Py_ssize_t total = 0;
1922
0
    if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
1923
0
        PyInterpreterState *interp = _PyInterpreterState_Main();
1924
0
        if (interp == NULL) {
1925
            /* We are at the very end of runtime finalization.
1926
               We can't rely on finalizing->interp since that thread
1927
               state is probably already freed, so we don't worry
1928
               about it. */
1929
0
            assert(PyInterpreterState_Head() == NULL);
1930
0
        }
1931
0
        else {
1932
0
            assert(interp != NULL);
1933
            /* It is probably the last interpreter but not necessarily. */
1934
0
            assert(PyInterpreterState_Next(interp) == NULL);
1935
0
            total += _PyInterpreterState_GetAllocatedBlocks(interp);
1936
0
        }
1937
0
    }
1938
0
    else {
1939
0
        _PyEval_StopTheWorldAll(&_PyRuntime);
1940
0
        HEAD_LOCK(runtime);
1941
0
        PyInterpreterState *interp = PyInterpreterState_Head();
1942
0
        assert(interp != NULL);
1943
#ifdef Py_DEBUG
1944
        int got_main = 0;
1945
#endif
1946
0
        for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
1947
#ifdef Py_DEBUG
1948
            if (_Py_IsMainInterpreter(interp)) {
1949
                assert(!got_main);
1950
                got_main = 1;
1951
                assert(has_own_state(interp));
1952
            }
1953
#endif
1954
0
            if (has_own_state(interp)) {
1955
0
                total += _PyInterpreterState_GetAllocatedBlocks(interp);
1956
0
            }
1957
0
        }
1958
0
        HEAD_UNLOCK(runtime);
1959
0
        _PyEval_StartTheWorldAll(&_PyRuntime);
1960
#ifdef Py_DEBUG
1961
        assert(got_main);
1962
#endif
1963
0
    }
1964
0
    total += runtime->obmalloc.interpreter_leaks;
1965
0
    total += last_final_leaks;
1966
0
    return total;
1967
0
}
1968
1969
Py_ssize_t
1970
_Py_GetGlobalAllocatedBlocks(void)
1971
0
{
1972
0
    return get_num_global_allocated_blocks(&_PyRuntime);
1973
0
}
1974
1975
#if WITH_PYMALLOC_RADIX_TREE
1976
/*==========================================================================*/
1977
/* radix tree for tracking arena usage. */
1978
1979
342M
#define arena_map_root (state->usage.arena_map_root)
1980
#ifdef USE_INTERIOR_NODES
1981
20
#define arena_map_mid_count (state->usage.arena_map_mid_count)
1982
20
#define arena_map_bot_count (state->usage.arena_map_bot_count)
1983
#endif
1984
1985
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1986
 * it cannot be created */
1987
static inline Py_ALWAYS_INLINE arena_map_bot_t *
1988
arena_map_get(OMState *state, pymem_block *p, int create)
1989
114M
{
1990
114M
#ifdef USE_INTERIOR_NODES
1991
    /* sanity check that IGNORE_BITS is correct */
1992
114M
    assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1993
114M
    int i1 = MAP_TOP_INDEX(p);
1994
114M
    if (arena_map_root.ptrs[i1] == NULL) {
1995
20
        if (!create) {
1996
0
            return NULL;
1997
0
        }
1998
20
        arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1999
20
        if (n == NULL) {
2000
0
            return NULL;
2001
0
        }
2002
20
        arena_map_root.ptrs[i1] = n;
2003
20
        arena_map_mid_count++;
2004
20
    }
2005
114M
    int i2 = MAP_MID_INDEX(p);
2006
114M
    if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
2007
2.68M
        if (!create) {
2008
2.68M
            return NULL;
2009
2.68M
        }
2010
20
        arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
2011
20
        if (n == NULL) {
2012
0
            return NULL;
2013
0
        }
2014
20
        arena_map_root.ptrs[i1]->ptrs[i2] = n;
2015
20
        arena_map_bot_count++;
2016
20
    }
2017
112M
    return arena_map_root.ptrs[i1]->ptrs[i2];
2018
#else
2019
    return &arena_map_root;
2020
#endif
2021
114M
}
2022
2023
2024
/* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
2025
 * away 24 bits of the address.  That reduces the space requirement of
2026
 * the tree compared to similar radix tree page-map schemes.  In
2027
 * exchange for slashing the space requirement, it needs more
2028
 * computation to check an address.
2029
 *
2030
 * Tracking coverage is done by "ideal" arena address.  It is easier to
2031
 * explain in decimal so let's say that the arena size is 100 bytes.
2032
 * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
2033
 * pointer address is inside an actual arena, we have to check two ideal
2034
 * arena addresses.  E.g. if pointer is 357, we need to check 200 and
2035
 * 300.  In the rare case that an arena is aligned in the ideal way
2036
 * (e.g. base address of arena is 200) then we only have to check one
2037
 * ideal address.
2038
 *
2039
 * The tree nodes for 200 and 300 both store the address of arena.
2040
 * There are two cases: the arena starts at a lower ideal arena and
2041
 * extends to this one, or the arena starts in this arena and extends to
2042
 * the next ideal arena.  The tail_lo and tail_hi members correspond to
2043
 * these two cases.
2044
 */
2045
2046
2047
/* mark or unmark addresses covered by arena */
2048
static int
2049
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
2050
1.09k
{
2051
    /* sanity check that IGNORE_BITS is correct */
2052
1.09k
    assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
2053
1.09k
    arena_map_bot_t *n_hi = arena_map_get(
2054
1.09k
            state, (pymem_block *)arena_base, is_used);
2055
1.09k
    if (n_hi == NULL) {
2056
0
        assert(is_used); /* otherwise node should already exist */
2057
0
        return 0; /* failed to allocate space for node */
2058
0
    }
2059
1.09k
    int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
2060
1.09k
    int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
2061
1.09k
    if (tail == 0) {
2062
        /* is ideal arena address */
2063
31
        n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
2064
31
    }
2065
1.06k
    else {
2066
        /* arena_base address is not ideal (aligned to arena size) and
2067
         * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
2068
         * for the next arena.  Note that it might be in different MAP_TOP
2069
         * and MAP_MID nodes as well so we need to call arena_map_get()
2070
         * again (do the full tree traversal).
2071
         */
2072
1.06k
        n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
2073
1.06k
        uintptr_t arena_base_next = arena_base + ARENA_SIZE;
2074
        /* If arena_base is a legit arena address, so is arena_base_next - 1
2075
         * (last address in arena).  If arena_base_next overflows then it
2076
         * must overflow to 0.  However, that would mean arena_base was
2077
         * "ideal" and we should not be in this case. */
2078
1.06k
        assert(arena_base < arena_base_next);
2079
1.06k
        arena_map_bot_t *n_lo = arena_map_get(
2080
1.06k
                state, (pymem_block *)arena_base_next, is_used);
2081
1.06k
        if (n_lo == NULL) {
2082
0
            assert(is_used); /* otherwise should already exist */
2083
0
            n_hi->arenas[i3].tail_hi = 0;
2084
0
            return 0; /* failed to allocate space for node */
2085
0
        }
2086
1.06k
        int i3_next = MAP_BOT_INDEX(arena_base_next);
2087
1.06k
        n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
2088
1.06k
    }
2089
1.09k
    return 1;
2090
1.09k
}
2091
2092
/* Return true if 'p' is a pointer inside an obmalloc arena.
2093
 * _PyObject_Free() calls this so it needs to be very fast. */
2094
static int
2095
arena_map_is_used(OMState *state, pymem_block *p)
2096
114M
{
2097
114M
    arena_map_bot_t *n = arena_map_get(state, p, 0);
2098
114M
    if (n == NULL) {
2099
2.68M
        return 0;
2100
2.68M
    }
2101
112M
    int i3 = MAP_BOT_INDEX(p);
2102
    /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
2103
112M
    int32_t hi = n->arenas[i3].tail_hi;
2104
112M
    int32_t lo = n->arenas[i3].tail_lo;
2105
112M
    int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
2106
112M
    return (tail < lo) || (tail >= hi && hi != 0);
2107
114M
}
2108
2109
/* end of radix tree logic */
2110
/*==========================================================================*/
2111
#endif /* WITH_PYMALLOC_RADIX_TREE */
2112
2113
2114
/* Allocate a new arena.  If we run out of memory, return NULL.  Else
2115
 * allocate a new arena, and return the address of an arena_object
2116
 * describing the new arena.  It's expected that the caller will set
2117
 * `usable_arenas` to the return value.
2118
 */
2119
static struct arena_object*
2120
new_arena(OMState *state)
2121
599
{
2122
599
    struct arena_object* arenaobj;
2123
599
    uint excess;        /* number of bytes above pool alignment */
2124
599
    void *address;
2125
2126
599
    int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
2127
599
    if (debug_stats == -1) {
2128
20
        const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
2129
20
        debug_stats = (opt != NULL && *opt != '\0');
2130
20
        _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
2131
20
    }
2132
599
    if (debug_stats) {
2133
0
        _PyObject_DebugMallocStats(stderr);
2134
0
    }
2135
2136
599
    if (unused_arena_objects == NULL) {
2137
24
        uint i;
2138
24
        uint numarenas;
2139
24
        size_t nbytes;
2140
2141
        /* Double the number of arena objects on each allocation.
2142
         * Note that it's possible for `numarenas` to overflow.
2143
         */
2144
24
        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
2145
24
        if (numarenas <= maxarenas)
2146
0
            return NULL;                /* overflow */
2147
#if SIZEOF_SIZE_T <= SIZEOF_INT
2148
        if (numarenas > SIZE_MAX / sizeof(*allarenas))
2149
            return NULL;                /* overflow */
2150
#endif
2151
24
        nbytes = numarenas * sizeof(*allarenas);
2152
24
        arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
2153
24
        if (arenaobj == NULL)
2154
0
            return NULL;
2155
24
        allarenas = arenaobj;
2156
2157
        /* We might need to fix pointers that were copied.  However,
2158
         * new_arena only gets called when all the pages in the
2159
         * previous arenas are full.  Thus, there are *no* pointers
2160
         * into the old array. Thus, we don't have to worry about
2161
         * invalid pointers.  Just to be sure, some asserts:
2162
         */
2163
24
        assert(usable_arenas == NULL);
2164
24
        assert(unused_arena_objects == NULL);
2165
2166
        /* Put the new arenas on the unused_arena_objects list. */
2167
472
        for (i = maxarenas; i < numarenas; ++i) {
2168
448
            allarenas[i].address = 0;              /* mark as unassociated */
2169
448
            allarenas[i].nextarena = i < numarenas - 1 ?
2170
448
                                        &allarenas[i+1] : NULL;
2171
448
        }
2172
2173
        /* Update globals. */
2174
24
        unused_arena_objects = &allarenas[maxarenas];
2175
24
        maxarenas = numarenas;
2176
24
    }
2177
2178
    /* Take the next available arena object off the head of the list. */
2179
599
    assert(unused_arena_objects != NULL);
2180
599
    arenaobj = unused_arena_objects;
2181
599
    unused_arena_objects = arenaobj->nextarena;
2182
599
    assert(arenaobj->address == 0);
2183
599
    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
2184
599
#if WITH_PYMALLOC_RADIX_TREE
2185
599
    if (address != NULL) {
2186
599
        if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
2187
            /* marking arena in radix tree failed, abort */
2188
0
            _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
2189
0
            address = NULL;
2190
0
        }
2191
599
    }
2192
599
#endif
2193
599
    if (address == NULL) {
2194
        /* The allocation failed: return NULL after putting the
2195
         * arenaobj back.
2196
         */
2197
0
        arenaobj->nextarena = unused_arena_objects;
2198
0
        unused_arena_objects = arenaobj;
2199
0
        return NULL;
2200
0
    }
2201
599
    arenaobj->address = (uintptr_t)address;
2202
2203
599
    ++narenas_currently_allocated;
2204
599
    ++ntimes_arena_allocated;
2205
599
    if (narenas_currently_allocated > narenas_highwater)
2206
167
        narenas_highwater = narenas_currently_allocated;
2207
599
    arenaobj->freepools = NULL;
2208
    /* pool_address <- first pool-aligned address in the arena
2209
       nfreepools <- number of whole pools that fit after alignment */
2210
599
    arenaobj->pool_address = (pymem_block*)arenaobj->address;
2211
599
    arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
2212
599
    excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
2213
599
    if (excess != 0) {
2214
130
        --arenaobj->nfreepools;
2215
130
        arenaobj->pool_address += POOL_SIZE - excess;
2216
130
    }
2217
599
    arenaobj->ntotalpools = arenaobj->nfreepools;
2218
2219
599
    return arenaobj;
2220
599
}
2221
2222
2223
2224
#if WITH_PYMALLOC_RADIX_TREE
2225
/* Return true if and only if P is an address that was allocated by
2226
   pymalloc.  When the radix tree is used, 'poolp' is unused.
2227
 */
2228
static bool
2229
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
2230
114M
{
2231
114M
    return arena_map_is_used(state, p);
2232
114M
}
2233
#else
2234
/*
2235
address_in_range(P, POOL)
2236
2237
Return true if and only if P is an address that was allocated by pymalloc.
2238
POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
2239
(the caller is asked to compute this because the macro expands POOL more than
2240
once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
2241
variable and pass the latter to the macro; because address_in_range is
2242
called on every alloc/realloc/free, micro-efficiency is important here).
2243
2244
Tricky:  Let B be the arena base address associated with the pool, B =
2245
arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
2246
2247
    B <= P < B + ARENA_SIZE
2248
2249
Subtracting B throughout, this is true iff
2250
2251
    0 <= P-B < ARENA_SIZE
2252
2253
By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
2254
2255
Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
2256
before the first arena has been allocated.  `arenas` is still NULL in that
2257
case.  We're relying on that maxarenas is also 0 in that case, so that
2258
(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
2259
into a NULL arenas.
2260
2261
Details:  given P and POOL, the arena_object corresponding to P is AO =
2262
arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
2263
stores, etc), POOL is the correct address of P's pool, AO.address is the
2264
correct base address of the pool's arena, and P must be within ARENA_SIZE of
2265
AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
2266
(NULL)).  Therefore address_in_range correctly reports that obmalloc
2267
controls P.
2268
2269
Now suppose obmalloc does not control P (e.g., P was obtained via a direct
2270
call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
2271
in this case -- it may even be uninitialized trash.  If the trash arenaindex
2272
is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
2273
control P.
2274
2275
Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
2276
allocated arena, obmalloc controls all the memory in slice AO.address :
2277
AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
2278
so P doesn't lie in that slice, so the macro correctly reports that P is not
2279
controlled by obmalloc.
2280
2281
Finally, if P is not controlled by obmalloc and AO corresponds to an unused
2282
arena_object (one not currently associated with an allocated arena),
2283
AO.address is 0, and the second test in the macro reduces to:
2284
2285
    P < ARENA_SIZE
2286
2287
If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
2288
that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
2289
of the test still passes, and the third clause (AO.address != 0) is necessary
2290
to get the correct result:  AO.address is 0 in this case, so the macro
2291
correctly reports that P is not controlled by obmalloc (despite that P lies in
2292
slice AO.address : AO.address + ARENA_SIZE).
2293
2294
Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
2295
2.5, arenas were never free()'ed, and an arenaindex < maxarena always
2296
corresponded to a currently-allocated arena, so the "P is not controlled by
2297
obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
2298
was impossible.
2299
2300
Note that the logic is excruciating, and reading up possibly uninitialized
2301
memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
2302
creates problems for some memory debuggers.  The overwhelming advantage is
2303
that this test determines whether an arbitrary address is controlled by
2304
obmalloc in a small constant time, independent of the number of arenas
2305
obmalloc controls.  Since this test is needed at every entry point, it's
2306
extremely desirable that it be this fast.
2307
*/
2308
2309
static bool _Py_NO_SANITIZE_ADDRESS
2310
            _Py_NO_SANITIZE_THREAD
2311
            _Py_NO_SANITIZE_MEMORY
2312
address_in_range(OMState *state, void *p, poolp pool)
2313
{
2314
    // Since address_in_range may be reading from memory which was not allocated
2315
    // by Python, it is important that pool->arenaindex is read only once, as
2316
    // another thread may be concurrently modifying the value without holding
2317
    // the GIL. The following dance forces the compiler to read pool->arenaindex
2318
    // only once.
2319
    uint arenaindex = *((volatile uint *)&pool->arenaindex);
2320
    return arenaindex < maxarenas &&
2321
        (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
2322
        allarenas[arenaindex].address != 0;
2323
}
2324
2325
#endif /* !WITH_PYMALLOC_RADIX_TREE */
2326
2327
/*==========================================================================*/
2328
2329
// Called when freelist is exhausted.  Extend the freelist if there is
2330
// space for a block.  Otherwise, remove this pool from usedpools.
2331
static void
2332
pymalloc_pool_extend(poolp pool, uint size)
2333
38.9M
{
2334
38.9M
    if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
2335
        /* There is room for another block. */
2336
33.4M
        pool->freeblock = (pymem_block*)pool + pool->nextoffset;
2337
33.4M
        pool->nextoffset += INDEX2SIZE(size);
2338
33.4M
        *(pymem_block **)(pool->freeblock) = NULL;
2339
33.4M
        return;
2340
33.4M
    }
2341
2342
    /* Pool is full, unlink from used pools. */
2343
5.57M
    poolp next;
2344
5.57M
    next = pool->nextpool;
2345
5.57M
    pool = pool->prevpool;
2346
5.57M
    next->prevpool = pool;
2347
5.57M
    pool->nextpool = next;
2348
5.57M
}
2349
2350
/* called when pymalloc_alloc can not allocate a block from usedpool.
2351
 * This function takes new pool and allocate a block from it.
2352
 */
2353
static void*
2354
allocate_from_new_pool(OMState *state, uint size)
2355
342k
{
2356
    /* There isn't a pool of the right size class immediately
2357
     * available:  use a free pool.
2358
     */
2359
342k
    if (UNLIKELY(usable_arenas == NULL)) {
2360
        /* No arena has a free pool:  allocate a new arena. */
2361
#ifdef WITH_MEMORY_LIMITS
2362
        if (narenas_currently_allocated >= MAX_ARENAS) {
2363
            return NULL;
2364
        }
2365
#endif
2366
599
        usable_arenas = new_arena(state);
2367
599
        if (usable_arenas == NULL) {
2368
0
            return NULL;
2369
0
        }
2370
599
        usable_arenas->nextarena = usable_arenas->prevarena = NULL;
2371
599
        assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
2372
599
        nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
2373
599
    }
2374
342k
    assert(usable_arenas->address != 0);
2375
2376
    /* This arena already had the smallest nfreepools value, so decreasing
2377
     * nfreepools doesn't change that, and we don't need to rearrange the
2378
     * usable_arenas list.  However, if the arena becomes wholly allocated,
2379
     * we need to remove its arena_object from usable_arenas.
2380
     */
2381
342k
    assert(usable_arenas->nfreepools > 0);
2382
342k
    if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
2383
        /* It's the last of this size, so there won't be any. */
2384
340k
        nfp2lasta[usable_arenas->nfreepools] = NULL;
2385
340k
    }
2386
    /* If any free pools will remain, it will be the new smallest. */
2387
342k
    if (usable_arenas->nfreepools > 1) {
2388
336k
        assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
2389
336k
        nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
2390
336k
    }
2391
2392
    /* Try to get a cached free pool. */
2393
342k
    poolp pool = usable_arenas->freepools;
2394
342k
    if (LIKELY(pool != NULL)) {
2395
        /* Unlink from cached pools. */
2396
305k
        usable_arenas->freepools = pool->nextpool;
2397
305k
        usable_arenas->nfreepools--;
2398
305k
        if (UNLIKELY(usable_arenas->nfreepools == 0)) {
2399
            /* Wholly allocated:  remove. */
2400
5.30k
            assert(usable_arenas->freepools == NULL);
2401
5.30k
            assert(usable_arenas->nextarena == NULL ||
2402
5.30k
                   usable_arenas->nextarena->prevarena ==
2403
5.30k
                   usable_arenas);
2404
5.30k
            usable_arenas = usable_arenas->nextarena;
2405
5.30k
            if (usable_arenas != NULL) {
2406
4.80k
                usable_arenas->prevarena = NULL;
2407
4.80k
                assert(usable_arenas->address != 0);
2408
4.80k
            }
2409
5.30k
        }
2410
300k
        else {
2411
            /* nfreepools > 0:  it must be that freepools
2412
             * isn't NULL, or that we haven't yet carved
2413
             * off all the arena's pools for the first
2414
             * time.
2415
             */
2416
300k
            assert(usable_arenas->freepools != NULL ||
2417
300k
                   usable_arenas->pool_address <=
2418
300k
                   (pymem_block*)usable_arenas->address +
2419
300k
                       ARENA_SIZE - POOL_SIZE);
2420
300k
        }
2421
305k
    }
2422
37.3k
    else {
2423
        /* Carve off a new pool. */
2424
37.3k
        assert(usable_arenas->nfreepools > 0);
2425
37.3k
        assert(usable_arenas->freepools == NULL);
2426
37.3k
        pool = (poolp)usable_arenas->pool_address;
2427
37.3k
        assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
2428
37.3k
                                 ARENA_SIZE - POOL_SIZE);
2429
37.3k
        pool->arenaindex = (uint)(usable_arenas - allarenas);
2430
37.3k
        assert(&allarenas[pool->arenaindex] == usable_arenas);
2431
37.3k
        pool->szidx = DUMMY_SIZE_IDX;
2432
37.3k
        usable_arenas->pool_address += POOL_SIZE;
2433
37.3k
        --usable_arenas->nfreepools;
2434
2435
37.3k
        if (usable_arenas->nfreepools == 0) {
2436
566
            assert(usable_arenas->nextarena == NULL ||
2437
566
                   usable_arenas->nextarena->prevarena ==
2438
566
                   usable_arenas);
2439
            /* Unlink the arena:  it is completely allocated. */
2440
566
            usable_arenas = usable_arenas->nextarena;
2441
566
            if (usable_arenas != NULL) {
2442
39
                usable_arenas->prevarena = NULL;
2443
39
                assert(usable_arenas->address != 0);
2444
39
            }
2445
566
        }
2446
37.3k
    }
2447
2448
    /* Frontlink to used pools. */
2449
342k
    pymem_block *bp;
2450
342k
    poolp next = usedpools[size + size]; /* == prev */
2451
342k
    pool->nextpool = next;
2452
342k
    pool->prevpool = next;
2453
342k
    next->nextpool = pool;
2454
342k
    next->prevpool = pool;
2455
342k
    pool->ref.count = 1;
2456
342k
    if (pool->szidx == size) {
2457
        /* Luckily, this pool last contained blocks
2458
         * of the same size class, so its header
2459
         * and free list are already initialized.
2460
         */
2461
171k
        bp = pool->freeblock;
2462
171k
        assert(bp != NULL);
2463
171k
        pool->freeblock = *(pymem_block **)bp;
2464
171k
        return bp;
2465
171k
    }
2466
    /*
2467
     * Initialize the pool header, set up the free list to
2468
     * contain just the second block, and return the first
2469
     * block.
2470
     */
2471
171k
    pool->szidx = size;
2472
171k
    size = INDEX2SIZE(size);
2473
171k
    bp = (pymem_block *)pool + POOL_OVERHEAD;
2474
171k
    pool->nextoffset = POOL_OVERHEAD + (size << 1);
2475
171k
    pool->maxnextoffset = POOL_SIZE - size;
2476
171k
    pool->freeblock = bp + size;
2477
171k
    *(pymem_block **)(pool->freeblock) = NULL;
2478
171k
    return bp;
2479
342k
}
2480
2481
/* pymalloc allocator
2482
2483
   Return a pointer to newly allocated memory if pymalloc allocated memory.
2484
2485
   Return NULL if pymalloc failed to allocate the memory block: on bigger
2486
   requests, on error in the code below (as a last chance to serve the request)
2487
   or when the max memory limit has been reached.
2488
*/
2489
static inline void*
2490
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
2491
107M
{
2492
#ifdef WITH_VALGRIND
2493
    if (UNLIKELY(running_on_valgrind == -1)) {
2494
        running_on_valgrind = RUNNING_ON_VALGRIND;
2495
    }
2496
    if (UNLIKELY(running_on_valgrind)) {
2497
        return NULL;
2498
    }
2499
#endif
2500
2501
107M
    if (UNLIKELY(nbytes == 0)) {
2502
38.0k
        return NULL;
2503
38.0k
    }
2504
107M
    if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
2505
2.06M
        return NULL;
2506
2.06M
    }
2507
2508
105M
    uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
2509
105M
    poolp pool = usedpools[size + size];
2510
105M
    pymem_block *bp;
2511
2512
105M
    if (LIKELY(pool != pool->nextpool)) {
2513
        /*
2514
         * There is a used pool for this size class.
2515
         * Pick up the head block of its free list.
2516
         */
2517
105M
        ++pool->ref.count;
2518
105M
        bp = pool->freeblock;
2519
105M
        assert(bp != NULL);
2520
2521
105M
        if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
2522
            // Reached the end of the free list, try to extend it.
2523
38.9M
            pymalloc_pool_extend(pool, size);
2524
38.9M
        }
2525
105M
    }
2526
342k
    else {
2527
        /* There isn't a pool of the right size class immediately
2528
         * available:  use a free pool.
2529
         */
2530
342k
        bp = allocate_from_new_pool(state, size);
2531
342k
    }
2532
2533
105M
    return (void *)bp;
2534
105M
}
2535
2536
2537
void *
2538
_PyObject_Malloc(void *ctx, size_t nbytes)
2539
102M
{
2540
102M
    OMState *state = get_state();
2541
102M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2542
102M
    if (LIKELY(ptr != NULL)) {
2543
101M
        return ptr;
2544
101M
    }
2545
2546
1.84M
    ptr = PyMem_RawMalloc(nbytes);
2547
1.84M
    if (ptr != NULL) {
2548
1.84M
        raw_allocated_blocks++;
2549
1.84M
    }
2550
1.84M
    return ptr;
2551
102M
}
2552
2553
2554
void *
2555
_PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2556
4.65M
{
2557
4.65M
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2558
4.65M
    size_t nbytes = nelem * elsize;
2559
2560
4.65M
    OMState *state = get_state();
2561
4.65M
    void* ptr = pymalloc_alloc(state, ctx, nbytes);
2562
4.65M
    if (LIKELY(ptr != NULL)) {
2563
4.39M
        memset(ptr, 0, nbytes);
2564
4.39M
        return ptr;
2565
4.39M
    }
2566
2567
257k
    ptr = PyMem_RawCalloc(nelem, elsize);
2568
257k
    if (ptr != NULL) {
2569
257k
        raw_allocated_blocks++;
2570
257k
    }
2571
257k
    return ptr;
2572
4.65M
}
2573
2574
2575
static void
2576
insert_to_usedpool(OMState *state, poolp pool)
2577
5.57M
{
2578
5.57M
    assert(pool->ref.count > 0);            /* else the pool is empty */
2579
2580
5.57M
    uint size = pool->szidx;
2581
5.57M
    poolp next = usedpools[size + size];
2582
5.57M
    poolp prev = next->prevpool;
2583
2584
    /* insert pool before next:   prev <-> pool <-> next */
2585
5.57M
    pool->nextpool = next;
2586
5.57M
    pool->prevpool = prev;
2587
5.57M
    next->prevpool = pool;
2588
5.57M
    prev->nextpool = pool;
2589
5.57M
}
2590
2591
static void
2592
insert_to_freepool(OMState *state, poolp pool)
2593
339k
{
2594
339k
    poolp next = pool->nextpool;
2595
339k
    poolp prev = pool->prevpool;
2596
339k
    next->prevpool = prev;
2597
339k
    prev->nextpool = next;
2598
2599
    /* Link the pool to freepools.  This is a singly-linked
2600
     * list, and pool->prevpool isn't used there.
2601
     */
2602
339k
    struct arena_object *ao = &allarenas[pool->arenaindex];
2603
339k
    pool->nextpool = ao->freepools;
2604
339k
    ao->freepools = pool;
2605
339k
    uint nf = ao->nfreepools;
2606
    /* If this is the rightmost arena with this number of free pools,
2607
     * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2608
     * are no arenas in usable_arenas with that value.
2609
     */
2610
339k
    struct arena_object* lastnf = nfp2lasta[nf];
2611
339k
    assert((nf == 0 && lastnf == NULL) ||
2612
339k
           (nf > 0 &&
2613
339k
            lastnf != NULL &&
2614
339k
            lastnf->nfreepools == nf &&
2615
339k
            (lastnf->nextarena == NULL ||
2616
339k
             nf < lastnf->nextarena->nfreepools)));
2617
339k
    if (lastnf == ao) {  /* it is the rightmost */
2618
316k
        struct arena_object* p = ao->prevarena;
2619
316k
        nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2620
316k
    }
2621
339k
    ao->nfreepools = ++nf;
2622
2623
    /* All the rest is arena management.  We just freed
2624
     * a pool, and there are 4 cases for arena mgmt:
2625
     * 1. If all the pools are free, return the arena to
2626
     *    the system free().  Except if this is the last
2627
     *    arena in the list, keep it to avoid thrashing:
2628
     *    keeping one wholly free arena in the list avoids
2629
     *    pathological cases where a simple loop would
2630
     *    otherwise provoke needing to allocate and free an
2631
     *    arena on every iteration.  See bpo-37257.
2632
     * 2. If this is the only free pool in the arena,
2633
     *    add the arena back to the `usable_arenas` list.
2634
     * 3. If the "next" arena has a smaller count of free
2635
     *    pools, we have to "slide this arena right" to
2636
     *    restore that usable_arenas is sorted in order of
2637
     *    nfreepools.
2638
     * 4. Else there's nothing more to do.
2639
     */
2640
339k
    if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2641
        /* Case 1.  First unlink ao from usable_arenas.
2642
         */
2643
499
        assert(ao->prevarena == NULL ||
2644
499
               ao->prevarena->address != 0);
2645
499
        assert(ao ->nextarena == NULL ||
2646
499
               ao->nextarena->address != 0);
2647
2648
        /* Fix the pointer in the prevarena, or the
2649
         * usable_arenas pointer.
2650
         */
2651
499
        if (ao->prevarena == NULL) {
2652
42
            usable_arenas = ao->nextarena;
2653
42
            assert(usable_arenas == NULL ||
2654
42
                   usable_arenas->address != 0);
2655
42
        }
2656
457
        else {
2657
457
            assert(ao->prevarena->nextarena == ao);
2658
457
            ao->prevarena->nextarena =
2659
457
                ao->nextarena;
2660
457
        }
2661
        /* Fix the pointer in the nextarena. */
2662
499
        if (ao->nextarena != NULL) {
2663
499
            assert(ao->nextarena->prevarena == ao);
2664
499
            ao->nextarena->prevarena =
2665
499
                ao->prevarena;
2666
499
        }
2667
        /* Record that this arena_object slot is
2668
         * available to be reused.
2669
         */
2670
499
        ao->nextarena = unused_arena_objects;
2671
499
        unused_arena_objects = ao;
2672
2673
499
#if WITH_PYMALLOC_RADIX_TREE
2674
        /* mark arena region as not under control of obmalloc */
2675
499
        arena_map_mark_used(state, ao->address, 0);
2676
499
#endif
2677
2678
        /* Free the entire arena. */
2679
499
        _PyObject_Arena.free(_PyObject_Arena.ctx,
2680
499
                             (void *)ao->address, ARENA_SIZE);
2681
499
        ao->address = 0;                        /* mark unassociated */
2682
499
        --narenas_currently_allocated;
2683
2684
499
        return;
2685
499
    }
2686
2687
339k
    if (nf == 1) {
2688
        /* Case 2.  Put ao at the head of
2689
         * usable_arenas.  Note that because
2690
         * ao->nfreepools was 0 before, ao isn't
2691
         * currently on the usable_arenas list.
2692
         */
2693
5.83k
        ao->nextarena = usable_arenas;
2694
5.83k
        ao->prevarena = NULL;
2695
5.83k
        if (usable_arenas)
2696
5.38k
            usable_arenas->prevarena = ao;
2697
5.83k
        usable_arenas = ao;
2698
5.83k
        assert(usable_arenas->address != 0);
2699
5.83k
        if (nfp2lasta[1] == NULL) {
2700
5.71k
            nfp2lasta[1] = ao;
2701
5.71k
        }
2702
2703
5.83k
        return;
2704
5.83k
    }
2705
2706
    /* If this arena is now out of order, we need to keep
2707
     * the list sorted.  The list is kept sorted so that
2708
     * the "most full" arenas are used first, which allows
2709
     * the nearly empty arenas to be completely freed.  In
2710
     * a few un-scientific tests, it seems like this
2711
     * approach allowed a lot more memory to be freed.
2712
     */
2713
    /* If this is the only arena with nf, record that. */
2714
333k
    if (nfp2lasta[nf] == NULL) {
2715
313k
        nfp2lasta[nf] = ao;
2716
313k
    } /* else the rightmost with nf doesn't change */
2717
    /* If this was the rightmost of the old size, it remains in place. */
2718
333k
    if (ao == lastnf) {
2719
        /* Case 4.  Nothing to do. */
2720
316k
        return;
2721
316k
    }
2722
    /* If ao were the only arena in the list, the last block would have
2723
     * gotten us out.
2724
     */
2725
333k
    assert(ao->nextarena != NULL);
2726
2727
    /* Case 3:  We have to move the arena towards the end of the list,
2728
     * because it has more free pools than the arena to its right.  It needs
2729
     * to move to follow lastnf.
2730
     * First unlink ao from usable_arenas.
2731
     */
2732
16.9k
    if (ao->prevarena != NULL) {
2733
        /* ao isn't at the head of the list */
2734
14.5k
        assert(ao->prevarena->nextarena == ao);
2735
14.5k
        ao->prevarena->nextarena = ao->nextarena;
2736
14.5k
    }
2737
2.36k
    else {
2738
        /* ao is at the head of the list */
2739
2.36k
        assert(usable_arenas == ao);
2740
2.36k
        usable_arenas = ao->nextarena;
2741
2.36k
    }
2742
16.9k
    ao->nextarena->prevarena = ao->prevarena;
2743
    /* And insert after lastnf. */
2744
16.9k
    ao->prevarena = lastnf;
2745
16.9k
    ao->nextarena = lastnf->nextarena;
2746
16.9k
    if (ao->nextarena != NULL) {
2747
16.7k
        ao->nextarena->prevarena = ao;
2748
16.7k
    }
2749
16.9k
    lastnf->nextarena = ao;
2750
    /* Verify that the swaps worked. */
2751
16.9k
    assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2752
16.9k
    assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2753
16.9k
    assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2754
16.9k
    assert((usable_arenas == ao && ao->prevarena == NULL)
2755
16.9k
           || ao->prevarena->nextarena == ao);
2756
16.9k
}
2757
2758
/* Free a memory block allocated by pymalloc_alloc().
2759
   Return 1 if it was freed.
2760
   Return 0 if the block was not allocated by pymalloc_alloc(). */
2761
static inline int
2762
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
2763
107M
{
2764
107M
    assert(p != NULL);
2765
2766
#ifdef WITH_VALGRIND
2767
    if (UNLIKELY(running_on_valgrind > 0)) {
2768
        return 0;
2769
    }
2770
#endif
2771
2772
107M
    poolp pool = POOL_ADDR(p);
2773
107M
    if (UNLIKELY(!address_in_range(state, p, pool))) {
2774
2.09M
        return 0;
2775
2.09M
    }
2776
    /* We allocated this address. */
2777
2778
    /* Link p to the start of the pool's freeblock list.  Since
2779
     * the pool had at least the p block outstanding, the pool
2780
     * wasn't empty (so it's already in a usedpools[] list, or
2781
     * was full and is in no list -- it's not in the freeblocks
2782
     * list in any case).
2783
     */
2784
107M
    assert(pool->ref.count > 0);            /* else it was empty */
2785
105M
    pymem_block *lastfree = pool->freeblock;
2786
105M
    *(pymem_block **)p = lastfree;
2787
105M
    pool->freeblock = (pymem_block *)p;
2788
105M
    pool->ref.count--;
2789
2790
105M
    if (UNLIKELY(lastfree == NULL)) {
2791
        /* Pool was full, so doesn't currently live in any list:
2792
         * link it to the front of the appropriate usedpools[] list.
2793
         * This mimics LRU pool usage for new allocations and
2794
         * targets optimal filling when several pools contain
2795
         * blocks of the same size class.
2796
         */
2797
5.57M
        insert_to_usedpool(state, pool);
2798
5.57M
        return 1;
2799
5.57M
    }
2800
2801
    /* freeblock wasn't NULL, so the pool wasn't full,
2802
     * and the pool is in a usedpools[] list.
2803
     */
2804
99.4M
    if (LIKELY(pool->ref.count != 0)) {
2805
        /* pool isn't empty:  leave it in usedpools */
2806
99.1M
        return 1;
2807
99.1M
    }
2808
2809
    /* Pool is now empty:  unlink from usedpools, and
2810
     * link to the front of freepools.  This ensures that
2811
     * previously freed pools will be allocated later
2812
     * (being not referenced, they are perhaps paged out).
2813
     */
2814
339k
    insert_to_freepool(state, pool);
2815
339k
    return 1;
2816
99.4M
}
2817
2818
2819
void
2820
_PyObject_Free(void *ctx, void *p)
2821
107M
{
2822
    /* PyObject_Free(NULL) has no effect */
2823
107M
    if (p == NULL) {
2824
56.7k
        return;
2825
56.7k
    }
2826
2827
107M
    OMState *state = get_state();
2828
107M
    if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
2829
        /* pymalloc didn't allocate this address */
2830
2.09M
        PyMem_RawFree(p);
2831
2.09M
        raw_allocated_blocks--;
2832
2.09M
    }
2833
107M
}
2834
2835
2836
/* pymalloc realloc.
2837
2838
   If nbytes==0, then as the Python docs promise, we do not treat this like
2839
   free(p), and return a non-NULL result.
2840
2841
   Return 1 if pymalloc reallocated memory and wrote the new pointer into
2842
   newptr_p.
2843
2844
   Return 0 if pymalloc didn't allocated p. */
2845
static int
2846
pymalloc_realloc(OMState *state, void *ctx,
2847
                 void **newptr_p, void *p, size_t nbytes)
2848
7.83M
{
2849
7.83M
    void *bp;
2850
7.83M
    poolp pool;
2851
7.83M
    size_t size;
2852
2853
7.83M
    assert(p != NULL);
2854
2855
#ifdef WITH_VALGRIND
2856
    /* Treat running_on_valgrind == -1 the same as 0 */
2857
    if (UNLIKELY(running_on_valgrind > 0)) {
2858
        return 0;
2859
    }
2860
#endif
2861
2862
7.83M
    pool = POOL_ADDR(p);
2863
7.83M
    if (!address_in_range(state, p, pool)) {
2864
        /* pymalloc is not managing this block.
2865
2866
           If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2867
           over this block.  However, if we do, we need to copy the valid data
2868
           from the C-managed block to one of our blocks, and there's no
2869
           portable way to know how much of the memory space starting at p is
2870
           valid.
2871
2872
           As bug 1185883 pointed out the hard way, it's possible that the
2873
           C-managed block is "at the end" of allocated VM space, so that a
2874
           memory fault can occur if we try to copy nbytes bytes starting at p.
2875
           Instead we punt: let C continue to manage this block. */
2876
597k
        return 0;
2877
597k
    }
2878
2879
    /* pymalloc is in charge of this block */
2880
7.23M
    size = INDEX2SIZE(pool->szidx);
2881
7.23M
    if (nbytes <= size) {
2882
        /* The block is staying the same or shrinking.
2883
2884
           If it's shrinking, there's a tradeoff: it costs cycles to copy the
2885
           block to a smaller size class, but it wastes memory not to copy it.
2886
2887
           The compromise here is to copy on shrink only if at least 25% of
2888
           size can be shaved off. */
2889
3.93M
        if (4 * nbytes > 3 * size) {
2890
            /* It's the same, or shrinking and new/old > 3/4. */
2891
3.52M
            *newptr_p = p;
2892
3.52M
            return 1;
2893
3.52M
        }
2894
402k
        size = nbytes;
2895
402k
    }
2896
2897
3.70M
    bp = _PyObject_Malloc(ctx, nbytes);
2898
3.70M
    if (bp != NULL) {
2899
3.70M
        memcpy(bp, p, size);
2900
3.70M
        _PyObject_Free(ctx, p);
2901
3.70M
    }
2902
3.70M
    *newptr_p = bp;
2903
3.70M
    return 1;
2904
7.23M
}
2905
2906
2907
void *
2908
_PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2909
14.4M
{
2910
14.4M
    void *ptr2;
2911
2912
14.4M
    if (ptr == NULL) {
2913
6.63M
        return _PyObject_Malloc(ctx, nbytes);
2914
6.63M
    }
2915
2916
7.83M
    OMState *state = get_state();
2917
7.83M
    if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
2918
7.23M
        return ptr2;
2919
7.23M
    }
2920
2921
597k
    return PyMem_RawRealloc(ptr, nbytes);
2922
7.83M
}
2923
2924
#else   /* ! WITH_PYMALLOC */
2925
2926
/*==========================================================================*/
2927
/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2928
 * only be used by extensions that are compiled with pymalloc enabled. */
2929
2930
Py_ssize_t
2931
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2932
{
2933
    return 0;
2934
}
2935
2936
Py_ssize_t
2937
_Py_GetGlobalAllocatedBlocks(void)
2938
{
2939
    return 0;
2940
}
2941
2942
void
2943
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
2944
{
2945
    return;
2946
}
2947
2948
void
2949
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
2950
{
2951
    return;
2952
}
2953
2954
#endif /* WITH_PYMALLOC */
2955
2956
2957
/*==========================================================================*/
2958
/* A x-platform debugging allocator.  This doesn't manage memory directly,
2959
 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2960
 */
2961
2962
/* Uncomment this define to add the "serialno" field */
2963
/* #define PYMEM_DEBUG_SERIALNO */
2964
2965
#ifdef PYMEM_DEBUG_SERIALNO
2966
static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2967
2968
/* serialno is always incremented via calling this routine.  The point is
2969
 * to supply a single place to set a breakpoint.
2970
 */
2971
static void
2972
bumpserialno(void)
2973
{
2974
    ++serialno;
2975
}
2976
#endif
2977
2978
0
#define SST SIZEOF_SIZE_T
2979
2980
#ifdef PYMEM_DEBUG_SERIALNO
2981
#  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2982
#else
2983
0
#  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2984
#endif
2985
2986
/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2987
static size_t
2988
read_size_t(const void *p)
2989
0
{
2990
0
    const uint8_t *q = (const uint8_t *)p;
2991
0
    size_t result = *q++;
2992
0
    int i;
2993
2994
0
    for (i = SST; --i > 0; ++q)
2995
0
        result = (result << 8) | *q;
2996
0
    return result;
2997
0
}
2998
2999
/* Write n as a big-endian size_t, MSB at address p, LSB at
3000
 * p + sizeof(size_t) - 1.
3001
 */
3002
static void
3003
write_size_t(void *p, size_t n)
3004
0
{
3005
0
    uint8_t *q = (uint8_t *)p + SST - 1;
3006
0
    int i;
3007
3008
0
    for (i = SST; --i >= 0; --q) {
3009
0
        *q = (uint8_t)(n & 0xff);
3010
0
        n >>= 8;
3011
0
    }
3012
0
}
3013
3014
static void
3015
fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
3016
               bool is_alloc)
3017
0
{
3018
#ifdef Py_GIL_DISABLED
3019
    if (api->api_id == 'o') {
3020
        // Don't overwrite the first few bytes of a PyObject allocation in the
3021
        // free-threaded build
3022
        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
3023
        size_t debug_offset;
3024
        if (is_alloc) {
3025
            debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
3026
        }
3027
        else {
3028
            char *alloc = (char *)data - 2*SST;  // start of the allocation
3029
            debug_offset = _mi_ptr_page(alloc)->debug_offset;
3030
        }
3031
        debug_offset -= 2*SST;  // account for pymalloc extra bytes
3032
        if (debug_offset < nbytes) {
3033
            memset((char *)data + debug_offset, c, nbytes - debug_offset);
3034
        }
3035
        return;
3036
    }
3037
#endif
3038
0
    memset(data, c, nbytes);
3039
0
}
3040
3041
/* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
3042
   fills them with useful stuff, here calling the underlying malloc's result p:
3043
3044
p[0: S]
3045
    Number of bytes originally asked for.  This is a size_t, big-endian (easier
3046
    to read in a memory dump).
3047
p[S]
3048
    API ID.  See PEP 445.  This is a character, but seems undocumented.
3049
p[S+1: 2*S]
3050
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
3051
p[2*S: 2*S+n]
3052
    The requested memory, filled with copies of PYMEM_CLEANBYTE.
3053
    Used to catch reference to uninitialized memory.
3054
    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
3055
    handled the request itself.
3056
p[2*S+n: 2*S+n+S]
3057
    Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
3058
p[2*S+n+S: 2*S+n+2*S]
3059
    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
3060
    and _PyMem_DebugRealloc.
3061
    This is a big-endian size_t.
3062
    If "bad memory" is detected later, the serial number gives an
3063
    excellent way to set a breakpoint on the next run, to capture the
3064
    instant at which this block was passed out.
3065
3066
If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
3067
for 3 * S extra bytes, and omits the last serialno field.
3068
*/
3069
3070
static void *
3071
_PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
3072
0
{
3073
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3074
0
    uint8_t *p;           /* base address of malloc'ed pad block */
3075
0
    uint8_t *data;        /* p + 2*SST == pointer to data bytes */
3076
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3077
0
    size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
3078
3079
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3080
        /* integer overflow: can't represent total as a Py_ssize_t */
3081
0
        return NULL;
3082
0
    }
3083
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3084
3085
    /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
3086
                ^--- p    ^--- data   ^--- tail
3087
       S: nbytes stored as size_t
3088
       I: API identifier (1 byte)
3089
       F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
3090
       C: Clean bytes used later to store actual data
3091
       N: Serial number stored as size_t
3092
3093
       If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
3094
       is omitted. */
3095
3096
0
    if (use_calloc) {
3097
0
        p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
3098
0
    }
3099
0
    else {
3100
0
        p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
3101
0
    }
3102
0
    if (p == NULL) {
3103
0
        return NULL;
3104
0
    }
3105
0
    data = p + 2*SST;
3106
3107
#ifdef PYMEM_DEBUG_SERIALNO
3108
    bumpserialno();
3109
#endif
3110
3111
    /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
3112
0
    write_size_t(p, nbytes);
3113
0
    p[SST] = (uint8_t)api->api_id;
3114
0
    memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3115
3116
0
    if (nbytes > 0 && !use_calloc) {
3117
0
        fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
3118
0
    }
3119
3120
    /* at tail, write pad (SST bytes) and serialno (SST bytes) */
3121
0
    tail = data + nbytes;
3122
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3123
#ifdef PYMEM_DEBUG_SERIALNO
3124
    write_size_t(tail + SST, serialno);
3125
#endif
3126
3127
0
    return data;
3128
0
}
3129
3130
void *
3131
_PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
3132
0
{
3133
0
    return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3134
0
}
3135
3136
void *
3137
_PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
3138
0
{
3139
0
    size_t nbytes;
3140
0
    assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
3141
0
    nbytes = nelem * elsize;
3142
0
    return _PyMem_DebugRawAlloc(1, ctx, nbytes);
3143
0
}
3144
3145
3146
/* The debug free first checks the 2*SST bytes on each end for sanity (in
3147
   particular, that the FORBIDDENBYTEs with the api ID are still intact).
3148
   Then fills the original bytes with PYMEM_DEADBYTE.
3149
   Then calls the underlying free.
3150
*/
3151
void
3152
_PyMem_DebugRawFree(void *ctx, void *p)
3153
0
{
3154
    /* PyMem_Free(NULL) has no effect */
3155
0
    if (p == NULL) {
3156
0
        return;
3157
0
    }
3158
3159
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3160
0
    uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
3161
0
    size_t nbytes;
3162
3163
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3164
0
    nbytes = read_size_t(q);
3165
0
    nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
3166
0
    memset(q, PYMEM_DEADBYTE, 2*SST);
3167
0
    fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
3168
0
    api->alloc.free(api->alloc.ctx, q);
3169
0
}
3170
3171
3172
void *
3173
_PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
3174
0
{
3175
0
    if (p == NULL) {
3176
0
        return _PyMem_DebugRawAlloc(0, ctx, nbytes);
3177
0
    }
3178
3179
0
    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
3180
0
    uint8_t *head;        /* base address of malloc'ed pad block */
3181
0
    uint8_t *data;        /* pointer to data bytes */
3182
0
    uint8_t *r;
3183
0
    uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
3184
0
    size_t total;         /* 2 * SST + nbytes + 2 * SST */
3185
0
    size_t original_nbytes;
3186
0
#define ERASED_SIZE 64
3187
3188
0
    _PyMem_DebugCheckAddress(__func__, api->api_id, p);
3189
3190
0
    data = (uint8_t *)p;
3191
0
    head = data - 2*SST;
3192
0
    original_nbytes = read_size_t(head);
3193
0
    if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
3194
        /* integer overflow: can't represent total as a Py_ssize_t */
3195
0
        return NULL;
3196
0
    }
3197
0
    total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
3198
3199
0
    tail = data + original_nbytes;
3200
#ifdef PYMEM_DEBUG_SERIALNO
3201
    size_t block_serialno = read_size_t(tail + SST);
3202
#endif
3203
0
#ifndef Py_GIL_DISABLED
3204
    /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
3205
       ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
3206
     */
3207
0
    uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
3208
0
    if (original_nbytes <= sizeof(save)) {
3209
0
        memcpy(save, data, original_nbytes);
3210
0
        memset(data - 2 * SST, PYMEM_DEADBYTE,
3211
0
               original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
3212
0
    }
3213
0
    else {
3214
0
        memcpy(save, data, ERASED_SIZE);
3215
0
        memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
3216
0
        memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
3217
0
        memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
3218
0
               ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
3219
0
    }
3220
0
#endif
3221
3222
    /* Resize and add decorations. */
3223
0
    r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
3224
0
    if (r == NULL) {
3225
        /* if realloc() failed: rewrite header and footer which have
3226
           just been erased */
3227
0
        nbytes = original_nbytes;
3228
0
    }
3229
0
    else {
3230
0
        head = r;
3231
#ifdef PYMEM_DEBUG_SERIALNO
3232
        bumpserialno();
3233
        block_serialno = serialno;
3234
#endif
3235
0
    }
3236
0
    data = head + 2*SST;
3237
3238
0
    write_size_t(head, nbytes);
3239
0
    head[SST] = (uint8_t)api->api_id;
3240
0
    memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
3241
3242
0
    tail = data + nbytes;
3243
0
    memset(tail, PYMEM_FORBIDDENBYTE, SST);
3244
#ifdef PYMEM_DEBUG_SERIALNO
3245
    write_size_t(tail + SST, block_serialno);
3246
#endif
3247
3248
0
#ifndef Py_GIL_DISABLED
3249
    /* Restore saved bytes. */
3250
0
    if (original_nbytes <= sizeof(save)) {
3251
0
        memcpy(data, save, Py_MIN(nbytes, original_nbytes));
3252
0
    }
3253
0
    else {
3254
0
        size_t i = original_nbytes - ERASED_SIZE;
3255
0
        memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
3256
0
        if (nbytes > i) {
3257
0
            memcpy(data + i, &save[ERASED_SIZE],
3258
0
                   Py_MIN(nbytes - i, ERASED_SIZE));
3259
0
        }
3260
0
    }
3261
0
#endif
3262
3263
0
    if (r == NULL) {
3264
0
        return NULL;
3265
0
    }
3266
3267
0
    if (nbytes > original_nbytes) {
3268
        /* growing: mark new extra memory clean */
3269
0
        memset(data + original_nbytes, PYMEM_CLEANBYTE,
3270
0
               nbytes - original_nbytes);
3271
0
    }
3272
3273
0
    return data;
3274
0
}
3275
3276
static inline void
3277
_PyMem_DebugCheckGIL(const char *func)
3278
0
{
3279
0
    PyThreadState *tstate = _PyThreadState_GET();
3280
0
    if (tstate == NULL) {
3281
0
#ifndef Py_GIL_DISABLED
3282
0
        _Py_FatalErrorFunc(func,
3283
0
                           "Python memory allocator called "
3284
0
                           "without holding the GIL");
3285
#else
3286
        _Py_FatalErrorFunc(func,
3287
                           "Python memory allocator called "
3288
                           "without an active thread state. "
3289
                           "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?");
3290
#endif
3291
0
    }
3292
0
}
3293
3294
void *
3295
_PyMem_DebugMalloc(void *ctx, size_t nbytes)
3296
0
{
3297
0
    _PyMem_DebugCheckGIL(__func__);
3298
0
    return _PyMem_DebugRawMalloc(ctx, nbytes);
3299
0
}
3300
3301
void *
3302
_PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
3303
0
{
3304
0
    _PyMem_DebugCheckGIL(__func__);
3305
0
    return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
3306
0
}
3307
3308
3309
void
3310
_PyMem_DebugFree(void *ctx, void *ptr)
3311
0
{
3312
0
    _PyMem_DebugCheckGIL(__func__);
3313
0
    _PyMem_DebugRawFree(ctx, ptr);
3314
0
}
3315
3316
3317
void *
3318
_PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
3319
0
{
3320
0
    _PyMem_DebugCheckGIL(__func__);
3321
0
    return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
3322
0
}
3323
3324
/* Check the forbidden bytes on both ends of the memory allocated for p.
3325
 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
3326
 * and call Py_FatalError to kill the program.
3327
 * The API id, is also checked.
3328
 */
3329
static void
3330
_PyMem_DebugCheckAddress(const char *func, char api, const void *p)
3331
0
{
3332
0
    assert(p != NULL);
3333
3334
0
    const uint8_t *q = (const uint8_t *)p;
3335
0
    size_t nbytes;
3336
0
    const uint8_t *tail;
3337
0
    int i;
3338
0
    char id;
3339
3340
    /* Check the API id */
3341
0
    id = (char)q[-SST];
3342
0
    if (id != api) {
3343
0
        _PyObject_DebugDumpAddress(p);
3344
0
        _Py_FatalErrorFormat(func,
3345
0
                             "bad ID: Allocated using API '%c', "
3346
0
                             "verified using API '%c'",
3347
0
                             id, api);
3348
0
    }
3349
3350
    /* Check the stuff at the start of p first:  if there's underwrite
3351
     * corruption, the number-of-bytes field may be nuts, and checking
3352
     * the tail could lead to a segfault then.
3353
     */
3354
0
    for (i = SST-1; i >= 1; --i) {
3355
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3356
0
            _PyObject_DebugDumpAddress(p);
3357
0
            _Py_FatalErrorFunc(func, "bad leading pad byte");
3358
0
        }
3359
0
    }
3360
3361
0
    nbytes = read_size_t(q - 2*SST);
3362
0
    tail = q + nbytes;
3363
0
    for (i = 0; i < SST; ++i) {
3364
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3365
0
            _PyObject_DebugDumpAddress(p);
3366
0
            _Py_FatalErrorFunc(func, "bad trailing pad byte");
3367
0
        }
3368
0
    }
3369
0
}
3370
3371
/* Display info to stderr about the memory block at p. */
3372
static void
3373
_PyObject_DebugDumpAddress(const void *p)
3374
0
{
3375
0
    const uint8_t *q = (const uint8_t *)p;
3376
0
    const uint8_t *tail;
3377
0
    size_t nbytes;
3378
0
    int i;
3379
0
    int ok;
3380
0
    char id;
3381
3382
0
    fprintf(stderr, "Debug memory block at address p=%p:", p);
3383
0
    if (p == NULL) {
3384
0
        fprintf(stderr, "\n");
3385
0
        return;
3386
0
    }
3387
0
    id = (char)q[-SST];
3388
0
    fprintf(stderr, " API '%c'\n", id);
3389
3390
0
    nbytes = read_size_t(q - 2*SST);
3391
0
    fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
3392
3393
    /* In case this is nuts, check the leading pad bytes first. */
3394
0
    fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
3395
0
    ok = 1;
3396
0
    for (i = 1; i <= SST-1; ++i) {
3397
0
        if (*(q-i) != PYMEM_FORBIDDENBYTE) {
3398
0
            ok = 0;
3399
0
            break;
3400
0
        }
3401
0
    }
3402
0
    if (ok)
3403
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3404
0
    else {
3405
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3406
0
            PYMEM_FORBIDDENBYTE);
3407
0
        for (i = SST-1; i >= 1; --i) {
3408
0
            const uint8_t byte = *(q-i);
3409
0
            fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
3410
0
            if (byte != PYMEM_FORBIDDENBYTE)
3411
0
                fputs(" *** OUCH", stderr);
3412
0
            fputc('\n', stderr);
3413
0
        }
3414
3415
0
        fputs("    Because memory is corrupted at the start, the "
3416
0
              "count of bytes requested\n"
3417
0
              "       may be bogus, and checking the trailing pad "
3418
0
              "bytes may segfault.\n", stderr);
3419
0
    }
3420
3421
0
    tail = q + nbytes;
3422
0
    fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
3423
0
    ok = 1;
3424
0
    for (i = 0; i < SST; ++i) {
3425
0
        if (tail[i] != PYMEM_FORBIDDENBYTE) {
3426
0
            ok = 0;
3427
0
            break;
3428
0
        }
3429
0
    }
3430
0
    if (ok)
3431
0
        fputs("FORBIDDENBYTE, as expected.\n", stderr);
3432
0
    else {
3433
0
        fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
3434
0
                PYMEM_FORBIDDENBYTE);
3435
0
        for (i = 0; i < SST; ++i) {
3436
0
            const uint8_t byte = tail[i];
3437
0
            fprintf(stderr, "        at tail+%d: 0x%02x",
3438
0
                    i, byte);
3439
0
            if (byte != PYMEM_FORBIDDENBYTE)
3440
0
                fputs(" *** OUCH", stderr);
3441
0
            fputc('\n', stderr);
3442
0
        }
3443
0
    }
3444
3445
#ifdef PYMEM_DEBUG_SERIALNO
3446
    size_t serial = read_size_t(tail + SST);
3447
    fprintf(stderr,
3448
            "    The block was made by call #%zu to debug malloc/realloc.\n",
3449
            serial);
3450
#endif
3451
3452
0
    if (nbytes > 0) {
3453
0
        i = 0;
3454
0
        fputs("    Data at p:", stderr);
3455
        /* print up to 8 bytes at the start */
3456
0
        while (q < tail && i < 8) {
3457
0
            fprintf(stderr, " %02x", *q);
3458
0
            ++i;
3459
0
            ++q;
3460
0
        }
3461
        /* and up to 8 at the end */
3462
0
        if (q < tail) {
3463
0
            if (tail - q > 8) {
3464
0
                fputs(" ...", stderr);
3465
0
                q = tail - 8;
3466
0
            }
3467
0
            while (q < tail) {
3468
0
                fprintf(stderr, " %02x", *q);
3469
0
                ++q;
3470
0
            }
3471
0
        }
3472
0
        fputc('\n', stderr);
3473
0
    }
3474
0
    fputc('\n', stderr);
3475
3476
0
    fflush(stderr);
3477
0
    _PyMem_DumpTraceback(fileno(stderr), p);
3478
0
}
3479
3480
3481
static size_t
3482
printone(FILE *out, const char* msg, size_t value)
3483
0
{
3484
0
    int i, k;
3485
0
    char buf[100];
3486
0
    size_t origvalue = value;
3487
3488
0
    fputs(msg, out);
3489
0
    for (i = (int)strlen(msg); i < 35; ++i)
3490
0
        fputc(' ', out);
3491
0
    fputc('=', out);
3492
3493
    /* Write the value with commas. */
3494
0
    i = 22;
3495
0
    buf[i--] = '\0';
3496
0
    buf[i--] = '\n';
3497
0
    k = 3;
3498
0
    do {
3499
0
        size_t nextvalue = value / 10;
3500
0
        unsigned int digit = (unsigned int)(value - nextvalue * 10);
3501
0
        value = nextvalue;
3502
0
        buf[i--] = (char)(digit + '0');
3503
0
        --k;
3504
0
        if (k == 0 && value && i >= 0) {
3505
0
            k = 3;
3506
0
            buf[i--] = ',';
3507
0
        }
3508
0
    } while (value && i >= 0);
3509
3510
0
    while (i >= 0)
3511
0
        buf[i--] = ' ';
3512
0
    fputs(buf, out);
3513
3514
0
    return origvalue;
3515
0
}
3516
3517
void
3518
_PyDebugAllocatorStats(FILE *out,
3519
                       const char *block_name, int num_blocks, size_t sizeof_block)
3520
0
{
3521
0
    char buf1[128];
3522
0
    char buf2[128];
3523
0
    PyOS_snprintf(buf1, sizeof(buf1),
3524
0
                  "%d %ss * %zd bytes each",
3525
0
                  num_blocks, block_name, sizeof_block);
3526
0
    PyOS_snprintf(buf2, sizeof(buf2),
3527
0
                  "%48s ", buf1);
3528
0
    (void)printone(out, buf2, num_blocks * sizeof_block);
3529
0
}
3530
3531
// Return true if the obmalloc state structure is heap allocated,
3532
// by PyMem_RawCalloc().  For the main interpreter, this structure
3533
// allocated in the BSS.  Allocating that way gives some memory savings
3534
// and a small performance win (at least on a demand paged OS).  On
3535
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
3536
// memory is for the arena_map_top array.  Since normally only one entry
3537
// of that array is used, only one page of resident memory is actually
3538
// used, rather than the full 256 kB.
3539
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
3540
0
{
3541
0
#if WITH_PYMALLOC
3542
0
    return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
3543
#else
3544
    return false;
3545
#endif
3546
0
}
3547
3548
#ifdef WITH_PYMALLOC
3549
static void
3550
init_obmalloc_pools(PyInterpreterState *interp)
3551
20
{
3552
    // initialize the obmalloc->pools structure.  This must be done
3553
    // before the obmalloc alloc/free functions can be called.
3554
20
    poolp temp[OBMALLOC_USED_POOLS_SIZE] =
3555
20
        _obmalloc_pools_INIT(interp->obmalloc->pools);
3556
20
    memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
3557
20
}
3558
#endif /* WITH_PYMALLOC */
3559
3560
int _PyMem_init_obmalloc(PyInterpreterState *interp)
3561
20
{
3562
20
#ifdef WITH_PYMALLOC
3563
    /* Initialize obmalloc, but only for subinterpreters,
3564
       since the main interpreter is initialized statically. */
3565
20
    if (_Py_IsMainInterpreter(interp)
3566
0
            || _PyInterpreterState_HasFeature(interp,
3567
20
                                              Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
3568
20
        interp->obmalloc = &obmalloc_state_main;
3569
20
        if (!obmalloc_state_initialized) {
3570
20
            init_obmalloc_pools(interp);
3571
20
            obmalloc_state_initialized = true;
3572
20
        }
3573
20
    } else {
3574
0
        interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
3575
0
        if (interp->obmalloc == NULL) {
3576
0
            return -1;
3577
0
        }
3578
0
        init_obmalloc_pools(interp);
3579
0
    }
3580
20
#endif /* WITH_PYMALLOC */
3581
20
    return 0; // success
3582
20
}
3583
3584
3585
#ifdef WITH_PYMALLOC
3586
3587
static void
3588
free_obmalloc_arenas(PyInterpreterState *interp)
3589
0
{
3590
0
    OMState *state = interp->obmalloc;
3591
0
    for (uint i = 0; i < maxarenas; ++i) {
3592
        // free each obmalloc memory arena
3593
0
        struct arena_object *ao = &allarenas[i];
3594
0
        _PyObject_Arena.free(_PyObject_Arena.ctx,
3595
0
                             (void *)ao->address, ARENA_SIZE);
3596
0
    }
3597
    // free the array containing pointers to all arenas
3598
0
    PyMem_RawFree(allarenas);
3599
0
#if WITH_PYMALLOC_RADIX_TREE
3600
0
#ifdef USE_INTERIOR_NODES
3601
    // Free the middle and bottom nodes of the radix tree.  These are allocated
3602
    // by arena_map_mark_used() but not freed when arenas are freed.
3603
0
    for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
3604
0
         arena_map_mid_t *mid = arena_map_root.ptrs[i1];
3605
0
         if (mid == NULL) {
3606
0
             continue;
3607
0
         }
3608
0
         for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
3609
0
            arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
3610
0
            if (bot == NULL) {
3611
0
                continue;
3612
0
            }
3613
0
            PyMem_RawFree(bot);
3614
0
         }
3615
0
         PyMem_RawFree(mid);
3616
0
    }
3617
0
#endif
3618
0
#endif
3619
0
}
3620
3621
#ifdef Py_DEBUG
3622
/* Is target in the list?  The list is traversed via the nextpool pointers.
3623
 * The list may be NULL-terminated, or circular.  Return 1 if target is in
3624
 * list, else 0.
3625
 */
3626
static int
3627
pool_is_in_list(const poolp target, poolp list)
3628
{
3629
    poolp origlist = list;
3630
    assert(target != NULL);
3631
    if (list == NULL)
3632
        return 0;
3633
    do {
3634
        if (target == list)
3635
            return 1;
3636
        list = list->nextpool;
3637
    } while (list != NULL && list != origlist);
3638
    return 0;
3639
}
3640
#endif
3641
3642
#ifdef WITH_MIMALLOC
3643
struct _alloc_stats {
3644
    size_t allocated_blocks;
3645
    size_t allocated_bytes;
3646
    size_t allocated_with_overhead;
3647
    size_t bytes_reserved;
3648
    size_t bytes_committed;
3649
};
3650
3651
static bool _collect_alloc_stats(
3652
    const mi_heap_t* heap, const mi_heap_area_t* area,
3653
    void* block, size_t block_size, void* arg)
3654
0
{
3655
0
    struct _alloc_stats *stats = (struct _alloc_stats *)arg;
3656
0
    stats->allocated_blocks += area->used;
3657
0
    stats->allocated_bytes += area->used * area->block_size;
3658
0
    stats->allocated_with_overhead += area->used * area->full_block_size;
3659
0
    stats->bytes_reserved += area->reserved;
3660
0
    stats->bytes_committed += area->committed;
3661
0
    return 1;
3662
0
}
3663
3664
static void
3665
py_mimalloc_print_stats(FILE *out)
3666
0
{
3667
0
    fprintf(out, "Small block threshold = %zu, in %u size classes.\n",
3668
0
        (size_t)MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
3669
0
    fprintf(out, "Medium block threshold = %zu\n",
3670
0
            (size_t)MI_MEDIUM_OBJ_SIZE_MAX);
3671
0
    fprintf(out, "Large object max size = %zu\n",
3672
0
            (size_t)MI_LARGE_OBJ_SIZE_MAX);
3673
3674
0
    mi_heap_t *heap = mi_heap_get_default();
3675
0
    struct _alloc_stats stats;
3676
0
    memset(&stats, 0, sizeof(stats));
3677
0
    mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
3678
3679
0
    fprintf(out, "    Allocated Blocks: %zd\n", stats.allocated_blocks);
3680
0
    fprintf(out, "    Allocated Bytes: %zd\n", stats.allocated_bytes);
3681
0
    fprintf(out, "    Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
3682
0
    fprintf(out, "    Bytes Reserved: %zd\n", stats.bytes_reserved);
3683
0
    fprintf(out, "    Bytes Committed: %zd\n", stats.bytes_committed);
3684
0
}
3685
#endif
3686
3687
3688
static void
3689
pymalloc_print_stats(FILE *out)
3690
0
{
3691
0
    OMState *state = get_state();
3692
3693
0
    uint i;
3694
0
    const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
3695
    /* # of pools, allocated blocks, and free blocks per class index */
3696
0
    size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3697
0
    size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3698
0
    size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
3699
    /* total # of allocated bytes in used and full pools */
3700
0
    size_t allocated_bytes = 0;
3701
    /* total # of available bytes in used pools */
3702
0
    size_t available_bytes = 0;
3703
    /* # of free pools + pools not yet carved out of current arena */
3704
0
    uint numfreepools = 0;
3705
    /* # of bytes for arena alignment padding */
3706
0
    size_t arena_alignment = 0;
3707
    /* # of bytes in used and full pools used for pool_headers */
3708
0
    size_t pool_header_bytes = 0;
3709
    /* # of bytes in used and full pools wasted due to quantization,
3710
     * i.e. the necessarily leftover space at the ends of used and
3711
     * full pools.
3712
     */
3713
0
    size_t quantization = 0;
3714
    /* # of arenas actually allocated. */
3715
0
    size_t narenas = 0;
3716
    /* running total -- should equal narenas * ARENA_SIZE */
3717
0
    size_t total;
3718
0
    char buf[128];
3719
3720
0
    fprintf(out, "Small block threshold = %d, in %u size classes.\n",
3721
0
            SMALL_REQUEST_THRESHOLD, numclasses);
3722
3723
0
    for (i = 0; i < numclasses; ++i)
3724
0
        numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
3725
3726
    /* Because full pools aren't linked to from anything, it's easiest
3727
     * to march over all the arenas.  If we're lucky, most of the memory
3728
     * will be living in full pools -- would be a shame to miss them.
3729
     */
3730
0
    for (i = 0; i < maxarenas; ++i) {
3731
0
        uintptr_t base = allarenas[i].address;
3732
3733
        /* Skip arenas which are not allocated. */
3734
0
        if (allarenas[i].address == (uintptr_t)NULL)
3735
0
            continue;
3736
0
        narenas += 1;
3737
3738
0
        numfreepools += allarenas[i].nfreepools;
3739
3740
        /* round up to pool alignment */
3741
0
        if (base & (uintptr_t)POOL_SIZE_MASK) {
3742
0
            arena_alignment += POOL_SIZE;
3743
0
            base &= ~(uintptr_t)POOL_SIZE_MASK;
3744
0
            base += POOL_SIZE;
3745
0
        }
3746
3747
        /* visit every pool in the arena */
3748
0
        assert(base <= (uintptr_t) allarenas[i].pool_address);
3749
0
        for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
3750
0
            poolp p = (poolp)base;
3751
0
            const uint sz = p->szidx;
3752
0
            uint freeblocks;
3753
3754
0
            if (p->ref.count == 0) {
3755
                /* currently unused */
3756
#ifdef Py_DEBUG
3757
                assert(pool_is_in_list(p, allarenas[i].freepools));
3758
#endif
3759
0
                continue;
3760
0
            }
3761
0
            ++numpools[sz];
3762
0
            numblocks[sz] += p->ref.count;
3763
0
            freeblocks = NUMBLOCKS(sz) - p->ref.count;
3764
0
            numfreeblocks[sz] += freeblocks;
3765
#ifdef Py_DEBUG
3766
            if (freeblocks > 0)
3767
                assert(pool_is_in_list(p, usedpools[sz + sz]));
3768
#endif
3769
0
        }
3770
0
    }
3771
0
    assert(narenas == narenas_currently_allocated);
3772
3773
0
    fputc('\n', out);
3774
0
    fputs("class   size   num pools   blocks in use  avail blocks\n"
3775
0
          "-----   ----   ---------   -------------  ------------\n",
3776
0
          out);
3777
3778
0
    for (i = 0; i < numclasses; ++i) {
3779
0
        size_t p = numpools[i];
3780
0
        size_t b = numblocks[i];
3781
0
        size_t f = numfreeblocks[i];
3782
0
        uint size = INDEX2SIZE(i);
3783
0
        if (p == 0) {
3784
0
            assert(b == 0 && f == 0);
3785
0
            continue;
3786
0
        }
3787
0
        fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3788
0
                i, size, p, b, f);
3789
0
        allocated_bytes += b * size;
3790
0
        available_bytes += f * size;
3791
0
        pool_header_bytes += p * POOL_OVERHEAD;
3792
0
        quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3793
0
    }
3794
0
    fputc('\n', out);
3795
#ifdef PYMEM_DEBUG_SERIALNO
3796
    if (_PyMem_DebugEnabled()) {
3797
        (void)printone(out, "# times object malloc called", serialno);
3798
    }
3799
#endif
3800
0
    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3801
0
    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3802
0
    (void)printone(out, "# arenas highwater mark", narenas_highwater);
3803
0
    (void)printone(out, "# arenas allocated current", narenas);
3804
3805
0
    PyOS_snprintf(buf, sizeof(buf),
3806
0
                  "%zu arenas * %d bytes/arena",
3807
0
                  narenas, ARENA_SIZE);
3808
0
    (void)printone(out, buf, narenas * ARENA_SIZE);
3809
3810
0
    fputc('\n', out);
3811
3812
    /* Account for what all of those arena bytes are being used for. */
3813
0
    total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3814
0
    total += printone(out, "# bytes in available blocks", available_bytes);
3815
3816
0
    PyOS_snprintf(buf, sizeof(buf),
3817
0
        "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3818
0
    total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3819
3820
0
    total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3821
0
    total += printone(out, "# bytes lost to quantization", quantization);
3822
0
    total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3823
0
    (void)printone(out, "Total", total);
3824
0
    assert(narenas * ARENA_SIZE == total);
3825
3826
0
#if WITH_PYMALLOC_RADIX_TREE
3827
0
    fputs("\narena map counts\n", out);
3828
0
#ifdef USE_INTERIOR_NODES
3829
0
    (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3830
0
    (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3831
0
    fputc('\n', out);
3832
0
#endif
3833
0
    total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3834
0
#ifdef USE_INTERIOR_NODES
3835
0
    total += printone(out, "# bytes lost to arena map mid",
3836
0
                      sizeof(arena_map_mid_t) * arena_map_mid_count);
3837
0
    total += printone(out, "# bytes lost to arena map bot",
3838
0
                      sizeof(arena_map_bot_t) * arena_map_bot_count);
3839
0
    (void)printone(out, "Total", total);
3840
0
#endif
3841
0
#endif
3842
3843
0
}
3844
3845
/* Print summary info to "out" about the state of pymalloc's structures.
3846
 * In Py_DEBUG mode, also perform some expensive internal consistency
3847
 * checks.
3848
 *
3849
 * Return 0 if the memory debug hooks are not installed or no statistics was
3850
 * written into out, return 1 otherwise.
3851
 */
3852
int
3853
_PyObject_DebugMallocStats(FILE *out)
3854
0
{
3855
0
#ifdef WITH_MIMALLOC
3856
0
    if (_PyMem_MimallocEnabled()) {
3857
0
        py_mimalloc_print_stats(out);
3858
0
        return 1;
3859
0
    }
3860
0
    else
3861
0
#endif
3862
0
    if (_PyMem_PymallocEnabled()) {
3863
0
        pymalloc_print_stats(out);
3864
0
        return 1;
3865
0
    }
3866
0
    else {
3867
0
        return 0;
3868
0
    }
3869
0
}
3870
3871
#endif /* #ifdef WITH_PYMALLOC */