Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Objects/mimalloc/os.c
Line
Count
Source (jump to first uncovered line)
1
/* ----------------------------------------------------------------------------
2
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3
This is free software; you can redistribute it and/or modify it under the
4
terms of the MIT license. A copy of the license can be found in the file
5
"LICENSE" at the root of this distribution.
6
-----------------------------------------------------------------------------*/
7
#include "mimalloc.h"
8
#include "mimalloc/internal.h"
9
#include "mimalloc/atomic.h"
10
#include "mimalloc/prim.h"
11
12
13
/* -----------------------------------------------------------
14
  Initialization.
15
  On windows initializes support for aligned allocation and
16
  large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
17
----------------------------------------------------------- */
18
19
static mi_os_mem_config_t mi_os_mem_config = {
20
  4096,   // page size
21
  0,      // large page size (usually 2MiB)
22
  4096,   // allocation granularity
23
  true,   // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
24
  false,  // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
25
  true    // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
26
};
27
28
0
bool _mi_os_has_overcommit(void) {
29
0
  return mi_os_mem_config.has_overcommit;
30
0
}
31
32
0
bool _mi_os_has_virtual_reserve(void) {
33
0
  return mi_os_mem_config.has_virtual_reserve;
34
0
}
35
36
37
// OS (small) page size
38
0
size_t _mi_os_page_size(void) {
39
0
  return mi_os_mem_config.page_size;
40
0
}
41
42
// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
43
0
size_t _mi_os_large_page_size(void) {
44
0
  return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
45
0
}
46
47
0
bool _mi_os_use_large_page(size_t size, size_t alignment) {
48
  // if we have access, check the size and alignment requirements
49
0
  if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
50
0
  return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
51
0
}
52
53
// round to a good OS allocation size (bounded by max 12.5% waste)
54
0
size_t _mi_os_good_alloc_size(size_t size) {
55
0
  size_t align_size;
56
0
  if (size < 512*MI_KiB) align_size = _mi_os_page_size();
57
0
  else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
58
0
  else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
59
0
  else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
60
0
  else align_size = 4*MI_MiB;
61
0
  if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
62
0
  return _mi_align_up(size, align_size);
63
0
}
64
65
16
void _mi_os_init(void) {
66
16
  _mi_prim_mem_init(&mi_os_mem_config);
67
16
}
68
69
70
/* -----------------------------------------------------------
71
  Util
72
-------------------------------------------------------------- */
73
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
74
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
75
76
0
static void* mi_align_up_ptr(void* p, size_t alignment) {
77
0
  return (void*)_mi_align_up((uintptr_t)p, alignment);
78
0
}
79
80
0
static void* mi_align_down_ptr(void* p, size_t alignment) {
81
0
  return (void*)_mi_align_down((uintptr_t)p, alignment);
82
0
}
83
84
85
/* -----------------------------------------------------------
86
  aligned hinting
87
-------------------------------------------------------------- */
88
89
// On 64-bit systems, we can do efficient aligned allocation by using
90
// the 2TiB to 30TiB area to allocate those.
91
#if (MI_INTPTR_SIZE >= 8)
92
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
93
94
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
95
// If this returns NULL, the OS will determine the address but on some OS's that may not be
96
// properly aligned which can be more costly as it needs to be adjusted afterwards.
97
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
98
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
99
//  in the middle of the 2TiB - 6TiB address range (see issue #372))
100
101
0
#define MI_HINT_BASE ((uintptr_t)2 << 40)  // 2TiB start
102
0
#define MI_HINT_AREA ((uintptr_t)4 << 40)  // upto 6TiB   (since before win8 there is "only" 8TiB available to processes)
103
0
#define MI_HINT_MAX  ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
104
105
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
106
0
{
107
0
  if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
108
0
  size = _mi_align_up(size, MI_SEGMENT_SIZE);
109
0
  if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
110
  #if (MI_SECURE>0)
111
  size += MI_SEGMENT_SIZE;        // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
112
  #endif
113
114
0
  uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
115
0
  if (hint == 0 || hint > MI_HINT_MAX) {   // wrap or initialize
116
0
    uintptr_t init = MI_HINT_BASE;
117
0
    #if (MI_SECURE>0 || MI_DEBUG==0)       // security: randomize start of aligned allocations unless in debug mode
118
0
    mi_heap_t* heap = mi_prim_get_default_heap();
119
    // gh-123022: default heap may not be initialized in CPython in background threads
120
0
    if (mi_heap_is_initialized(heap)) {
121
0
      uintptr_t r = _mi_heap_random_next(heap);
122
0
      init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA);  // (randomly 20 bits)*4MiB == 0 to 4TiB
123
0
    }
124
0
    #endif
125
0
    uintptr_t expected = hint + size;
126
0
    mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
127
0
    hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
128
0
  }
129
0
  if (hint%try_alignment != 0) return NULL;
130
0
  return (void*)hint;
131
0
}
132
#else
133
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
134
  MI_UNUSED(try_alignment); MI_UNUSED(size);
135
  return NULL;
136
}
137
#endif
138
139
140
/* -----------------------------------------------------------
141
  Free memory
142
-------------------------------------------------------------- */
143
144
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
145
146
0
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
147
0
  MI_UNUSED(tld_stats);
148
0
  mi_assert_internal((size % _mi_os_page_size()) == 0);
149
0
  if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
150
0
  int err = _mi_prim_free(addr, size);
151
0
  if (err != 0) {
152
0
    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
153
0
  }
154
0
  mi_stats_t* stats = &_mi_stats_main;
155
0
  if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
156
0
  _mi_stat_decrease(&stats->reserved, size);
157
0
}
158
159
0
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
160
0
  if (mi_memkind_is_os(memid.memkind)) {
161
0
    size_t csize = _mi_os_good_alloc_size(size);
162
0
    void* base = addr;
163
    // different base? (due to alignment)
164
0
    if (memid.mem.os.base != NULL) {
165
0
      mi_assert(memid.mem.os.base <= addr);
166
0
      mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
167
0
      base = memid.mem.os.base;
168
0
      csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
169
0
    }
170
    // free it
171
0
    if (memid.memkind == MI_MEM_OS_HUGE) {
172
0
      mi_assert(memid.is_pinned);
173
0
      mi_os_free_huge_os_pages(base, csize, tld_stats);
174
0
    }
175
0
    else {
176
0
      mi_os_prim_free(base, csize, still_committed, tld_stats);
177
0
    }
178
0
  }
179
0
  else {
180
    // nothing to do
181
0
    mi_assert(memid.memkind < MI_MEM_OS);
182
0
  }
183
0
}
184
185
0
void  _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
186
0
  _mi_os_free_ex(p, size, true, memid, tld_stats);
187
0
}
188
189
190
/* -----------------------------------------------------------
191
   Primitive allocation from the OS.
192
-------------------------------------------------------------- */
193
194
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
195
0
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
196
0
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
197
0
  mi_assert_internal(is_zero != NULL);
198
0
  mi_assert_internal(is_large != NULL);
199
0
  if (size == 0) return NULL;
200
0
  if (!commit) { allow_large = false; }
201
0
  if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
202
203
0
  *is_zero = false;
204
0
  void* p = NULL;
205
0
  int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p);
206
0
  if (err != 0) {
207
0
    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
208
0
  }
209
0
  mi_stat_counter_increase(stats->mmap_calls, 1);
210
0
  if (p != NULL) {
211
0
    _mi_stat_increase(&stats->reserved, size);
212
0
    if (commit) {
213
0
      _mi_stat_increase(&stats->committed, size);
214
      // seems needed for asan (or `mimalloc-test-api` fails)
215
      #ifdef MI_TRACK_ASAN
216
      if (*is_zero) { mi_track_mem_defined(p,size); }
217
               else { mi_track_mem_undefined(p,size); }
218
      #endif
219
0
    }
220
0
  }
221
0
  return p;
222
0
}
223
224
225
// Primitive aligned allocation from the OS.
226
// This function guarantees the allocated memory is aligned.
227
0
static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) {
228
0
  mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
229
0
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
230
0
  mi_assert_internal(is_large != NULL);
231
0
  mi_assert_internal(is_zero != NULL);
232
0
  mi_assert_internal(base != NULL);
233
0
  if (!commit) allow_large = false;
234
0
  if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
235
0
  size = _mi_align_up(size, _mi_os_page_size());
236
237
  // try first with a hint (this will be aligned directly on Win 10+ or BSD)
238
0
  void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
239
0
  if (p == NULL) return NULL;
240
241
  // aligned already?
242
0
  if (((uintptr_t)p % alignment) == 0) {
243
0
    *base = p;
244
0
  }
245
0
  else {
246
    // if not aligned, free it, overallocate, and unmap around it
247
    // NOTE(sgross): this warning causes issues in Python tests
248
    // _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
249
0
    mi_os_prim_free(p, size, commit, stats);
250
0
    if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
251
0
    const size_t over_size = size + alignment;
252
253
0
    if (mi_os_mem_config.must_free_whole) {  // win32 virtualAlloc cannot free parts of an allocate block
254
      // over-allocate uncommitted (virtual) memory
255
0
      p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
256
0
      if (p == NULL) return NULL;
257
258
      // set p to the aligned part in the full region
259
      // note: this is dangerous on Windows as VirtualFree needs the actual base pointer
260
      // this is handled though by having the `base` field in the memid's
261
0
      *base = p; // remember the base
262
0
      p = mi_align_up_ptr(p, alignment);
263
264
      // explicitly commit only the aligned part
265
0
      if (commit) {
266
0
        _mi_os_commit(p, size, NULL, stats);
267
0
      }
268
0
    }
269
0
    else  { // mmap can free inside an allocation
270
      // overallocate...
271
0
      p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
272
0
      if (p == NULL) return NULL;
273
274
      // and selectively unmap parts around the over-allocated area. (noop on sbrk)
275
0
      void* aligned_p = mi_align_up_ptr(p, alignment);
276
0
      size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
277
0
      size_t mid_size = _mi_align_up(size, _mi_os_page_size());
278
0
      size_t post_size = over_size - pre_size - mid_size;
279
0
      mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
280
0
      if (pre_size > 0)  { mi_os_prim_free(p, pre_size, commit, stats); }
281
0
      if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); }
282
      // we can return the aligned pointer on `mmap` (and sbrk) systems
283
0
      p = aligned_p;
284
0
      *base = aligned_p; // since we freed the pre part, `*base == p`.
285
0
    }
286
0
  }
287
288
0
  mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
289
0
  return p;
290
0
}
291
292
293
/* -----------------------------------------------------------
294
  OS API: alloc and alloc_aligned
295
----------------------------------------------------------- */
296
297
0
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
298
0
  MI_UNUSED(tld_stats);
299
0
  *memid = _mi_memid_none();
300
0
  mi_stats_t* stats = &_mi_stats_main;
301
0
  if (size == 0) return NULL;
302
0
  size = _mi_os_good_alloc_size(size);
303
0
  bool os_is_large = false;
304
0
  bool os_is_zero  = false;
305
0
  void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
306
0
  if (p != NULL) {
307
0
    *memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
308
0
  }
309
0
  return p;
310
0
}
311
312
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
313
0
{
314
0
  MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
315
0
  MI_UNUSED(tld_stats);
316
0
  *memid = _mi_memid_none();
317
0
  if (size == 0) return NULL;
318
0
  size = _mi_os_good_alloc_size(size);
319
0
  alignment = _mi_align_up(alignment, _mi_os_page_size());
320
321
0
  bool os_is_large = false;
322
0
  bool os_is_zero  = false;
323
0
  void* os_base = NULL;
324
0
  void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ );
325
0
  if (p != NULL) {
326
0
    *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
327
0
    memid->mem.os.base = os_base;
328
0
    memid->mem.os.alignment = alignment;
329
0
  }
330
0
  return p;
331
0
}
332
333
/* -----------------------------------------------------------
334
  OS aligned allocation with an offset. This is used
335
  for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
336
  page where the object can be aligned at an offset from the start of the segment.
337
  As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
338
  to use the actual start of the memory region.
339
----------------------------------------------------------- */
340
341
0
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
342
0
  mi_assert(offset <= MI_SEGMENT_SIZE);
343
0
  mi_assert(offset <= size);
344
0
  mi_assert((alignment % _mi_os_page_size()) == 0);
345
0
  *memid = _mi_memid_none();
346
0
  if (offset > MI_SEGMENT_SIZE) return NULL;
347
0
  if (offset == 0) {
348
    // regular aligned allocation
349
0
    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
350
0
  }
351
0
  else {
352
    // overallocate to align at an offset
353
0
    const size_t extra = _mi_align_up(offset, alignment) - offset;
354
0
    const size_t oversize = size + extra;
355
0
    void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
356
0
    if (start == NULL) return NULL;
357
358
0
    void* const p = (uint8_t*)start + extra;
359
0
    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
360
    // decommit the overallocation at the start
361
0
    if (commit && extra > _mi_os_page_size()) {
362
0
      _mi_os_decommit(start, extra, tld_stats);
363
0
    }
364
0
    return p;
365
0
  }
366
0
}
367
368
/* -----------------------------------------------------------
369
  OS memory API: reset, commit, decommit, protect, unprotect.
370
----------------------------------------------------------- */
371
372
// OS page align within a given area, either conservative (pages inside the area only),
373
// or not (straddling pages outside the area is possible)
374
0
static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
375
0
  mi_assert(addr != NULL && size > 0);
376
0
  if (newsize != NULL) *newsize = 0;
377
0
  if (size == 0 || addr == NULL) return NULL;
378
379
  // page align conservatively within the range
380
0
  void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
381
0
    : mi_align_down_ptr(addr, _mi_os_page_size()));
382
0
  void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
383
0
    : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
384
0
  ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
385
0
  if (diff <= 0) return NULL;
386
387
0
  mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
388
0
  if (newsize != NULL) *newsize = (size_t)diff;
389
0
  return start;
390
0
}
391
392
0
static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
393
0
  return mi_os_page_align_areax(true, addr, size, newsize);
394
0
}
395
396
0
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
397
0
  MI_UNUSED(tld_stats);
398
0
  mi_stats_t* stats = &_mi_stats_main;
399
0
  if (is_zero != NULL) { *is_zero = false; }
400
0
  _mi_stat_increase(&stats->committed, size);  // use size for precise commit vs. decommit
401
0
  _mi_stat_counter_increase(&stats->commit_calls, 1);
402
403
  // page align range
404
0
  size_t csize;
405
0
  void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
406
0
  if (csize == 0) return true;
407
408
  // commit
409
0
  bool os_is_zero = false;
410
0
  int err = _mi_prim_commit(start, csize, &os_is_zero);
411
0
  if (err != 0) {
412
0
    _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
413
0
    return false;
414
0
  }
415
0
  if (os_is_zero && is_zero != NULL) {
416
0
    *is_zero = true;
417
0
    mi_assert_expensive(mi_mem_is_zero(start, csize));
418
0
  }
419
  // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
420
  #ifdef MI_TRACK_ASAN
421
  if (os_is_zero) { mi_track_mem_defined(start,csize); }
422
             else { mi_track_mem_undefined(start,csize); }
423
  #endif
424
0
  return true;
425
0
}
426
427
0
static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) {
428
0
  MI_UNUSED(tld_stats);
429
0
  mi_stats_t* stats = &_mi_stats_main;
430
0
  mi_assert_internal(needs_recommit!=NULL);
431
0
  _mi_stat_decrease(&stats->committed, size);
432
433
  // page align
434
0
  size_t csize;
435
0
  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
436
0
  if (csize == 0) return true;
437
438
  // decommit
439
0
  *needs_recommit = true;
440
0
  int err = _mi_prim_decommit(start,csize,needs_recommit);
441
0
  if (err != 0) {
442
0
    _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
443
0
  }
444
0
  mi_assert_internal(err == 0);
445
0
  return (err == 0);
446
0
}
447
448
0
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
449
0
  bool needs_recommit;
450
0
  return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats);
451
0
}
452
453
454
// Signal to the OS that the address range is no longer in use
455
// but may be used later again. This will release physical memory
456
// pages and reduce swapping while keeping the memory committed.
457
// We page align to a conservative area inside the range to reset.
458
0
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
459
  // page align conservatively within the range
460
0
  size_t csize;
461
0
  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
462
0
  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
463
0
  _mi_stat_increase(&stats->reset, csize);
464
0
  _mi_stat_counter_increase(&stats->reset_calls, 1);
465
466
  #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
467
  memset(start, 0, csize); // pretend it is eagerly reset
468
  #endif
469
470
0
  int err = _mi_prim_reset(start, csize);
471
0
  if (err != 0) {
472
0
    _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
473
0
  }
474
0
  return (err == 0);
475
0
}
476
477
478
// either resets or decommits memory, returns true if the memory needs
479
// to be recommitted if it is to be re-used later on.
480
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
481
0
{
482
0
  if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
483
0
  _mi_stat_counter_increase(&stats->purge_calls, 1);
484
0
  _mi_stat_increase(&stats->purged, size);
485
486
0
  if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
487
0
      !_mi_preloading())                                   // don't decommit during preloading (unsafe)
488
0
  {
489
0
    bool needs_recommit = true;
490
0
    mi_os_decommit_ex(p, size, &needs_recommit, stats);
491
0
    return needs_recommit;
492
0
  }
493
0
  else {
494
0
    if (allow_reset) {  // this can sometimes be not allowed if the range is not fully committed
495
0
      _mi_os_reset(p, size, stats);
496
0
    }
497
0
    return false;  // needs no recommit
498
0
  }
499
0
}
500
501
// either resets or decommits memory, returns true if the memory needs
502
// to be recommitted if it is to be re-used later on.
503
0
bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
504
0
  return _mi_os_purge_ex(p, size, true, stats);
505
0
}
506
507
// Protect a region in memory to be not accessible.
508
0
static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
509
  // page align conservatively within the range
510
0
  size_t csize = 0;
511
0
  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
512
0
  if (csize == 0) return false;
513
  /*
514
  if (_mi_os_is_huge_reserved(addr)) {
515
          _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
516
  }
517
  */
518
0
  int err = _mi_prim_protect(start,csize,protect);
519
0
  if (err != 0) {
520
0
    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
521
0
  }
522
0
  return (err == 0);
523
0
}
524
525
0
bool _mi_os_protect(void* addr, size_t size) {
526
0
  return mi_os_protectx(addr, size, true);
527
0
}
528
529
0
bool _mi_os_unprotect(void* addr, size_t size) {
530
0
  return mi_os_protectx(addr, size, false);
531
0
}
532
533
534
535
/* ----------------------------------------------------------------------------
536
Support for allocating huge OS pages (1Gib) that are reserved up-front
537
and possibly associated with a specific NUMA node. (use `numa_node>=0`)
538
-----------------------------------------------------------------------------*/
539
0
#define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)
540
541
542
#if (MI_INTPTR_SIZE >= 8)
543
// To ensure proper alignment, use our own area for huge OS pages
544
static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
545
546
// Claim an aligned address range for huge pages
547
0
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
548
0
  if (total_size != NULL) *total_size = 0;
549
0
  const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
550
551
0
  uintptr_t start = 0;
552
0
  uintptr_t end = 0;
553
0
  uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
554
0
  do {
555
0
    start = huge_start;
556
0
    if (start == 0) {
557
      // Initialize the start address after the 32TiB area
558
0
      start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
559
0
    #if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
560
0
      mi_heap_t* heap = mi_prim_get_default_heap();
561
      // gh-123022: default heap may not be initialized in CPython in background threads
562
0
      if (mi_heap_is_initialized(heap)) {
563
0
        uintptr_t r = _mi_heap_random_next(heap);
564
0
        start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
565
0
      }
566
0
    #endif
567
0
    }
568
0
    end = start + size;
569
0
    mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
570
0
  } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
571
572
0
  if (total_size != NULL) *total_size = size;
573
0
  return (uint8_t*)start;
574
0
}
575
#else
576
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
577
  MI_UNUSED(pages);
578
  if (total_size != NULL) *total_size = 0;
579
  return NULL;
580
}
581
#endif
582
583
// Allocate MI_SEGMENT_SIZE aligned huge pages
584
0
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
585
0
  *memid = _mi_memid_none();
586
0
  if (psize != NULL) *psize = 0;
587
0
  if (pages_reserved != NULL) *pages_reserved = 0;
588
0
  size_t size = 0;
589
0
  uint8_t* start = mi_os_claim_huge_pages(pages, &size);
590
0
  if (start == NULL) return NULL; // or 32-bit systems
591
592
  // Allocate one page at the time but try to place them contiguously
593
  // We allocate one page at the time to be able to abort if it takes too long
594
  // or to at least allocate as many as available on the system.
595
0
  mi_msecs_t start_t = _mi_clock_start();
596
0
  size_t page = 0;
597
0
  bool all_zero = true;
598
0
  while (page < pages) {
599
    // allocate a page
600
0
    bool is_zero = false;
601
0
    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
602
0
    void* p = NULL;
603
0
    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
604
0
    if (!is_zero) { all_zero = false;  }
605
0
    if (err != 0) {
606
0
      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
607
0
      break;
608
0
    }
609
610
    // Did we succeed at a contiguous address?
611
0
    if (p != addr) {
612
      // no success, issue a warning and break
613
0
      if (p != NULL) {
614
0
        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
615
0
        mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
616
0
      }
617
0
      break;
618
0
    }
619
620
    // success, record it
621
0
    page++;  // increase before timeout check (see issue #711)
622
0
    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
623
0
    _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
624
625
    // check for timeout
626
0
    if (max_msecs > 0) {
627
0
      mi_msecs_t elapsed = _mi_clock_end(start_t);
628
0
      if (page >= 1) {
629
0
        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
630
0
        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
631
0
          elapsed = max_msecs + 1;
632
0
        }
633
0
      }
634
0
      if (elapsed > max_msecs) {
635
0
        _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
636
0
        break;
637
0
      }
638
0
    }
639
0
  }
640
0
  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
641
0
  if (pages_reserved != NULL) { *pages_reserved = page; }
642
0
  if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
643
0
  if (page != 0) {
644
0
    mi_assert(start != NULL);
645
0
    *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
646
0
    memid->memkind = MI_MEM_OS_HUGE;
647
0
    mi_assert(memid->is_pinned);
648
    #ifdef MI_TRACK_ASAN
649
    if (all_zero) { mi_track_mem_defined(start,size); }
650
    #endif
651
0
  }
652
0
  return (page == 0 ? NULL : start);
653
0
}
654
655
// free every huge page in a range individually (as we allocated per page)
656
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
657
0
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
658
0
  if (p==NULL || size==0) return;
659
0
  uint8_t* base = (uint8_t*)p;
660
0
  while (size >= MI_HUGE_OS_PAGE_SIZE) {
661
0
    mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
662
0
    size -= MI_HUGE_OS_PAGE_SIZE;
663
0
    base += MI_HUGE_OS_PAGE_SIZE;
664
0
  }
665
0
}
666
667
/* ----------------------------------------------------------------------------
668
Support NUMA aware allocation
669
-----------------------------------------------------------------------------*/
670
671
_Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count
672
673
0
size_t _mi_os_numa_node_count_get(void) {
674
0
  size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
675
0
  if (count <= 0) {
676
0
    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
677
0
    if (ncount > 0) {
678
0
      count = (size_t)ncount;
679
0
    }
680
0
    else {
681
0
      count = _mi_prim_numa_node_count(); // or detect dynamically
682
0
      if (count == 0) count = 1;
683
0
    }
684
0
    mi_atomic_store_release(&_mi_numa_node_count, count); // save it
685
0
    _mi_verbose_message("using %zd numa regions\n", count);
686
0
  }
687
0
  return count;
688
0
}
689
690
0
int _mi_os_numa_node_get(mi_os_tld_t* tld) {
691
0
  MI_UNUSED(tld);
692
0
  size_t numa_count = _mi_os_numa_node_count();
693
0
  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
694
  // never more than the node count and >= 0
695
0
  size_t numa_node = _mi_prim_numa_node();
696
0
  if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
697
0
  return (int)numa_node;
698
0
}