Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Objects/mimalloc/prim/unix/prim.c
Line
Count
Source (jump to first uncovered line)
1
/* ----------------------------------------------------------------------------
2
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3
This is free software; you can redistribute it and/or modify it under the
4
terms of the MIT license. A copy of the license can be found in the file
5
"LICENSE" at the root of this distribution.
6
-----------------------------------------------------------------------------*/
7
8
// This file is included in `src/prim/prim.c`
9
10
#ifndef _DEFAULT_SOURCE
11
#define _DEFAULT_SOURCE   // ensure mmap flags and syscall are defined
12
#endif
13
14
#if defined(__sun)
15
// illumos provides new mman.h api when any of these are defined
16
// otherwise the old api based on caddr_t which predates the void pointers one.
17
// stock solaris provides only the former, chose to atomically to discard those
18
// flags only here rather than project wide tough.
19
#undef _XOPEN_SOURCE
20
#undef _POSIX_C_SOURCE
21
#endif
22
23
#include "mimalloc.h"
24
#include "mimalloc/internal.h"
25
#include "mimalloc/atomic.h"
26
#include "mimalloc/prim.h"
27
28
#include <sys/mman.h>  // mmap
29
#include <unistd.h>    // sysconf
30
#include <fcntl.h>     // open, close, read, access
31
32
#if defined(__linux__)
33
  #include <features.h>
34
  #include <fcntl.h>
35
  #if defined(__GLIBC__)
36
  #include <linux/mman.h> // linux mmap flags
37
  #else
38
  #include <sys/mman.h>
39
  #endif
40
#elif defined(__APPLE__)
41
  #include <TargetConditionals.h>
42
  #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
43
  #include <mach/vm_statistics.h>
44
  #endif
45
#elif defined(__FreeBSD__) || defined(__DragonFly__)
46
  #include <sys/param.h>
47
  #if __FreeBSD_version >= 1200000
48
  #include <sys/cpuset.h>
49
  #include <sys/domainset.h>
50
  #endif
51
  #include <sys/sysctl.h>
52
#endif
53
54
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__sun) && !defined(__NetBSD__)
55
  #define MI_HAS_SYSCALL_H
56
  #include <sys/syscall.h>
57
#endif
58
59
//------------------------------------------------------------------------------------
60
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
61
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
62
// still initializing (issue #713)
63
//------------------------------------------------------------------------------------
64
65
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
66
67
16
static int mi_prim_open(const char* fpath, int open_flags) {
68
16
  return syscall(SYS_open,fpath,open_flags,0);
69
16
}
70
16
static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
71
16
  return syscall(SYS_read,fd,buf,bufsize);
72
16
}
73
16
static int mi_prim_close(int fd) {
74
16
  return syscall(SYS_close,fd);
75
16
}
76
0
static int mi_prim_access(const char *fpath, int mode) {
77
0
  return syscall(SYS_access,fpath,mode);
78
0
}
79
80
#elif !defined(__APPLE__) && !defined(_AIX) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__sun) && !defined(__NetBSD__) // avoid unused warnings
81
82
static int mi_prim_open(const char* fpath, int open_flags) {
83
  return open(fpath,open_flags);
84
}
85
static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
86
  return read(fd,buf,bufsize);
87
}
88
static int mi_prim_close(int fd) {
89
  return close(fd);
90
}
91
static int mi_prim_access(const char *fpath, int mode) {
92
  return access(fpath,mode);
93
}
94
95
#endif
96
97
98
99
//---------------------------------------------
100
// init
101
//---------------------------------------------
102
103
16
static bool unix_detect_overcommit(void) {
104
16
  bool os_overcommit = true;
105
16
#if defined(__linux__)
106
16
  int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
107
16
        if (fd >= 0) {
108
16
    char buf[32] = {0};
109
16
    ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf));
110
16
    mi_prim_close(fd);
111
    // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
112
    // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
113
16
    if (nread >= 1) {
114
16
      os_overcommit = (buf[0] == '0' || buf[0] == '1');
115
16
    }
116
16
  }
117
#elif defined(__FreeBSD__)
118
  int val = 0;
119
  size_t olen = sizeof(val);
120
  if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
121
    os_overcommit = (val != 0);
122
  }
123
#else
124
  // default: overcommit is true
125
#endif
126
16
  return os_overcommit;
127
16
}
128
129
16
void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
130
16
  long psize = sysconf(_SC_PAGESIZE);
131
16
  if (psize > 0) {
132
16
    config->page_size = (size_t)psize;
133
16
    config->alloc_granularity = (size_t)psize;
134
16
  }
135
16
  config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
136
16
  config->has_overcommit = unix_detect_overcommit();
137
16
  config->must_free_whole = false;    // mmap can free in parts
138
16
  config->has_virtual_reserve = true; // todo: check if this true for NetBSD?  (for anonymous mmap with PROT_NONE)
139
16
}
140
141
142
//---------------------------------------------
143
// free
144
//---------------------------------------------
145
146
0
int _mi_prim_free(void* addr, size_t size ) {
147
0
  bool err = (munmap(addr, size) == -1);
148
0
  return (err ? errno : 0);
149
0
}
150
151
152
//---------------------------------------------
153
// mmap
154
//---------------------------------------------
155
156
0
static int unix_madvise(void* addr, size_t size, int advice) {
157
  #if defined(__sun)
158
  return madvise((caddr_t)addr, size, advice);  // Solaris needs cast (issue #520)
159
  #else
160
0
  return madvise(addr, size, advice);
161
0
  #endif
162
0
}
163
164
0
static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
165
0
  MI_UNUSED(try_alignment);
166
0
  void* p = NULL;
167
  #if defined(MAP_ALIGNED)  // BSD
168
  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
169
    size_t n = mi_bsr(try_alignment);
170
    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
171
      p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
172
      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
173
        int err = errno;
174
        _mi_verbose_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
175
      }
176
      if (p!=MAP_FAILED) return p;
177
      // fall back to regular mmap
178
    }
179
  }
180
  #elif defined(MAP_ALIGN)  // Solaris
181
  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
182
    p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
183
    if (p!=MAP_FAILED) return p;
184
    // fall back to regular mmap
185
  }
186
  #endif
187
0
  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
188
  // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
189
0
  if (addr == NULL) {
190
0
    void* hint = _mi_os_get_aligned_hint(try_alignment, size);
191
0
    if (hint != NULL) {
192
0
      p = mmap(hint, size, protect_flags, flags, fd, 0);
193
0
      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
194
        #if MI_TRACK_ENABLED  // asan sometimes does not instrument errno correctly?
195
        int err = 0;
196
        #else
197
0
        int err = errno;
198
0
        #endif
199
0
        _mi_verbose_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
200
0
      }
201
0
      if (p!=MAP_FAILED) return p;
202
      // fall back to regular mmap
203
0
    }
204
0
  }
205
0
  #endif
206
  // regular mmap
207
0
  p = mmap(addr, size, protect_flags, flags, fd, 0);
208
0
  if (p!=MAP_FAILED) return p;
209
  // failed to allocate
210
0
  return NULL;
211
0
}
212
213
0
static int unix_mmap_fd(void) {
214
  #if defined(VM_MAKE_TAG)
215
  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
216
  int os_tag = (int)mi_option_get(mi_option_os_tag);
217
  if (os_tag < 100 || os_tag > 255) { os_tag = 100; }
218
  return VM_MAKE_TAG(os_tag);
219
  #else
220
0
  return -1;
221
0
  #endif
222
0
}
223
224
0
static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
225
  #if !defined(MAP_ANONYMOUS)
226
  #define MAP_ANONYMOUS  MAP_ANON
227
  #endif
228
  #if !defined(MAP_NORESERVE)
229
  #define MAP_NORESERVE  0
230
  #endif
231
0
  void* p = NULL;
232
0
  const int fd = unix_mmap_fd();
233
0
  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
234
0
  if (_mi_os_has_overcommit()) {
235
0
    flags |= MAP_NORESERVE;
236
0
  }
237
  #if defined(PROT_MAX)
238
  protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
239
  #endif
240
  // huge page allocation
241
0
  if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) {
242
0
    static _Atomic(size_t) large_page_try_ok; // = 0;
243
0
    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
244
0
    if (!large_only && try_ok > 0) {
245
      // If the OS is not configured for large OS pages, or the user does not have
246
      // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
247
      // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
248
      // to avoid too many failing calls to mmap.
249
0
      mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
250
0
    }
251
0
    else {
252
0
      int lflags = flags & ~MAP_NORESERVE;  // using NORESERVE on huge pages seems to fail on Linux
253
0
      int lfd = fd;
254
      #ifdef MAP_ALIGNED_SUPER
255
      lflags |= MAP_ALIGNED_SUPER;
256
      #endif
257
0
      #ifdef MAP_HUGETLB
258
0
      lflags |= MAP_HUGETLB;
259
0
      #endif
260
0
      #ifdef MAP_HUGE_1GB
261
0
      static bool mi_huge_pages_available = true;
262
0
      if ((size % MI_GiB) == 0 && mi_huge_pages_available) {
263
0
        lflags |= MAP_HUGE_1GB;
264
0
      }
265
0
      else
266
0
      #endif
267
0
      {
268
0
        #ifdef MAP_HUGE_2MB
269
0
        lflags |= MAP_HUGE_2MB;
270
0
        #endif
271
0
      }
272
      #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
273
      lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
274
      #endif
275
0
      if (large_only || lflags != flags) {
276
        // try large OS page allocation
277
0
        *is_large = true;
278
0
        p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
279
0
        #ifdef MAP_HUGE_1GB
280
0
        if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
281
0
          mi_huge_pages_available = false; // don't try huge 1GiB pages again
282
0
          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
283
0
          lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
284
0
          p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
285
0
        }
286
0
        #endif
287
0
        if (large_only) return p;
288
0
        if (p == NULL) {
289
0
          mi_atomic_store_release(&large_page_try_ok, (size_t)8);  // on error, don't try again for the next N allocations
290
0
        }
291
0
      }
292
0
    }
293
0
  }
294
  // regular allocation
295
0
  if (p == NULL) {
296
0
    *is_large = false;
297
0
    p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd);
298
0
    if (p != NULL) {
299
0
      #if defined(MADV_HUGEPAGE)
300
      // Many Linux systems don't allow MAP_HUGETLB but they support instead
301
      // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
302
      // though since properly aligned allocations will already use large pages if available
303
      // in that case -- in particular for our large regions (in `memory.c`).
304
      // However, some systems only allow THP if called with explicit `madvise`, so
305
      // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
306
0
      if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
307
0
        if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) {
308
0
          *is_large = true; // possibly
309
0
        };
310
0
      }
311
      #elif defined(__sun)
312
      if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
313
        struct memcntl_mha cmd = {0};
314
        cmd.mha_pagesize = 2*MI_MiB;
315
        cmd.mha_cmd = MHA_MAPSIZE_VA;
316
        if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
317
          *is_large = true;
318
        }
319
      }
320
      #endif
321
0
    }
322
0
  }
323
0
  return p;
324
0
}
325
326
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
327
0
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
328
0
  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
329
0
  mi_assert_internal(commit || !allow_large);
330
0
  mi_assert_internal(try_alignment > 0);
331
332
0
  *is_zero = true;
333
0
  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
334
0
  *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
335
0
  return (*addr != NULL ? 0 : errno);
336
0
}
337
338
339
//---------------------------------------------
340
// Commit/Reset
341
//---------------------------------------------
342
343
0
static void unix_mprotect_hint(int err) {
344
  #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page
345
  if (err == ENOMEM) {
346
    _mi_warning_message("The next warning may be caused by a low memory map limit.\n"
347
                        "  On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n"
348
                        "  For example: sudo sysctl -w vm.max_map_count=262144\n");
349
  }
350
  #else
351
0
  MI_UNUSED(err);
352
0
  #endif
353
0
}
354
355
0
int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
356
  // commit: ensure we can access the area
357
  // note: we may think that *is_zero can be true since the memory
358
  // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but
359
  // we sometimes call commit on a range with still partially committed
360
  // memory and `mprotect` does not zero the range.
361
0
  *is_zero = false;
362
0
  int err = mprotect(start, size, (PROT_READ | PROT_WRITE));
363
0
  if (err != 0) {
364
0
    err = errno;
365
0
    unix_mprotect_hint(err);
366
0
  }
367
0
  return err;
368
0
}
369
370
0
int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
371
0
  int err = 0;
372
  // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
373
0
  err = unix_madvise(start, size, MADV_DONTNEED);
374
0
  #if !MI_DEBUG && !MI_SECURE
375
0
    *needs_recommit = false;
376
  #else
377
    *needs_recommit = true;
378
    mprotect(start, size, PROT_NONE);
379
  #endif
380
  /*
381
  // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss)
382
  *needs_recommit = true;
383
  const int fd = unix_mmap_fd();
384
  void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
385
  if (p != start) { err = errno; }
386
  */
387
0
  return err;
388
0
}
389
390
0
int _mi_prim_reset(void* start, size_t size) {
391
  // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it
392
  // will not reduce the `rss` stats in tools like `top` even though the memory is available
393
  // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
394
  // default `MADV_DONTNEED` is used though.
395
0
  #if defined(MADV_FREE)
396
0
  static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
397
0
  int oadvice = (int)mi_atomic_load_relaxed(&advice);
398
0
  int err;
399
0
  while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
400
0
  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
401
    // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
402
0
    mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
403
0
    err = unix_madvise(start, size, MADV_DONTNEED);
404
0
  }
405
  #else
406
  int err = unix_madvise(start, size, MADV_DONTNEED);
407
  #endif
408
0
  return err;
409
0
}
410
411
0
int _mi_prim_protect(void* start, size_t size, bool protect) {
412
0
  int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE));
413
0
  if (err != 0) { err = errno; }
414
0
  unix_mprotect_hint(err);
415
0
  return err;
416
0
}
417
418
419
420
//---------------------------------------------
421
// Huge page allocation
422
//---------------------------------------------
423
424
#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__)
425
426
#ifndef MPOL_PREFERRED
427
0
#define MPOL_PREFERRED 1
428
#endif
429
430
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind)
431
0
static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
432
0
  return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
433
0
}
434
#else
435
static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
436
  MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
437
  return 0;
438
}
439
#endif
440
441
0
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
442
0
  bool is_large = true;
443
0
  *is_zero = true;
444
0
  *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
445
0
  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
446
0
    unsigned long numa_mask = (1UL << numa_node);
447
    // TODO: does `mbind` work correctly for huge OS pages? should we
448
    // use `set_mempolicy` before calling mmap instead?
449
    // see: <https://lkml.org/lkml/2017/2/9/875>
450
0
    long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
451
0
    if (err != 0) {
452
0
      err = errno;
453
0
      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err);
454
0
    }
455
0
  }
456
0
  return (*addr != NULL ? 0 : errno);
457
0
}
458
459
#else
460
461
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
462
  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
463
  *is_zero = false;
464
  *addr = NULL;
465
  return ENOMEM;
466
}
467
468
#endif
469
470
//---------------------------------------------
471
// NUMA nodes
472
//---------------------------------------------
473
474
#if defined(__linux__)
475
476
#include <stdio.h>    // snprintf
477
478
0
size_t _mi_prim_numa_node(void) {
479
0
  #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu)
480
0
    unsigned long node = 0;
481
0
    unsigned long ncpu = 0;
482
0
    long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
483
0
    if (err != 0) return 0;
484
0
    return node;
485
  #else
486
    return 0;
487
  #endif
488
0
}
489
490
0
size_t _mi_prim_numa_node_count(void) {
491
0
  char buf[128];
492
0
  unsigned node = 0;
493
0
  for(node = 0; node < 256; node++) {
494
    // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
495
0
    snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
496
0
    if (mi_prim_access(buf,R_OK) != 0) break;
497
0
  }
498
0
  return (node+1);
499
0
}
500
501
#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
502
503
size_t _mi_prim_numa_node(void) {
504
  domainset_t dom;
505
  size_t node;
506
  int policy;
507
  if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
508
  for (node = 0; node < MAXMEMDOM; node++) {
509
    if (DOMAINSET_ISSET(node, &dom)) return node;
510
  }
511
  return 0ul;
512
}
513
514
size_t _mi_prim_numa_node_count(void) {
515
  size_t ndomains = 0;
516
  size_t len = sizeof(ndomains);
517
  if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
518
  return ndomains;
519
}
520
521
#elif defined(__DragonFly__)
522
523
size_t _mi_prim_numa_node(void) {
524
  // TODO: DragonFly does not seem to provide any userland means to get this information.
525
  return 0ul;
526
}
527
528
size_t _mi_prim_numa_node_count(void) {
529
  size_t ncpus = 0, nvirtcoresperphys = 0;
530
  size_t len = sizeof(size_t);
531
  if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
532
  if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
533
  return nvirtcoresperphys * ncpus;
534
}
535
536
#else
537
538
size_t _mi_prim_numa_node(void) {
539
  return 0;
540
}
541
542
size_t _mi_prim_numa_node_count(void) {
543
  return 1;
544
}
545
546
#endif
547
548
// ----------------------------------------------------------------
549
// Clock
550
// ----------------------------------------------------------------
551
552
#include <time.h>
553
554
#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
555
556
48
mi_msecs_t _mi_prim_clock_now(void) {
557
48
  struct timespec t;
558
48
  #ifdef CLOCK_MONOTONIC
559
48
  clock_gettime(CLOCK_MONOTONIC, &t);
560
  #else
561
  clock_gettime(CLOCK_REALTIME, &t);
562
  #endif
563
48
  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
564
48
}
565
566
#else
567
568
// low resolution timer
569
mi_msecs_t _mi_prim_clock_now(void) {
570
  #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
571
  return (mi_msecs_t)clock();
572
  #elif (CLOCKS_PER_SEC < 1000)
573
  return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
574
  #else
575
  return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
576
  #endif
577
}
578
579
#endif
580
581
582
583
584
//----------------------------------------------------------------
585
// Process info
586
//----------------------------------------------------------------
587
588
#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)
589
#include <stdio.h>
590
#include <unistd.h>
591
#include <sys/resource.h>
592
593
#if defined(__APPLE__)
594
#include <mach/mach.h>
595
#endif
596
597
#if defined(__HAIKU__)
598
#include <kernel/OS.h>
599
#endif
600
601
0
static mi_msecs_t timeval_secs(const struct timeval* tv) {
602
0
  return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
603
0
}
604
605
void _mi_prim_process_info(mi_process_info_t* pinfo)
606
0
{
607
0
  struct rusage rusage;
608
0
  getrusage(RUSAGE_SELF, &rusage);
609
0
  pinfo->utime = timeval_secs(&rusage.ru_utime);
610
0
  pinfo->stime = timeval_secs(&rusage.ru_stime);
611
0
#if !defined(__HAIKU__)
612
0
  pinfo->page_faults = rusage.ru_majflt;
613
0
#endif
614
#if defined(__HAIKU__)
615
  // Haiku does not have (yet?) a way to
616
  // get these stats per process
617
  thread_info tid;
618
  area_info mem;
619
  ssize_t c;
620
  get_thread_info(find_thread(0), &tid);
621
  while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
622
    pinfo->peak_rss += mem.ram_size;
623
  }
624
  pinfo->page_faults = 0;
625
#elif defined(__APPLE__)
626
  pinfo->peak_rss = rusage.ru_maxrss;         // macos reports in bytes
627
  #ifdef MACH_TASK_BASIC_INFO
628
  struct mach_task_basic_info info;
629
  mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
630
  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
631
    pinfo->current_rss = (size_t)info.resident_size;
632
  }
633
  #else
634
  struct task_basic_info info;
635
  mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
636
  if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
637
    pinfo->current_rss = (size_t)info.resident_size;
638
  }
639
  #endif
640
#else
641
0
  pinfo->peak_rss = rusage.ru_maxrss * 1024;  // Linux/BSD report in KiB
642
0
#endif
643
  // use defaults for commit
644
0
}
645
646
#else
647
648
#ifndef __wasi__
649
// WebAssembly instances are not processes
650
#pragma message("define a way to get process info")
651
#endif
652
653
void _mi_prim_process_info(mi_process_info_t* pinfo)
654
{
655
  // use defaults
656
  MI_UNUSED(pinfo);
657
}
658
659
#endif
660
661
662
//----------------------------------------------------------------
663
// Output
664
//----------------------------------------------------------------
665
666
0
void _mi_prim_out_stderr( const char* msg ) {
667
0
  fputs(msg,stderr);
668
0
}
669
670
671
//----------------------------------------------------------------
672
// Environment
673
//----------------------------------------------------------------
674
675
#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
676
// On Posix systemsr use `environ` to access environment variables
677
// even before the C runtime is initialized.
678
#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
679
#include <crt_externs.h>
680
static char** mi_get_environ(void) {
681
  return (*_NSGetEnviron());
682
}
683
#else
684
extern char** environ;
685
512
static char** mi_get_environ(void) {
686
512
  return environ;
687
512
}
688
#endif
689
512
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
690
512
  if (name==NULL) return false;
691
512
  const size_t len = _mi_strlen(name);
692
512
  if (len == 0) return false;
693
512
  char** env = mi_get_environ();
694
512
  if (env == NULL) return false;
695
  // compare up to 10000 entries
696
18.4k
  for (int i = 0; i < 10000 && env[i] != NULL; i++) {
697
17.9k
    const char* s = env[i];
698
17.9k
    if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive
699
      // found it
700
0
      _mi_strlcpy(result, s + len + 1, result_size);
701
0
      return true;
702
0
    }
703
17.9k
  }
704
512
  return false;
705
512
}
706
#else
707
// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
708
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
709
  // cannot call getenv() when still initializing the C runtime.
710
  if (_mi_preloading()) return false;
711
  const char* s = getenv(name);
712
  if (s == NULL) {
713
    // we check the upper case name too.
714
    char buf[64+1];
715
    size_t len = _mi_strnlen(name,sizeof(buf)-1);
716
    for (size_t i = 0; i < len; i++) {
717
      buf[i] = _mi_toupper(name[i]);
718
    }
719
    buf[len] = 0;
720
    s = getenv(buf);
721
  }
722
  if (s == NULL || _mi_strnlen(s,result_size) >= result_size)  return false;
723
  _mi_strlcpy(result, s, result_size);
724
  return true;
725
}
726
#endif  // !MI_USE_ENVIRON
727
728
729
//----------------------------------------------------------------
730
// Random
731
//----------------------------------------------------------------
732
733
#if defined(__APPLE__)
734
735
#include <AvailabilityMacros.h>
736
#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
737
#include <CommonCrypto/CommonCryptoError.h>
738
#include <CommonCrypto/CommonRandom.h>
739
#endif
740
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
741
  #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
742
    // We prefer CCRandomGenerateBytes as it returns an error code while arc4random_buf
743
    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
744
    return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
745
  #else
746
    // fall back on older macOS
747
    arc4random_buf(buf, buf_len);
748
    return true;
749
  #endif
750
}
751
752
#elif defined(__ANDROID__) || defined(__DragonFly__) || \
753
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
754
      defined(__sun)
755
756
#include <stdlib.h>
757
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
758
  arc4random_buf(buf, buf_len);
759
  return true;
760
}
761
762
#elif defined(__linux__) || defined(__HAIKU__)
763
764
#include <sys/types.h>
765
#include <sys/stat.h>
766
#include <fcntl.h>
767
#include <errno.h>
768
769
16
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
770
  // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
771
  // and for the latter the actual `getrandom` call is not always defined.
772
  // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
773
  // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
774
16
  #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom)
775
16
    #ifndef GRND_NONBLOCK
776
16
    #define GRND_NONBLOCK (1)
777
16
    #endif
778
16
    static _Atomic(uintptr_t) no_getrandom; // = 0
779
16
    if (mi_atomic_load_acquire(&no_getrandom)==0) {
780
16
      ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
781
16
      if (ret >= 0) return (buf_len == (size_t)ret);
782
0
      if (errno != ENOSYS) return false;
783
0
      mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom
784
0
    }
785
0
  #endif
786
0
  int flags = O_RDONLY;
787
0
  #if defined(O_CLOEXEC)
788
0
  flags |= O_CLOEXEC;
789
0
  #endif
790
0
  int fd = mi_prim_open("/dev/urandom", flags);
791
0
  if (fd < 0) return false;
792
0
  size_t count = 0;
793
0
  while(count < buf_len) {
794
0
    ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count);
795
0
    if (ret<=0) {
796
0
      if (errno!=EAGAIN && errno!=EINTR) break;
797
0
    }
798
0
    else {
799
0
      count += ret;
800
0
    }
801
0
  }
802
0
  mi_prim_close(fd);
803
0
  return (count==buf_len);
804
0
}
805
806
#else
807
808
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
809
  return false;
810
}
811
812
#endif
813
814
815
//----------------------------------------------------------------
816
// Thread init/done
817
//----------------------------------------------------------------
818
819
#if defined(MI_USE_PTHREADS)
820
821
// use pthread local storage keys to detect thread ending
822
// (and used with MI_TLS_PTHREADS for the default heap)
823
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
824
825
0
static void mi_pthread_done(void* value) {
826
0
  if (value!=NULL) {
827
0
    _mi_thread_done((mi_heap_t*)value);
828
0
  }
829
0
}
830
831
16
void _mi_prim_thread_init_auto_done(void) {
832
16
  mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
833
16
  pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
834
16
}
835
836
0
void _mi_prim_thread_done_auto_done(void) {
837
  // nothing to do
838
0
}
839
840
16
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
841
16
  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // can happen during recursive invocation on freeBSD
842
16
    pthread_setspecific(_mi_heap_default_key, heap);
843
16
  }
844
16
}
845
846
#else
847
848
void _mi_prim_thread_init_auto_done(void) {
849
  // nothing
850
}
851
852
void _mi_prim_thread_done_auto_done(void) {
853
  // nothing
854
}
855
856
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
857
  MI_UNUSED(heap);
858
}
859
860
#endif