/src/cpython/Include/internal/mimalloc/mimalloc/prim.h
Line | Count | Source |
1 | | /* ---------------------------------------------------------------------------- |
2 | | Copyright (c) 2018-2023, Microsoft Research, Daan Leijen |
3 | | This is free software; you can redistribute it and/or modify it under the |
4 | | terms of the MIT license. A copy of the license can be found in the file |
5 | | "LICENSE" at the root of this distribution. |
6 | | -----------------------------------------------------------------------------*/ |
7 | | #pragma once |
8 | | #ifndef MIMALLOC_PRIM_H |
9 | | #define MIMALLOC_PRIM_H |
10 | | |
11 | | |
12 | | // -------------------------------------------------------------------------- |
13 | | // This file specifies the primitive portability API. |
14 | | // Each OS/host needs to implement these primitives, see `src/prim` |
15 | | // for implementations on Window, macOS, WASI, and Linux/Unix. |
16 | | // |
17 | | // note: on all primitive functions, we always have result parameters != NUL, and: |
18 | | // addr != NULL and page aligned |
19 | | // size > 0 and page aligned |
20 | | // return value is an error code an int where 0 is success. |
21 | | // -------------------------------------------------------------------------- |
22 | | |
23 | | // OS memory configuration |
24 | | typedef struct mi_os_mem_config_s { |
25 | | size_t page_size; // 4KiB |
26 | | size_t large_page_size; // 2MiB |
27 | | size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) |
28 | | bool has_overcommit; // can we reserve more memory than can be actually committed? |
29 | | bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) |
30 | | bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) |
31 | | } mi_os_mem_config_t; |
32 | | |
33 | | // Initialize |
34 | | void _mi_prim_mem_init( mi_os_mem_config_t* config ); |
35 | | |
36 | | // Free OS memory |
37 | | int _mi_prim_free(void* addr, size_t size ); |
38 | | |
39 | | // Allocate OS memory. Return NULL on error. |
40 | | // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. |
41 | | // If `commit` is false, the virtual memory range only needs to be reserved (with no access) |
42 | | // which will later be committed explicitly using `_mi_prim_commit`. |
43 | | // `is_zero` is set to true if the memory was zero initialized (as on most OS's) |
44 | | // pre: !commit => !allow_large |
45 | | // try_alignment >= _mi_os_page_size() and a power of 2 |
46 | | int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); |
47 | | |
48 | | // Commit memory. Returns error code or 0 on success. |
49 | | // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. |
50 | | // `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) |
51 | | int _mi_prim_commit(void* addr, size_t size, bool* is_zero); |
52 | | |
53 | | // Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true |
54 | | // if the memory would need to be re-committed. For example, on Windows this is always true, |
55 | | // but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. |
56 | | // pre: needs_recommit != NULL |
57 | | int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); |
58 | | |
59 | | // Reset memory. The range keeps being accessible but the content might be reset. |
60 | | // Returns error code or 0 on success. |
61 | | int _mi_prim_reset(void* addr, size_t size); |
62 | | |
63 | | // Protect memory. Returns error code or 0 on success. |
64 | | int _mi_prim_protect(void* addr, size_t size, bool protect); |
65 | | |
66 | | // Allocate huge (1GiB) pages possibly associated with a NUMA node. |
67 | | // `is_zero` is set to true if the memory was zero initialized (as on most OS's) |
68 | | // pre: size > 0 and a multiple of 1GiB. |
69 | | // numa_node is either negative (don't care), or a numa node number. |
70 | | int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); |
71 | | |
72 | | // Return the current NUMA node |
73 | | size_t _mi_prim_numa_node(void); |
74 | | |
75 | | // Return the number of logical NUMA nodes |
76 | | size_t _mi_prim_numa_node_count(void); |
77 | | |
78 | | // Clock ticks |
79 | | mi_msecs_t _mi_prim_clock_now(void); |
80 | | |
81 | | // Return process information (only for statistics) |
82 | | typedef struct mi_process_info_s { |
83 | | mi_msecs_t elapsed; |
84 | | mi_msecs_t utime; |
85 | | mi_msecs_t stime; |
86 | | size_t current_rss; |
87 | | size_t peak_rss; |
88 | | size_t current_commit; |
89 | | size_t peak_commit; |
90 | | size_t page_faults; |
91 | | } mi_process_info_t; |
92 | | |
93 | | void _mi_prim_process_info(mi_process_info_t* pinfo); |
94 | | |
95 | | // Default stderr output. (only for warnings etc. with verbose enabled) |
96 | | // msg != NULL && _mi_strlen(msg) > 0 |
97 | | void _mi_prim_out_stderr( const char* msg ); |
98 | | |
99 | | // Get an environment variable. (only for options) |
100 | | // name != NULL, result != NULL, result_size >= 64 |
101 | | bool _mi_prim_getenv(const char* name, char* result, size_t result_size); |
102 | | |
103 | | |
104 | | // Fill a buffer with strong randomness; return `false` on error or if |
105 | | // there is no strong randomization available. |
106 | | bool _mi_prim_random_buf(void* buf, size_t buf_len); |
107 | | |
108 | | // Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. |
109 | | void _mi_prim_thread_init_auto_done(void); |
110 | | |
111 | | // Called on process exit and may take action to clean up resources associated with the thread auto done. |
112 | | void _mi_prim_thread_done_auto_done(void); |
113 | | |
114 | | // Called when the default heap for a thread changes |
115 | | void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); |
116 | | |
117 | | |
118 | | //------------------------------------------------------------------- |
119 | | // Thread id: `_mi_prim_thread_id()` |
120 | | // |
121 | | // Getting the thread id should be performant as it is called in the |
122 | | // fast path of `_mi_free` and we specialize for various platforms as |
123 | | // inlined definitions. Regular code should call `init.c:_mi_thread_id()`. |
124 | | // We only require _mi_prim_thread_id() to return a unique id |
125 | | // for each thread (unequal to zero). |
126 | | //------------------------------------------------------------------- |
127 | | |
128 | | // defined in `init.c`; do not use these directly |
129 | | extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from |
130 | | extern bool _mi_process_is_initialized; // has mi_process_init been called? |
131 | | |
132 | | static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; |
133 | | |
134 | | #ifdef MI_PRIM_THREAD_ID |
135 | | |
136 | | static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { |
137 | | return MI_PRIM_THREAD_ID(); |
138 | | } |
139 | | |
140 | | #elif defined(_WIN32) |
141 | | |
142 | | #define WIN32_LEAN_AND_MEAN |
143 | | #include <windows.h> |
144 | | static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { |
145 | | // Windows: works on Intel and ARM in both 32- and 64-bit |
146 | | return (uintptr_t)NtCurrentTeb(); |
147 | | } |
148 | | |
149 | | // We use assembly for a fast thread id on the main platforms. The TLS layout depends on |
150 | | // both the OS and libc implementation so we use specific tests for each main platform. |
151 | | // If you test on another platform and it works please send a PR :-) |
152 | | // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. |
153 | | #elif defined(__GNUC__) && ( \ |
154 | | (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ |
155 | | || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ |
156 | | || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ |
157 | | || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ |
158 | | || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ |
159 | | ) |
160 | | |
161 | 72 | static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { |
162 | 72 | void* res; |
163 | 72 | const size_t ofs = (slot*sizeof(void*)); |
164 | | #if defined(__i386__) |
165 | | __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS |
166 | | #elif defined(__APPLE__) && defined(__x86_64__) |
167 | | __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS |
168 | | #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) |
169 | | __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI |
170 | | #elif defined(__x86_64__) |
171 | | __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS |
172 | | #elif defined(__arm__) |
173 | | void** tcb; MI_UNUSED(ofs); |
174 | | __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); |
175 | | res = tcb[slot]; |
176 | | #elif defined(__aarch64__) |
177 | | void** tcb; MI_UNUSED(ofs); |
178 | | #if defined(__APPLE__) // M1, issue #343 |
179 | | __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); |
180 | | #else |
181 | | __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); |
182 | | #endif |
183 | | res = tcb[slot]; |
184 | | #endif |
185 | 72 | return res; |
186 | 72 | } |
187 | | |
188 | | // setting a tls slot is only used on macOS for now |
189 | 0 | static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { |
190 | 0 | const size_t ofs = (slot*sizeof(void*)); |
191 | 0 | #if defined(__i386__) |
192 | 0 | __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS |
193 | 0 | #elif defined(__APPLE__) && defined(__x86_64__) |
194 | 0 | __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS |
195 | 0 | #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) |
196 | 0 | __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI |
197 | 0 | #elif defined(__x86_64__) |
198 | 0 | __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS |
199 | 0 | #elif defined(__arm__) |
200 | 0 | void** tcb; MI_UNUSED(ofs); |
201 | 0 | __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); |
202 | 0 | tcb[slot] = value; |
203 | 0 | #elif defined(__aarch64__) |
204 | 0 | void** tcb; MI_UNUSED(ofs); |
205 | 0 | #if defined(__APPLE__) // M1, issue #343 |
206 | 0 | __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); |
207 | 0 | #else |
208 | 0 | __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); |
209 | 0 | #endif |
210 | 0 | tcb[slot] = value; |
211 | 0 | #endif |
212 | 0 | } |
213 | | |
214 | 72 | static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { |
215 | | #if defined(__BIONIC__) |
216 | | // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id |
217 | | // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 |
218 | | return (uintptr_t)mi_prim_tls_slot(1); |
219 | | #else |
220 | | // in all our other targets, slot 0 is the thread id |
221 | | // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h |
222 | | // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 |
223 | 72 | return (uintptr_t)mi_prim_tls_slot(0); |
224 | 72 | #endif |
225 | 72 | } |
226 | | |
227 | | #else |
228 | | |
229 | | // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). |
230 | | static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { |
231 | | return (uintptr_t)&_mi_heap_default; |
232 | | } |
233 | | |
234 | | #endif |
235 | | |
236 | | |
237 | | |
238 | | /* ---------------------------------------------------------------------------------------- |
239 | | The thread local default heap: `_mi_prim_get_default_heap()` |
240 | | This is inlined here as it is on the fast path for allocation functions. |
241 | | |
242 | | On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a |
243 | | __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures |
244 | | that the storage will always be available (allocated on the thread stacks). |
245 | | |
246 | | On some platforms though we cannot use that when overriding `malloc` since the underlying |
247 | | TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. |
248 | | We try to circumvent this in an efficient way: |
249 | | - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the |
250 | | loader itself calls `malloc` even before the modules are initialized. |
251 | | - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). |
252 | | - DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) |
253 | | ------------------------------------------------------------------------------------------- */ |
254 | | |
255 | | static inline mi_heap_t* mi_prim_get_default_heap(void); |
256 | | |
257 | | #if defined(MI_MALLOC_OVERRIDE) |
258 | | #if defined(__APPLE__) // macOS |
259 | | #define MI_TLS_SLOT 89 // seems unused? |
260 | | // #define MI_TLS_RECURSE_GUARD 1 |
261 | | // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) |
262 | | // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> |
263 | | #elif defined(__OpenBSD__) |
264 | | // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) |
265 | | // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> |
266 | | #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) |
267 | | // #elif defined(__DragonFly__) |
268 | | // #warning "mimalloc is not working correctly on DragonFly yet." |
269 | | // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> |
270 | | #elif defined(__ANDROID__) |
271 | | // See issue #381 |
272 | | #define MI_TLS_PTHREAD |
273 | | #endif |
274 | | #endif |
275 | | |
276 | | |
277 | | #if defined(MI_TLS_SLOT) |
278 | | |
279 | | static inline mi_heap_t* mi_prim_get_default_heap(void) { |
280 | | mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); |
281 | | if mi_unlikely(heap == NULL) { |
282 | | #ifdef __GNUC__ |
283 | | __asm(""); // prevent conditional load of the address of _mi_heap_empty |
284 | | #endif |
285 | | heap = (mi_heap_t*)&_mi_heap_empty; |
286 | | } |
287 | | return heap; |
288 | | } |
289 | | |
290 | | #elif defined(MI_TLS_PTHREAD_SLOT_OFS) |
291 | | |
292 | | static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { |
293 | | pthread_t self = pthread_self(); |
294 | | #if defined(__DragonFly__) |
295 | | if (self==NULL) return NULL; |
296 | | #endif |
297 | | return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); |
298 | | } |
299 | | |
300 | | static inline mi_heap_t* mi_prim_get_default_heap(void) { |
301 | | mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); |
302 | | if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); |
303 | | mi_heap_t* heap = *pheap; |
304 | | if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; |
305 | | return heap; |
306 | | } |
307 | | |
308 | | #elif defined(MI_TLS_PTHREAD) |
309 | | |
310 | | extern pthread_key_t _mi_heap_default_key; |
311 | | static inline mi_heap_t* mi_prim_get_default_heap(void) { |
312 | | mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); |
313 | | return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); |
314 | | } |
315 | | |
316 | | #else // default using a thread local variable; used on most platforms. |
317 | | |
318 | 108 | static inline mi_heap_t* mi_prim_get_default_heap(void) { |
319 | | #if defined(MI_TLS_RECURSE_GUARD) |
320 | | if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); |
321 | | #endif |
322 | 108 | return _mi_heap_default; |
323 | 108 | } |
324 | | |
325 | | #endif // mi_prim_get_default_heap() |
326 | | |
327 | | |
328 | | |
329 | | #endif // MIMALLOC_PRIM_H |