Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Memory management functions. |
3 | | * |
4 | | * Copyright 2000-2007 Willy Tarreau <w@1wt.eu> |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU General Public License |
8 | | * as published by the Free Software Foundation; either version |
9 | | * 2 of the License, or (at your option) any later version. |
10 | | * |
11 | | */ |
12 | | |
13 | | #include <errno.h> |
14 | | |
15 | | #include <import/plock.h> |
16 | | |
17 | | #include <haproxy/activity.h> |
18 | | #include <haproxy/api.h> |
19 | | #include <haproxy/applet-t.h> |
20 | | #include <haproxy/cfgparse.h> |
21 | | #include <haproxy/channel.h> |
22 | | #include <haproxy/cli.h> |
23 | | #include <haproxy/errors.h> |
24 | | #include <haproxy/global.h> |
25 | | #include <haproxy/list.h> |
26 | | #include <haproxy/pool.h> |
27 | | #include <haproxy/pool-os.h> |
28 | | #include <haproxy/sc_strm.h> |
29 | | #include <haproxy/stats-t.h> |
30 | | #include <haproxy/stconn.h> |
31 | | #include <haproxy/thread.h> |
32 | | #include <haproxy/tools.h> |
33 | | |
34 | | |
35 | | /* These ones are initialized per-thread on startup by init_pools() */ |
36 | | THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */ |
37 | | THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */ |
38 | | |
39 | | struct list pools __read_mostly = LIST_HEAD_INIT(pools); |
40 | | int mem_poison_byte __read_mostly = 'P'; |
41 | | int pool_trim_in_progress = 0; |
42 | | uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */ |
43 | | #if defined(DEBUG_FAIL_ALLOC) && (DEBUG_FAIL_ALLOC > 0) |
44 | | POOL_DBG_FAIL_ALLOC | |
45 | | #endif |
46 | | #if defined(DEBUG_DONT_SHARE_POOLS) && (DEBUG_DONT_SHARE_POOLS > 0) |
47 | | POOL_DBG_DONT_MERGE | |
48 | | #endif |
49 | | #if defined(DEBUG_POOL_INTEGRITY) && (DEBUG_POOL_INTEGRITY > 0) |
50 | | POOL_DBG_COLD_FIRST | |
51 | | POOL_DBG_INTEGRITY | |
52 | | #endif |
53 | | #if defined(CONFIG_HAP_NO_GLOBAL_POOLS) |
54 | | POOL_DBG_NO_GLOBAL | |
55 | | #endif |
56 | | #if defined(DEBUG_NO_POOLS) && (DEBUG_NO_POOLS > 0) |
57 | | POOL_DBG_NO_CACHE | |
58 | | #endif |
59 | | #if defined(DEBUG_POOL_TRACING) && (DEBUG_POOL_TRACING > 0) |
60 | | POOL_DBG_CALLER | |
61 | | #endif |
62 | | #if defined(DEBUG_MEMORY_POOLS) && (DEBUG_MEMORY_POOLS > 0) |
63 | | POOL_DBG_TAG | |
64 | | #endif |
65 | | #if defined(DEBUG_UAF) && (DEBUG_UAF > 0) |
66 | | POOL_DBG_NO_CACHE | |
67 | | POOL_DBG_UAF | |
68 | | #endif |
69 | | 0; |
70 | | |
71 | | static const struct { |
72 | | uint flg; |
73 | | const char *set; |
74 | | const char *clr; |
75 | | const char *hlp; |
76 | | } dbg_options[] = { |
77 | | /* flg, set, clr, hlp */ |
78 | | { POOL_DBG_FAIL_ALLOC, "fail", "no-fail", "randomly fail allocations" }, |
79 | | { POOL_DBG_DONT_MERGE, "no-merge", "merge", "disable merging of similar pools" }, |
80 | | { POOL_DBG_COLD_FIRST, "cold-first", "hot-first", "pick cold objects first" }, |
81 | | { POOL_DBG_INTEGRITY, "integrity", "no-integrity", "enable cache integrity checks" }, |
82 | | { POOL_DBG_NO_GLOBAL, "no-global", "global", "disable global shared cache" }, |
83 | | { POOL_DBG_NO_CACHE, "no-cache", "cache", "disable thread-local cache" }, |
84 | | { POOL_DBG_CALLER, "caller", "no-caller", "save caller information in cache" }, |
85 | | { POOL_DBG_TAG, "tag", "no-tag", "add tag at end of allocated objects" }, |
86 | | { POOL_DBG_POISON, "poison", "no-poison", "poison newly allocated objects" }, |
87 | | { POOL_DBG_UAF, "uaf", "no-uaf", "enable use-after-free checks (slow)" }, |
88 | | { POOL_DBG_BACKUP, "backup", "no-backup", "compare object contents on re-alloc" }, |
89 | | { 0 /* end */ } |
90 | | }; |
91 | | |
92 | | /* describes a snapshot of a pool line about to be dumped by "show pools" */ |
93 | | struct pool_dump_info { |
94 | | const struct pool_head *entry; |
95 | | ulong alloc_items; |
96 | | ulong alloc_bytes; |
97 | | ulong used_items; |
98 | | ulong cached_items; |
99 | | ulong need_avg; |
100 | | ulong failed_items; |
101 | | }; |
102 | | |
103 | | /* context used by "show pools" */ |
104 | | struct show_pools_ctx { |
105 | | char *prefix; /* if non-null, match this prefix name for the pool */ |
106 | | int how; /* bits 0..3: 0=no sort, 1=by name, 2=by item size, 3=by total alloc */ |
107 | | int maxcnt; /* 0=no limit, other=max number of output entries */ |
108 | | }; |
109 | | |
110 | | static int mem_fail_rate __read_mostly = 0; |
111 | | static int using_default_allocator __read_mostly = 1; // linked-in allocator or LD_PRELOADed one ? |
112 | | static int disable_trim __read_mostly = 0; |
113 | | static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL; |
114 | | static int(*_malloc_trim)(size_t) = NULL; |
115 | | |
116 | | /* returns the pool hash bucket an object should use based on its pointer. |
117 | | * Objects will needed consistent bucket assignment so that they may be |
118 | | * allocated on one thread and released on another one. Thus only the |
119 | | * pointer is usable. |
120 | | */ |
121 | | static forceinline unsigned int pool_pbucket(const void *ptr) |
122 | 0 | { |
123 | 0 | return ptr_hash(ptr, CONFIG_HAP_POOL_BUCKETS_BITS); |
124 | 0 | } |
125 | | |
126 | | /* returns the pool hash bucket to use for the current thread. This should only |
127 | | * be used when no pointer is available (e.g. count alloc failures). |
128 | | */ |
129 | | static forceinline unsigned int pool_tbucket(void) |
130 | 0 | { |
131 | 0 | return tid % CONFIG_HAP_POOL_BUCKETS; |
132 | 0 | } |
133 | | |
134 | | /* ask the allocator to trim memory pools. |
135 | | * This must run under thread isolation so that competing threads trying to |
136 | | * allocate or release memory do not prevent the allocator from completing |
137 | | * its job. We just have to be careful as callers might already be isolated |
138 | | * themselves. |
139 | | */ |
140 | | void trim_all_pools(void) |
141 | 0 | { |
142 | 0 | int isolated = thread_isolated(); |
143 | |
|
144 | 0 | if (!isolated) |
145 | 0 | thread_isolate(); |
146 | |
|
147 | 0 | malloc_trim(0); |
148 | |
|
149 | 0 | if (!isolated) |
150 | 0 | thread_release(); |
151 | 0 | } |
152 | | |
153 | | /* check if we're using the same allocator as the one that provides |
154 | | * malloc_trim() and mallinfo(). The principle is that on glibc, both |
155 | | * malloc_trim() and mallinfo() are provided, and using mallinfo() we |
156 | | * can check if malloc() is performed through glibc or any other one |
157 | | * the executable was linked against (e.g. jemalloc). Prior to this we |
158 | | * have to check whether we're running on jemalloc by verifying if the |
159 | | * mallctl() function is provided. Its pointer will be used later. |
160 | | */ |
161 | | static void detect_allocator(void) |
162 | 0 | { |
163 | 0 | #if defined(__ELF__) |
164 | 0 | extern int mallctl(const char *, void *, size_t *, void *, size_t) __attribute__((weak)); |
165 | |
|
166 | 0 | my_mallctl = mallctl; |
167 | 0 | #endif |
168 | 0 | if (!my_mallctl) { |
169 | | /* trick: we won't enter here if mallctl() is known at link |
170 | | * time. This allows to detect if the symbol was changed since |
171 | | * the program was linked, indicating it's not running on the |
172 | | * expected allocator (due to an LD_PRELOAD) and that we must |
173 | | * be extra cautious and avoid some optimizations that are |
174 | | * known to break such as malloc_trim(). |
175 | | */ |
176 | 0 | my_mallctl = get_sym_curr_addr("mallctl"); |
177 | 0 | using_default_allocator = (my_mallctl == NULL); |
178 | 0 | } |
179 | |
|
180 | 0 | if (!my_mallctl) { |
181 | 0 | #if defined(HA_HAVE_MALLOC_TRIM) |
182 | | #ifdef HA_HAVE_MALLINFO2 |
183 | | struct mallinfo2 mi1, mi2; |
184 | | #else |
185 | 0 | struct mallinfo mi1, mi2; |
186 | 0 | #endif |
187 | 0 | void *ptr; |
188 | |
|
189 | | #ifdef HA_HAVE_MALLINFO2 |
190 | | mi1 = mallinfo2(); |
191 | | #else |
192 | 0 | mi1 = mallinfo(); |
193 | 0 | #endif |
194 | 0 | ptr = DISGUISE(malloc(1)); |
195 | | #ifdef HA_HAVE_MALLINFO2 |
196 | | mi2 = mallinfo2(); |
197 | | #else |
198 | 0 | mi2 = mallinfo(); |
199 | 0 | #endif |
200 | 0 | free(DISGUISE(ptr)); |
201 | |
|
202 | 0 | using_default_allocator = !!memcmp(&mi1, &mi2, sizeof(mi1)); |
203 | | #elif defined(HA_HAVE_MALLOC_ZONE) |
204 | | using_default_allocator = (malloc_default_zone() != NULL); |
205 | | #endif |
206 | 0 | } |
207 | | |
208 | | /* detect presence of malloc_trim() */ |
209 | 0 | _malloc_trim = get_sym_next_addr("malloc_trim"); |
210 | 0 | } |
211 | | |
212 | | /* replace the libc's malloc_trim() so that we can also intercept the calls |
213 | | * from child libraries when the allocator is not the default one. |
214 | | */ |
215 | | int malloc_trim(size_t pad) |
216 | 0 | { |
217 | 0 | int ret = 0; |
218 | |
|
219 | 0 | if (disable_trim) |
220 | 0 | return ret; |
221 | | |
222 | 0 | HA_ATOMIC_INC(&pool_trim_in_progress); |
223 | |
|
224 | 0 | if (my_mallctl) { |
225 | | /* here we're on jemalloc and malloc_trim() is called either |
226 | | * by haproxy or another dependency (the worst case that |
227 | | * normally crashes). Instead of just failing, we can actually |
228 | | * emulate it so let's do it now. |
229 | | */ |
230 | 0 | unsigned int i, narenas = 0; |
231 | 0 | size_t len = sizeof(narenas); |
232 | |
|
233 | 0 | if (my_mallctl("arenas.narenas", &narenas, &len, NULL, 0) == 0) { |
234 | 0 | for (i = 0; i < narenas; i ++) { |
235 | 0 | char mib[32] = {0}; |
236 | 0 | snprintf(mib, sizeof(mib), "arena.%u.purge", i); |
237 | 0 | (void)my_mallctl(mib, NULL, NULL, NULL, 0); |
238 | 0 | ret = 1; // success |
239 | 0 | } |
240 | 0 | } |
241 | 0 | } |
242 | 0 | else if (!using_default_allocator) { |
243 | | /* special allocators that can be LD_PRELOADed end here */ |
244 | 0 | ret = 0; // did nothing |
245 | 0 | } |
246 | 0 | else if (_malloc_trim) { |
247 | | /* we're typically on glibc and not overridden */ |
248 | 0 | ret = _malloc_trim(pad); |
249 | 0 | } |
250 | | #if defined(HA_HAVE_MALLOC_ZONE) |
251 | | else { |
252 | | /* we're on MacOS, there's an equivalent mechanism */ |
253 | | vm_address_t *zones; |
254 | | unsigned int i, nzones; |
255 | | |
256 | | if (malloc_get_all_zones(0, NULL, &zones, &nzones) == KERN_SUCCESS) { |
257 | | for (i = 0; i < nzones; i ++) { |
258 | | malloc_zone_t *zone = (malloc_zone_t *)zones[i]; |
259 | | |
260 | | /* we cannot purge anonymous zones */ |
261 | | if (zone->zone_name) { |
262 | | malloc_zone_pressure_relief(zone, 0); |
263 | | ret = 1; // success |
264 | | } |
265 | | } |
266 | | } |
267 | | } |
268 | | #endif |
269 | 0 | HA_ATOMIC_DEC(&pool_trim_in_progress); |
270 | | |
271 | | /* here we have ret=0 if nothing was release, or 1 if some were */ |
272 | 0 | return ret; |
273 | 0 | } |
274 | | |
275 | | static int mem_should_fail(const struct pool_head *pool) |
276 | 0 | { |
277 | 0 | int ret = 0; |
278 | |
|
279 | 0 | if (mem_fail_rate > 0 && !(global.mode & MODE_STARTING)) { |
280 | 0 | if (mem_fail_rate > statistical_prng_range(100)) |
281 | 0 | ret = 1; |
282 | 0 | else |
283 | 0 | ret = 0; |
284 | 0 | } |
285 | 0 | return ret; |
286 | 0 | } |
287 | | |
288 | | /* Try to find an existing shared pool with the same characteristics and |
289 | | * returns it, otherwise creates this one. NULL is returned if no memory |
290 | | * is available for a new creation. Two flags are supported : |
291 | | * - MEM_F_SHARED to indicate that the pool may be shared with other users |
292 | | * - MEM_F_EXACT to indicate that the size must not be rounded up |
293 | | * The name must be a stable pointer during all the program's life time. |
294 | | * The file and line are passed to store the registration location in the |
295 | | * registration struct. Use create_pool() instead which does it for free. |
296 | | * The alignment will be stored as-is in the registration. |
297 | | */ |
298 | | struct pool_head *create_pool_with_loc(const char *name, unsigned int size, |
299 | | unsigned int align, unsigned int flags, |
300 | | const char *file, unsigned int line) |
301 | 0 | { |
302 | 0 | struct pool_registration *reg; |
303 | 0 | struct pool_head *pool; |
304 | |
|
305 | 0 | reg = calloc(1, sizeof(*reg)); |
306 | 0 | if (!reg) |
307 | 0 | return NULL; |
308 | | |
309 | 0 | reg->name = name; |
310 | 0 | reg->file = file; |
311 | 0 | reg->line = line; |
312 | 0 | reg->size = size; |
313 | 0 | reg->flags = flags; |
314 | 0 | reg->align = align; |
315 | |
|
316 | 0 | pool = create_pool_from_reg(name, reg); |
317 | 0 | if (!pool) |
318 | 0 | free(reg); |
319 | 0 | return pool; |
320 | 0 | } |
321 | | |
322 | | /* create a pool from a pool registration. All configuration is taken from |
323 | | * there. The alignment will automatically be raised to sizeof(void*) or the |
324 | | * next power of two so that it's always possible to lazily pass alignof() or |
325 | | * sizeof(). Alignments are always respected when merging pools. |
326 | | */ |
327 | | struct pool_head *create_pool_from_reg(const char *name, struct pool_registration *reg) |
328 | 0 | { |
329 | 0 | unsigned int extra_mark, extra_caller, extra; |
330 | 0 | unsigned int flags = reg->flags; |
331 | 0 | unsigned int size = reg->size; |
332 | 0 | unsigned int alignment = reg->align; |
333 | 0 | struct pool_head *pool = NULL; |
334 | 0 | struct pool_head *entry; |
335 | 0 | struct list *start; |
336 | 0 | unsigned int align; |
337 | 0 | unsigned int best_diff; |
338 | 0 | int thr __maybe_unused; |
339 | | |
340 | | /* extend alignment if needed */ |
341 | 0 | if (alignment < sizeof(void*)) |
342 | 0 | alignment = sizeof(void*); |
343 | 0 | else if (alignment & (alignment - 1)) { |
344 | | /* not power of two! round up to next power of two by filling |
345 | | * all LSB in O(log(log(N))) then increment the result. |
346 | | */ |
347 | 0 | int shift = 1; |
348 | 0 | do { |
349 | 0 | alignment |= alignment >> shift; |
350 | 0 | shift *= 2; |
351 | 0 | } while (alignment & (alignment + 1)); |
352 | 0 | alignment++; |
353 | 0 | } |
354 | |
|
355 | 0 | if (reg->type_align && alignment < reg->type_align) { |
356 | 0 | ha_alert("BUG in the code: at %s:%u, requested creation of pool '%s' aligned to %u " |
357 | 0 | "while type requires alignment of %u! Please report to developers. Aborting.\n", |
358 | 0 | reg->file, reg->line, name, alignment, reg->type_align); |
359 | 0 | return NULL; |
360 | 0 | } |
361 | | |
362 | 0 | extra_mark = (pool_debugging & POOL_DBG_TAG) ? POOL_EXTRA_MARK : 0; |
363 | 0 | extra_caller = (pool_debugging & POOL_DBG_CALLER) ? POOL_EXTRA_CALLER : 0; |
364 | 0 | extra = extra_mark + extra_caller; |
365 | |
|
366 | 0 | if (!(pool_debugging & POOL_DBG_NO_CACHE)) { |
367 | | /* we'll store two lists there, we need the room for this. Let's |
368 | | * make sure it's always OK even when including the extra word |
369 | | * that is stored after the pci struct. |
370 | | */ |
371 | 0 | if (size + extra - extra_caller < sizeof(struct pool_cache_item)) |
372 | 0 | size = sizeof(struct pool_cache_item) + extra_caller - extra; |
373 | 0 | } |
374 | | |
375 | | /* Now we know our size is set to the strict minimum possible. It may |
376 | | * be OK for elements allocated with an exact size (e.g. buffers), but |
377 | | * we're going to round the size up 16 bytes to merge almost identical |
378 | | * pools together. We only round up however when we add the debugging |
379 | | * tag since it's used to detect overflows. Otherwise we only round up |
380 | | * to the size of a word to preserve alignment. |
381 | | */ |
382 | 0 | if (!(flags & MEM_F_EXACT)) { |
383 | 0 | align = (pool_debugging & POOL_DBG_TAG) ? sizeof(void *) : 16; |
384 | 0 | size = ((size + align - 1) & -align); |
385 | 0 | } |
386 | |
|
387 | 0 | if (pool_debugging & POOL_DBG_BACKUP) { |
388 | | /* keep a full backup of the pool at release time. We need |
389 | | * a word-aligned size, so it's fine to do it now. |
390 | | */ |
391 | 0 | extra += size; |
392 | 0 | } |
393 | | |
394 | | /* TODO: thread: we do not lock pool list for now because all pools are |
395 | | * created during HAProxy startup (so before threads creation) */ |
396 | 0 | start = &pools; |
397 | 0 | best_diff = ~0U; |
398 | |
|
399 | 0 | list_for_each_entry(entry, &pools, list) { |
400 | 0 | if (entry->size == size || |
401 | 0 | (!(flags & MEM_F_EXACT) && !pool_allocated(entry) && |
402 | | /* size within 1% of avg size */ |
403 | 0 | (((ullong)entry->sum_size * 100ULL < (ullong)size * entry->users * 101ULL && |
404 | 0 | (ullong)entry->sum_size * 101ULL > (ullong)size * entry->users * 100ULL) || |
405 | | /* or +/- 16 compared to the current avg size */ |
406 | 0 | (entry->sum_size - 16 * entry->users < size * entry->users && |
407 | 0 | entry->sum_size + 16 * entry->users > size * entry->users)))) { |
408 | | |
409 | | /* either we can share this place and we take it, or |
410 | | * we look for a shareable one or for the next position |
411 | | * before which we will insert a new one. |
412 | | */ |
413 | 0 | if ((flags & entry->flags & MEM_F_SHARED) && |
414 | 0 | (!(pool_debugging & POOL_DBG_DONT_MERGE) || |
415 | 0 | strcmp(name, entry->name) == 0)) { |
416 | | /* we can share this one */ |
417 | 0 | uint diff = (abs((int)(size * entry->users - entry->size)) + entry->users / 2) / entry->users; |
418 | | |
419 | | /* the principle here is: |
420 | | * - if the best pool is smaller and the current |
421 | | * candidate larger, we prefer the larger one |
422 | | * so as not to grow an existing pool; |
423 | | * - otherwise we go for the smallest distance |
424 | | * from the existing one. |
425 | | */ |
426 | 0 | if (!pool || entry->size == size || |
427 | 0 | (pool->size != size && |
428 | 0 | ((pool->size < size && entry->size >= size) || |
429 | 0 | (diff == best_diff && entry->size >= size) || |
430 | 0 | (diff < best_diff)))) { |
431 | 0 | best_diff = diff; |
432 | 0 | pool = entry; |
433 | 0 | } |
434 | 0 | } |
435 | 0 | } |
436 | 0 | else if (entry->size > size) { |
437 | | /* insert before this one */ |
438 | 0 | start = &entry->list; |
439 | 0 | } |
440 | 0 | } |
441 | |
|
442 | 0 | if (!pool) { |
443 | 0 | void *pool_addr; |
444 | |
|
445 | 0 | pool_addr = calloc(1, sizeof(*pool) + __alignof__(*pool)); |
446 | 0 | if (!pool_addr) |
447 | 0 | goto fail; |
448 | | |
449 | | /* always provide an aligned pool */ |
450 | 0 | pool = (struct pool_head*)((((size_t)pool_addr) + __alignof__(*pool)) & -(size_t)__alignof__(*pool)); |
451 | 0 | pool->base_addr = pool_addr; // keep it, it's the address to free later |
452 | |
|
453 | 0 | if (name) |
454 | 0 | strlcpy2(pool->name, name, sizeof(pool->name)); |
455 | 0 | pool->alloc_sz = size + extra; |
456 | 0 | pool->size = size; |
457 | 0 | pool->align = alignment; |
458 | 0 | pool->flags = flags; |
459 | 0 | LIST_APPEND(start, &pool->list); |
460 | 0 | LIST_INIT(&pool->regs); |
461 | |
|
462 | 0 | if (!(pool_debugging & POOL_DBG_NO_CACHE)) { |
463 | | /* update per-thread pool cache if necessary */ |
464 | 0 | for (thr = 0; thr < MAX_THREADS; thr++) { |
465 | 0 | LIST_INIT(&pool->cache[thr].list); |
466 | 0 | pool->cache[thr].tid = thr; |
467 | 0 | pool->cache[thr].pool = pool; |
468 | 0 | } |
469 | 0 | } |
470 | 0 | } |
471 | 0 | else { |
472 | | /* we found the best one */ |
473 | 0 | if (size > pool->size) { |
474 | 0 | pool->size = size; |
475 | 0 | pool->alloc_sz = size + extra; |
476 | 0 | } |
477 | 0 | if (alignment > pool->align) |
478 | 0 | pool->align = alignment; |
479 | 0 | DPRINTF(stderr, "Sharing %s with %s\n", name, pool->name); |
480 | 0 | } |
481 | | |
482 | 0 | LIST_APPEND(&pool->regs, ®->list); |
483 | 0 | pool->users++; |
484 | 0 | pool->sum_size += size; |
485 | |
|
486 | 0 | fail: |
487 | 0 | return pool; |
488 | 0 | } |
489 | | |
490 | | /* Tries to allocate an object for the pool <pool> using the system's allocator |
491 | | * and directly returns it. The pool's allocated counter is checked but NOT |
492 | | * updated, this is left to the caller, and but no other checks are performed. |
493 | | */ |
494 | | void *pool_get_from_os_noinc(struct pool_head *pool) |
495 | 0 | { |
496 | 0 | if (!pool->limit || pool_allocated(pool) < pool->limit) { |
497 | 0 | void *ptr; |
498 | |
|
499 | 0 | if ((pool_debugging & POOL_DBG_UAF) || (pool->flags & MEM_F_UAF)) |
500 | 0 | ptr = pool_alloc_area_uaf(pool->alloc_sz, pool->align); |
501 | 0 | else |
502 | 0 | ptr = pool_alloc_area(pool->alloc_sz, pool->align); |
503 | 0 | if (ptr) |
504 | 0 | return ptr; |
505 | 0 | _HA_ATOMIC_INC(&pool->buckets[pool_tbucket()].failed); |
506 | 0 | } |
507 | 0 | activity[tid].pool_fail++; |
508 | 0 | return NULL; |
509 | |
|
510 | 0 | } |
511 | | |
512 | | /* Releases a pool item back to the operating system but DOES NOT update |
513 | | * the allocation counter, it's left to the caller to do it. It may be |
514 | | * done before or after, it doesn't matter, the function does not use it. |
515 | | */ |
516 | | void pool_put_to_os_nodec(struct pool_head *pool, void *ptr) |
517 | 0 | { |
518 | 0 | if ((pool_debugging & POOL_DBG_UAF) || (pool->flags & MEM_F_UAF)) |
519 | 0 | pool_free_area_uaf(ptr, pool->alloc_sz); |
520 | 0 | else |
521 | 0 | pool_free_area(ptr, pool->alloc_sz); |
522 | 0 | } |
523 | | |
524 | | /* Tries to allocate an object for the pool <pool> using the system's allocator |
525 | | * and directly returns it. The pool's counters are updated but the object is |
526 | | * never cached, so this is usable with and without local or shared caches. |
527 | | */ |
528 | | void *pool_alloc_nocache(struct pool_head *pool, const void *caller) |
529 | 0 | { |
530 | 0 | void *ptr = NULL; |
531 | 0 | uint bucket; |
532 | 0 | uint used; |
533 | |
|
534 | 0 | ptr = pool_get_from_os_noinc(pool); |
535 | 0 | if (!ptr) |
536 | 0 | return NULL; |
537 | | |
538 | 0 | bucket = pool_pbucket(ptr); |
539 | |
|
540 | 0 | _HA_ATOMIC_INC(&pool->buckets[bucket].allocated); |
541 | 0 | used = _HA_ATOMIC_FETCH_ADD(&pool->buckets[bucket].used, 1); |
542 | 0 | swrate_add_scaled_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used, POOL_AVG_SAMPLES/4); |
543 | | |
544 | | /* keep track of where the element was allocated from */ |
545 | 0 | POOL_DEBUG_SET_MARK(pool, ptr); |
546 | 0 | POOL_DEBUG_TRACE_CALLER(pool, (struct pool_cache_item *)ptr, caller); |
547 | 0 | return ptr; |
548 | 0 | } |
549 | | |
550 | | /* Release a pool item back to the OS and keeps the pool's counters up to date. |
551 | | * This is always defined even when pools are not enabled (their usage stats |
552 | | * are maintained). |
553 | | */ |
554 | | void pool_free_nocache(struct pool_head *pool, void *ptr) |
555 | 0 | { |
556 | 0 | uint bucket = pool_pbucket(ptr); |
557 | 0 | uint used; |
558 | |
|
559 | 0 | used = _HA_ATOMIC_SUB_FETCH(&pool->buckets[bucket].used, 1); |
560 | 0 | _HA_ATOMIC_DEC(&pool->buckets[bucket].allocated); |
561 | 0 | swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used); |
562 | |
|
563 | 0 | pool_put_to_os_nodec(pool, ptr); |
564 | 0 | } |
565 | | |
566 | | /* Copies the contents of <item> to the reserved area after it to have a backup. |
567 | | * The item part is left untouched. |
568 | | */ |
569 | | void pool_copy_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size) |
570 | 0 | { |
571 | 0 | ulong *ptr = (ulong *)item; |
572 | 0 | ulong extra; |
573 | |
|
574 | 0 | if (size <= sizeof(*item)) |
575 | 0 | return; |
576 | | |
577 | 0 | extra = !!(pool_debugging & (POOL_DBG_TAG|POOL_DBG_CALLER)); |
578 | 0 | memcpy(&ptr[size/sizeof(*ptr) + extra], ptr, size); |
579 | 0 | } |
580 | | |
581 | | /* Updates <pch>'s fill_pattern and fills the free area after <item> with it, |
582 | | * up to <size> bytes. The item part is left untouched. |
583 | | */ |
584 | | void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size) |
585 | 0 | { |
586 | 0 | ulong *ptr = (ulong *)item; |
587 | 0 | uint ofs; |
588 | 0 | ulong u; |
589 | |
|
590 | 0 | if (size <= sizeof(*item)) |
591 | 0 | return; |
592 | | |
593 | | /* Upgrade the fill_pattern to change about half of the bits |
594 | | * (to be sure to catch static flag corruption), and apply it. |
595 | | */ |
596 | 0 | u = pch->fill_pattern += ~0UL / 3; // 0x55...55 |
597 | 0 | ofs = sizeof(*item) / sizeof(*ptr); |
598 | 0 | while (ofs < size / sizeof(*ptr)) |
599 | 0 | ptr[ofs++] = u; |
600 | 0 | } |
601 | | |
602 | | /* check for a pool_cache_item integrity after extracting it from the cache. It |
603 | | * must have been previously initialized using either pool_fill_pattern() or |
604 | | * pool_copy_pattern(). If any corruption is detected, the function provokes an |
605 | | * immediate crash. |
606 | | */ |
607 | | void pool_check_pattern(struct pool_cache_head *pch, struct pool_head *pool, struct pool_cache_item *item, const void *caller) |
608 | 0 | { |
609 | 0 | const ulong *ptr = (const ulong *)item; |
610 | 0 | uint size = pool->size; |
611 | 0 | uint ofs; |
612 | 0 | ulong u; |
613 | |
|
614 | 0 | if (size <= sizeof(*item)) |
615 | 0 | return; |
616 | | |
617 | 0 | if (pool_debugging & POOL_DBG_INTEGRITY) { |
618 | | /* let's check that all words past *item are equal */ |
619 | 0 | ofs = sizeof(*item) / sizeof(*ptr); |
620 | 0 | u = ptr[ofs++]; |
621 | 0 | while (ofs < size / sizeof(*ptr)) { |
622 | 0 | if (unlikely(ptr[ofs] != u)) { |
623 | 0 | pool_inspect_item("cache corruption detected", pool, item, caller, ofs * sizeof(*ptr)); |
624 | 0 | ABORT_NOW(); |
625 | 0 | } |
626 | 0 | ofs++; |
627 | 0 | } |
628 | 0 | } else { |
629 | | /* the pattern was backed up */ |
630 | 0 | ofs = sizeof(*item) / sizeof(*ptr); |
631 | 0 | u = !!(pool_debugging & (POOL_DBG_TAG|POOL_DBG_CALLER)); |
632 | 0 | while (ofs < size / sizeof(*ptr)) { |
633 | 0 | if (unlikely(ptr[ofs] != ptr[size/sizeof(*ptr) + u + ofs])) { |
634 | 0 | pool_inspect_item("cache corruption detected", pool, item, caller, ofs * sizeof(*ptr)); |
635 | 0 | ABORT_NOW(); |
636 | 0 | } |
637 | 0 | ofs++; |
638 | 0 | } |
639 | 0 | } |
640 | 0 | } |
641 | | |
642 | | /* removes up to <count> items from the end of the local pool cache <ph> for |
643 | | * pool <pool>. The shared pool is refilled with these objects in the limit |
644 | | * of the number of acceptable objects, and the rest will be released to the |
645 | | * OS. It is not a problem is <count> is larger than the number of objects in |
646 | | * the local cache. The counters are automatically updated. Must not be used |
647 | | * with pools disabled. |
648 | | */ |
649 | | static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count) |
650 | 0 | { |
651 | 0 | struct pool_cache_item *item; |
652 | 0 | struct pool_item *pi, *head = NULL; |
653 | 0 | void *caller = __builtin_return_address(0); |
654 | 0 | uint released = 0; |
655 | 0 | uint cluster = 0; |
656 | 0 | uint to_free_max; |
657 | 0 | uint bucket; |
658 | 0 | uint used; |
659 | |
|
660 | 0 | BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); |
661 | | |
662 | | /* Note: this will be zero when global pools are disabled */ |
663 | 0 | to_free_max = pool_releasable(pool); |
664 | |
|
665 | 0 | while (released < count && !LIST_ISEMPTY(&ph->list)) { |
666 | 0 | item = LIST_PREV(&ph->list, typeof(item), by_pool); |
667 | 0 | BUG_ON(&item->by_pool == &ph->list); |
668 | 0 | if (unlikely(pool_debugging & (POOL_DBG_INTEGRITY|POOL_DBG_BACKUP))) |
669 | 0 | pool_check_pattern(ph, pool, item, caller); |
670 | 0 | LIST_DELETE(&item->by_pool); |
671 | 0 | LIST_DELETE(&item->by_lru); |
672 | |
|
673 | 0 | bucket = pool_pbucket(item); |
674 | 0 | used = _HA_ATOMIC_SUB_FETCH(&pool->buckets[bucket].used, 1); |
675 | 0 | swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, used); |
676 | |
|
677 | 0 | if (to_free_max > released || cluster) { |
678 | | /* will never match when global pools are disabled */ |
679 | 0 | pi = (struct pool_item *)item; |
680 | 0 | pi->next = NULL; |
681 | 0 | pi->down = head; |
682 | 0 | head = pi; |
683 | 0 | cluster++; |
684 | 0 | if (cluster >= CONFIG_HAP_POOL_CLUSTER_SIZE) { |
685 | | /* enough to make a cluster */ |
686 | 0 | pool_put_to_shared_cache(pool, head); |
687 | 0 | cluster = 0; |
688 | 0 | head = NULL; |
689 | 0 | } |
690 | 0 | } else { |
691 | | /* does pool_free_nocache() with a known bucket */ |
692 | 0 | _HA_ATOMIC_DEC(&pool->buckets[bucket].allocated); |
693 | 0 | pool_put_to_os_nodec(pool, item); |
694 | 0 | } |
695 | |
|
696 | 0 | released++; |
697 | 0 | } |
698 | | |
699 | | /* incomplete cluster left */ |
700 | 0 | if (cluster) |
701 | 0 | pool_put_to_shared_cache(pool, head); |
702 | |
|
703 | 0 | ph->count -= released; |
704 | 0 | pool_cache_count -= released; |
705 | 0 | pool_cache_bytes -= released * pool->size; |
706 | 0 | } |
707 | | |
708 | | /* Evicts some of the oldest objects from one local cache, until its number of |
709 | | * objects is no more than 16+1/8 of the total number of locally cached objects |
710 | | * or the total size of the local cache is no more than 75% of its maximum (i.e. |
711 | | * we don't want a single cache to use all the cache for itself). For this, the |
712 | | * list is scanned in reverse. If <full> is non-null, all objects are evicted. |
713 | | * Must not be used when pools are disabled. |
714 | | */ |
715 | | void pool_evict_from_local_cache(struct pool_head *pool, int full) |
716 | 0 | { |
717 | 0 | struct pool_cache_head *ph = &pool->cache[tid]; |
718 | |
|
719 | 0 | BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); |
720 | | |
721 | 0 | while ((ph->count && full) || |
722 | 0 | (ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE && |
723 | 0 | ph->count >= 16 + pool_cache_count / 8 && |
724 | 0 | pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) { |
725 | 0 | pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE); |
726 | 0 | } |
727 | 0 | } |
728 | | |
729 | | /* Evicts some of the oldest objects from the local cache, pushing them to the |
730 | | * global pool. Must not be used when pools are disabled. |
731 | | */ |
732 | | void pool_evict_from_local_caches() |
733 | 0 | { |
734 | 0 | struct pool_cache_item *item; |
735 | 0 | struct pool_cache_head *ph; |
736 | 0 | struct pool_head *pool; |
737 | |
|
738 | 0 | BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); |
739 | | |
740 | 0 | do { |
741 | 0 | item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru); |
742 | 0 | BUG_ON(&item->by_lru == &th_ctx->pool_lru_head); |
743 | | /* note: by definition we remove oldest objects so they also are the |
744 | | * oldest in their own pools, thus their next is the pool's head. |
745 | | */ |
746 | 0 | ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list); |
747 | 0 | BUG_ON(ph->tid != tid); |
748 | | |
749 | 0 | pool = container_of(ph - tid, struct pool_head, cache); |
750 | 0 | BUG_ON(pool != ph->pool); |
751 | | |
752 | 0 | pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE); |
753 | 0 | } while (pool_cache_bytes > global.tune.pool_cache_size * 7 / 8); |
754 | 0 | } |
755 | | |
756 | | /* Frees an object to the local cache, possibly pushing oldest objects to the |
757 | | * shared cache, which itself may decide to release some of them to the OS. |
758 | | * While it is unspecified what the object becomes past this point, it is |
759 | | * guaranteed to be released from the users' perspective. A caller address may |
760 | | * be passed and stored into the area when DEBUG_POOL_TRACING is set. Must not |
761 | | * be used with pools disabled. |
762 | | */ |
763 | | void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller) |
764 | 0 | { |
765 | 0 | struct pool_cache_item *item = (struct pool_cache_item *)ptr; |
766 | 0 | struct pool_cache_head *ph = &pool->cache[tid]; |
767 | |
|
768 | 0 | BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); |
769 | | |
770 | 0 | LIST_INSERT(&ph->list, &item->by_pool); |
771 | 0 | LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru); |
772 | 0 | POOL_DEBUG_TRACE_CALLER(pool, item, caller); |
773 | 0 | ph->count++; |
774 | 0 | if (unlikely(pool_debugging & POOL_DBG_BACKUP)) |
775 | 0 | pool_copy_pattern(ph, item, pool->size); |
776 | |
|
777 | 0 | if (unlikely(pool_debugging & POOL_DBG_INTEGRITY)) |
778 | 0 | pool_fill_pattern(ph, item, pool->size); |
779 | |
|
780 | 0 | pool_cache_count++; |
781 | 0 | pool_cache_bytes += pool->size; |
782 | |
|
783 | 0 | if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) { |
784 | 0 | if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE) |
785 | 0 | pool_evict_from_local_cache(pool, 0); |
786 | 0 | if (pool_cache_bytes > global.tune.pool_cache_size) |
787 | 0 | pool_evict_from_local_caches(); |
788 | 0 | } |
789 | 0 | } |
790 | | |
791 | | /* Tries to refill the local cache <pch> from the shared one for pool <pool>. |
792 | | * This is only used when pools are in use and shared pools are enabled. No |
793 | | * malloc() is attempted, and poisonning is never performed. The purpose is to |
794 | | * get the fastest possible refilling so that the caller can easily check if |
795 | | * the cache has enough objects for its use. Must not be used when pools are |
796 | | * disabled. |
797 | | */ |
798 | | void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch) |
799 | 0 | { |
800 | 0 | struct pool_cache_item *item; |
801 | 0 | struct pool_item *ret, *down; |
802 | 0 | uint bucket; |
803 | 0 | uint count; |
804 | |
|
805 | 0 | BUG_ON(pool_debugging & POOL_DBG_NO_CACHE); |
806 | | |
807 | | /* we'll need to reference the first element to figure the next one. We |
808 | | * must temporarily lock it so that nobody allocates then releases it, |
809 | | * or the dereference could fail. In order to limit the locking, |
810 | | * threads start from a bucket that depends on their ID. |
811 | | */ |
812 | | |
813 | 0 | bucket = pool_tbucket(); |
814 | 0 | ret = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list); |
815 | 0 | count = 0; |
816 | 0 | do { |
817 | | /* look for an apparently non-busy entry. If we hit a busy pool |
818 | | * we retry with another random bucket. And if we encounter a |
819 | | * NULL, we retry once with another random bucket. This is in |
820 | | * order to prevent object accumulation in other buckets. |
821 | | */ |
822 | 0 | while (unlikely(ret == POOL_BUSY || (ret == NULL && count++ < 1))) { |
823 | 0 | bucket = statistical_prng() % CONFIG_HAP_POOL_BUCKETS; |
824 | 0 | ret = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list); |
825 | 0 | } |
826 | 0 | if (ret == NULL) |
827 | 0 | return; |
828 | 0 | } while (unlikely((ret = _HA_ATOMIC_XCHG(&pool->buckets[bucket].free_list, POOL_BUSY)) == POOL_BUSY)); |
829 | | |
830 | 0 | if (unlikely(ret == NULL)) { |
831 | 0 | HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, NULL); |
832 | 0 | return; |
833 | 0 | } |
834 | | |
835 | | /* this releases the lock */ |
836 | 0 | HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, ret->next); |
837 | | |
838 | | /* now store the retrieved object(s) into the local cache. Note that |
839 | | * they don't all have the same hash and that it doesn't necessarily |
840 | | * match the one from the pool. |
841 | | */ |
842 | 0 | count = 0; |
843 | 0 | for (; ret; ret = down) { |
844 | 0 | down = ret->down; |
845 | 0 | item = (struct pool_cache_item *)ret; |
846 | 0 | POOL_DEBUG_TRACE_CALLER(pool, item, NULL); |
847 | 0 | LIST_INSERT(&pch->list, &item->by_pool); |
848 | 0 | LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru); |
849 | 0 | _HA_ATOMIC_INC(&pool->buckets[pool_pbucket(item)].used); |
850 | 0 | count++; |
851 | 0 | if (unlikely(pool_debugging & POOL_DBG_INTEGRITY)) |
852 | 0 | pool_fill_pattern(pch, item, pool->size); |
853 | |
|
854 | 0 | } |
855 | 0 | pch->count += count; |
856 | 0 | pool_cache_count += count; |
857 | 0 | pool_cache_bytes += count * pool->size; |
858 | 0 | } |
859 | | |
860 | | /* Adds pool item cluster <item> to the shared cache, which contains <count> |
861 | | * elements. The caller is advised to first check using pool_releasable() if |
862 | | * it's wise to add this series of objects there. Both the pool and the item's |
863 | | * head must be valid. |
864 | | */ |
865 | | void pool_put_to_shared_cache(struct pool_head *pool, struct pool_item *item) |
866 | 0 | { |
867 | 0 | struct pool_item *free_list; |
868 | 0 | uint bucket = pool_pbucket(item); |
869 | | |
870 | | /* we prefer to put the item into the entry that corresponds to its own |
871 | | * hash so that on return it remains in the right place, but that's not |
872 | | * mandatory. |
873 | | */ |
874 | 0 | free_list = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list); |
875 | 0 | do { |
876 | | /* look for an apparently non-busy entry */ |
877 | 0 | while (unlikely(free_list == POOL_BUSY)) { |
878 | 0 | bucket = (bucket + 1) % CONFIG_HAP_POOL_BUCKETS; |
879 | 0 | free_list = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list); |
880 | 0 | } |
881 | 0 | _HA_ATOMIC_STORE(&item->next, free_list); |
882 | 0 | __ha_barrier_atomic_store(); |
883 | 0 | } while (!_HA_ATOMIC_CAS(&pool->buckets[bucket].free_list, &free_list, item)); |
884 | 0 | __ha_barrier_atomic_store(); |
885 | 0 | } |
886 | | |
887 | | /* |
888 | | * This function frees whatever can be freed in pool <pool>. |
889 | | */ |
890 | | void pool_flush(struct pool_head *pool) |
891 | 0 | { |
892 | 0 | struct pool_item *next, *temp, *down; |
893 | 0 | uint bucket; |
894 | |
|
895 | 0 | if (!pool || (pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL))) |
896 | 0 | return; |
897 | | |
898 | | /* The loop below atomically detaches the head of the free list and |
899 | | * replaces it with a NULL. Then the list can be released. |
900 | | */ |
901 | 0 | for (bucket = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) { |
902 | 0 | next = pool->buckets[bucket].free_list; |
903 | 0 | while (1) { |
904 | 0 | while (unlikely(next == POOL_BUSY)) |
905 | 0 | next = (void*)pl_wait_new_long((ulong*)&pool->buckets[bucket].free_list, (ulong)next); |
906 | |
|
907 | 0 | if (next == NULL) |
908 | 0 | break; |
909 | | |
910 | 0 | next = _HA_ATOMIC_XCHG(&pool->buckets[bucket].free_list, POOL_BUSY); |
911 | 0 | if (next != POOL_BUSY) { |
912 | 0 | HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, NULL); |
913 | 0 | break; |
914 | 0 | } |
915 | 0 | } |
916 | |
|
917 | 0 | while (next) { |
918 | 0 | temp = next; |
919 | 0 | next = temp->next; |
920 | 0 | for (; temp; temp = down) { |
921 | 0 | down = temp->down; |
922 | 0 | _HA_ATOMIC_DEC(&pool->buckets[pool_pbucket(temp)].allocated); |
923 | 0 | pool_put_to_os_nodec(pool, temp); |
924 | 0 | } |
925 | 0 | } |
926 | 0 | } |
927 | | /* here, we should have pool->allocated == pool->used */ |
928 | 0 | } |
929 | | |
930 | | /* |
931 | | * This function frees whatever can be freed in all pools, but respecting |
932 | | * the minimum thresholds imposed by owners. It makes sure to be alone to |
933 | | * run by using thread_isolate(). <pool_ctx> is unused. |
934 | | */ |
935 | | void pool_gc(struct pool_head *pool_ctx) |
936 | 0 | { |
937 | 0 | struct pool_head *entry; |
938 | 0 | int isolated = thread_isolated(); |
939 | |
|
940 | 0 | if (!isolated) |
941 | 0 | thread_isolate(); |
942 | |
|
943 | 0 | list_for_each_entry(entry, &pools, list) { |
944 | 0 | struct pool_item *temp, *down; |
945 | 0 | uint allocated = pool_allocated(entry); |
946 | 0 | uint used = pool_used(entry); |
947 | 0 | int bucket = 0; |
948 | |
|
949 | 0 | while ((int)(allocated - used) > (int)entry->minavail) { |
950 | | /* ok let's find next entry to evict */ |
951 | 0 | while (!entry->buckets[bucket].free_list && bucket < CONFIG_HAP_POOL_BUCKETS) |
952 | 0 | bucket++; |
953 | |
|
954 | 0 | if (bucket >= CONFIG_HAP_POOL_BUCKETS) |
955 | 0 | break; |
956 | | |
957 | 0 | temp = entry->buckets[bucket].free_list; |
958 | 0 | entry->buckets[bucket].free_list = temp->next; |
959 | 0 | for (; temp; temp = down) { |
960 | 0 | down = temp->down; |
961 | 0 | allocated--; |
962 | 0 | _HA_ATOMIC_DEC(&entry->buckets[pool_pbucket(temp)].allocated); |
963 | 0 | pool_put_to_os_nodec(entry, temp); |
964 | 0 | } |
965 | 0 | } |
966 | 0 | } |
967 | |
|
968 | 0 | trim_all_pools(); |
969 | |
|
970 | 0 | if (!isolated) |
971 | 0 | thread_release(); |
972 | 0 | } |
973 | | |
974 | | /* |
975 | | * Returns a pointer to type <type> taken from the pool <pool_type> or |
976 | | * dynamically allocated. In the first case, <pool_type> is updated to point to |
977 | | * the next element in the list. <flags> is a binary-OR of POOL_F_* flags. |
978 | | * Prefer using pool_alloc() which does the right thing without flags. |
979 | | */ |
980 | | void *__pool_alloc(struct pool_head *pool, unsigned int flags) |
981 | 0 | { |
982 | 0 | void *p = NULL; |
983 | 0 | void *caller = __builtin_return_address(0); |
984 | |
|
985 | 0 | if (unlikely(pool_debugging & POOL_DBG_FAIL_ALLOC)) |
986 | 0 | if (!(flags & POOL_F_NO_FAIL) && mem_should_fail(pool)) |
987 | 0 | return NULL; |
988 | | |
989 | 0 | if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p) |
990 | 0 | p = pool_get_from_cache(pool, caller); |
991 | |
|
992 | 0 | if (unlikely(!p)) |
993 | 0 | p = pool_alloc_nocache(pool, caller); |
994 | |
|
995 | 0 | if (likely(p)) { |
996 | | #ifdef USE_MEMORY_PROFILING |
997 | | if (unlikely(profiling & HA_PROF_MEMORY)) { |
998 | | extern struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1]; |
999 | | struct memprof_stats *bin; |
1000 | | |
1001 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_P_ALLOC); |
1002 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
1003 | | _HA_ATOMIC_ADD(&bin->alloc_tot, pool->size); |
1004 | | _HA_ATOMIC_STORE(&bin->info, pool); |
1005 | | /* replace the caller with the allocated bin: this way |
1006 | | * we'll the pool_free() call will be able to update our |
1007 | | * entry. We only do it for non-colliding entries though, |
1008 | | * since these ones store the true caller location. |
1009 | | */ |
1010 | | if (bin >= &memprof_stats[0] && bin < &memprof_stats[MEMPROF_HASH_BUCKETS]) |
1011 | | POOL_DEBUG_TRACE_CALLER(pool, (struct pool_cache_item *)p, bin); |
1012 | | } |
1013 | | #endif |
1014 | 0 | if (unlikely(flags & POOL_F_MUST_ZERO)) |
1015 | 0 | memset(p, 0, pool->size); |
1016 | 0 | else if (unlikely(!(flags & POOL_F_NO_POISON) && (pool_debugging & POOL_DBG_POISON))) |
1017 | 0 | memset(p, mem_poison_byte, pool->size); |
1018 | 0 | } |
1019 | 0 | return p; |
1020 | 0 | } |
1021 | | |
1022 | | /* |
1023 | | * Puts a memory area back to the corresponding pool. <ptr> be valid. Using |
1024 | | * pool_free() is preferred. |
1025 | | */ |
1026 | | void __pool_free(struct pool_head *pool, void *ptr) |
1027 | 0 | { |
1028 | 0 | const void *caller = __builtin_return_address(0); |
1029 | | |
1030 | | /* we'll get late corruption if we refill to the wrong pool or double-free */ |
1031 | 0 | POOL_DEBUG_CHECK_MARK(pool, ptr, caller); |
1032 | 0 | POOL_DEBUG_RESET_MARK(pool, ptr); |
1033 | | |
1034 | | #ifdef USE_MEMORY_PROFILING |
1035 | | if (unlikely(profiling & HA_PROF_MEMORY) && ptr) { |
1036 | | extern struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1]; |
1037 | | struct memprof_stats *bin; |
1038 | | |
1039 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_P_FREE); |
1040 | | _HA_ATOMIC_ADD(&bin->free_calls, 1); |
1041 | | _HA_ATOMIC_ADD(&bin->free_tot, pool->size); |
1042 | | _HA_ATOMIC_STORE(&bin->info, pool); |
1043 | | |
1044 | | /* check if the caller is an allocator, and if so, let's update |
1045 | | * its free() count. |
1046 | | */ |
1047 | | bin = *(struct memprof_stats**)(((char *)ptr) + pool->alloc_sz - sizeof(void*)); |
1048 | | if (bin >= &memprof_stats[0] && bin < &memprof_stats[MEMPROF_HASH_BUCKETS]) { |
1049 | | _HA_ATOMIC_ADD(&bin->free_calls, 1); |
1050 | | _HA_ATOMIC_ADD(&bin->free_tot, pool->size); |
1051 | | } |
1052 | | } |
1053 | | #endif |
1054 | | |
1055 | 0 | if (unlikely((pool_debugging & POOL_DBG_NO_CACHE) || |
1056 | 0 | (pool->flags & MEM_F_UAF) || |
1057 | 0 | global.tune.pool_cache_size < pool->size)) { |
1058 | 0 | pool_free_nocache(pool, ptr); |
1059 | 0 | return; |
1060 | 0 | } |
1061 | | |
1062 | 0 | pool_put_to_cache(pool, ptr, caller); |
1063 | 0 | } |
1064 | | |
1065 | | /* |
1066 | | * This function destroys a pool by freeing it completely, unless it's still |
1067 | | * in use. This should be called only under extreme circumstances. It always |
1068 | | * returns NULL if the resulting pool is empty, easing the clearing of the old |
1069 | | * pointer, otherwise it returns the pool. |
1070 | | * . |
1071 | | */ |
1072 | | void *pool_destroy(struct pool_head *pool) |
1073 | 0 | { |
1074 | 0 | if (pool) { |
1075 | 0 | if (!(pool_debugging & POOL_DBG_NO_CACHE)) |
1076 | 0 | pool_evict_from_local_cache(pool, 1); |
1077 | |
|
1078 | 0 | pool_flush(pool); |
1079 | 0 | if (pool_used(pool)) |
1080 | 0 | return pool; |
1081 | 0 | pool->users--; |
1082 | 0 | if (!pool->users) { |
1083 | | /* remove all registrations at once */ |
1084 | 0 | struct pool_registration *reg, *back; |
1085 | |
|
1086 | 0 | list_for_each_entry_safe(reg, back, &pool->regs, list) { |
1087 | 0 | LIST_DELETE(®->list); |
1088 | 0 | if (!(reg->flags & MEM_F_STATREG)) |
1089 | 0 | free(reg); |
1090 | 0 | } |
1091 | |
|
1092 | 0 | LIST_DELETE(&pool->list); |
1093 | | |
1094 | | /* note that if used == 0, the cache is empty */ |
1095 | 0 | free(pool->base_addr); |
1096 | 0 | } |
1097 | | |
1098 | | /* make sure this pool is no longer referenced in memory profiling */ |
1099 | 0 | memprof_remove_stale_info(pool); |
1100 | 0 | } |
1101 | 0 | return NULL; |
1102 | 0 | } |
1103 | | |
1104 | | /* This destroys all pools on exit. It is *not* thread safe. */ |
1105 | | void pool_destroy_all() |
1106 | 0 | { |
1107 | 0 | struct pool_head *entry, *back; |
1108 | |
|
1109 | 0 | list_for_each_entry_safe(entry, back, &pools, list) { |
1110 | | /* there's only one occurrence of each pool in the list, |
1111 | | * and we're existing instead of looping on the whole |
1112 | | * list just to decrement users, force it to 1 here. |
1113 | | */ |
1114 | 0 | entry->users = 1; |
1115 | 0 | pool_destroy(entry); |
1116 | 0 | } |
1117 | 0 | } |
1118 | | |
1119 | | /* carefully inspects an item upon fatal error and emit diagnostics. |
1120 | | * If ofs < 0, no hint is provided regarding the content location. However if |
1121 | | * ofs >= 0, then we also try to inspect around that place where corruption |
1122 | | * was detected. |
1123 | | */ |
1124 | | void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller, ssize_t ofs) |
1125 | 0 | { |
1126 | 0 | const struct pool_head *the_pool = NULL; |
1127 | |
|
1128 | 0 | chunk_printf(&trash, |
1129 | 0 | "FATAL: pool inconsistency detected in thread %d: %s.\n" |
1130 | 0 | " caller: %p (", |
1131 | 0 | tid + 1, msg, caller); |
1132 | |
|
1133 | 0 | resolve_sym_name(&trash, NULL, caller); |
1134 | |
|
1135 | 0 | chunk_appendf(&trash, |
1136 | 0 | ")\n" |
1137 | 0 | " item: %p\n" |
1138 | 0 | " pool: %p ('%s', size %u, real %u, users %u)\n", |
1139 | 0 | item, pool, pool->name, pool->size, pool->alloc_sz, pool->users); |
1140 | |
|
1141 | 0 | if (ofs >= 0) { |
1142 | 0 | chunk_printf(&trash, "Contents around first corrupted address relative to pool item:.\n"); |
1143 | 0 | dump_area_with_syms(&trash, item, item + ofs, NULL, NULL, NULL); |
1144 | 0 | } |
1145 | |
|
1146 | 0 | if (pool_debugging & POOL_DBG_TAG) { |
1147 | 0 | const void **pool_mark; |
1148 | 0 | struct pool_head *ph; |
1149 | 0 | const void *tag; |
1150 | |
|
1151 | 0 | pool_mark = (const void **)(((char *)item) + pool->size); |
1152 | 0 | tag = may_access(pool_mark) ? *pool_mark : NULL; |
1153 | 0 | if (tag == pool) { |
1154 | 0 | chunk_appendf(&trash, " tag: @%p = %p (%s)\n", pool_mark, tag, pool->name); |
1155 | 0 | the_pool = pool; |
1156 | 0 | } |
1157 | 0 | else { |
1158 | 0 | if (!may_access(pool_mark)) |
1159 | 0 | chunk_appendf(&trash, "Tag not accessible. "); |
1160 | 0 | else |
1161 | 0 | chunk_appendf(&trash, "Tag does not match (%p). ", tag); |
1162 | |
|
1163 | 0 | list_for_each_entry(ph, &pools, list) { |
1164 | 0 | pool_mark = (const void **)(((char *)item) + ph->size); |
1165 | 0 | if (!may_access(pool_mark)) |
1166 | 0 | continue; |
1167 | 0 | tag = *pool_mark; |
1168 | |
|
1169 | 0 | if (tag == ph) { |
1170 | 0 | if (!the_pool) |
1171 | 0 | chunk_appendf(&trash, "Possible origin pool(s):\n"); |
1172 | |
|
1173 | 0 | chunk_appendf(&trash, " tag: @%p = %p (%s, size %u, real %u, users %u)\n", |
1174 | 0 | pool_mark, tag, ph->name, ph->size, ph->alloc_sz, ph->users); |
1175 | 0 | if (!the_pool || the_pool->size < ph->size) |
1176 | 0 | the_pool = ph; |
1177 | 0 | } |
1178 | 0 | } |
1179 | |
|
1180 | 0 | if (!the_pool) { |
1181 | 0 | chunk_appendf(&trash, |
1182 | 0 | "Tag does not match any other pool.\n"); |
1183 | |
|
1184 | 0 | pool_mark = (const void **)(((char *)item) + pool->size); |
1185 | 0 | if (resolve_sym_name(&trash, "Resolving the tag as a pool_free() location: ", *pool_mark)) |
1186 | 0 | chunk_appendf(&trash, "\n"); |
1187 | 0 | else |
1188 | 0 | chunk_appendf(&trash, " (no match).\n"); |
1189 | |
|
1190 | 0 | dump_area_with_syms(&trash, item, pool_mark, pool, "pool", pool->name); |
1191 | 0 | } |
1192 | 0 | } |
1193 | 0 | } |
1194 | |
|
1195 | 0 | if (pool_debugging & POOL_DBG_CALLER) { |
1196 | 0 | struct buffer *trash2 = get_trash_chunk(); |
1197 | 0 | const struct pool_head *ph; |
1198 | 0 | const void **pool_mark; |
1199 | 0 | const void *tag, *rec_tag; |
1200 | |
|
1201 | 0 | ph = the_pool ? the_pool : pool; |
1202 | 0 | pool_mark = (const void **)(((char *)item) + ph->alloc_sz - sizeof(void*)); |
1203 | 0 | rec_tag = may_access(pool_mark) ? *pool_mark : NULL; |
1204 | |
|
1205 | 0 | if (rec_tag && resolve_sym_name(trash2, NULL, rec_tag)) |
1206 | 0 | chunk_appendf(&trash, |
1207 | 0 | "Recorded caller if pool '%s':\n @%p (+%04u) = %p (%s)\n", |
1208 | 0 | ph->name, pool_mark, (uint)(ph->alloc_sz - sizeof(void*)), |
1209 | 0 | rec_tag, trash2->area); |
1210 | |
|
1211 | 0 | if (!the_pool) { |
1212 | | /* the pool couldn't be formally verified */ |
1213 | 0 | chunk_appendf(&trash, "Other possible callers:\n"); |
1214 | 0 | list_for_each_entry(ph, &pools, list) { |
1215 | 0 | if (ph == pool) |
1216 | 0 | continue; |
1217 | 0 | pool_mark = (const void **)(((char *)item) + ph->alloc_sz - sizeof(void*)); |
1218 | 0 | if (!may_access(pool_mark)) |
1219 | 0 | continue; |
1220 | 0 | tag = *pool_mark; |
1221 | 0 | if (tag == rec_tag) |
1222 | 0 | continue; |
1223 | | |
1224 | | /* see if we can resolve something */ |
1225 | 0 | chunk_printf(trash2, "@%p (+%04u) = %p (", pool_mark, (uint)(ph->alloc_sz - sizeof(void*)), tag); |
1226 | 0 | if (resolve_sym_name(trash2, NULL, tag)) { |
1227 | 0 | chunk_appendf(trash2, ")"); |
1228 | 0 | chunk_appendf(&trash, |
1229 | 0 | " %s [as pool %s, size %u, real %u, users %u]\n", |
1230 | 0 | trash2->area, ph->name, ph->size, ph->alloc_sz, ph->users); |
1231 | 0 | } |
1232 | 0 | } |
1233 | 0 | } |
1234 | 0 | } |
1235 | |
|
1236 | 0 | chunk_appendf(&trash, "\n"); |
1237 | 0 | DISGUISE(write(2, trash.area, trash.data)); |
1238 | 0 | } |
1239 | | |
1240 | | /* used by qsort in "show pools" to sort by name */ |
1241 | | static int cmp_dump_pools_name(const void *a, const void *b) |
1242 | 0 | { |
1243 | 0 | const struct pool_dump_info *l = (const struct pool_dump_info *)a; |
1244 | 0 | const struct pool_dump_info *r = (const struct pool_dump_info *)b; |
1245 | |
|
1246 | 0 | return strcmp(l->entry->name, r->entry->name); |
1247 | 0 | } |
1248 | | |
1249 | | /* used by qsort in "show pools" to sort by item size */ |
1250 | | static int cmp_dump_pools_size(const void *a, const void *b) |
1251 | 0 | { |
1252 | 0 | const struct pool_dump_info *l = (const struct pool_dump_info *)a; |
1253 | 0 | const struct pool_dump_info *r = (const struct pool_dump_info *)b; |
1254 | |
|
1255 | 0 | if (l->entry->size > r->entry->size) |
1256 | 0 | return -1; |
1257 | 0 | else if (l->entry->size < r->entry->size) |
1258 | 0 | return 1; |
1259 | 0 | else |
1260 | 0 | return 0; |
1261 | 0 | } |
1262 | | |
1263 | | /* used by qsort in "show pools" to sort by usage */ |
1264 | | static int cmp_dump_pools_usage(const void *a, const void *b) |
1265 | 0 | { |
1266 | 0 | const struct pool_dump_info *l = (const struct pool_dump_info *)a; |
1267 | 0 | const struct pool_dump_info *r = (const struct pool_dump_info *)b; |
1268 | |
|
1269 | 0 | if (l->alloc_bytes > r->alloc_bytes) |
1270 | 0 | return -1; |
1271 | 0 | else if (l->alloc_bytes < r->alloc_bytes) |
1272 | 0 | return 1; |
1273 | 0 | else |
1274 | 0 | return 0; |
1275 | 0 | } |
1276 | | |
1277 | | /* will not dump more than this number of entries. Anything beyond this will |
1278 | | * likely not fit into a regular output buffer anyway. |
1279 | | */ |
1280 | 0 | #define POOLS_MAX_DUMPED_ENTRIES 1024 |
1281 | | |
1282 | | /* This function dumps memory usage information into the trash buffer. |
1283 | | * It may sort by a criterion if bits 0..3 of <how> are non-zero, and |
1284 | | * limit the number of output lines if <max> is non-zero. It may limit |
1285 | | * only to pools whose names start with <pfx> if <pfx> is non-null. |
1286 | | */ |
1287 | | void dump_pools_to_trash(int how, int max, const char *pfx) |
1288 | 0 | { |
1289 | 0 | struct pool_dump_info pool_info[POOLS_MAX_DUMPED_ENTRIES]; |
1290 | 0 | struct pool_head *entry; |
1291 | 0 | unsigned long long allocated, used; |
1292 | 0 | int nbpools, i; |
1293 | 0 | unsigned long long cached_bytes = 0; |
1294 | 0 | uint cached = 0; |
1295 | 0 | uint alloc_items; |
1296 | 0 | int by_what = how & 0xF; // bits 0..3 = sorting criterion |
1297 | 0 | int detailed = !!(how & 0x10); // print details |
1298 | |
|
1299 | 0 | allocated = used = nbpools = 0; |
1300 | |
|
1301 | 0 | list_for_each_entry(entry, &pools, list) { |
1302 | 0 | if (nbpools >= POOLS_MAX_DUMPED_ENTRIES) |
1303 | 0 | break; |
1304 | | |
1305 | 0 | alloc_items = pool_allocated(entry); |
1306 | | /* do not dump unused entries when sorting by usage */ |
1307 | 0 | if (by_what == 3 && !alloc_items) |
1308 | 0 | continue; |
1309 | | |
1310 | | /* verify the pool name if a prefix is requested */ |
1311 | 0 | if (pfx && strncmp(entry->name, pfx, strlen(pfx)) != 0) |
1312 | 0 | continue; |
1313 | | |
1314 | 0 | if (!(pool_debugging & POOL_DBG_NO_CACHE)) { |
1315 | 0 | for (cached = i = 0; i < global.nbthread; i++) |
1316 | 0 | cached += entry->cache[i].count; |
1317 | 0 | } |
1318 | 0 | pool_info[nbpools].entry = entry; |
1319 | 0 | pool_info[nbpools].alloc_items = alloc_items; |
1320 | 0 | pool_info[nbpools].alloc_bytes = (ulong)entry->size * alloc_items; |
1321 | 0 | pool_info[nbpools].used_items = pool_used(entry); |
1322 | 0 | pool_info[nbpools].cached_items = cached; |
1323 | 0 | pool_info[nbpools].need_avg = swrate_avg(pool_needed_avg(entry), POOL_AVG_SAMPLES); |
1324 | 0 | pool_info[nbpools].failed_items = pool_failed(entry); |
1325 | 0 | nbpools++; |
1326 | 0 | } |
1327 | |
|
1328 | 0 | if (by_what == 1) /* sort by name */ |
1329 | 0 | qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_name); |
1330 | 0 | else if (by_what == 2) /* sort by item size */ |
1331 | 0 | qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_size); |
1332 | 0 | else if (by_what == 3) /* sort by total usage */ |
1333 | 0 | qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_usage); |
1334 | |
|
1335 | 0 | chunk_printf(&trash, "Dumping pools usage"); |
1336 | 0 | if (!max || max >= POOLS_MAX_DUMPED_ENTRIES) |
1337 | 0 | max = POOLS_MAX_DUMPED_ENTRIES; |
1338 | 0 | if (nbpools >= max) |
1339 | 0 | chunk_appendf(&trash, " (limited to the first %u entries)", max); |
1340 | 0 | chunk_appendf(&trash, ". Use SIGQUIT to flush them.\n"); |
1341 | |
|
1342 | 0 | for (i = 0; i < nbpools && i < max; i++) { |
1343 | 0 | chunk_appendf(&trash, " - Pool %s (%u bytes/%u) : %lu allocated (%lu bytes), %lu used" |
1344 | 0 | " (~%lu by thread caches)" |
1345 | 0 | ", needed_avg %lu, %lu failures, %u users, @%p%s\n", |
1346 | 0 | pool_info[i].entry->name, pool_info[i].entry->size, pool_info[i].entry->align, |
1347 | 0 | pool_info[i].alloc_items, pool_info[i].alloc_bytes, |
1348 | 0 | pool_info[i].used_items, pool_info[i].cached_items, |
1349 | 0 | pool_info[i].need_avg, pool_info[i].failed_items, |
1350 | 0 | pool_info[i].entry->users, pool_info[i].entry, |
1351 | 0 | (pool_info[i].entry->flags & MEM_F_SHARED) ? " [SHARED]" : ""); |
1352 | |
|
1353 | 0 | cached_bytes += pool_info[i].cached_items * (ulong)pool_info[i].entry->size; |
1354 | 0 | allocated += pool_info[i].alloc_items * (ulong)pool_info[i].entry->size; |
1355 | 0 | used += pool_info[i].used_items * (ulong)pool_info[i].entry->size; |
1356 | |
|
1357 | 0 | if (detailed) { |
1358 | 0 | struct pool_registration *reg; |
1359 | 0 | list_for_each_entry(reg, &pool_info[i].entry->regs, list) { |
1360 | 0 | chunk_appendf(&trash, " > %-12s: size=%u flags=%#x align=%u", reg->name, reg->size, reg->flags, reg->align); |
1361 | 0 | if (reg->file && reg->line) |
1362 | 0 | chunk_appendf(&trash, " [%s:%u]", reg->file, reg->line); |
1363 | 0 | chunk_appendf(&trash, "\n"); |
1364 | 0 | } |
1365 | 0 | } |
1366 | 0 | } |
1367 | |
|
1368 | 0 | chunk_appendf(&trash, "Total: %d pools, %llu bytes allocated, %llu used" |
1369 | 0 | " (~%llu by thread caches)" |
1370 | 0 | ".\n", |
1371 | 0 | nbpools, allocated, used, cached_bytes |
1372 | 0 | ); |
1373 | 0 | } |
1374 | | |
1375 | | /* Dump statistics on pools usage. */ |
1376 | | void dump_pools(void) |
1377 | 0 | { |
1378 | 0 | dump_pools_to_trash(0, 0, NULL); |
1379 | 0 | qfprintf(stderr, "%s", trash.area); |
1380 | 0 | } |
1381 | | |
1382 | | /* This function returns the total number of failed pool allocations */ |
1383 | | int pool_total_failures() |
1384 | 0 | { |
1385 | 0 | struct pool_head *entry; |
1386 | 0 | int failed = 0; |
1387 | |
|
1388 | 0 | list_for_each_entry(entry, &pools, list) |
1389 | 0 | failed += pool_failed(entry); |
1390 | 0 | return failed; |
1391 | 0 | } |
1392 | | |
1393 | | /* This function returns the total amount of memory allocated in pools (in bytes) */ |
1394 | | unsigned long long pool_total_allocated() |
1395 | 0 | { |
1396 | 0 | struct pool_head *entry; |
1397 | 0 | unsigned long long allocated = 0; |
1398 | |
|
1399 | 0 | list_for_each_entry(entry, &pools, list) |
1400 | 0 | allocated += pool_allocated(entry) * (ullong)entry->size; |
1401 | 0 | return allocated; |
1402 | 0 | } |
1403 | | |
1404 | | /* This function returns the total amount of memory used in pools (in bytes) */ |
1405 | | unsigned long long pool_total_used() |
1406 | 0 | { |
1407 | 0 | struct pool_head *entry; |
1408 | 0 | unsigned long long used = 0; |
1409 | |
|
1410 | 0 | list_for_each_entry(entry, &pools, list) |
1411 | 0 | used += pool_used(entry) * (ullong)entry->size; |
1412 | 0 | return used; |
1413 | 0 | } |
1414 | | |
1415 | | /* This function parses a string made of a set of debugging features as |
1416 | | * specified after -dM on the command line, and will set pool_debugging |
1417 | | * accordingly. On success it returns a strictly positive value. It may zero |
1418 | | * with the first warning in <err>, -1 with a help message in <err>, or -2 with |
1419 | | * the first error in <err> return the first error in <err>. <err> is undefined |
1420 | | * on success, and will be non-null and locally allocated on help/error/warning. |
1421 | | * The caller must free it. Warnings are used to report features that were not |
1422 | | * enabled at build time, and errors are used to report unknown features. |
1423 | | */ |
1424 | | int pool_parse_debugging(const char *str, char **err) |
1425 | 0 | { |
1426 | 0 | struct ist args; |
1427 | 0 | char *end; |
1428 | 0 | uint new_dbg; |
1429 | 0 | int v; |
1430 | | |
1431 | | |
1432 | | /* if it's empty or starts with a number, it's the mem poisonning byte */ |
1433 | 0 | v = strtol(str, &end, 0); |
1434 | 0 | if (!*end || *end == ',') { |
1435 | 0 | mem_poison_byte = *str ? v : 'P'; |
1436 | 0 | if (mem_poison_byte >= 0) |
1437 | 0 | pool_debugging |= POOL_DBG_POISON; |
1438 | 0 | else |
1439 | 0 | pool_debugging &= ~POOL_DBG_POISON; |
1440 | 0 | str = end; |
1441 | 0 | } |
1442 | |
|
1443 | 0 | new_dbg = pool_debugging; |
1444 | |
|
1445 | 0 | for (args = ist(str); istlen(args); args = istadv(istfind(args, ','), 1)) { |
1446 | 0 | struct ist feat = iststop(args, ','); |
1447 | |
|
1448 | 0 | if (!istlen(feat)) |
1449 | 0 | continue; |
1450 | | |
1451 | 0 | if (isteq(feat, ist("help"))) { |
1452 | 0 | ha_free(err); |
1453 | 0 | memprintf(err, |
1454 | 0 | "-dM alone enables memory poisonning with byte 0x50 on allocation. A numeric\n" |
1455 | 0 | "value may be appended immediately after -dM to use another value (0 supported).\n" |
1456 | 0 | "Then an optional list of comma-delimited keywords may be appended to set or\n" |
1457 | 0 | "clear some debugging options ('*' marks the current setting):\n\n" |
1458 | 0 | " set clear description\n" |
1459 | 0 | " -----------------+-----------------+-----------------------------------------\n"); |
1460 | |
|
1461 | 0 | for (v = 0; dbg_options[v].flg; v++) { |
1462 | 0 | memprintf(err, "%s %c %-15s|%c %-15s| %s\n", |
1463 | 0 | *err, |
1464 | 0 | (pool_debugging & dbg_options[v].flg) ? '*' : ' ', |
1465 | 0 | dbg_options[v].set, |
1466 | 0 | (pool_debugging & dbg_options[v].flg) ? ' ' : '*', |
1467 | 0 | dbg_options[v].clr, |
1468 | 0 | dbg_options[v].hlp); |
1469 | 0 | } |
1470 | |
|
1471 | 0 | memprintf(err, |
1472 | 0 | "%s -----------------+-----------------+-----------------------------------------\n" |
1473 | 0 | "Examples:\n" |
1474 | 0 | " Disable merging and enable poisonning with byte 'P': -dM0x50,no-merge\n" |
1475 | 0 | " Randomly fail allocations: -dMfail\n" |
1476 | 0 | " Detect out-of-bound corruptions: -dMno-merge,tag\n" |
1477 | 0 | " Detect post-free cache corruptions: -dMno-merge,cold-first,integrity,caller\n" |
1478 | 0 | " Detect all cache corruptions: -dMno-merge,cold-first,integrity,tag,caller\n" |
1479 | 0 | " Detect UAF (disables cache, very slow): -dMuaf\n" |
1480 | 0 | " Detect post-cache UAF: -dMuaf,cache,no-merge,cold-first,integrity,tag,caller\n" |
1481 | 0 | " Detect post-free cache corruptions: -dMno-merge,cold-first,integrity,caller\n", |
1482 | 0 | *err); |
1483 | 0 | return -1; |
1484 | 0 | } |
1485 | | |
1486 | 0 | for (v = 0; dbg_options[v].flg; v++) { |
1487 | 0 | if (isteq(feat, ist(dbg_options[v].set))) { |
1488 | 0 | new_dbg |= dbg_options[v].flg; |
1489 | | /* UAF implicitly disables caching, but it's |
1490 | | * still possible to forcefully re-enable it. |
1491 | | */ |
1492 | 0 | if (dbg_options[v].flg == POOL_DBG_UAF) |
1493 | 0 | new_dbg |= POOL_DBG_NO_CACHE; |
1494 | | /* fail should preset the tune.fail-alloc ratio to 1% */ |
1495 | 0 | if (dbg_options[v].flg == POOL_DBG_FAIL_ALLOC) |
1496 | 0 | mem_fail_rate = 1; |
1497 | 0 | break; |
1498 | 0 | } |
1499 | 0 | else if (isteq(feat, ist(dbg_options[v].clr))) { |
1500 | 0 | new_dbg &= ~dbg_options[v].flg; |
1501 | | /* no-fail should reset the tune.fail-alloc ratio */ |
1502 | 0 | if (dbg_options[v].flg == POOL_DBG_FAIL_ALLOC) |
1503 | 0 | mem_fail_rate = 0; |
1504 | 0 | break; |
1505 | 0 | } |
1506 | 0 | } |
1507 | |
|
1508 | 0 | if (!dbg_options[v].flg) { |
1509 | 0 | memprintf(err, "unknown pool debugging feature <%.*s>", (int)istlen(feat), istptr(feat)); |
1510 | 0 | return -2; |
1511 | 0 | } |
1512 | 0 | } |
1513 | | |
1514 | 0 | pool_debugging = new_dbg; |
1515 | 0 | return 1; |
1516 | 0 | } |
1517 | | |
1518 | | /* parse a "show pools" command. It returns 1 on failure, 0 if it starts to dump. */ |
1519 | | static int cli_parse_show_pools(char **args, char *payload, struct appctx *appctx, void *private) |
1520 | 0 | { |
1521 | 0 | struct show_pools_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); |
1522 | 0 | int arg; |
1523 | |
|
1524 | 0 | for (arg = 2; *args[arg]; arg++) { |
1525 | 0 | if (strcmp(args[arg], "byname") == 0) { |
1526 | 0 | ctx->how = (ctx->how & ~0xF) | 1; // sort output by name |
1527 | 0 | } |
1528 | 0 | else if (strcmp(args[arg], "bysize") == 0) { |
1529 | 0 | ctx->how = (ctx->how & ~0xF) | 2; // sort output by item size |
1530 | 0 | } |
1531 | 0 | else if (strcmp(args[arg], "byusage") == 0) { |
1532 | 0 | ctx->how = (ctx->how & ~0xF) | 3; // sort output by total allocated size |
1533 | 0 | } |
1534 | 0 | else if (strcmp(args[arg], "detailed") == 0) { |
1535 | 0 | ctx->how |= 0x10; // print detailed registrations |
1536 | 0 | } |
1537 | 0 | else if (strcmp(args[arg], "match") == 0 && *args[arg+1]) { |
1538 | 0 | ctx->prefix = strdup(args[arg+1]); // only pools starting with this |
1539 | 0 | if (!ctx->prefix) |
1540 | 0 | return cli_err(appctx, "Out of memory.\n"); |
1541 | 0 | arg++; |
1542 | 0 | } |
1543 | 0 | else if (isdigit((unsigned char)*args[arg])) { |
1544 | 0 | ctx->maxcnt = atoi(args[arg]); // number of entries to dump |
1545 | 0 | } |
1546 | 0 | else |
1547 | 0 | return cli_err(appctx, "Expects either 'byname', 'bysize', 'byusage', 'match <pfx>', 'detailed', or a max number of output lines.\n"); |
1548 | 0 | } |
1549 | 0 | return 0; |
1550 | 0 | } |
1551 | | |
1552 | | /* release the "show pools" context */ |
1553 | | static void cli_release_show_pools(struct appctx *appctx) |
1554 | 0 | { |
1555 | 0 | struct show_pools_ctx *ctx = appctx->svcctx; |
1556 | |
|
1557 | 0 | ha_free(&ctx->prefix); |
1558 | 0 | } |
1559 | | |
1560 | | /* This function dumps memory usage information onto the stream connector's |
1561 | | * read buffer. It returns 0 as long as it does not complete, non-zero upon |
1562 | | * completion. No state is used. |
1563 | | */ |
1564 | | static int cli_io_handler_dump_pools(struct appctx *appctx) |
1565 | 0 | { |
1566 | 0 | struct show_pools_ctx *ctx = appctx->svcctx; |
1567 | |
|
1568 | 0 | dump_pools_to_trash(ctx->how, ctx->maxcnt, ctx->prefix); |
1569 | 0 | if (applet_putchk(appctx, &trash) == -1) |
1570 | 0 | return 0; |
1571 | 0 | return 1; |
1572 | 0 | } |
1573 | | |
1574 | | /* callback used to create early pool <name> of size <size> and store the |
1575 | | * resulting pointer into <ptr>. If the allocation fails, it quits with after |
1576 | | * emitting an error message. |
1577 | | */ |
1578 | | void create_pool_callback(struct pool_head **ptr, char *name, struct pool_registration *reg) |
1579 | 0 | { |
1580 | 0 | *ptr = create_pool_from_reg(name, reg); |
1581 | 0 | if (!*ptr) { |
1582 | 0 | ha_alert("Failed to allocate pool '%s' of size %u : %s. Aborting.\n", |
1583 | 0 | name, reg->size, strerror(errno)); |
1584 | 0 | exit(1); |
1585 | 0 | } |
1586 | 0 | } |
1587 | | |
1588 | | /* Initializes all per-thread arrays on startup */ |
1589 | | static void init_pools() |
1590 | 0 | { |
1591 | 0 | int thr; |
1592 | |
|
1593 | 0 | for (thr = 0; thr < MAX_THREADS; thr++) { |
1594 | 0 | LIST_INIT(&ha_thread_ctx[thr].pool_lru_head); |
1595 | 0 | } |
1596 | |
|
1597 | 0 | detect_allocator(); |
1598 | 0 | } |
1599 | | |
1600 | | INITCALL0(STG_PREPARE, init_pools); |
1601 | | |
1602 | | /* Report in build options if trim is supported */ |
1603 | | static void pools_register_build_options(void) |
1604 | 0 | { |
1605 | 0 | if (!using_default_allocator) { |
1606 | 0 | char *ptr = NULL; |
1607 | 0 | memprintf(&ptr, "Running with a replaced memory allocator (e.g. via LD_PRELOAD)."); |
1608 | 0 | hap_register_build_opts(ptr, 1); |
1609 | 0 | mark_tainted(TAINTED_REPLACED_MEM_ALLOCATOR); |
1610 | 0 | } |
1611 | 0 | } |
1612 | | INITCALL0(STG_REGISTER, pools_register_build_options); |
1613 | | |
1614 | | /* register cli keywords */ |
1615 | | static struct cli_kw_list cli_kws = {{ },{ |
1616 | | { { "show", "pools", NULL }, "show pools [by*] [match <pfx>] [nb] : report information about the memory pools usage", cli_parse_show_pools, cli_io_handler_dump_pools, cli_release_show_pools }, |
1617 | | {{},} |
1618 | | }}; |
1619 | | |
1620 | | INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); |
1621 | | |
1622 | | |
1623 | | /* config parser for global "tune.fail-alloc" */ |
1624 | | static int mem_parse_global_fail_alloc(char **args, int section_type, struct proxy *curpx, |
1625 | | const struct proxy *defpx, const char *file, int line, |
1626 | | char **err) |
1627 | 0 | { |
1628 | 0 | if (too_many_args(1, args, err, NULL)) |
1629 | 0 | return -1; |
1630 | 0 | mem_fail_rate = atoi(args[1]); |
1631 | 0 | if (mem_fail_rate < 0 || mem_fail_rate > 100) { |
1632 | 0 | memprintf(err, "'%s' expects a numeric value between 0 and 100.", args[0]); |
1633 | 0 | return -1; |
1634 | 0 | } |
1635 | 0 | return 0; |
1636 | 0 | } |
1637 | | |
1638 | | /* config parser for global "tune.memory.hot-size" */ |
1639 | | static int mem_parse_global_hot_size(char **args, int section_type, struct proxy *curpx, |
1640 | | const struct proxy *defpx, const char *file, int line, |
1641 | | char **err) |
1642 | 0 | { |
1643 | 0 | long size; |
1644 | |
|
1645 | 0 | if (too_many_args(1, args, err, NULL)) |
1646 | 0 | return -1; |
1647 | | |
1648 | 0 | size = atol(args[1]); |
1649 | 0 | if (size <= 0) { |
1650 | 0 | memprintf(err, "'%s' expects a strictly positive value.", args[0]); |
1651 | 0 | return -1; |
1652 | 0 | } |
1653 | | |
1654 | 0 | global.tune.pool_cache_size = size; |
1655 | 0 | return 0; |
1656 | 0 | } |
1657 | | |
1658 | | /* config parser for global "no-memory-trimming" */ |
1659 | | static int mem_parse_global_no_mem_trim(char **args, int section_type, struct proxy *curpx, |
1660 | | const struct proxy *defpx, const char *file, int line, |
1661 | | char **err) |
1662 | 0 | { |
1663 | 0 | if (too_many_args(0, args, err, NULL)) |
1664 | 0 | return -1; |
1665 | 0 | disable_trim = 1; |
1666 | 0 | return 0; |
1667 | 0 | } |
1668 | | |
1669 | | /* register global config keywords */ |
1670 | | static struct cfg_kw_list mem_cfg_kws = {ILH, { |
1671 | | { CFG_GLOBAL, "tune.fail-alloc", mem_parse_global_fail_alloc }, |
1672 | | { CFG_GLOBAL, "tune.memory.hot-size", mem_parse_global_hot_size }, |
1673 | | { CFG_GLOBAL, "no-memory-trimming", mem_parse_global_no_mem_trim }, |
1674 | | { 0, NULL, NULL } |
1675 | | }}; |
1676 | | |
1677 | | INITCALL1(STG_REGISTER, cfg_register_keywords, &mem_cfg_kws); |
1678 | | |
1679 | | /* |
1680 | | * Local variables: |
1681 | | * c-indent-level: 8 |
1682 | | * c-basic-offset: 8 |
1683 | | * End: |
1684 | | */ |