/src/haproxy/src/activity.c
Line | Count | Source |
1 | | /* |
2 | | * activity measurement functions. |
3 | | * |
4 | | * Copyright 2000-2018 Willy Tarreau <w@1wt.eu> |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU General Public License |
8 | | * as published by the Free Software Foundation; either version |
9 | | * 2 of the License, or (at your option) any later version. |
10 | | * |
11 | | */ |
12 | | |
13 | | #include <errno.h> |
14 | | #include <haproxy/activity-t.h> |
15 | | #include <haproxy/api.h> |
16 | | #include <haproxy/applet.h> |
17 | | #include <haproxy/cfgparse.h> |
18 | | #include <haproxy/clock.h> |
19 | | #include <haproxy/channel.h> |
20 | | #include <haproxy/cli.h> |
21 | | #include <haproxy/freq_ctr.h> |
22 | | #include <haproxy/listener.h> |
23 | | #include <haproxy/sc_strm.h> |
24 | | #include <haproxy/stconn.h> |
25 | | #include <haproxy/tools.h> |
26 | | |
27 | | /* CLI context for the "show profiling" command */ |
28 | | struct show_prof_ctx { |
29 | | int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */ |
30 | | int linenum; /* next line to be dumped (starts at 0) */ |
31 | | int maxcnt; /* max line count per step (0=not set) */ |
32 | | int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time, 3=sort by ctx */ |
33 | | int aggr; /* 0=dump raw, 1=aggregate on callee */ |
34 | | /* 4-byte hole here */ |
35 | | struct sched_activity *tmp_activity; /* dynamically allocated during dumps */ |
36 | | struct memprof_stats *tmp_memstats; /* dynamically allocated during dumps */ |
37 | | }; |
38 | | |
39 | | /* CLI context for the "show activity" command */ |
40 | | struct show_activity_ctx { |
41 | | int thr; /* thread ID to show or -1 for all */ |
42 | | int line; /* line number being dumped */ |
43 | | int col; /* columnline being dumped, 0 to nbt+1 */ |
44 | | }; |
45 | | |
46 | | #if defined(DEBUG_MEM_STATS) |
47 | | /* these ones are macros in bug.h when DEBUG_MEM_STATS is set, and will |
48 | | * prevent the new ones from being redefined. |
49 | | */ |
50 | | #undef calloc |
51 | | #undef malloc |
52 | | #undef realloc |
53 | | #undef strdup |
54 | | #endif |
55 | | |
56 | | /* bit field of profiling options. Beware, may be modified at runtime! */ |
57 | | unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF; |
58 | | |
59 | | /* start/stop dates of profiling */ |
60 | | uint64_t prof_task_start_ns = 0; |
61 | | uint64_t prof_task_stop_ns = 0; |
62 | | uint64_t prof_mem_start_ns = 0; |
63 | | uint64_t prof_mem_stop_ns = 0; |
64 | | |
65 | | /* One struct per thread containing all collected measurements */ |
66 | | struct activity activity[MAX_THREADS] = { }; |
67 | | |
68 | | /* One struct per function pointer hash entry (SCHED_ACT_HASH_BUCKETS values, 0=collision) */ |
69 | | struct sched_activity sched_activity[SCHED_ACT_HASH_BUCKETS] = { }; |
70 | | |
71 | | |
72 | | #ifdef USE_MEMORY_PROFILING |
73 | | |
74 | | static const char *const memprof_methods[MEMPROF_METH_METHODS] = { |
75 | | "unknown", "malloc", "calloc", "realloc", "strdup", "free", "p_alloc", "p_free", |
76 | | "strndup", "valloc", "aligned_valloc", "posix_memalign", "memalign", "pvalloc", |
77 | | }; |
78 | | |
79 | | /* last one is for hash collisions ("others") and has no caller address */ |
80 | | struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1] = { }; |
81 | | |
82 | | /* used to detect recursive calls */ |
83 | | static THREAD_LOCAL int in_memprof = 0; |
84 | | |
85 | | /* These ones are used by glibc and will be called early. They are in charge of |
86 | | * initializing the handlers with the original functions. |
87 | | */ |
88 | | static void *memprof_malloc_initial_handler(size_t size); |
89 | | static void *memprof_calloc_initial_handler(size_t nmemb, size_t size); |
90 | | static void *memprof_realloc_initial_handler(void *ptr, size_t size); |
91 | | static char *memprof_strdup_initial_handler(const char *s); |
92 | | static void memprof_free_initial_handler(void *ptr); |
93 | | |
94 | | /* these ones are optional but may be used by some dependencies */ |
95 | | static char *memprof_strndup_initial_handler(const char *s, size_t n); |
96 | | static void *memprof_valloc_initial_handler(size_t sz); |
97 | | static void *memprof_pvalloc_initial_handler(size_t sz); |
98 | | static void *memprof_memalign_initial_handler(size_t al, size_t sz); |
99 | | static void *memprof_aligned_alloc_initial_handler(size_t al, size_t sz); |
100 | | static int memprof_posix_memalign_initial_handler(void **ptr, size_t al, size_t sz); |
101 | | |
102 | | /* Fallback handlers for the main alloc/free functions. They are preset to |
103 | | * the initializer in order to save a test in the functions's critical path. |
104 | | */ |
105 | | static void *(*memprof_malloc_handler)(size_t size) = memprof_malloc_initial_handler; |
106 | | static void *(*memprof_calloc_handler)(size_t nmemb, size_t size) = memprof_calloc_initial_handler; |
107 | | static void *(*memprof_realloc_handler)(void *ptr, size_t size) = memprof_realloc_initial_handler; |
108 | | static char *(*memprof_strdup_handler)(const char *s) = memprof_strdup_initial_handler; |
109 | | static void (*memprof_free_handler)(void *ptr) = memprof_free_initial_handler; |
110 | | |
111 | | /* these ones are optional but may be used by some dependencies */ |
112 | | static char *(*memprof_strndup_handler)(const char *s, size_t n) = memprof_strndup_initial_handler; |
113 | | static void *(*memprof_valloc_handler)(size_t sz) = memprof_valloc_initial_handler; |
114 | | static void *(*memprof_pvalloc_handler)(size_t sz) = memprof_pvalloc_initial_handler; |
115 | | static void *(*memprof_memalign_handler)(size_t al, size_t sz) = memprof_memalign_initial_handler; |
116 | | static void *(*memprof_aligned_alloc_handler)(size_t al, size_t sz) = memprof_aligned_alloc_initial_handler; |
117 | | static int (*memprof_posix_memalign_handler)(void **ptr, size_t al, size_t sz) = memprof_posix_memalign_initial_handler; |
118 | | |
119 | | /* Used to force to die if it's not possible to retrieve the allocation |
120 | | * functions. We cannot even use stdio in this case. |
121 | | */ |
122 | | static __attribute__((noreturn)) void memprof_die(const char *msg) |
123 | | { |
124 | | DISGUISE(write(2, msg, strlen(msg))); |
125 | | exit(1); |
126 | | } |
127 | | |
128 | | /* Resolve original allocation functions and initialize all handlers. |
129 | | * This must be called very early at boot, before the very first malloc() |
130 | | * call, and is not thread-safe! It's not even possible to use stdio there. |
131 | | * Worse, we have to account for the risk of reentrance from dlsym() when |
132 | | * it tries to prepare its error messages. Here its ahndled by in_memprof |
133 | | * that makes allocators return NULL. dlsym() handles it gracefully. An |
134 | | * alternate approach consists in calling aligned_alloc() from these places |
135 | | * but that would mean not being able to intercept it later if considered |
136 | | * useful to do so. |
137 | | */ |
138 | | static void memprof_init() |
139 | | { |
140 | | in_memprof++; |
141 | | memprof_malloc_handler = get_sym_next_addr("malloc"); |
142 | | if (!memprof_malloc_handler) |
143 | | memprof_die("FATAL: malloc() function not found.\n"); |
144 | | |
145 | | memprof_calloc_handler = get_sym_next_addr("calloc"); |
146 | | if (!memprof_calloc_handler) |
147 | | memprof_die("FATAL: calloc() function not found.\n"); |
148 | | |
149 | | memprof_realloc_handler = get_sym_next_addr("realloc"); |
150 | | if (!memprof_realloc_handler) |
151 | | memprof_die("FATAL: realloc() function not found.\n"); |
152 | | |
153 | | memprof_strdup_handler = get_sym_next_addr("strdup"); |
154 | | if (!memprof_strdup_handler) |
155 | | memprof_die("FATAL: strdup() function not found.\n"); |
156 | | |
157 | | memprof_free_handler = get_sym_next_addr("free"); |
158 | | if (!memprof_free_handler) |
159 | | memprof_die("FATAL: free() function not found.\n"); |
160 | | |
161 | | /* these ones are not always implemented, rarely used and may not exist |
162 | | * so we don't fail on them. |
163 | | */ |
164 | | memprof_strndup_handler = get_sym_next_addr("strndup"); |
165 | | memprof_valloc_handler = get_sym_next_addr("valloc"); |
166 | | memprof_pvalloc_handler = get_sym_next_addr("pvalloc"); |
167 | | memprof_memalign_handler = get_sym_next_addr("memalign"); |
168 | | memprof_aligned_alloc_handler = get_sym_next_addr("aligned_alloc"); |
169 | | memprof_posix_memalign_handler = get_sym_next_addr("posix_memalign"); |
170 | | |
171 | | in_memprof--; |
172 | | } |
173 | | |
174 | | /* the initial handlers will initialize all regular handlers and will call the |
175 | | * one they correspond to. A single one of these functions will typically be |
176 | | * called, though it's unknown which one (as any might be called before main). |
177 | | */ |
178 | | static void *memprof_malloc_initial_handler(size_t size) |
179 | | { |
180 | | if (in_memprof) { |
181 | | /* it's likely that dlsym() needs malloc(), let's fail */ |
182 | | return NULL; |
183 | | } |
184 | | |
185 | | memprof_init(); |
186 | | return memprof_malloc_handler(size); |
187 | | } |
188 | | |
189 | | static void *memprof_calloc_initial_handler(size_t nmemb, size_t size) |
190 | | { |
191 | | if (in_memprof) { |
192 | | /* it's likely that dlsym() needs calloc(), let's fail */ |
193 | | return NULL; |
194 | | } |
195 | | memprof_init(); |
196 | | return memprof_calloc_handler(nmemb, size); |
197 | | } |
198 | | |
199 | | static void *memprof_realloc_initial_handler(void *ptr, size_t size) |
200 | | { |
201 | | if (in_memprof) { |
202 | | /* it's likely that dlsym() needs realloc(), let's fail */ |
203 | | return NULL; |
204 | | } |
205 | | |
206 | | memprof_init(); |
207 | | return memprof_realloc_handler(ptr, size); |
208 | | } |
209 | | |
210 | | static char *memprof_strdup_initial_handler(const char *s) |
211 | | { |
212 | | if (in_memprof) { |
213 | | /* probably that dlsym() needs strdup(), let's fail */ |
214 | | return NULL; |
215 | | } |
216 | | |
217 | | memprof_init(); |
218 | | return memprof_strdup_handler(s); |
219 | | } |
220 | | |
221 | | static void memprof_free_initial_handler(void *ptr) |
222 | | { |
223 | | memprof_init(); |
224 | | memprof_free_handler(ptr); |
225 | | } |
226 | | |
227 | | /* optional handlers */ |
228 | | |
229 | | static char *memprof_strndup_initial_handler(const char *s, size_t n) |
230 | | { |
231 | | if (in_memprof) { |
232 | | /* probably that dlsym() needs strndup(), let's fail */ |
233 | | return NULL; |
234 | | } |
235 | | |
236 | | memprof_init(); |
237 | | return memprof_strndup_handler(s, n); |
238 | | } |
239 | | |
240 | | static void *memprof_valloc_initial_handler(size_t sz) |
241 | | { |
242 | | if (in_memprof) { |
243 | | /* probably that dlsym() needs valloc(), let's fail */ |
244 | | return NULL; |
245 | | } |
246 | | |
247 | | memprof_init(); |
248 | | return memprof_valloc_handler(sz); |
249 | | } |
250 | | |
251 | | static void *memprof_pvalloc_initial_handler(size_t sz) |
252 | | { |
253 | | if (in_memprof) { |
254 | | /* probably that dlsym() needs pvalloc(), let's fail */ |
255 | | return NULL; |
256 | | } |
257 | | |
258 | | memprof_init(); |
259 | | return memprof_pvalloc_handler(sz); |
260 | | } |
261 | | |
262 | | static void *memprof_memalign_initial_handler(size_t al, size_t sz) |
263 | | { |
264 | | if (in_memprof) { |
265 | | /* probably that dlsym() needs memalign(), let's fail */ |
266 | | return NULL; |
267 | | } |
268 | | |
269 | | memprof_init(); |
270 | | return memprof_memalign_handler(al, sz); |
271 | | } |
272 | | |
273 | | static void *memprof_aligned_alloc_initial_handler(size_t al, size_t sz) |
274 | | { |
275 | | if (in_memprof) { |
276 | | /* probably that dlsym() needs aligned_alloc(), let's fail */ |
277 | | return NULL; |
278 | | } |
279 | | |
280 | | memprof_init(); |
281 | | return memprof_aligned_alloc_handler(al, sz); |
282 | | } |
283 | | |
284 | | static int memprof_posix_memalign_initial_handler(void **ptr, size_t al, size_t sz) |
285 | | { |
286 | | if (in_memprof) { |
287 | | /* probably that dlsym() needs posix_memalign(), let's fail */ |
288 | | return ENOMEM; |
289 | | } |
290 | | |
291 | | memprof_init(); |
292 | | return memprof_posix_memalign_handler(ptr, al, sz); |
293 | | } |
294 | | |
295 | | /* Assign a bin for the memprof_stats to the return address. May perform a few |
296 | | * attempts before finding the right one, but always succeeds (in the worst |
297 | | * case, returns a default bin). The caller address is atomically set except |
298 | | * for the default one which is never set. |
299 | | */ |
300 | | struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth) |
301 | | { |
302 | | int retries = 16; // up to 16 consecutive entries may be tested. |
303 | | const void *old; |
304 | | unsigned int bin; |
305 | | ullong hash; |
306 | | |
307 | | if (unlikely(!ra)) { |
308 | | bin = MEMPROF_HASH_BUCKETS; |
309 | | goto leave; |
310 | | } |
311 | | hash = _ptr2_hash_arg(ra, th_ctx->exec_ctx.pointer, th_ctx->exec_ctx.type); |
312 | | for (bin = _ptr_hash_reduce(hash, MEMPROF_HASH_BITS); |
313 | | memprof_stats[bin].caller != ra || |
314 | | memprof_stats[bin].exec_ctx.type != th_ctx->exec_ctx.type || |
315 | | memprof_stats[bin].exec_ctx.pointer != th_ctx->exec_ctx.pointer; |
316 | | bin = (bin + (hash | 1)) & (MEMPROF_HASH_BUCKETS - 1)) { |
317 | | if (!--retries) { |
318 | | bin = MEMPROF_HASH_BUCKETS; |
319 | | break; |
320 | | } |
321 | | |
322 | | old = NULL; |
323 | | if (!memprof_stats[bin].caller && |
324 | | HA_ATOMIC_CAS(&memprof_stats[bin].caller, &old, ra)) { |
325 | | memprof_stats[bin].exec_ctx = th_ctx->exec_ctx; |
326 | | memprof_stats[bin].method = meth; |
327 | | break; |
328 | | } |
329 | | } |
330 | | leave: |
331 | | return &memprof_stats[bin]; |
332 | | } |
333 | | |
334 | | /* This is the new global malloc() function. It must optimize for the normal |
335 | | * case (i.e. profiling disabled) hence the first test to permit a direct jump. |
336 | | * It must remain simple to guarantee the lack of reentrance. stdio is not |
337 | | * possible there even for debugging. The reported size is the really allocated |
338 | | * one as returned by malloc_usable_size(), because this will allow it to be |
339 | | * compared to the one before realloc() or free(). This is a GNU and jemalloc |
340 | | * extension but other systems may also store this size in ptr[-1]. |
341 | | */ |
342 | | void *malloc(size_t size) |
343 | | { |
344 | | struct memprof_stats *bin; |
345 | | void *ret; |
346 | | |
347 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
348 | | return memprof_malloc_handler(size); |
349 | | |
350 | | ret = memprof_malloc_handler(size); |
351 | | size = malloc_usable_size(ret) + sizeof(void *); |
352 | | |
353 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_MALLOC); |
354 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
355 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
356 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
357 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
358 | | return ret; |
359 | | } |
360 | | |
361 | | /* This is the new global calloc() function. It must optimize for the normal |
362 | | * case (i.e. profiling disabled) hence the first test to permit a direct jump. |
363 | | * It must remain simple to guarantee the lack of reentrance. stdio is not |
364 | | * possible there even for debugging. The reported size is the really allocated |
365 | | * one as returned by malloc_usable_size(), because this will allow it to be |
366 | | * compared to the one before realloc() or free(). This is a GNU and jemalloc |
367 | | * extension but other systems may also store this size in ptr[-1]. |
368 | | */ |
369 | | void *calloc(size_t nmemb, size_t size) |
370 | | { |
371 | | struct memprof_stats *bin; |
372 | | void *ret; |
373 | | |
374 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
375 | | return memprof_calloc_handler(nmemb, size); |
376 | | |
377 | | ret = memprof_calloc_handler(nmemb, size); |
378 | | size = malloc_usable_size(ret) + sizeof(void *); |
379 | | |
380 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_CALLOC); |
381 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
382 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
383 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
384 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
385 | | return ret; |
386 | | } |
387 | | |
388 | | /* This is the new global realloc() function. It must optimize for the normal |
389 | | * case (i.e. profiling disabled) hence the first test to permit a direct jump. |
390 | | * It must remain simple to guarantee the lack of reentrance. stdio is not |
391 | | * possible there even for debugging. The reported size is the really allocated |
392 | | * one as returned by malloc_usable_size(), because this will allow it to be |
393 | | * compared to the one before realloc() or free(). This is a GNU and jemalloc |
394 | | * extension but other systems may also store this size in ptr[-1]. |
395 | | * Depending on the old vs new size, it's considered as an allocation or a free |
396 | | * (or neither if the size remains the same). |
397 | | */ |
398 | | void *realloc(void *ptr, size_t size) |
399 | | { |
400 | | struct memprof_stats *bin; |
401 | | size_t size_before; |
402 | | void *ret; |
403 | | |
404 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
405 | | return memprof_realloc_handler(ptr, size); |
406 | | |
407 | | size_before = malloc_usable_size(ptr); |
408 | | ret = memprof_realloc_handler(ptr, size); |
409 | | size = malloc_usable_size(ret); |
410 | | |
411 | | /* only count the extra link for new allocations */ |
412 | | if (!ptr) |
413 | | size += sizeof(void *); |
414 | | |
415 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_REALLOC); |
416 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
417 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
418 | | if (size > size_before) { |
419 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
420 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size - size_before); |
421 | | } else if (size < size_before) { |
422 | | _HA_ATOMIC_ADD(&bin->free_calls, 1); |
423 | | _HA_ATOMIC_ADD(&bin->free_tot, size_before - size); |
424 | | } |
425 | | return ret; |
426 | | } |
427 | | |
428 | | /* This is the new global strdup() function. It must optimize for the normal |
429 | | * case (i.e. profiling disabled) hence the first test to permit a direct jump. |
430 | | * It must remain simple to guarantee the lack of reentrance. stdio is not |
431 | | * possible there even for debugging. The reported size is the really allocated |
432 | | * one as returned by malloc_usable_size(), because this will allow it to be |
433 | | * compared to the one before realloc() or free(). This is a GNU and jemalloc |
434 | | * extension but other systems may also store this size in ptr[-1]. |
435 | | */ |
436 | | char *strdup(const char *s) |
437 | | { |
438 | | struct memprof_stats *bin; |
439 | | size_t size; |
440 | | char *ret; |
441 | | |
442 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
443 | | return memprof_strdup_handler(s); |
444 | | |
445 | | ret = memprof_strdup_handler(s); |
446 | | size = malloc_usable_size(ret) + sizeof(void *); |
447 | | |
448 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_STRDUP); |
449 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
450 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
451 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
452 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
453 | | return ret; |
454 | | } |
455 | | |
456 | | /* This is the new global free() function. It must optimize for the normal |
457 | | * case (i.e. profiling disabled) hence the first test to permit a direct jump. |
458 | | * It must remain simple to guarantee the lack of reentrance. stdio is not |
459 | | * possible there even for debugging. The reported size is the really allocated |
460 | | * one as returned by malloc_usable_size(), because this will allow it to be |
461 | | * compared to the one before realloc() or free(). This is a GNU and jemalloc |
462 | | * extension but other systems may also store this size in ptr[-1]. Since |
463 | | * free() is often called on NULL pointers to collect garbage at the end of |
464 | | * many functions or during config parsing, as a special case free(NULL) |
465 | | * doesn't update any stats. |
466 | | */ |
467 | | void free(void *ptr) |
468 | | { |
469 | | struct memprof_stats *bin; |
470 | | size_t size_before; |
471 | | |
472 | | if (likely(!(profiling & HA_PROF_MEMORY) || !ptr)) { |
473 | | memprof_free_handler(ptr); |
474 | | return; |
475 | | } |
476 | | |
477 | | size_before = malloc_usable_size(ptr) + sizeof(void *); |
478 | | memprof_free_handler(ptr); |
479 | | |
480 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_FREE); |
481 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
482 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
483 | | _HA_ATOMIC_ADD(&bin->free_calls, 1); |
484 | | _HA_ATOMIC_ADD(&bin->free_tot, size_before); |
485 | | } |
486 | | |
487 | | /* optional handlers below, essentially to monitor libs activities */ |
488 | | |
489 | | char *strndup(const char *s, size_t size) |
490 | | { |
491 | | struct memprof_stats *bin; |
492 | | char *ret; |
493 | | |
494 | | if (!memprof_strndup_handler) |
495 | | return NULL; |
496 | | |
497 | | ret = memprof_strndup_handler(s, size); |
498 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
499 | | return ret; |
500 | | |
501 | | size = malloc_usable_size(ret) + sizeof(void *); |
502 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_STRNDUP); |
503 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
504 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
505 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
506 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
507 | | return ret; |
508 | | } |
509 | | |
510 | | void *valloc(size_t size) |
511 | | { |
512 | | struct memprof_stats *bin; |
513 | | void *ret; |
514 | | |
515 | | if (!memprof_valloc_handler) |
516 | | return NULL; |
517 | | |
518 | | ret = memprof_valloc_handler(size); |
519 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
520 | | return ret; |
521 | | |
522 | | size = malloc_usable_size(ret) + sizeof(void *); |
523 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_VALLOC); |
524 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
525 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
526 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
527 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
528 | | return ret; |
529 | | } |
530 | | |
531 | | void *pvalloc(size_t size) |
532 | | { |
533 | | struct memprof_stats *bin; |
534 | | void *ret; |
535 | | |
536 | | if (!memprof_pvalloc_handler) |
537 | | return NULL; |
538 | | |
539 | | ret = memprof_pvalloc_handler(size); |
540 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
541 | | return ret; |
542 | | |
543 | | size = malloc_usable_size(ret) + sizeof(void *); |
544 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_PVALLOC); |
545 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
546 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
547 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
548 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
549 | | return ret; |
550 | | } |
551 | | |
552 | | void *memalign(size_t align, size_t size) |
553 | | { |
554 | | struct memprof_stats *bin; |
555 | | void *ret; |
556 | | |
557 | | if (!memprof_memalign_handler) |
558 | | return NULL; |
559 | | |
560 | | ret = memprof_memalign_handler(align, size); |
561 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
562 | | return ret; |
563 | | |
564 | | size = malloc_usable_size(ret) + sizeof(void *); |
565 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_MEMALIGN); |
566 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
567 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
568 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
569 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
570 | | return ret; |
571 | | } |
572 | | |
573 | | void *aligned_alloc(size_t align, size_t size) |
574 | | { |
575 | | struct memprof_stats *bin; |
576 | | void *ret; |
577 | | |
578 | | if (!memprof_aligned_alloc_handler) |
579 | | return NULL; |
580 | | |
581 | | ret = memprof_aligned_alloc_handler(align, size); |
582 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
583 | | return ret; |
584 | | |
585 | | size = malloc_usable_size(ret) + sizeof(void *); |
586 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_ALIGNED_ALLOC); |
587 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
588 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
589 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
590 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
591 | | return ret; |
592 | | } |
593 | | |
594 | | int posix_memalign(void **ptr, size_t align, size_t size) |
595 | | { |
596 | | struct memprof_stats *bin; |
597 | | int ret; |
598 | | |
599 | | if (!memprof_posix_memalign_handler) |
600 | | return ENOMEM; |
601 | | |
602 | | ret = memprof_posix_memalign_handler(ptr, align, size); |
603 | | if (likely(!(profiling & HA_PROF_MEMORY))) |
604 | | return ret; |
605 | | |
606 | | if (ret != 0) // error |
607 | | return ret; |
608 | | |
609 | | size = malloc_usable_size(*ptr) + sizeof(void *); |
610 | | bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_POSIX_MEMALIGN); |
611 | | if (unlikely(th_ctx->lock_level & 0x7F)) |
612 | | _HA_ATOMIC_ADD(&bin->locked_calls, 1); |
613 | | _HA_ATOMIC_ADD(&bin->alloc_calls, 1); |
614 | | _HA_ATOMIC_ADD(&bin->alloc_tot, size); |
615 | | return ret; |
616 | | } |
617 | | |
618 | | /* remove info from entries matching <info>. This needs to be used by callers |
619 | | * of pool_destroy() so that we don't keep a reference to a dead pool. Nothing |
620 | | * is done if <info> is NULL. |
621 | | */ |
622 | | void memprof_remove_stale_info(const void *info) |
623 | | { |
624 | | int i; |
625 | | |
626 | | if (!info) |
627 | | return; |
628 | | |
629 | | for (i = 0; i < MEMPROF_HASH_BUCKETS; i++) { |
630 | | if (_HA_ATOMIC_LOAD(&memprof_stats[i].info) == info) |
631 | | _HA_ATOMIC_STORE(&memprof_stats[i].info, NULL); |
632 | | } |
633 | | } |
634 | | |
635 | | #endif // USE_MEMORY_PROFILING |
636 | | |
637 | | /* Updates the current thread's statistics about stolen CPU time. The unit for |
638 | | * <stolen> is half-milliseconds. |
639 | | */ |
640 | | void report_stolen_time(uint64_t stolen) |
641 | 0 | { |
642 | 0 | activity[tid].cpust_total += stolen; |
643 | 0 | update_freq_ctr(&activity[tid].cpust_1s, stolen); |
644 | 0 | update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen); |
645 | 0 | } |
646 | | |
647 | | /* Update avg_loop value for the current thread and possibly decide to enable |
648 | | * task-level profiling on the current thread based on its average run time. |
649 | | * The <run_time> argument is the number of microseconds elapsed since the |
650 | | * last time poll() returned. |
651 | | */ |
652 | | void activity_count_runtime(uint32_t run_time) |
653 | 0 | { |
654 | 0 | uint32_t up, down; |
655 | | |
656 | | /* 1 millisecond per loop on average over last 1024 iterations is |
657 | | * enough to turn on profiling. |
658 | | */ |
659 | 0 | up = 1000; |
660 | 0 | down = up * 99 / 100; |
661 | |
|
662 | 0 | run_time = swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time); |
663 | | |
664 | | /* In automatic mode, reaching the "up" threshold on average switches |
665 | | * profiling to "on" when automatic, and going back below the "down" |
666 | | * threshold switches to off. The forced modes don't check the load. |
667 | | */ |
668 | 0 | if (!(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)) { |
669 | 0 | if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON || |
670 | 0 | ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON && |
671 | 0 | swrate_avg(run_time, TIME_STATS_SAMPLES) >= up))) { |
672 | |
|
673 | 0 | if (profiling & HA_PROF_TASKS_LOCK) |
674 | 0 | _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_L); |
675 | 0 | else |
676 | 0 | _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_L); |
677 | |
|
678 | 0 | if (profiling & HA_PROF_TASKS_MEM) |
679 | 0 | _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_M); |
680 | 0 | else |
681 | 0 | _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_M); |
682 | |
|
683 | 0 | _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING); |
684 | 0 | } |
685 | 0 | } else { |
686 | 0 | if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF || |
687 | 0 | ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF && |
688 | 0 | swrate_avg(run_time, TIME_STATS_SAMPLES) <= down))) |
689 | 0 | _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING); |
690 | 0 | } |
691 | 0 | } |
692 | | |
693 | | #ifdef USE_MEMORY_PROFILING |
694 | | /* config parser for global "profiling.memory", accepts "on" or "off" */ |
695 | | static int cfg_parse_prof_memory(char **args, int section_type, struct proxy *curpx, |
696 | | const struct proxy *defpx, const char *file, int line, |
697 | | char **err) |
698 | | { |
699 | | if (too_many_args(1, args, err, NULL)) |
700 | | return -1; |
701 | | |
702 | | if (strcmp(args[1], "on") == 0) { |
703 | | profiling |= HA_PROF_MEMORY; |
704 | | HA_ATOMIC_STORE(&prof_mem_start_ns, now_ns); |
705 | | } |
706 | | else if (strcmp(args[1], "off") == 0) |
707 | | profiling &= ~HA_PROF_MEMORY; |
708 | | else { |
709 | | memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]); |
710 | | return -1; |
711 | | } |
712 | | return 0; |
713 | | } |
714 | | #endif // USE_MEMORY_PROFILING |
715 | | |
716 | | /* config parser for global "profiling.tasks", accepts "on", "off", 'auto", |
717 | | * "lock", "no-lock", "memory", "no-memory". |
718 | | */ |
719 | | static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx, |
720 | | const struct proxy *defpx, const char *file, int line, |
721 | | char **err) |
722 | 0 | { |
723 | 0 | int arg; |
724 | |
|
725 | 0 | for (arg = 1; *args[arg]; arg++) { |
726 | 0 | if (strcmp(args[arg], "on") == 0) { |
727 | 0 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON; |
728 | 0 | HA_ATOMIC_STORE(&prof_task_start_ns, now_ns); |
729 | 0 | } |
730 | 0 | else if (strcmp(args[arg], "auto") == 0) { |
731 | 0 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF; |
732 | 0 | HA_ATOMIC_STORE(&prof_task_start_ns, now_ns); |
733 | 0 | } |
734 | 0 | else if (strcmp(args[arg], "off") == 0) |
735 | 0 | profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF; |
736 | 0 | else if (strcmp(args[arg], "lock") == 0) |
737 | 0 | profiling |= HA_PROF_TASKS_LOCK; |
738 | 0 | else if (strcmp(args[arg], "no-lock") == 0) |
739 | 0 | profiling &= ~HA_PROF_TASKS_LOCK; |
740 | 0 | else if (strcmp(args[arg], "memory") == 0) |
741 | 0 | profiling |= HA_PROF_TASKS_MEM; |
742 | 0 | else if (strcmp(args[arg], "no-memory") == 0) |
743 | 0 | profiling &= ~HA_PROF_TASKS_MEM; |
744 | 0 | else |
745 | 0 | break; |
746 | 0 | } |
747 | | |
748 | | /* either no arg or invalid arg */ |
749 | 0 | if (arg == 1 || *args[arg]) { |
750 | 0 | memprintf(err, "'%s' expects a combination of either 'on', 'auto', 'off', 'lock', 'no-lock', 'memory', or 'no-memory', but got '%s'.", args[0], args[arg]); |
751 | 0 | return -1; |
752 | 0 | } |
753 | 0 | return 0; |
754 | 0 | } |
755 | | |
756 | | /* parse a "set profiling" command. It always returns 1. */ |
757 | | static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private) |
758 | 0 | { |
759 | 0 | int arg; |
760 | |
|
761 | 0 | if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) |
762 | 0 | return 1; |
763 | | |
764 | 0 | if (strcmp(args[2], "memory") == 0) { |
765 | | #ifdef USE_MEMORY_PROFILING |
766 | | if (strcmp(args[3], "on") == 0) { |
767 | | unsigned int old = profiling; |
768 | | int i; |
769 | | |
770 | | while (!_HA_ATOMIC_CAS(&profiling, &old, old | HA_PROF_MEMORY)) |
771 | | ; |
772 | | |
773 | | HA_ATOMIC_STORE(&prof_mem_start_ns, now_ns); |
774 | | HA_ATOMIC_STORE(&prof_mem_stop_ns, 0); |
775 | | |
776 | | /* also flush current profiling stats */ |
777 | | for (i = 0; i < sizeof(memprof_stats) / sizeof(memprof_stats[0]); i++) { |
778 | | HA_ATOMIC_STORE(&memprof_stats[i].locked_calls, 0); |
779 | | HA_ATOMIC_STORE(&memprof_stats[i].alloc_calls, 0); |
780 | | HA_ATOMIC_STORE(&memprof_stats[i].free_calls, 0); |
781 | | HA_ATOMIC_STORE(&memprof_stats[i].alloc_tot, 0); |
782 | | HA_ATOMIC_STORE(&memprof_stats[i].free_tot, 0); |
783 | | HA_ATOMIC_STORE(&memprof_stats[i].caller, NULL); |
784 | | } |
785 | | } |
786 | | else if (strcmp(args[3], "off") == 0) { |
787 | | unsigned int old = profiling; |
788 | | |
789 | | while (!_HA_ATOMIC_CAS(&profiling, &old, old & ~HA_PROF_MEMORY)) |
790 | | ; |
791 | | |
792 | | if (HA_ATOMIC_LOAD(&prof_mem_start_ns)) |
793 | | HA_ATOMIC_STORE(&prof_mem_stop_ns, now_ns); |
794 | | } |
795 | | else |
796 | | return cli_err(appctx, "Expects either 'on' or 'off'.\n"); |
797 | | return 1; |
798 | | #else |
799 | 0 | return cli_err(appctx, "Memory profiling not compiled in.\n"); |
800 | 0 | #endif |
801 | 0 | } |
802 | | |
803 | 0 | if (strcmp(args[2], "tasks") != 0) |
804 | 0 | return cli_err(appctx, "Expects either 'tasks' or 'memory'.\n"); |
805 | | |
806 | 0 | for (arg = 3; *args[arg]; arg++) { |
807 | 0 | if (strcmp(args[arg], "on") == 0) { |
808 | 0 | unsigned int old = profiling; |
809 | 0 | int i; |
810 | |
|
811 | 0 | while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON)) |
812 | 0 | ; |
813 | |
|
814 | 0 | HA_ATOMIC_STORE(&prof_task_start_ns, now_ns); |
815 | 0 | HA_ATOMIC_STORE(&prof_task_stop_ns, 0); |
816 | | |
817 | | /* also flush current profiling stats */ |
818 | 0 | for (i = 0; i < SCHED_ACT_HASH_BUCKETS; i++) { |
819 | 0 | HA_ATOMIC_STORE(&sched_activity[i].calls, 0); |
820 | 0 | HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0); |
821 | 0 | HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0); |
822 | 0 | HA_ATOMIC_STORE(&sched_activity[i].lkw_time, 0); |
823 | 0 | HA_ATOMIC_STORE(&sched_activity[i].lkd_time, 0); |
824 | 0 | HA_ATOMIC_STORE(&sched_activity[i].mem_time, 0); |
825 | 0 | HA_ATOMIC_STORE(&sched_activity[i].func, NULL); |
826 | 0 | HA_ATOMIC_STORE(&sched_activity[i].caller, NULL); |
827 | 0 | } |
828 | 0 | } |
829 | 0 | else if (strcmp(args[arg], "auto") == 0) { |
830 | 0 | unsigned int old = profiling; |
831 | 0 | unsigned int new; |
832 | |
|
833 | 0 | do { |
834 | 0 | if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON) |
835 | 0 | new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON; |
836 | 0 | else |
837 | 0 | new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF; |
838 | 0 | } while (!_HA_ATOMIC_CAS(&profiling, &old, new)); |
839 | |
|
840 | 0 | HA_ATOMIC_STORE(&prof_task_start_ns, now_ns); |
841 | 0 | HA_ATOMIC_STORE(&prof_task_stop_ns, 0); |
842 | 0 | } |
843 | 0 | else if (strcmp(args[arg], "off") == 0) { |
844 | 0 | unsigned int old = profiling; |
845 | 0 | while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF)) |
846 | 0 | ; |
847 | |
|
848 | 0 | if (HA_ATOMIC_LOAD(&prof_task_start_ns)) |
849 | 0 | HA_ATOMIC_STORE(&prof_task_stop_ns, now_ns); |
850 | 0 | } |
851 | 0 | else if (strcmp(args[arg], "lock") == 0) |
852 | 0 | HA_ATOMIC_OR(&profiling, HA_PROF_TASKS_LOCK); |
853 | 0 | else if (strcmp(args[arg], "no-lock") == 0) |
854 | 0 | HA_ATOMIC_AND(&profiling, ~HA_PROF_TASKS_LOCK); |
855 | 0 | else if (strcmp(args[arg], "memory") == 0) |
856 | 0 | HA_ATOMIC_OR(&profiling, HA_PROF_TASKS_MEM); |
857 | 0 | else if (strcmp(args[arg], "no-memory") == 0) |
858 | 0 | HA_ATOMIC_AND(&profiling, ~HA_PROF_TASKS_MEM); |
859 | 0 | else |
860 | 0 | break; // unknown arg |
861 | 0 | } |
862 | | |
863 | | /* either no arg or invalid one */ |
864 | 0 | if (arg == 3 || *args[arg]) |
865 | 0 | return cli_err(appctx, "Expects a combination of either 'on', 'auto', 'off', 'lock', 'no-lock', 'memory' or 'no-memory'.\n"); |
866 | | |
867 | 0 | return 1; |
868 | 0 | } |
869 | | |
870 | | static int cmp_sched_activity_calls(const void *a, const void *b) |
871 | 0 | { |
872 | 0 | const struct sched_activity *l = (const struct sched_activity *)a; |
873 | 0 | const struct sched_activity *r = (const struct sched_activity *)b; |
874 | |
|
875 | 0 | if (l->calls > r->calls) |
876 | 0 | return -1; |
877 | 0 | else if (l->calls < r->calls) |
878 | 0 | return 1; |
879 | 0 | else |
880 | 0 | return 0; |
881 | 0 | } |
882 | | |
883 | | /* sort by address first, then by call count */ |
884 | | static int cmp_sched_activity_addr(const void *a, const void *b) |
885 | 0 | { |
886 | 0 | const struct sched_activity *l = (const struct sched_activity *)a; |
887 | 0 | const struct sched_activity *r = (const struct sched_activity *)b; |
888 | |
|
889 | 0 | if (l->func > r->func) |
890 | 0 | return -1; |
891 | 0 | else if (l->func < r->func) |
892 | 0 | return 1; |
893 | 0 | else if (l->calls > r->calls) |
894 | 0 | return -1; |
895 | 0 | else if (l->calls < r->calls) |
896 | 0 | return 1; |
897 | 0 | else |
898 | 0 | return 0; |
899 | 0 | } |
900 | | |
901 | | /* sort by cpu time first, then by inverse call count (to spot highest offenders) */ |
902 | | static int cmp_sched_activity_cpu(const void *a, const void *b) |
903 | 0 | { |
904 | 0 | const struct sched_activity *l = (const struct sched_activity *)a; |
905 | 0 | const struct sched_activity *r = (const struct sched_activity *)b; |
906 | |
|
907 | 0 | if (l->cpu_time > r->cpu_time) |
908 | 0 | return -1; |
909 | 0 | else if (l->cpu_time < r->cpu_time) |
910 | 0 | return 1; |
911 | 0 | else if (l->calls < r->calls) |
912 | 0 | return -1; |
913 | 0 | else if (l->calls > r->calls) |
914 | 0 | return 1; |
915 | 0 | else |
916 | 0 | return 0; |
917 | 0 | } |
918 | | |
919 | | #ifdef USE_MEMORY_PROFILING |
920 | | /* used by qsort below */ |
921 | | static int cmp_memprof_stats(const void *a, const void *b) |
922 | | { |
923 | | const struct memprof_stats *l = (const struct memprof_stats *)a; |
924 | | const struct memprof_stats *r = (const struct memprof_stats *)b; |
925 | | |
926 | | if (l->alloc_tot + l->free_tot > r->alloc_tot + r->free_tot) |
927 | | return -1; |
928 | | else if (l->alloc_tot + l->free_tot < r->alloc_tot + r->free_tot) |
929 | | return 1; |
930 | | else if (l->exec_ctx.type > r->exec_ctx.type) |
931 | | return -1; |
932 | | else if (l->exec_ctx.type < r->exec_ctx.type) |
933 | | return 1; |
934 | | else if (l->exec_ctx.pointer > r->exec_ctx.pointer) |
935 | | return -1; |
936 | | else if (l->exec_ctx.pointer < r->exec_ctx.pointer) |
937 | | return 1; |
938 | | else |
939 | | return 0; |
940 | | } |
941 | | |
942 | | static int cmp_memprof_addr(const void *a, const void *b) |
943 | | { |
944 | | const struct memprof_stats *l = (const struct memprof_stats *)a; |
945 | | const struct memprof_stats *r = (const struct memprof_stats *)b; |
946 | | |
947 | | if (l->caller > r->caller) |
948 | | return -1; |
949 | | else if (l->caller < r->caller) |
950 | | return 1; |
951 | | else if (l->exec_ctx.type > r->exec_ctx.type) |
952 | | return -1; |
953 | | else if (l->exec_ctx.type < r->exec_ctx.type) |
954 | | return 1; |
955 | | else if (l->exec_ctx.pointer > r->exec_ctx.pointer) |
956 | | return -1; |
957 | | else if (l->exec_ctx.pointer < r->exec_ctx.pointer) |
958 | | return 1; |
959 | | else |
960 | | return 0; |
961 | | } |
962 | | |
963 | | static int cmp_memprof_ctx(const void *a, const void *b) |
964 | | { |
965 | | const struct memprof_stats *l = (const struct memprof_stats *)a; |
966 | | const struct memprof_stats *r = (const struct memprof_stats *)b; |
967 | | const void *ptrl = l->exec_ctx.pointer; |
968 | | const void *ptrr = r->exec_ctx.pointer; |
969 | | |
970 | | /* in case of a mux, we'll use the always-present ->subscribe() |
971 | | * function as a sorting key so that mux-ops and other mux functions |
972 | | * appear grouped together. |
973 | | */ |
974 | | if (l->exec_ctx.type == TH_EX_CTX_MUX) |
975 | | ptrl = l->exec_ctx.mux_ops->subscribe; |
976 | | |
977 | | if (r->exec_ctx.type == TH_EX_CTX_MUX) |
978 | | ptrr = r->exec_ctx.mux_ops->subscribe; |
979 | | |
980 | | if (ptrl > ptrr) |
981 | | return -1; |
982 | | else if (ptrl < ptrr) |
983 | | return 1; |
984 | | else if (l->exec_ctx.type > r->exec_ctx.type) |
985 | | return -1; |
986 | | else if (l->exec_ctx.type < r->exec_ctx.type) |
987 | | return 1; |
988 | | else if (l->caller > r->caller) |
989 | | return -1; |
990 | | else if (l->caller < r->caller) |
991 | | return 1; |
992 | | else |
993 | | return 0; |
994 | | } |
995 | | #endif // USE_MEMORY_PROFILING |
996 | | |
997 | | /* Computes the index of function pointer <func> and caller <caller> for use |
998 | | * with sched_activity[] or any other similar array passed in <array>, and |
999 | | * returns a pointer to the entry after having atomically assigned it to this |
1000 | | * function pointer and caller combination. Note that in case of collision, |
1001 | | * the first entry is returned instead ("other"). |
1002 | | */ |
1003 | | struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller) |
1004 | 0 | { |
1005 | 0 | uint32_t hash = ptr2_hash(func, caller, SCHED_ACT_HASH_BITS); |
1006 | 0 | struct sched_activity *ret; |
1007 | 0 | const void *old; |
1008 | 0 | int tries = 16; |
1009 | |
|
1010 | 0 | for (tries = 16; tries > 0; tries--, hash++) { |
1011 | 0 | ret = &array[hash]; |
1012 | |
|
1013 | 0 | while (1) { |
1014 | 0 | if (likely(ret->func)) { |
1015 | 0 | if (likely(ret->func == func && ret->caller == caller)) |
1016 | 0 | return ret; |
1017 | 0 | break; |
1018 | 0 | } |
1019 | | |
1020 | | /* try to create the new entry. Func is sufficient to |
1021 | | * reserve the node. |
1022 | | */ |
1023 | 0 | old = NULL; |
1024 | 0 | if (HA_ATOMIC_CAS(&ret->func, &old, func)) { |
1025 | 0 | ret->caller = caller; |
1026 | 0 | return ret; |
1027 | 0 | } |
1028 | | /* changed in parallel, check again */ |
1029 | 0 | } |
1030 | 0 | } |
1031 | | |
1032 | 0 | return array; |
1033 | 0 | } |
1034 | | |
1035 | | /* This function dumps all profiling settings. It returns 0 if the output |
1036 | | * buffer is full and it needs to be called again, otherwise non-zero. |
1037 | | * It dumps some parts depending on the following states from show_prof_ctx: |
1038 | | * dump_step: |
1039 | | * 0, 4: dump status, then jump to 1 if 0 |
1040 | | * 1, 5: dump tasks, then jump to 2 if 1 |
1041 | | * 2, 6: dump memory, then stop |
1042 | | * linenum: |
1043 | | * restart line for each step (starts at zero) |
1044 | | * maxcnt: |
1045 | | * may contain a configured max line count for each step (0=not set) |
1046 | | * byaddr: |
1047 | | * 0: sort by usage |
1048 | | * 1: sort by address |
1049 | | */ |
1050 | | static int cli_io_handler_show_profiling(struct appctx *appctx) |
1051 | 0 | { |
1052 | 0 | struct show_prof_ctx *ctx = appctx->svcctx; |
1053 | 0 | struct sched_activity *tmp_activity = ctx->tmp_activity; |
1054 | | #ifdef USE_MEMORY_PROFILING |
1055 | | struct memprof_stats *tmp_memstats = ctx->tmp_memstats; |
1056 | | unsigned long long tot_alloc_calls, tot_free_calls; |
1057 | | unsigned long long tot_alloc_bytes, tot_free_bytes; |
1058 | | #endif |
1059 | 0 | struct buffer *name_buffer = get_trash_chunk(); |
1060 | 0 | const struct ha_caller *caller; |
1061 | 0 | const char *str; |
1062 | 0 | int max_lines; |
1063 | 0 | int i, j, max; |
1064 | 0 | int dumped; |
1065 | |
|
1066 | 0 | chunk_reset(&trash); |
1067 | |
|
1068 | 0 | switch (profiling & HA_PROF_TASKS_MASK) { |
1069 | 0 | case HA_PROF_TASKS_AOFF: str="auto-off"; break; |
1070 | 0 | case HA_PROF_TASKS_AON: str="auto-on"; break; |
1071 | 0 | case HA_PROF_TASKS_ON: str="on"; break; |
1072 | 0 | default: str="off"; break; |
1073 | 0 | } |
1074 | | |
1075 | 0 | if ((ctx->dump_step & 3) != 0) |
1076 | 0 | goto skip_status; |
1077 | | |
1078 | 0 | chunk_printf(&trash, |
1079 | 0 | "Per-task CPU profiling : %-8s # set profiling tasks {on|auto|off}\n" |
1080 | 0 | "Memory usage profiling : %-8s # set profiling memory {on|off}\n", |
1081 | 0 | str, (profiling & HA_PROF_MEMORY) ? "on" : "off"); |
1082 | |
|
1083 | 0 | if (applet_putchk(appctx, &trash) == -1) { |
1084 | | /* failed, try again */ |
1085 | 0 | return 0; |
1086 | 0 | } |
1087 | | |
1088 | 0 | ctx->linenum = 0; // reset first line to dump |
1089 | 0 | if ((ctx->dump_step & 4) == 0) |
1090 | 0 | ctx->dump_step++; // next step |
1091 | |
|
1092 | 0 | skip_status: |
1093 | 0 | if ((ctx->dump_step & 3) != 1) |
1094 | 0 | goto skip_tasks; |
1095 | | |
1096 | 0 | if (tmp_activity) |
1097 | 0 | goto tasks_resume; |
1098 | | |
1099 | | /* first call for show profiling tasks: we have to allocate a tmp |
1100 | | * array for sorting and processing, and possibly perform some |
1101 | | * sorting and aggregation. |
1102 | | */ |
1103 | 0 | tmp_activity = ha_aligned_alloc(__alignof__(*tmp_activity), sizeof(sched_activity)); |
1104 | 0 | if (!tmp_activity) |
1105 | 0 | goto end_tasks; |
1106 | | |
1107 | 0 | ctx->tmp_activity = tmp_activity; |
1108 | 0 | memcpy(tmp_activity, sched_activity, sizeof(sched_activity)); |
1109 | | |
1110 | | /* for addr sort and for callee aggregation we have to first sort by address */ |
1111 | 0 | if (ctx->aggr || ctx->by_what == 1) // sort by addr |
1112 | 0 | qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_addr); |
1113 | |
|
1114 | 0 | if (ctx->aggr) { |
1115 | | /* merge entries for the same callee and reset their count */ |
1116 | 0 | for (i = j = 0; i < SCHED_ACT_HASH_BUCKETS; i = j) { |
1117 | 0 | for (j = i + 1; j < SCHED_ACT_HASH_BUCKETS && tmp_activity[j].func == tmp_activity[i].func; j++) { |
1118 | 0 | tmp_activity[i].calls += tmp_activity[j].calls; |
1119 | 0 | tmp_activity[i].cpu_time += tmp_activity[j].cpu_time; |
1120 | 0 | tmp_activity[i].lat_time += tmp_activity[j].lat_time; |
1121 | 0 | tmp_activity[i].lkw_time += tmp_activity[j].lkw_time; |
1122 | 0 | tmp_activity[i].lkd_time += tmp_activity[j].lkd_time; |
1123 | 0 | tmp_activity[i].mem_time += tmp_activity[j].mem_time; |
1124 | 0 | tmp_activity[j].calls = 0; |
1125 | 0 | } |
1126 | 0 | } |
1127 | 0 | } |
1128 | |
|
1129 | 0 | if (!ctx->by_what) // sort by usage |
1130 | 0 | qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls); |
1131 | 0 | else if (ctx->by_what == 2) // by cpu_tot |
1132 | 0 | qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_cpu); |
1133 | |
|
1134 | 0 | tasks_resume: |
1135 | 0 | if (!ctx->linenum) |
1136 | 0 | chunk_appendf(&trash, "Tasks activity over %.3f sec till %.3f sec ago:\n" |
1137 | 0 | " function calls cpu_tot cpu_avg lkw_avg lkd_avg mem_avg lat_avg\n", |
1138 | 0 | (prof_task_start_ns ? (prof_task_stop_ns ? prof_task_stop_ns : now_ns) - prof_task_start_ns : 0) / 1000000000.0, |
1139 | 0 | (prof_task_stop_ns ? now_ns - prof_task_stop_ns : 0) / 1000000000.0); |
1140 | |
|
1141 | 0 | max_lines = ctx->maxcnt; |
1142 | 0 | if (!max_lines) |
1143 | 0 | max_lines = SCHED_ACT_HASH_BUCKETS; |
1144 | |
|
1145 | 0 | dumped = 0; |
1146 | 0 | for (i = ctx->linenum; i < max_lines; i++) { |
1147 | 0 | if (!tmp_activity[i].calls) |
1148 | 0 | continue; // skip aggregated or empty entries |
1149 | | |
1150 | 0 | ctx->linenum = i; |
1151 | | |
1152 | | /* resolve_sym_name() may be slow, better dump a few entries at a time */ |
1153 | 0 | if (dumped >= 10) |
1154 | 0 | return 0; |
1155 | | |
1156 | 0 | chunk_reset(name_buffer); |
1157 | 0 | caller = HA_ATOMIC_LOAD(&tmp_activity[i].caller); |
1158 | |
|
1159 | 0 | if (!tmp_activity[i].func) |
1160 | 0 | chunk_printf(name_buffer, "other"); |
1161 | 0 | else |
1162 | 0 | resolve_sym_name(name_buffer, "", tmp_activity[i].func); |
1163 | | |
1164 | | /* reserve 35 chars for name+' '+#calls, knowing that longer names |
1165 | | * are often used for less often called functions. |
1166 | | */ |
1167 | 0 | max = 35 - name_buffer->data; |
1168 | 0 | if (max < 1) |
1169 | 0 | max = 1; |
1170 | 0 | chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls); |
1171 | |
|
1172 | 0 | print_time_short(&trash, " ", tmp_activity[i].cpu_time, ""); |
1173 | 0 | print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, ""); |
1174 | 0 | print_time_short(&trash, " ", tmp_activity[i].lkw_time / tmp_activity[i].calls, ""); |
1175 | 0 | print_time_short(&trash, " ", tmp_activity[i].lkd_time / tmp_activity[i].calls, ""); |
1176 | 0 | print_time_short(&trash, " ", tmp_activity[i].mem_time / tmp_activity[i].calls, ""); |
1177 | 0 | print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, ""); |
1178 | |
|
1179 | 0 | if (caller && !ctx->aggr && caller->what <= WAKEUP_TYPE_APPCTX_WAKEUP) |
1180 | 0 | chunk_appendf(&trash, " <- %s@%s:%d %s", |
1181 | 0 | caller->func, caller->file, caller->line, |
1182 | 0 | task_wakeup_type_str(caller->what)); |
1183 | |
|
1184 | 0 | b_putchr(&trash, '\n'); |
1185 | |
|
1186 | 0 | if (applet_putchk(appctx, &trash) == -1) { |
1187 | | /* failed, try again */ |
1188 | 0 | return 0; |
1189 | 0 | } |
1190 | 0 | dumped++; |
1191 | 0 | } |
1192 | | |
1193 | 0 | if (applet_putchk(appctx, &trash) == -1) { |
1194 | | /* failed, try again */ |
1195 | 0 | return 0; |
1196 | 0 | } |
1197 | | |
1198 | 0 | end_tasks: |
1199 | 0 | ha_free(&ctx->tmp_activity); |
1200 | 0 | ctx->linenum = 0; // reset first line to dump |
1201 | 0 | if ((ctx->dump_step & 4) == 0) |
1202 | 0 | ctx->dump_step++; // next step |
1203 | |
|
1204 | 0 | skip_tasks: |
1205 | |
|
1206 | | #ifdef USE_MEMORY_PROFILING |
1207 | | if ((ctx->dump_step & 3) != 2) |
1208 | | goto skip_mem; |
1209 | | |
1210 | | if (tmp_memstats) |
1211 | | goto memstats_resume; |
1212 | | |
1213 | | /* first call for show profiling memory: we have to allocate a tmp |
1214 | | * array for sorting and processing, and possibly perform some sorting |
1215 | | * and aggregation. |
1216 | | */ |
1217 | | tmp_memstats = ha_aligned_alloc(__alignof__(*tmp_memstats), sizeof(memprof_stats)); |
1218 | | if (!tmp_memstats) |
1219 | | goto end_memstats; |
1220 | | |
1221 | | ctx->tmp_memstats = tmp_memstats; |
1222 | | memcpy(tmp_memstats, memprof_stats, sizeof(memprof_stats)); |
1223 | | |
1224 | | if (ctx->by_what == 1) |
1225 | | qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr); |
1226 | | else if (ctx->by_what == 3) |
1227 | | qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_ctx); |
1228 | | else |
1229 | | qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats); |
1230 | | |
1231 | | if (ctx->aggr) { |
1232 | | /* merge entries for the same caller and reset the exec_ctx */ |
1233 | | for (i = j = 0; i < MEMPROF_HASH_BUCKETS; i++) { |
1234 | | if ((tmp_memstats[i].alloc_calls | tmp_memstats[i].free_calls) == 0) |
1235 | | continue; |
1236 | | for (j = i + 1; j < MEMPROF_HASH_BUCKETS; j++) { |
1237 | | if ((tmp_memstats[j].alloc_calls | tmp_memstats[j].free_calls) == 0) |
1238 | | continue; |
1239 | | if (tmp_memstats[j].caller != tmp_memstats[i].caller || |
1240 | | tmp_memstats[j].method != tmp_memstats[i].method || |
1241 | | tmp_memstats[j].info != tmp_memstats[i].info) |
1242 | | continue; |
1243 | | tmp_memstats[i].locked_calls += tmp_memstats[j].locked_calls; |
1244 | | tmp_memstats[i].alloc_calls += tmp_memstats[j].alloc_calls; |
1245 | | tmp_memstats[i].free_calls += tmp_memstats[j].free_calls; |
1246 | | tmp_memstats[i].alloc_tot += tmp_memstats[j].alloc_tot; |
1247 | | tmp_memstats[i].free_tot += tmp_memstats[j].free_tot; |
1248 | | /* don't dump the ctx */ |
1249 | | tmp_memstats[i].exec_ctx.type = 0; |
1250 | | /* don't dump the merged entry */ |
1251 | | tmp_memstats[j].alloc_calls = tmp_memstats[j].free_calls = 0; |
1252 | | } |
1253 | | } |
1254 | | } |
1255 | | |
1256 | | memstats_resume: |
1257 | | if (!ctx->linenum) |
1258 | | chunk_appendf(&trash, |
1259 | | "Alloc/Free statistics by call place over %.3f sec till %.3f sec ago:\n" |
1260 | | " Calls | Tot Bytes | Caller, method, extra info\n" |
1261 | | "<- alloc -> <- free ->|<-- alloc ---> <-- free ---->|\n", |
1262 | | (prof_mem_start_ns ? (prof_mem_stop_ns ? prof_mem_stop_ns : now_ns) - prof_mem_start_ns : 0) / 1000000000.0, |
1263 | | (prof_mem_stop_ns ? now_ns - prof_mem_stop_ns : 0) / 1000000000.0); |
1264 | | |
1265 | | max_lines = ctx->maxcnt; |
1266 | | if (!max_lines) |
1267 | | max_lines = MEMPROF_HASH_BUCKETS + 1; |
1268 | | |
1269 | | dumped = 0; |
1270 | | for (i = ctx->linenum; i < max_lines; i++) { |
1271 | | struct memprof_stats *entry = &tmp_memstats[i]; |
1272 | | |
1273 | | ctx->linenum = i; |
1274 | | if (!entry->alloc_calls && !entry->free_calls) |
1275 | | continue; |
1276 | | |
1277 | | /* resolve_sym_name() may be slow, better dump a few entries at a time */ |
1278 | | if (dumped >= 10) |
1279 | | return 0; |
1280 | | |
1281 | | chunk_appendf(&trash, "%11llu %11llu %14llu %14llu| %16p ", |
1282 | | entry->alloc_calls, entry->free_calls, |
1283 | | entry->alloc_tot, entry->free_tot, |
1284 | | entry->caller); |
1285 | | |
1286 | | if (entry->caller) |
1287 | | resolve_sym_name(&trash, NULL, entry->caller); |
1288 | | else |
1289 | | chunk_appendf(&trash, "[other]"); |
1290 | | |
1291 | | if (((1UL << tmp_memstats[i].method) & MEMPROF_FREE_MASK) || !entry->alloc_calls) { |
1292 | | chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], |
1293 | | (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls)); |
1294 | | } else |
1295 | | chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], |
1296 | | (long long)(entry->alloc_tot) / (long long)(entry->alloc_calls)); |
1297 | | |
1298 | | if (entry->alloc_tot && entry->free_tot) { |
1299 | | /* that's a realloc, show the total diff to help spot leaks */ |
1300 | | chunk_appendf(&trash," [delta=%lld]", (long long)(entry->alloc_tot - entry->free_tot)); |
1301 | | } |
1302 | | |
1303 | | if (entry->info) { |
1304 | | /* that's a pool name */ |
1305 | | const struct pool_head *pool = entry->info; |
1306 | | chunk_appendf(&trash," [pool=%s]", pool->name); |
1307 | | } |
1308 | | |
1309 | | if (entry->locked_calls) { |
1310 | | unsigned long long tot_calls = entry->alloc_calls + entry->free_calls; |
1311 | | |
1312 | | chunk_appendf(&trash," [locked=%llu (%d.%1d %%)]", |
1313 | | entry->locked_calls, |
1314 | | (int)(100ULL * entry->locked_calls / tot_calls), |
1315 | | (int)((1000ULL * entry->locked_calls / tot_calls) % 10)); |
1316 | | } |
1317 | | |
1318 | | chunk_append_thread_ctx(&trash, &entry->exec_ctx, " [via ", "]"); |
1319 | | chunk_appendf(&trash, "\n"); |
1320 | | |
1321 | | if (applet_putchk(appctx, &trash) == -1) |
1322 | | return 0; |
1323 | | |
1324 | | dumped++; |
1325 | | } |
1326 | | |
1327 | | if (applet_putchk(appctx, &trash) == -1) |
1328 | | return 0; |
1329 | | |
1330 | | tot_alloc_calls = tot_free_calls = tot_alloc_bytes = tot_free_bytes = 0; |
1331 | | for (i = 0; i < max_lines; i++) { |
1332 | | tot_alloc_calls += tmp_memstats[i].alloc_calls; |
1333 | | tot_alloc_bytes += tmp_memstats[i].alloc_tot; |
1334 | | if ((1UL << tmp_memstats[i].method) & MEMPROF_FREE_MASK) { |
1335 | | tot_free_calls += tmp_memstats[i].free_calls; |
1336 | | tot_free_bytes += tmp_memstats[i].free_tot; |
1337 | | } |
1338 | | } |
1339 | | |
1340 | | /* last step: summarize by DSO. We create one entry per new DSO in |
1341 | | * tmp_memstats, which is thus destroyed. The DSO's name is allocated |
1342 | | * and stored into tmp_stats.info. Must be freed at the end. We store |
1343 | | * <max> dso entries total. There are very few so we do that in a single |
1344 | | * pass and append it after the total. |
1345 | | */ |
1346 | | for (i = max = 0; i < max_lines; i++) { |
1347 | | struct memprof_stats *entry = &tmp_memstats[i]; |
1348 | | |
1349 | | if (!entry->alloc_calls && !entry->free_calls) |
1350 | | continue; |
1351 | | |
1352 | | chunk_reset(name_buffer); |
1353 | | if (!entry->caller) |
1354 | | chunk_printf(name_buffer, "other"); |
1355 | | else |
1356 | | resolve_dso_name(name_buffer, "", entry->caller); |
1357 | | |
1358 | | /* look it up among known names (0..max) */ |
1359 | | for (j = 0; j < max; j++) { |
1360 | | if (tmp_memstats[j].info && strcmp(name_buffer->area, tmp_memstats[j].info) == 0) |
1361 | | break; |
1362 | | } |
1363 | | |
1364 | | if (j == max) { |
1365 | | /* not found, create a new entry at <j>. We need to be |
1366 | | * careful as it could be the same as <entry> (i)! |
1367 | | */ |
1368 | | max++; |
1369 | | |
1370 | | if (j != i) // set max to keep min caller's address |
1371 | | tmp_memstats[j].caller = (void*)-1; |
1372 | | |
1373 | | tmp_memstats[j].info = strdup(name_buffer->area); // may fail, but checked when used |
1374 | | tmp_memstats[j].alloc_calls = entry->alloc_calls; |
1375 | | tmp_memstats[j].alloc_tot = entry->alloc_tot; |
1376 | | if ((1UL << entry->method) & MEMPROF_FREE_MASK) { |
1377 | | tmp_memstats[j].free_calls = entry->free_calls; |
1378 | | tmp_memstats[j].free_tot = entry->free_tot; |
1379 | | } else { |
1380 | | tmp_memstats[j].free_calls = 0; |
1381 | | tmp_memstats[j].free_tot = 0; |
1382 | | } |
1383 | | } else { |
1384 | | tmp_memstats[j].alloc_calls += entry->alloc_calls; |
1385 | | tmp_memstats[j].alloc_tot += entry->alloc_tot; |
1386 | | if ((1UL << entry->method) & MEMPROF_FREE_MASK) { |
1387 | | tmp_memstats[j].free_calls += entry->free_calls; |
1388 | | tmp_memstats[j].free_tot += entry->free_tot; |
1389 | | } |
1390 | | } |
1391 | | |
1392 | | if (entry->caller && |
1393 | | tmp_memstats[j].caller > entry->caller) |
1394 | | tmp_memstats[j].caller = entry->caller; // keep lowest address |
1395 | | } |
1396 | | |
1397 | | /* now we have entries 0..max-1 that are filled with per-DSO stats. This is |
1398 | | * compact enough to fit next to the total line in one buffer, hence no |
1399 | | * state kept. |
1400 | | */ |
1401 | | chunk_appendf(&trash, |
1402 | | "-----------------------|-----------------------------| " |
1403 | | " - min caller - | -- by DSO below --\n"); |
1404 | | |
1405 | | for (i = 0; i < max; i++) { |
1406 | | struct memprof_stats *entry = &tmp_memstats[i]; |
1407 | | |
1408 | | chunk_appendf(&trash, "%11llu %11llu %14llu %14llu| %16p DSO:%s;", |
1409 | | entry->alloc_calls, entry->free_calls, |
1410 | | entry->alloc_tot, entry->free_tot, |
1411 | | entry->caller == (void*)-1 ? 0 : entry->caller, entry->info ? (const char*)entry->info : "other"); |
1412 | | |
1413 | | if (entry->alloc_tot != entry->free_tot) |
1414 | | chunk_appendf(&trash, " delta_calls=%lld; delta_bytes=%lld", |
1415 | | (long long)(entry->alloc_calls - entry->free_calls), |
1416 | | (long long)(entry->alloc_tot - entry->free_tot)); |
1417 | | chunk_appendf(&trash, "\n"); |
1418 | | } |
1419 | | |
1420 | | chunk_appendf(&trash, |
1421 | | "-----------------------|-----------------------------|\n" |
1422 | | "%11llu %11llu %14llu %14llu| <- Total; Delta_calls=%lld; Delta_bytes=%lld\n", |
1423 | | tot_alloc_calls, tot_free_calls, |
1424 | | tot_alloc_bytes, tot_free_bytes, |
1425 | | tot_alloc_calls - tot_free_calls, |
1426 | | tot_alloc_bytes - tot_free_bytes); |
1427 | | |
1428 | | /* release optional buffer name */ |
1429 | | for (i = 0; i < max; i++) |
1430 | | ha_free(&tmp_memstats[i].info); |
1431 | | |
1432 | | if (applet_putchk(appctx, &trash) == -1) |
1433 | | return 0; |
1434 | | |
1435 | | end_memstats: |
1436 | | ha_free(&ctx->tmp_memstats); |
1437 | | ctx->linenum = 0; // reset first line to dump |
1438 | | if ((ctx->dump_step & 4) == 0) |
1439 | | ctx->dump_step++; // next step |
1440 | | |
1441 | | skip_mem: |
1442 | | #endif // USE_MEMORY_PROFILING |
1443 | |
|
1444 | 0 | return 1; |
1445 | 0 | } |
1446 | | |
1447 | | /* release structs allocated by "show profiling" */ |
1448 | | static void cli_release_show_profiling(struct appctx *appctx) |
1449 | 0 | { |
1450 | 0 | struct show_prof_ctx *ctx = appctx->svcctx; |
1451 | |
|
1452 | 0 | ha_free(&ctx->tmp_activity); |
1453 | 0 | ha_free(&ctx->tmp_memstats); |
1454 | 0 | } |
1455 | | |
1456 | | /* parse a "show profiling" command. It returns 1 on failure, 0 if it starts to dump. |
1457 | | * - cli.i0 is set to the first state (0=all, 4=status, 5=tasks, 6=memory) |
1458 | | * - cli.o1 is set to 1 if the output must be sorted by addr instead of usage |
1459 | | * - cli.o0 is set to the number of lines of output |
1460 | | */ |
1461 | | static int cli_parse_show_profiling(char **args, char *payload, struct appctx *appctx, void *private) |
1462 | 0 | { |
1463 | 0 | struct show_prof_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); |
1464 | 0 | int arg; |
1465 | |
|
1466 | 0 | if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) |
1467 | 0 | return 1; |
1468 | | |
1469 | 0 | for (arg = 2; *args[arg]; arg++) { |
1470 | 0 | if (strcmp(args[arg], "all") == 0) { |
1471 | 0 | ctx->dump_step = 0; // will cycle through 0,1,2; default |
1472 | 0 | } |
1473 | 0 | else if (strcmp(args[arg], "status") == 0) { |
1474 | 0 | ctx->dump_step = 4; // will visit status only |
1475 | 0 | } |
1476 | 0 | else if (strcmp(args[arg], "tasks") == 0) { |
1477 | 0 | ctx->dump_step = 5; // will visit tasks only |
1478 | 0 | } |
1479 | 0 | else if (strcmp(args[arg], "memory") == 0) { |
1480 | 0 | ctx->dump_step = 6; // will visit memory only |
1481 | 0 | } |
1482 | 0 | else if (strcmp(args[arg], "byaddr") == 0) { |
1483 | 0 | ctx->by_what = 1; // sort output by address instead of usage |
1484 | 0 | } |
1485 | 0 | else if (strcmp(args[arg], "bytime") == 0) { |
1486 | 0 | ctx->by_what = 2; // sort output by total time instead of usage |
1487 | 0 | } |
1488 | 0 | else if (strcmp(args[arg], "byctx") == 0) { |
1489 | 0 | ctx->by_what = 3; // sort output by caller context instead of usage |
1490 | 0 | } |
1491 | 0 | else if (strcmp(args[arg], "aggr") == 0) { |
1492 | 0 | ctx->aggr = 1; // aggregate output by callee |
1493 | 0 | } |
1494 | 0 | else if (isdigit((unsigned char)*args[arg])) { |
1495 | 0 | ctx->maxcnt = atoi(args[arg]); // number of entries to dump |
1496 | 0 | } |
1497 | 0 | else |
1498 | 0 | return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'byctx', 'aggr' or a max number of output lines.\n"); |
1499 | 0 | } |
1500 | 0 | return 0; |
1501 | 0 | } |
1502 | | |
1503 | | /* This function scans all threads' run queues and collects statistics about |
1504 | | * running tasks. It returns 0 if the output buffer is full and it needs to be |
1505 | | * called again, otherwise non-zero. |
1506 | | */ |
1507 | | static int cli_io_handler_show_tasks(struct appctx *appctx) |
1508 | 0 | { |
1509 | 0 | struct sched_activity tmp_activity[SCHED_ACT_HASH_BUCKETS]; |
1510 | 0 | struct buffer *name_buffer = get_trash_chunk(); |
1511 | 0 | struct sched_activity *entry; |
1512 | 0 | const struct tasklet *tl; |
1513 | 0 | const struct task *t; |
1514 | 0 | uint64_t now_ns, lat; |
1515 | 0 | struct eb32_node *rqnode; |
1516 | 0 | uint64_t tot_calls, tot_cpu; |
1517 | 0 | int thr, queue; |
1518 | 0 | int i, max; |
1519 | | |
1520 | | /* It's not possible to scan queues in small chunks and yield in the |
1521 | | * middle of the dump and come back again. So what we're doing instead |
1522 | | * is to freeze all threads and inspect their queues at once as fast as |
1523 | | * possible, using a sched_activity array to collect metrics with |
1524 | | * limited collision, then we'll report statistics only. The tasks' |
1525 | | * #calls will reflect the number of occurrences, and the lat_time will |
1526 | | * reflect the latency when set. We prefer to take the time before |
1527 | | * calling thread_isolate() so that the wait time doesn't impact the |
1528 | | * measurement accuracy. However this requires to take care of negative |
1529 | | * times since tasks might be queued after we retrieve it. The cpu_time |
1530 | | * will store the total number of calls per task, allowing to sort out |
1531 | | * the most vs least busy ones. |
1532 | | */ |
1533 | |
|
1534 | 0 | now_ns = now_mono_time(); |
1535 | 0 | memset(tmp_activity, 0, sizeof(tmp_activity)); |
1536 | |
|
1537 | 0 | thread_isolate(); |
1538 | | |
1539 | | /* 1. global run queue */ |
1540 | |
|
1541 | | #ifdef USE_THREAD |
1542 | | for (thr = 0; thr < global.nbthread; thr++) { |
1543 | | /* task run queue */ |
1544 | | rqnode = eb32_first(&ha_thread_ctx[thr].rqueue_shared); |
1545 | | while (rqnode) { |
1546 | | t = eb32_entry(rqnode, struct task, rq); |
1547 | | entry = sched_activity_entry(tmp_activity, t->process, NULL); |
1548 | | if (t->wake_date) { |
1549 | | lat = (uint32_t)now_ns - t->wake_date; |
1550 | | if ((int64_t)lat > 0) |
1551 | | entry->lat_time += lat; |
1552 | | } |
1553 | | entry->cpu_time += t->calls; |
1554 | | entry->calls++; |
1555 | | rqnode = eb32_next(rqnode); |
1556 | | } |
1557 | | } |
1558 | | #endif |
1559 | | /* 2. all threads's local run queues */ |
1560 | 0 | for (thr = 0; thr < global.nbthread; thr++) { |
1561 | | /* task run queue */ |
1562 | 0 | rqnode = eb32_first(&ha_thread_ctx[thr].rqueue); |
1563 | 0 | while (rqnode) { |
1564 | 0 | t = eb32_entry(rqnode, struct task, rq); |
1565 | 0 | entry = sched_activity_entry(tmp_activity, t->process, NULL); |
1566 | 0 | if (t->wake_date) { |
1567 | 0 | lat = (uint32_t)now_ns - t->wake_date; |
1568 | 0 | if ((int64_t)lat > 0) |
1569 | 0 | entry->lat_time += lat; |
1570 | 0 | } |
1571 | 0 | entry->cpu_time += t->calls; |
1572 | 0 | entry->calls++; |
1573 | 0 | rqnode = eb32_next(rqnode); |
1574 | 0 | } |
1575 | | |
1576 | | /* shared tasklet list */ |
1577 | 0 | list_for_each_entry(tl, mt_list_to_list(&ha_thread_ctx[thr].shared_tasklet_list), list) { |
1578 | 0 | t = (const struct task *)tl; |
1579 | 0 | entry = sched_activity_entry(tmp_activity, t->process, NULL); |
1580 | 0 | if (!TASK_IS_TASKLET(t) && t->wake_date) { |
1581 | 0 | lat = (uint32_t)now_ns - t->wake_date; |
1582 | 0 | if ((int64_t)lat > 0) |
1583 | 0 | entry->lat_time += lat; |
1584 | 0 | } |
1585 | 0 | entry->cpu_time += t->calls; |
1586 | 0 | entry->calls++; |
1587 | 0 | } |
1588 | | |
1589 | | /* classful tasklets */ |
1590 | 0 | for (queue = 0; queue < TL_CLASSES; queue++) { |
1591 | 0 | list_for_each_entry(tl, &ha_thread_ctx[thr].tasklets[queue], list) { |
1592 | 0 | t = (const struct task *)tl; |
1593 | 0 | entry = sched_activity_entry(tmp_activity, t->process, NULL); |
1594 | 0 | if (!TASK_IS_TASKLET(t) && t->wake_date) { |
1595 | 0 | lat = (uint32_t)now_ns - t->wake_date; |
1596 | 0 | if ((int64_t)lat > 0) |
1597 | 0 | entry->lat_time += lat; |
1598 | 0 | } |
1599 | 0 | entry->cpu_time += t->calls; |
1600 | 0 | entry->calls++; |
1601 | 0 | } |
1602 | 0 | } |
1603 | 0 | } |
1604 | | |
1605 | | /* hopefully we're done */ |
1606 | 0 | thread_release(); |
1607 | |
|
1608 | 0 | chunk_reset(&trash); |
1609 | |
|
1610 | 0 | tot_calls = tot_cpu = 0; |
1611 | 0 | for (i = 0; i < SCHED_ACT_HASH_BUCKETS; i++) { |
1612 | 0 | tot_calls += tmp_activity[i].calls; |
1613 | 0 | tot_cpu += tmp_activity[i].cpu_time; |
1614 | 0 | } |
1615 | 0 | tot_cpu = tot_cpu ? tot_cpu : 1; // prepare for the divide |
1616 | |
|
1617 | 0 | qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls); |
1618 | |
|
1619 | 0 | chunk_appendf(&trash, "Running tasks: %d (%d threads)\n" |
1620 | 0 | " function places %% lat_tot lat_avg calls_tot calls_avg calls%%\n", |
1621 | 0 | (int)tot_calls, global.nbthread); |
1622 | |
|
1623 | 0 | for (i = 0; i < SCHED_ACT_HASH_BUCKETS && tmp_activity[i].calls; i++) { |
1624 | 0 | chunk_reset(name_buffer); |
1625 | |
|
1626 | 0 | if (!tmp_activity[i].func) |
1627 | 0 | chunk_printf(name_buffer, "other"); |
1628 | 0 | else |
1629 | 0 | resolve_sym_name(name_buffer, "", tmp_activity[i].func); |
1630 | | |
1631 | | /* reserve 35 chars for name+' '+#calls, knowing that longer names |
1632 | | * are often used for less often called functions. |
1633 | | */ |
1634 | 0 | max = 35 - name_buffer->data; |
1635 | 0 | if (max < 1) |
1636 | 0 | max = 1; |
1637 | 0 | chunk_appendf(&trash, " %s%*llu %3d.%1d", |
1638 | 0 | name_buffer->area, max, (unsigned long long)tmp_activity[i].calls, |
1639 | 0 | (int)(100ULL * tmp_activity[i].calls / tot_calls), |
1640 | 0 | (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10)); |
1641 | 0 | print_time_short(&trash, " ", tmp_activity[i].lat_time, ""); |
1642 | 0 | print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, ""); |
1643 | 0 | chunk_appendf(&trash, " %10llu %10llu %3d.%1d\n", |
1644 | 0 | (ullong)tmp_activity[i].cpu_time, (ullong)tmp_activity[i].cpu_time / tmp_activity[i].calls, |
1645 | 0 | (int)(100ULL * tmp_activity[i].cpu_time / tot_cpu), |
1646 | 0 | (int)((1000ULL * tmp_activity[i].cpu_time / tot_cpu)%10)); |
1647 | 0 | } |
1648 | |
|
1649 | 0 | if (applet_putchk(appctx, &trash) == -1) { |
1650 | | /* failed, try again */ |
1651 | 0 | return 0; |
1652 | 0 | } |
1653 | 0 | return 1; |
1654 | 0 | } |
1655 | | |
1656 | | /* This function dumps some activity counters used by developers and support to |
1657 | | * rule out some hypothesis during bug reports. It returns 0 if the output |
1658 | | * buffer is full and it needs to be called again, otherwise non-zero. It dumps |
1659 | | * everything at once in the buffer and is not designed to do it in multiple |
1660 | | * passes. |
1661 | | */ |
1662 | | static int cli_io_handler_show_activity(struct appctx *appctx) |
1663 | 0 | { |
1664 | 0 | struct show_activity_ctx *actctx = appctx->svcctx; |
1665 | 0 | int tgt = actctx->thr; // target thread, -1 for all, 0 for total only |
1666 | 0 | uint up_sec, up_usec; |
1667 | 0 | int base_line; |
1668 | 0 | ullong up; |
1669 | | |
1670 | | /* this macro is used below to dump values. The thread number is "thr", |
1671 | | * and runs from 0 to nbt-1 when values are printed using the formula. |
1672 | | * We normally try to dmup integral lines in order to keep counters |
1673 | | * consistent. If we fail once on a line, we'll detect it next time |
1674 | | * because we'll have committed actctx->col=1 thanks to the header |
1675 | | * always being dumped individually. We'll be called again thanks to |
1676 | | * the header being present, leaving some data in the buffer. In this |
1677 | | * case once we restart we'll proceed one column at a time to make sure |
1678 | | * we don't overflow the buffer again. |
1679 | | */ |
1680 | 0 | #undef SHOW_VAL |
1681 | 0 | #define SHOW_VAL(header, x, formula) \ |
1682 | 0 | do { \ |
1683 | 0 | unsigned int _v[MAX_THREADS]; \ |
1684 | 0 | unsigned int _tot; \ |
1685 | 0 | const int _nbt = global.nbthread; \ |
1686 | 0 | int restarted = actctx->col > 0; \ |
1687 | 0 | int thr; \ |
1688 | 0 | _tot = thr = 0; \ |
1689 | 0 | do { \ |
1690 | 0 | _tot += _v[thr] = (x); \ |
1691 | 0 | } while (++thr < _nbt); \ |
1692 | 0 | for (thr = actctx->col - 2; thr <= _nbt; thr++) { \ |
1693 | 0 | if (thr == -2) { \ |
1694 | | /* line header */ \ |
1695 | 0 | chunk_appendf(&trash, "%s", header); \ |
1696 | 0 | } \ |
1697 | 0 | else if (thr == -1) { \ |
1698 | | /* aggregate value only for multi-thread: all & 0 */ \ |
1699 | 0 | if (_nbt > 1 && tgt <= 0) \ |
1700 | 0 | chunk_appendf(&trash, " %u%s", \ |
1701 | 0 | (formula), \ |
1702 | 0 | (tgt < 0) ? \ |
1703 | 0 | " [" : ""); \ |
1704 | 0 | } \ |
1705 | 0 | else if (thr < _nbt) { \ |
1706 | | /* individual value only for all or exact value */ \ |
1707 | 0 | if (tgt == -1 || tgt == thr+1) \ |
1708 | 0 | chunk_appendf(&trash, " %u", \ |
1709 | 0 | _v[thr]); \ |
1710 | 0 | } \ |
1711 | 0 | else /* thr == _nbt */ { \ |
1712 | 0 | chunk_appendf(&trash, "%s\n", \ |
1713 | 0 | (_nbt > 1 && tgt < 0) ? \ |
1714 | 0 | " ]" : ""); \ |
1715 | 0 | } \ |
1716 | 0 | if (thr == -2 || restarted) { \ |
1717 | | /* failed once, emit one column at a time */\ |
1718 | 0 | if (applet_putchk(appctx, &trash) == -1) \ |
1719 | 0 | break; /* main loop handles it */ \ |
1720 | 0 | chunk_reset(&trash); \ |
1721 | 0 | actctx->col = thr + 3; \ |
1722 | 0 | } \ |
1723 | 0 | } \ |
1724 | 0 | if (applet_putchk(appctx, &trash) == -1) \ |
1725 | 0 | break; /* main loop will handle it */ \ |
1726 | | /* OK dump done for this line */ \ |
1727 | 0 | chunk_reset(&trash); \ |
1728 | 0 | if (thr > _nbt) \ |
1729 | 0 | actctx->col = 0; \ |
1730 | 0 | } while (0) |
1731 | | |
1732 | | /* retrieve uptime */ |
1733 | 0 | up = now_ns - start_time_ns; |
1734 | 0 | up_sec = ns_to_sec(up); |
1735 | 0 | up_usec = (up / 1000U) % 1000000U; |
1736 | | |
1737 | | /* iterate over all dump lines. It happily skips over holes so it's |
1738 | | * not a problem not to have an exact match, we just need to have |
1739 | | * stable and consistent lines during a dump. |
1740 | | */ |
1741 | 0 | base_line = __LINE__; |
1742 | 0 | do { |
1743 | 0 | chunk_reset(&trash); |
1744 | |
|
1745 | 0 | switch (actctx->line + base_line) { |
1746 | 0 | case __LINE__: chunk_appendf(&trash, "thread_id: %u (%u..%u)\n", tid + 1, 1, global.nbthread); break; |
1747 | 0 | case __LINE__: chunk_appendf(&trash, "date_now: %lu.%06lu\n", (ulong)date.tv_sec, (ulong)date.tv_usec); break; |
1748 | 0 | case __LINE__: chunk_appendf(&trash, "uptime_now: %u.%06u\n", up_sec, up_usec); break; |
1749 | 0 | case __LINE__: SHOW_VAL("ctxsw:", activity[thr].ctxsw, _tot); break; |
1750 | 0 | case __LINE__: SHOW_VAL("tasksw:", activity[thr].tasksw, _tot); break; |
1751 | 0 | case __LINE__: SHOW_VAL("empty_rq:", activity[thr].empty_rq, _tot); break; |
1752 | 0 | case __LINE__: SHOW_VAL("long_rq:", activity[thr].long_rq, _tot); break; |
1753 | 0 | case __LINE__: SHOW_VAL("curr_rq:", _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].rq_total), _tot); break; |
1754 | 0 | case __LINE__: SHOW_VAL("loops:", activity[thr].loops, _tot); break; |
1755 | 0 | case __LINE__: SHOW_VAL("wake_tasks:", activity[thr].wake_tasks, _tot); break; |
1756 | 0 | case __LINE__: SHOW_VAL("wake_signal:", activity[thr].wake_signal, _tot); break; |
1757 | 0 | case __LINE__: SHOW_VAL("poll_io:", activity[thr].poll_io, _tot); break; |
1758 | 0 | case __LINE__: SHOW_VAL("poll_exp:", activity[thr].poll_exp, _tot); break; |
1759 | 0 | case __LINE__: SHOW_VAL("poll_drop_fd:", activity[thr].poll_drop_fd, _tot); break; |
1760 | 0 | case __LINE__: SHOW_VAL("poll_skip_fd:", activity[thr].poll_skip_fd, _tot); break; |
1761 | 0 | case __LINE__: SHOW_VAL("conn_dead:", activity[thr].conn_dead, _tot); break; |
1762 | 0 | case __LINE__: SHOW_VAL("stream_calls:", activity[thr].stream_calls, _tot); break; |
1763 | 0 | case __LINE__: SHOW_VAL("pool_fail:", activity[thr].pool_fail, _tot); break; |
1764 | 0 | case __LINE__: SHOW_VAL("buf_wait:", activity[thr].buf_wait, _tot); break; |
1765 | 0 | case __LINE__: SHOW_VAL("cpust_ms_tot:", activity[thr].cpust_total / 2, _tot); break; |
1766 | 0 | case __LINE__: SHOW_VAL("cpust_ms_1s:", read_freq_ctr(&activity[thr].cpust_1s) / 2, _tot); break; |
1767 | 0 | case __LINE__: SHOW_VAL("cpust_ms_15s:", read_freq_ctr_period(&activity[thr].cpust_15s, 15000) / 2, _tot); break; |
1768 | 0 | case __LINE__: SHOW_VAL("avg_cpu_pct:", (100 - ha_thread_ctx[thr].idle_pct), (_tot + _nbt/2) / _nbt); break; |
1769 | 0 | case __LINE__: SHOW_VAL("avg_loop_us:", swrate_avg(activity[thr].avg_loop_us, TIME_STATS_SAMPLES), (_tot + _nbt/2) / _nbt); break; |
1770 | 0 | case __LINE__: SHOW_VAL("accepted:", activity[thr].accepted, _tot); break; |
1771 | 0 | case __LINE__: SHOW_VAL("accq_pushed:", activity[thr].accq_pushed, _tot); break; |
1772 | 0 | case __LINE__: SHOW_VAL("accq_full:", activity[thr].accq_full, _tot); break; |
1773 | | #ifdef USE_THREAD |
1774 | | case __LINE__: SHOW_VAL("accq_ring:", accept_queue_ring_len(&accept_queue_rings[thr]), _tot); break; |
1775 | | case __LINE__: SHOW_VAL("fd_takeover:", activity[thr].fd_takeover, _tot); break; |
1776 | | case __LINE__: SHOW_VAL("check_adopted:",activity[thr].check_adopted, _tot); break; |
1777 | | #endif |
1778 | 0 | case __LINE__: SHOW_VAL("check_started:",activity[thr].check_started, _tot); break; |
1779 | 0 | case __LINE__: SHOW_VAL("check_active:", _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].active_checks), _tot); break; |
1780 | 0 | case __LINE__: SHOW_VAL("check_running:",_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].running_checks), _tot); break; |
1781 | |
|
1782 | | #if defined(DEBUG_DEV) |
1783 | | /* keep these ones at the end */ |
1784 | | case __LINE__: SHOW_VAL("ctr0:", activity[thr].ctr0, _tot); break; |
1785 | | case __LINE__: SHOW_VAL("ctr1:", activity[thr].ctr1, _tot); break; |
1786 | | case __LINE__: SHOW_VAL("ctr2:", activity[thr].ctr2, _tot); break; |
1787 | | #endif |
1788 | 0 | } |
1789 | 0 | #undef SHOW_VAL |
1790 | | |
1791 | | /* try to dump what was possibly not dumped yet */ |
1792 | | |
1793 | 0 | if (applet_putchk(appctx, &trash) == -1) { |
1794 | | /* buffer full, retry later */ |
1795 | 0 | return 0; |
1796 | 0 | } |
1797 | | /* line was dumped, let's commit it */ |
1798 | 0 | actctx->line++; |
1799 | 0 | } while (actctx->line + base_line < __LINE__); |
1800 | | |
1801 | | /* dump complete */ |
1802 | 0 | return 1; |
1803 | 0 | } |
1804 | | |
1805 | | /* parse a "show activity" CLI request. Returns 0 if it needs to continue, 1 if it |
1806 | | * wants to stop here. It sets a show_activity_ctx context where, if a specific |
1807 | | * thread is requested, it puts the thread number into ->thr otherwise sets it to |
1808 | | * -1. |
1809 | | */ |
1810 | | static int cli_parse_show_activity(char **args, char *payload, struct appctx *appctx, void *private) |
1811 | 0 | { |
1812 | 0 | struct show_activity_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); |
1813 | |
|
1814 | 0 | if (!cli_has_level(appctx, ACCESS_LVL_OPER)) |
1815 | 0 | return 1; |
1816 | | |
1817 | 0 | ctx->thr = -1; // show all by default |
1818 | 0 | if (*args[2]) |
1819 | 0 | ctx->thr = atoi(args[2]); |
1820 | |
|
1821 | 0 | if (ctx->thr < -1 || ctx->thr > global.nbthread) |
1822 | 0 | return cli_err(appctx, "Thread ID number must be between -1 and nbthread\n"); |
1823 | | |
1824 | 0 | return 0; |
1825 | 0 | } |
1826 | | |
1827 | | /* config keyword parsers */ |
1828 | | static struct cfg_kw_list cfg_kws = {ILH, { |
1829 | | #ifdef USE_MEMORY_PROFILING |
1830 | | { CFG_GLOBAL, "profiling.memory", cfg_parse_prof_memory }, |
1831 | | #endif |
1832 | | { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks }, |
1833 | | { 0, NULL, NULL } |
1834 | | }}; |
1835 | | |
1836 | | INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); |
1837 | | |
1838 | | /* register cli keywords */ |
1839 | | static struct cli_kw_list cli_kws = {{ },{ |
1840 | | { { "set", "profiling", NULL }, "set profiling <what> {auto|on|off} : enable/disable resource profiling (tasks,memory)", cli_parse_set_profiling, NULL }, |
1841 | | { { "show", "activity", NULL }, "show activity [-1|0|thread_num] : show per-thread activity stats (for support/developers)", cli_parse_show_activity, cli_io_handler_show_activity, NULL }, |
1842 | | { { "show", "profiling", NULL }, "show profiling [<what>|<#lines>|<opts>]*: show profiling state (all,status,tasks,memory)", cli_parse_show_profiling, cli_io_handler_show_profiling, cli_release_show_profiling }, |
1843 | | { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL }, |
1844 | | {{},} |
1845 | | }}; |
1846 | | |
1847 | | INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); |