Line data Source code
1 : /* TODO: Layering violation */
2 : #include "../../../shared_dev/commands/bench/bench.h"
3 :
4 : #include "../../fd_config.h"
5 : #include "../../../platform/fd_cap_chk.h"
6 : #include "../../../../disco/topo/fd_topo.h"
7 : #include "../../../../disco/metrics/fd_metrics.h"
8 : #include "../../../../util/log/fd_log.h"
9 :
10 : #include "helper.h"
11 :
12 : #include <unistd.h>
13 : #include <errno.h>
14 : #include <stdio.h>
15 : #include <stdlib.h>
16 : #include <signal.h>
17 : #include <sys/syscall.h>
18 : #include <sys/resource.h>
19 : #include <linux/capability.h>
20 : #include <sys/ioctl.h>
21 : #include <termios.h>
22 : #include "generated/monitor_seccomp.h"
23 :
24 : extern action_t * ACTIONS[];
25 :
26 : void
27 : monitor_cmd_args( int * pargc,
28 : char *** pargv,
29 0 : args_t * args ) {
30 0 : args->monitor.drain_output_fd = -1; /* only accessible to development commands, not the command line */
31 0 : args->monitor.dt_min = fd_env_strip_cmdline_long( pargc, pargv, "--dt-min", NULL, 6666667. );
32 0 : args->monitor.dt_max = fd_env_strip_cmdline_long( pargc, pargv, "--dt-max", NULL, 133333333. );
33 0 : args->monitor.duration = fd_env_strip_cmdline_long( pargc, pargv, "--duration", NULL, 0. );
34 0 : args->monitor.seed = fd_env_strip_cmdline_uint( pargc, pargv, "--seed", NULL, (uint)fd_tickcount() );
35 :
36 0 : args->monitor.with_bench = fd_env_strip_cmdline_contains( pargc, pargv, "--bench" );
37 0 : args->monitor.with_sankey = fd_env_strip_cmdline_contains( pargc, pargv, "--sankey" );
38 :
39 0 : char const * topo_name = fd_env_strip_cmdline_cstr( pargc, pargv, "--topo", NULL, "" );
40 :
41 0 : ulong topo_name_len = strlen( topo_name );
42 0 : if( FD_UNLIKELY( topo_name_len > sizeof(args->monitor.topo)-1 ) ) FD_LOG_ERR(( "Unknown --topo %s", topo_name ));
43 0 : fd_cstr_fini( fd_cstr_append_text( fd_cstr_init( args->monitor.topo ), topo_name, topo_name_len ) );
44 :
45 0 : if( FD_UNLIKELY( args->monitor.dt_min<0L ) ) FD_LOG_ERR(( "--dt-min should be positive" ));
46 0 : if( FD_UNLIKELY( args->monitor.dt_max<args->monitor.dt_min ) ) FD_LOG_ERR(( "--dt-max should be at least --dt-min" ));
47 0 : if( FD_UNLIKELY( args->monitor.duration<0L ) ) FD_LOG_ERR(( "--duration should be non-negative" ));
48 0 : }
49 :
50 : void
51 : monitor_cmd_perm( args_t * args FD_PARAM_UNUSED,
52 : fd_cap_chk_t * chk,
53 0 : config_t const * config ) {
54 0 : ulong mlock_limit = fd_topo_mlock( &config->topo );
55 :
56 0 : fd_cap_chk_raise_rlimit( chk, "monitor", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
57 :
58 0 : if( fd_sandbox_requires_cap_sys_admin( config->uid, config->gid ) )
59 0 : fd_cap_chk_cap( chk, "monitor", CAP_SYS_ADMIN, "call `unshare(2)` with `CLONE_NEWUSER` to sandbox the process in a user namespace" );
60 0 : if( FD_LIKELY( getuid() != config->uid ) )
61 0 : fd_cap_chk_cap( chk, "monitor", CAP_SETUID, "call `setresuid(2)` to switch uid to the sanbox user" );
62 0 : if( FD_LIKELY( getgid() != config->gid ) )
63 0 : fd_cap_chk_cap( chk, "monitor", CAP_SETGID, "call `setresgid(2)` to switch gid to the sandbox user" );
64 0 : }
65 :
66 : typedef struct {
67 : ulong pid;
68 : ulong heartbeat;
69 : ulong status;
70 :
71 : ulong in_backp;
72 : ulong backp_cnt;
73 :
74 : ulong nvcsw;
75 : ulong nivcsw;
76 :
77 : ulong regime_ticks[9];
78 : } tile_snap_t;
79 :
80 : typedef struct {
81 : ulong mcache_seq;
82 :
83 : ulong fseq_seq;
84 :
85 : ulong fseq_diag_tot_cnt;
86 : ulong fseq_diag_tot_sz;
87 : ulong fseq_diag_filt_cnt;
88 : ulong fseq_diag_filt_sz;
89 : ulong fseq_diag_ovrnp_cnt;
90 : ulong fseq_diag_ovrnr_cnt;
91 : ulong fseq_diag_slow_cnt;
92 : } link_snap_t;
93 :
94 : static ulong
95 0 : tile_total_ticks( tile_snap_t * snap ) {
96 0 : ulong total = 0UL;
97 0 : for( ulong i=0UL; i<9UL; i++ ) total += snap->regime_ticks[ i ];
98 0 : return total;
99 0 : }
100 :
101 : static void
102 : tile_snap( tile_snap_t * snap_cur, /* Snapshot for each tile, indexed [0,tile_cnt) */
103 0 : fd_topo_t const * topo ) {
104 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
105 0 : tile_snap_t * snap = &snap_cur[ tile_idx ];
106 :
107 0 : fd_topo_tile_t const * tile = &topo->tiles[ tile_idx ];
108 0 : snap->heartbeat = fd_metrics_tile( tile->metrics )[ FD_METRICS_GAUGE_TILE_HEARTBEAT_OFF ];
109 0 : snap->status = fd_metrics_tile( tile->metrics )[ FD_METRICS_GAUGE_TILE_STATUS_OFF ];
110 :
111 0 : fd_metrics_register( tile->metrics );
112 :
113 0 : FD_COMPILER_MFENCE();
114 0 : snap->pid = FD_MGAUGE_GET( TILE, PID );
115 0 : snap->nvcsw = FD_MCNT_GET( TILE, CONTEXT_SWITCH_VOLUNTARY_COUNT );
116 0 : snap->nivcsw = FD_MCNT_GET( TILE, CONTEXT_SWITCH_INVOLUNTARY_COUNT );
117 0 : snap->in_backp = FD_MGAUGE_GET( TILE, IN_BACKPRESSURE );
118 0 : snap->backp_cnt = FD_MCNT_GET( TILE, BACKPRESSURE_COUNT );
119 0 : for( ulong i=0UL; i<9UL; i++ ) {
120 0 : snap->regime_ticks[ i ] = fd_metrics_tl[ MIDX(COUNTER, TILE, REGIME_DURATION_NANOS)+i ];
121 0 : }
122 0 : FD_COMPILER_MFENCE();
123 0 : }
124 0 : }
125 :
126 : static void
127 : link_snap( link_snap_t * snap_cur,
128 0 : fd_topo_t const * topo ) {
129 0 : ulong link_idx = 0UL;
130 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
131 0 : for( ulong in_idx=0UL; in_idx<topo->tiles[ tile_idx ].in_cnt; in_idx++ ) {
132 0 : link_snap_t * snap = &snap_cur[ link_idx ];
133 0 : fd_frag_meta_t const * mcache = topo->links[ topo->tiles[ tile_idx ].in_link_id[ in_idx ] ].mcache;
134 0 : ulong const * seq = (ulong const *)fd_mcache_seq_laddr_const( mcache );
135 0 : snap->mcache_seq = fd_mcache_seq_query( seq );
136 :
137 0 : ulong const * fseq = topo->tiles[ tile_idx ].in_link_fseq[ in_idx ];
138 0 : snap->fseq_seq = fd_fseq_query( fseq );
139 :
140 0 : ulong const * in_metrics = NULL;
141 0 : if( FD_LIKELY( topo->tiles[ tile_idx ].in_link_poll[ in_idx ] ) ) {
142 0 : in_metrics = (ulong const *)fd_metrics_link_in( topo->tiles[ tile_idx ].metrics, in_idx );
143 0 : }
144 :
145 0 : FD_COMPILER_MFENCE();
146 0 : if( FD_LIKELY( in_metrics ) ) {
147 0 : snap->fseq_diag_tot_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_CONSUMED_COUNT_OFF ];
148 0 : snap->fseq_diag_tot_sz = in_metrics[ FD_METRICS_COUNTER_LINK_CONSUMED_SIZE_BYTES_OFF ];
149 0 : snap->fseq_diag_filt_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
150 0 : snap->fseq_diag_filt_sz = in_metrics[ FD_METRICS_COUNTER_LINK_FILTERED_SIZE_BYTES_OFF ];
151 0 : snap->fseq_diag_ovrnp_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_OVERRUN_POLLING_COUNT_OFF ];
152 0 : snap->fseq_diag_ovrnr_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_OVERRUN_READING_COUNT_OFF ];
153 0 : snap->fseq_diag_slow_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_SLOW_COUNT_OFF ];
154 0 : } else {
155 0 : snap->fseq_diag_tot_cnt = 0UL;
156 0 : snap->fseq_diag_tot_sz = 0UL;
157 0 : snap->fseq_diag_filt_cnt = 0UL;
158 0 : snap->fseq_diag_filt_sz = 0UL;
159 0 : snap->fseq_diag_ovrnp_cnt = 0UL;
160 0 : snap->fseq_diag_ovrnr_cnt = 0UL;
161 0 : snap->fseq_diag_slow_cnt = 0UL;
162 0 : }
163 0 : FD_COMPILER_MFENCE();
164 0 : snap->fseq_diag_tot_cnt += snap->fseq_diag_filt_cnt;
165 0 : snap->fseq_diag_tot_sz += snap->fseq_diag_filt_sz;
166 0 : link_idx++;
167 0 : }
168 0 : }
169 0 : }
170 :
171 : /**********************************************************************/
172 :
173 0 : static void write_stdout( char * buf, ulong buf_sz ) {
174 0 : ulong written = 0;
175 0 : ulong total = buf_sz;
176 0 : while( written < total ) {
177 0 : long n = write( STDOUT_FILENO, buf + written, total - written );
178 0 : if( FD_UNLIKELY( n < 0 ) ) {
179 0 : if( errno == EINTR ) continue;
180 0 : FD_LOG_ERR(( "error writing to stdout (%i-%s)", errno, fd_io_strerror( errno ) ));
181 0 : }
182 0 : written += (ulong)n;
183 0 : }
184 0 : }
185 :
186 : static int stop1 = 0;
187 :
188 0 : #define FD_MONITOR_TEXT_BUF_SZ 131072
189 : static char buffer[ FD_MONITOR_TEXT_BUF_SZ ];
190 : static char buffer2[ FD_MONITOR_TEXT_BUF_SZ ];
191 :
192 : static void
193 : drain_to_buffer( char ** buf,
194 : ulong * buf_sz,
195 0 : int fd ) {
196 0 : while(1) {
197 0 : long nread = read( fd, buffer2, *buf_sz );
198 0 : if( FD_LIKELY( nread == -1 && errno == EAGAIN ) ) break; /* no data available */
199 0 : else if( FD_UNLIKELY( nread == -1 ) ) FD_LOG_ERR(( "read() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
200 :
201 0 : char * ptr = buffer2;
202 0 : char * next;
203 0 : while(( next = memchr( ptr, '\n', (ulong)nread - (ulong)(ptr - buffer2) ))) {
204 0 : ulong len = (ulong)(next - ptr);
205 0 : if( FD_UNLIKELY( *buf_sz < len ) ) {
206 0 : write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - *buf_sz );
207 0 : *buf = buffer;
208 0 : *buf_sz = FD_MONITOR_TEXT_BUF_SZ;
209 0 : }
210 0 : fd_memcpy( *buf, ptr, len );
211 0 : *buf += len;
212 0 : *buf_sz -= len;
213 :
214 0 : if( FD_UNLIKELY( *buf_sz < sizeof(TEXT_NEWLINE)-1 ) ) {
215 0 : write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - *buf_sz );
216 0 : *buf = buffer;
217 0 : *buf_sz = FD_MONITOR_TEXT_BUF_SZ;
218 0 : }
219 0 : fd_memcpy( *buf, TEXT_NEWLINE, sizeof(TEXT_NEWLINE)-1 );
220 0 : *buf += sizeof(TEXT_NEWLINE)-1;
221 0 : *buf_sz -= sizeof(TEXT_NEWLINE)-1;
222 :
223 0 : ptr = next + 1;
224 0 : }
225 0 : }
226 0 : }
227 :
228 : static struct termios termios_backup;
229 :
230 : static void
231 0 : restore_terminal( void ) {
232 0 : (void)ioctl( STDIN_FILENO, TCSETS, &termios_backup );
233 0 : }
234 :
235 : static void
236 : run_monitor( config_t const * config,
237 : int drain_output_fd,
238 : int with_sankey,
239 : long dt_min,
240 : long dt_max,
241 : long duration,
242 0 : uint seed ) {
243 0 : fd_topo_t const * topo = &config->topo;
244 :
245 : /* Setup local objects used by this app */
246 0 : fd_rng_t _rng[1];
247 0 : fd_rng_t * rng = fd_rng_join( fd_rng_new( _rng, seed, 0UL ) );
248 :
249 0 : tile_snap_t * tile_snap_prv = (tile_snap_t *)fd_alloca( alignof(tile_snap_t), sizeof(tile_snap_t)*2UL*topo->tile_cnt );
250 0 : if( FD_UNLIKELY( !tile_snap_prv ) ) FD_LOG_ERR(( "fd_alloca failed" )); /* Paranoia */
251 0 : tile_snap_t * tile_snap_cur = tile_snap_prv + topo->tile_cnt;
252 :
253 0 : ulong link_cnt = 0UL;
254 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) link_cnt += topo->tiles[ tile_idx ].in_cnt;
255 0 : link_snap_t * link_snap_prv = (link_snap_t *)fd_alloca( alignof(link_snap_t), sizeof(link_snap_t)*2UL*link_cnt );
256 0 : if( FD_UNLIKELY( !link_snap_prv ) ) FD_LOG_ERR(( "fd_alloca failed" )); /* Paranoia */
257 0 : link_snap_t * link_snap_cur = link_snap_prv + link_cnt;
258 :
259 : /* Get the initial reference diagnostic snapshot */
260 0 : tile_snap( tile_snap_prv, topo );
261 0 : link_snap( link_snap_prv, topo );
262 0 : long then = fd_log_wallclock();
263 :
264 : /* Monitor for duration ns. Note that for duration==0, this
265 : will still do exactly one pretty print. */
266 0 : FD_LOG_NOTICE(( "monitoring --dt-min %li ns, --dt-max %li ns, --duration %li ns, --seed %u", dt_min, dt_max, duration, seed ));
267 :
268 0 : long stop = then + duration;
269 0 : if( duration == 0 ) stop = LONG_MAX;
270 :
271 0 : #define PRINT( ... ) do { \
272 0 : int n = snprintf( buf, buf_sz, __VA_ARGS__ ); \
273 0 : if( FD_UNLIKELY( n<0 ) ) FD_LOG_ERR(( "snprintf failed" )); \
274 0 : if( FD_UNLIKELY( (ulong)n>=buf_sz ) ) FD_LOG_ERR(( "snprintf truncated" )); \
275 0 : buf += n; buf_sz -= (ulong)n; \
276 0 : } while(0)
277 0 : int monitor_pane = 0;
278 :
279 : /* Restore original terminal attributes at exit */
280 0 : atexit( restore_terminal );
281 0 : if( FD_UNLIKELY( ioctl( STDIN_FILENO, TCGETS, &termios_backup ) ) ) {
282 0 : FD_LOG_ERR(( "ioctl(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
283 0 : }
284 :
285 : /* Disable character echo and line buffering */
286 0 : struct termios term = termios_backup;
287 0 : term.c_lflag &= (tcflag_t)~(ICANON | ECHO);
288 0 : if( FD_UNLIKELY( ioctl( STDIN_FILENO, TCSETS, &term ) ) ) {
289 0 : FD_LOG_WARNING(( "ioctl(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
290 0 : }
291 :
292 0 : for(;;) {
293 : /* Wait a somewhat randomized amount and then make a diagnostic
294 : snapshot */
295 0 : fd_log_wait_until( then + dt_min + (long)fd_rng_ulong_roll( rng, 1UL+(ulong)(dt_max-dt_min) ) );
296 :
297 0 : tile_snap( tile_snap_cur, topo );
298 0 : link_snap( link_snap_cur, topo );
299 0 : long now = fd_log_wallclock();
300 :
301 : /* Pretty print a comparison between this diagnostic snapshot and
302 : the previous one. */
303 :
304 0 : char * buf = buffer;
305 0 : ulong buf_sz = FD_MONITOR_TEXT_BUF_SZ;
306 :
307 0 : PRINT( "\033[2J\033[H" );
308 :
309 : /* drain any firedancer log messages into the terminal */
310 0 : if( FD_UNLIKELY( drain_output_fd >= 0 ) ) drain_to_buffer( &buf, &buf_sz, drain_output_fd );
311 0 : if( FD_UNLIKELY( buf_sz < FD_MONITOR_TEXT_BUF_SZ / 2 ) ) {
312 : /* make sure there's enough space to print the whole monitor in one go */
313 0 : write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - buf_sz );
314 0 : buf = buffer;
315 0 : buf_sz = FD_MONITOR_TEXT_BUF_SZ;
316 0 : }
317 :
318 0 : if( FD_UNLIKELY( drain_output_fd >= 0 ) ) PRINT( TEXT_NEWLINE );
319 0 : int c = fd_getchar();
320 0 : if( FD_UNLIKELY( c=='\t' ) ) monitor_pane = !monitor_pane;
321 0 : if( FD_UNLIKELY( c=='\x04' ) ) break; /* Ctrl-D */
322 :
323 0 : long dt = now-then;
324 :
325 0 : char now_cstr[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
326 0 : if( !monitor_pane ) {
327 0 : PRINT( "snapshot for %s | Use TAB to switch panes" TEXT_NEWLINE, fd_log_wallclock_cstr( now, now_cstr ) );
328 0 : PRINT( " tile | pid | stale | heart | nivcsw | nvcsw | in backp | backp cnt | %% hkeep | %% wait | %% backp | %% finish" TEXT_NEWLINE );
329 0 : PRINT( "---------+---------+------------+-------+---------------------+---------------------+----------+---------------------+----------+----------+----------+----------" TEXT_NEWLINE );
330 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
331 0 : tile_snap_t * prv = &tile_snap_prv[ tile_idx ];
332 0 : tile_snap_t * cur = &tile_snap_cur[ tile_idx ];
333 0 : if( cur->status==2UL ) continue; /* stopped tile */
334 0 : PRINT( " %7s", topo->tiles[ tile_idx ].name );
335 0 : PRINT( " | %7lu", cur->pid );
336 0 : PRINT( " | " ); printf_stale ( &buf, &buf_sz, (long)(now - (long)cur->heartbeat), 1e8 /* 100 millis */ );
337 0 : PRINT( " | " ); printf_heart ( &buf, &buf_sz, (long)cur->heartbeat, (long)prv->heartbeat );
338 0 : PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->nivcsw, prv->nivcsw );
339 0 : PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->nvcsw, prv->nvcsw );
340 0 : PRINT( " | " ); printf_err_bool( &buf, &buf_sz, cur->in_backp, prv->in_backp );
341 0 : PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->backp_cnt, prv->backp_cnt );
342 :
343 0 : ulong cur_hkeep_ticks = cur->regime_ticks[0]+cur->regime_ticks[1]+cur->regime_ticks[2];
344 0 : ulong prv_hkeep_ticks = prv->regime_ticks[0]+prv->regime_ticks[1]+prv->regime_ticks[2];
345 :
346 0 : ulong cur_wait_ticks = cur->regime_ticks[3]+cur->regime_ticks[6];
347 0 : ulong prv_wait_ticks = prv->regime_ticks[3]+prv->regime_ticks[6];
348 :
349 0 : ulong cur_backp_ticks = cur->regime_ticks[5];
350 0 : ulong prv_backp_ticks = prv->regime_ticks[5];
351 :
352 0 : ulong cur_processing_ticks = cur->regime_ticks[4]+cur->regime_ticks[7];
353 0 : ulong prv_processing_ticks = prv->regime_ticks[4]+prv->regime_ticks[7];
354 :
355 0 : PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_hkeep_ticks, prv_hkeep_ticks, 0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
356 0 : PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_wait_ticks, prv_wait_ticks, 0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
357 0 : PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_backp_ticks, prv_backp_ticks, 0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
358 0 : PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_processing_ticks, prv_processing_ticks, 0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
359 0 : PRINT( TEXT_NEWLINE );
360 0 : }
361 0 : } else {
362 0 : PRINT( " link | tot TPS | tot bps | uniq TPS | uniq bps | ha tr%% | uniq bw%% | filt tr%% | filt bw%% | ovrnp cnt | ovrnr cnt | slow cnt | tx seq" TEXT_NEWLINE );
363 0 : PRINT( "------------------+----------+----------+----------+----------+----------+----------+----------+----------+---------------------+---------------------+---------------------+-------------------" TEXT_NEWLINE );
364 :
365 0 : ulong link_idx = 0UL;
366 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
367 0 : for( ulong in_idx=0UL; in_idx<topo->tiles[ tile_idx ].in_cnt; in_idx++ ) {
368 0 : link_snap_t * prv = &link_snap_prv[ link_idx ];
369 0 : link_snap_t * cur = &link_snap_cur[ link_idx ];
370 :
371 0 : fd_topo_link_t link = topo->links[ topo->tiles[ tile_idx ].in_link_id[ in_idx ] ];
372 0 : ulong producer_tile_id = fd_topo_find_link_producer( topo, &link );
373 0 : FD_TEST( producer_tile_id != ULONG_MAX );
374 0 : char const * producer = topo->tiles[ producer_tile_id ].name;
375 0 : PRINT( " %7s->%-7s", producer, topo->tiles[ tile_idx ].name );
376 0 : ulong cur_raw_cnt = /* cur->cnc_diag_ha_filt_cnt + */ cur->fseq_diag_tot_cnt;
377 0 : ulong cur_raw_sz = /* cur->cnc_diag_ha_filt_sz + */ cur->fseq_diag_tot_sz;
378 0 : ulong prv_raw_cnt = /* prv->cnc_diag_ha_filt_cnt + */ prv->fseq_diag_tot_cnt;
379 0 : ulong prv_raw_sz = /* prv->cnc_diag_ha_filt_sz + */ prv->fseq_diag_tot_sz;
380 :
381 0 : PRINT( " | " ); printf_rate( &buf, &buf_sz, 1e9, 0., cur_raw_cnt, prv_raw_cnt, dt );
382 0 : PRINT( " | " ); printf_rate( &buf, &buf_sz, 8e9, 0., cur_raw_sz, prv_raw_sz, dt ); /* Assumes sz incl framing */
383 0 : PRINT( " | " ); printf_rate( &buf, &buf_sz, 1e9, 0., cur->fseq_diag_tot_cnt, prv->fseq_diag_tot_cnt, dt );
384 0 : PRINT( " | " ); printf_rate( &buf, &buf_sz, 8e9, 0., cur->fseq_diag_tot_sz, prv->fseq_diag_tot_sz, dt ); /* Assumes sz incl framing */
385 :
386 0 : PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_tot_cnt, prv->fseq_diag_tot_cnt, 0.,
387 0 : cur_raw_cnt, prv_raw_cnt, DBL_MIN );
388 0 : PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_tot_sz, prv->fseq_diag_tot_sz, 0.,
389 0 : cur_raw_sz, prv_raw_sz, DBL_MIN ); /* Assumes sz incl framing */
390 0 : PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_filt_cnt, prv->fseq_diag_filt_cnt, 0.,
391 0 : cur->fseq_diag_tot_cnt, prv->fseq_diag_tot_cnt, DBL_MIN );
392 0 : PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_filt_sz, prv->fseq_diag_filt_sz, 0.,
393 0 : cur->fseq_diag_tot_sz, prv->fseq_diag_tot_sz, DBL_MIN ); /* Assumes sz incl framing */
394 :
395 0 : PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnp_cnt, prv->fseq_diag_ovrnp_cnt );
396 0 : PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnr_cnt, prv->fseq_diag_ovrnr_cnt );
397 0 : PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_slow_cnt, prv->fseq_diag_slow_cnt );
398 0 : PRINT( " | " ); printf_seq( &buf, &buf_sz, cur->mcache_seq, prv->mcache_seq );
399 0 : PRINT( TEXT_NEWLINE );
400 0 : link_idx++;
401 0 : }
402 0 : }
403 0 : }
404 0 : if( FD_UNLIKELY( with_sankey ) ) {
405 : /* We only need to count from one of the benchs, since they both receive
406 : all of the transactions. */
407 0 : fd_topo_tile_t const * benchs = &topo->tiles[ fd_topo_find_tile( topo, "benchs", 0UL ) ];
408 0 : ulong fseq_sum = 0UL;
409 0 : for( ulong i=0UL; i<benchs->in_cnt; i++ ) {
410 0 : ulong const * fseq = benchs->in_link_fseq[ i ];
411 0 : fseq_sum += fd_fseq_query( fseq );
412 0 : }
413 :
414 0 : ulong net_tile_idx = fd_topo_find_tile( topo, "net", 0UL );
415 0 : if( FD_UNLIKELY( net_tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "net tile not found" ));
416 :
417 0 : fd_topo_tile_t const * net = &topo->tiles[ net_tile_idx ];
418 0 : ulong net_sent = fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ net->out_link_id[ 0 ] ].mcache ) );
419 0 : net_sent += fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ net->out_link_id[ 1 ] ].mcache ) );
420 0 : net_sent = fseq_sum;
421 :
422 0 : ulong verify_failed = 0UL;
423 0 : ulong verify_sent = 0UL;
424 0 : ulong verify_overrun = 0UL;
425 0 : for( ulong i=0UL; i<config->layout.verify_tile_count; i++ ) {
426 0 : fd_topo_tile_t const * verify = &topo->tiles[ fd_topo_find_tile( topo, "verify", i ) ];
427 0 : verify_overrun += fd_metrics_link_in( verify->metrics, 0UL )[ FD_METRICS_COUNTER_LINK_OVERRUN_POLLING_FRAG_COUNT_OFF ] / config->layout.verify_tile_count;
428 0 : verify_failed += fd_metrics_link_in( verify->metrics, 0UL )[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
429 0 : verify_sent += fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ verify->out_link_id[ 0 ] ].mcache ) );
430 0 : }
431 :
432 0 : fd_topo_tile_t const * dedup = &topo->tiles[ fd_topo_find_tile( topo, "dedup", 0UL ) ];
433 0 : ulong dedup_failed = 0UL;
434 0 : for( ulong i=0UL; i<config->layout.verify_tile_count; i++) {
435 0 : dedup_failed += fd_metrics_link_in( dedup->metrics, i )[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
436 0 : }
437 0 : ulong dedup_sent = fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ dedup->out_link_id[ 0 ] ].mcache ) );
438 :
439 0 : fd_topo_tile_t const * pack = &topo->tiles[ fd_topo_find_tile( topo, "pack", 0UL ) ];
440 0 : volatile ulong * pack_metrics = fd_metrics_tile( pack->metrics );
441 0 : ulong pack_invalid = pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_INSTR_ACCT_CNT_OFF ] +
442 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_WRITE_SYSVAR_OFF ] +
443 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_ESTIMATION_FAIL_OFF ] +
444 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_TOO_LARGE_OFF ] +
445 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_EXPIRED_OFF ] +
446 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_ADDR_LUT_OFF ] +
447 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_UNAFFORDABLE_OFF ] +
448 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_DUPLICATE_OFF ] +
449 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_PRIORITY_OFF ] +
450 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_NONVOTE_REPLACE_OFF ] +
451 0 : pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_VOTE_REPLACE_OFF ];
452 0 : ulong pack_overrun = pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_DROPPED_FROM_EXTRA_OFF ];
453 0 : ulong pack_sent = pack_metrics[ FD_METRICS_HISTOGRAM_PACK_TOTAL_TRANSACTIONS_PER_MICROBLOCK_COUNT_OFF + FD_HISTF_BUCKET_CNT ];
454 :
455 0 : static ulong last_fseq_sum;
456 0 : static ulong last_net_sent;
457 0 : static ulong last_verify_overrun;
458 0 : static ulong last_verify_failed;
459 0 : static ulong last_verify_sent;
460 0 : static ulong last_dedup_failed;
461 0 : static ulong last_dedup_sent;
462 0 : static ulong last_pack_overrun;
463 0 : static ulong last_pack_invalid;
464 0 : static ulong last_pack_sent;
465 :
466 0 : PRINT( "TXNS SENT: %-10lu" TEXT_NEWLINE, fseq_sum );
467 0 : PRINT( "NET TXNS SENT: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, net_sent, 100.0 * (double)net_sent/(double)fseq_sum, 100.0 * (double)(net_sent - last_net_sent)/(double)(fseq_sum - last_fseq_sum) );
468 0 : PRINT( "VERIFY OVERRUN: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, verify_overrun, 100.0 * (double)verify_overrun/(double)net_sent, 100.0 * (double)(verify_overrun - last_verify_overrun)/(double)(net_sent - last_net_sent) );
469 0 : PRINT( "VERIFY FAILED: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, verify_failed, 100.0 * (double)verify_failed/(double)net_sent, 100.0 * (double)(verify_failed - last_verify_failed)/(double)(net_sent - last_net_sent) );
470 0 : PRINT( "VERIFY SENT: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, verify_sent, 100.0 * (double)verify_sent/(double)net_sent, 100.0 * (double)(verify_sent - last_verify_sent)/(double)(net_sent - last_net_sent) );
471 0 : PRINT( "DEDUP FAILED: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, dedup_failed, 100.0 * (double)dedup_failed/(double)verify_sent, 100.0 * (double)(dedup_failed - last_dedup_failed)/(double)(verify_sent - last_verify_sent) );
472 0 : PRINT( "DEDUP SENT: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, dedup_sent, 100.0 * (double)dedup_sent/(double)verify_sent, 100.0 * (double)(dedup_sent - last_dedup_sent)/(double)(verify_sent - last_verify_sent) );
473 0 : PRINT( "PACK OVERRUN: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, pack_overrun, 100.0 * (double)pack_overrun/(double)dedup_sent, 100.0 * (double)(pack_overrun - last_pack_overrun)/(double)(dedup_sent - last_dedup_sent) );
474 0 : PRINT( "PACK INVALID: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, pack_invalid, 100.0 * (double)pack_invalid/(double)dedup_sent, 100.0 * (double)(pack_invalid - last_pack_invalid)/(double)(dedup_sent - last_dedup_sent) );
475 0 : PRINT( "PACK SENT: %-10lu %-5.2lf%% %-5.2lf%%" TEXT_NEWLINE, pack_sent, 100.0 * (double)pack_sent/(double)dedup_sent, 100.0 * (double)(pack_sent - last_pack_sent)/(double)(dedup_sent - last_dedup_sent) );
476 :
477 0 : last_fseq_sum = fseq_sum;
478 0 : last_net_sent = net_sent;
479 0 : last_verify_overrun = verify_overrun;
480 0 : last_verify_failed = verify_failed;
481 0 : last_verify_sent = verify_sent;
482 0 : last_dedup_failed = dedup_failed;
483 0 : last_dedup_sent = dedup_sent;
484 0 : last_pack_overrun = pack_overrun;
485 0 : last_pack_invalid = pack_invalid;
486 0 : last_pack_sent = pack_sent;
487 0 : }
488 :
489 : /* write entire monitor output buffer */
490 0 : write_stdout( buffer, sizeof(buffer) - buf_sz );
491 :
492 0 : if( FD_UNLIKELY( stop1 || (now-stop)>=0L ) ) {
493 : /* Stop once we've been monitoring for duration ns */
494 0 : break;
495 0 : }
496 :
497 0 : then = now;
498 0 : tile_snap_t * tmp = tile_snap_prv; tile_snap_prv = tile_snap_cur; tile_snap_cur = tmp;
499 0 : link_snap_t * tmp2 = link_snap_prv; link_snap_prv = link_snap_cur; link_snap_cur = tmp2;
500 0 : }
501 0 : }
502 :
503 : static void
504 0 : signal1( int sig ) {
505 0 : (void)sig;
506 0 : exit( 0 ); /* gracefully exit */
507 0 : }
508 :
509 : void
510 : reconstruct_topo( config_t * config,
511 0 : char const * topo_name ) {
512 0 : if( !topo_name[0] ) return; /* keep default action topo */
513 :
514 0 : action_t const * selected = NULL;
515 0 : for( action_t ** a=ACTIONS; *a; a++ ) {
516 0 : action_t const * action = *a;
517 0 : if( 0==strcmp( action->name, topo_name ) ) {
518 0 : selected = action;
519 0 : break;
520 0 : }
521 0 : }
522 :
523 0 : if( !selected ) FD_LOG_ERR(( "Unknown --topo %s", topo_name ));
524 0 : if( !selected->topo ) FD_LOG_ERR(( "Cannot recover topology for --topo %s", topo_name ));
525 :
526 0 : selected->topo( config );
527 0 : }
528 :
529 : void
530 : monitor_cmd_fn( args_t * args,
531 0 : config_t * config ) {
532 0 : reconstruct_topo( config, args->monitor.topo );
533 :
534 0 : if( FD_UNLIKELY( args->monitor.with_bench ) ) {
535 0 : add_bench_topo( &config->topo,
536 0 : config->development.bench.affinity,
537 0 : config->development.bench.benchg_tile_count,
538 0 : config->development.bench.benchs_tile_count,
539 0 : 0UL,
540 0 : 0,
541 0 : 0.0f,
542 0 : 0.0f,
543 0 : 0UL,
544 0 : 0,
545 0 : 0U,
546 0 : 0,
547 0 : 0U,
548 0 : 1,
549 0 : !config->is_firedancer );
550 0 : }
551 :
552 0 : struct sigaction sa = {
553 0 : .sa_handler = signal1,
554 0 : .sa_flags = 0,
555 0 : };
556 0 : if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
557 0 : FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
558 0 : if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
559 0 : FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
560 :
561 0 : int allow_fds[ 5 ];
562 0 : ulong allow_fds_cnt = 0;
563 0 : allow_fds[ allow_fds_cnt++ ] = 0; /* stdin */
564 0 : allow_fds[ allow_fds_cnt++ ] = 1; /* stdout */
565 0 : allow_fds[ allow_fds_cnt++ ] = 2; /* stderr */
566 0 : if( FD_LIKELY( fd_log_private_logfile_fd()!=-1 ) )
567 0 : allow_fds[ allow_fds_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
568 0 : if( FD_UNLIKELY( args->monitor.drain_output_fd!=-1 ) )
569 0 : allow_fds[ allow_fds_cnt++ ] = args->monitor.drain_output_fd; /* maybe we are interposing firedancer log output with the monitor */
570 :
571 0 : fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY, FD_TOPO_CORE_DUMP_LEVEL_DISABLED );
572 :
573 0 : struct sock_filter seccomp_filter[ 128UL ];
574 0 : uint drain_output_fd = args->monitor.drain_output_fd >= 0 ? (uint)args->monitor.drain_output_fd : (uint)-1;
575 0 : populate_sock_filter_policy_monitor( 128UL, seccomp_filter, (uint)fd_log_private_logfile_fd(), drain_output_fd );
576 :
577 0 : if( FD_UNLIKELY( close( config->log.lock_fd ) ) ) FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
578 :
579 0 : if( FD_LIKELY( config->development.sandbox ) ) {
580 0 : fd_sandbox_enter( config->uid,
581 0 : config->gid,
582 0 : 0,
583 0 : 0,
584 0 : 0,
585 0 : 1, /* Keep controlling terminal for main so it can receive Ctrl+C */
586 0 : 0,
587 0 : 0UL,
588 0 : 0UL,
589 0 : 0UL,
590 0 : 0UL,
591 0 : allow_fds_cnt,
592 0 : allow_fds,
593 0 : sock_filter_policy_monitor_instr_cnt,
594 0 : seccomp_filter );
595 0 : } else {
596 0 : fd_sandbox_switch_uid_gid( config->uid, config->gid );
597 0 : }
598 :
599 0 : fd_topo_fill( &config->topo );
600 :
601 0 : run_monitor( config,
602 0 : args->monitor.drain_output_fd,
603 0 : args->monitor.with_sankey,
604 0 : args->monitor.dt_min,
605 0 : args->monitor.dt_max,
606 0 : args->monitor.duration,
607 0 : args->monitor.seed );
608 :
609 0 : exit( 0 ); /* gracefully exit */
610 0 : }
611 :
612 : action_t fd_action_monitor = {
613 : .name = "monitor",
614 : .args = monitor_cmd_args,
615 : .fn = monitor_cmd_fn,
616 : .require_config = 1,
617 : .perm = monitor_cmd_perm,
618 : .description = "Monitor a locally running Firedancer instance with a terminal GUI",
619 : };
|