LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 1756 0.0 %
Date: 2026-03-19 18:19:27 Functions: 0 69 0.0 %

          Line data    Source code
       1             : #include <stdio.h> /* for vsnprintf */
       2             : #include <stdarg.h> /* for va_list */
       3             : 
       4             : #include "fd_sched.h"
       5             : #include "fd_execrp.h" /* for poh hash value */
       6             : #include "../../util/math/fd_stat.h" /* for sorted search */
       7             : #include "../../disco/fd_disco_base.h" /* for FD_MAX_TXN_PER_SLOT */
       8             : #include "../../disco/metrics/fd_metrics.h" /* for fd_metrics_convert_seconds_to_ticks and etc. */
       9             : #include "../../discof/poh/fd_poh.h" /* for MAX_SKIPPED_TICKS */
      10             : #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */
      11             : #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */
      12             : 
      13           0 : #define FD_SCHED_MAX_STAGING_LANES_LOG     (2)
      14           0 : #define FD_SCHED_MAX_STAGING_LANES         (1UL<<FD_SCHED_MAX_STAGING_LANES_LOG)
      15             : #define FD_SCHED_MAX_EXEC_TILE_CNT         (64UL)
      16           0 : #define FD_SCHED_MAX_PRINT_BUF_SZ          (2UL<<20)
      17             : 
      18             : #define FD_SCHED_MAX_MBLK_PER_SLOT             (MAX_SKIPPED_TICKS)
      19           0 : #define FD_SCHED_MAX_POH_HASHES_PER_TASK       (4096UL) /* This seems to be the sweet spot. */
      20             : 
      21             : /* 64 ticks per slot, and a single gigantic microblock containing min
      22             :    size transactions. */
      23             : FD_STATIC_ASSERT( FD_MAX_TXN_PER_SLOT_SHRED==((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT-65UL*sizeof(fd_microblock_hdr_t))/FD_TXN_MIN_SERIALIZED_SZ), max_txn_per_slot_shred );
      24             : 
      25             : /* We size the buffer to be able to hold residual data from the previous
      26             :    FEC set that only becomes parseable after the next FEC set is
      27             :    ingested, as well as the incoming FEC set.  The largest minimally
      28             :    parseable unit of data is a transaction.  So that much data may
      29             :    straddle FEC set boundaries.  Other minimally parseable units of data
      30             :    include the microblock header and the microblock count within a
      31             :    batch. */
      32           0 : #define FD_SCHED_MAX_PAYLOAD_PER_FEC       (FD_STORE_DATA_MAX)
      33             : #define FD_SCHED_MAX_FEC_BUF_SZ            (FD_SCHED_MAX_PAYLOAD_PER_FEC+FD_TXN_MTU)
      34             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(fd_microblock_hdr_t), resize buffer for residual data );
      35             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(ulong),               resize buffer for residual data );
      36             : 
      37           0 : #define FD_SCHED_MAX_TXN_PER_FEC           ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */
      38           0 : #define FD_SCHED_MAX_MBLK_PER_FEC          ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/sizeof(fd_microblock_hdr_t)+1UL) /* 1334 */
      39             : 
      40             : FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits );
      41             : FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH,       limits );
      42             : 
      43           0 : #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */
      44             : 
      45           0 : #define FD_SCHED_OK          (0)
      46           0 : #define FD_SCHED_AGAIN_LATER (1)
      47           0 : #define FD_SCHED_BAD_BLOCK   (2)
      48             : 
      49             : 
      50             : /* Structs. */
      51             : 
      52             : struct fd_sched_mblk {
      53             :   ulong start_txn_idx; /* inclusive parse idx */
      54             :   ulong end_txn_idx;   /* non-inclusive parse idx */
      55             :   ulong curr_txn_idx;  /* next txn to mixin, parse idx */
      56             :   ulong hashcnt;       /* number of pure hashes, excluding final mixin */
      57             :   ulong curr_hashcnt;
      58             :   fd_hash_t end_hash[ 1 ];
      59             :   fd_hash_t curr_hash[ 1 ];
      60             :   uint curr_sig_cnt;
      61             :   uint next;
      62             :   int is_tick;
      63             : };
      64             : typedef struct fd_sched_mblk fd_sched_mblk_t;
      65             : 
      66             : #define SLIST_NAME  mblk_slist
      67             : #define SLIST_ELE_T fd_sched_mblk_t
      68           0 : #define SLIST_IDX_T uint
      69           0 : #define SLIST_NEXT  next
      70             : #include "../../util/tmpl/fd_slist.c"
      71             : 
      72             : #define SET_NAME txn_bitset
      73             : #define SET_MAX  FD_SCHED_MAX_DEPTH
      74             : #include "../../util/tmpl/fd_set.c"
      75             : 
      76             : struct fd_sched_block {
      77             :   ulong               slot;
      78             :   ulong               parent_slot;
      79             :   ulong               parent_idx;  /* Index of the parent in the pool. */
      80             :   ulong               child_idx;   /* Index of the left-child in the pool. */
      81             :   ulong               sibling_idx; /* Index of the right-sibling in the pool. */
      82             : 
      83             :   /* Counters. */
      84             :   uint                txn_parsed_cnt;
      85             :   /*                  txn_queued_cnt = txn_parsed_cnt-txn_in_flight_cnt-txn_done_cnt */
      86             :   uint                txn_exec_in_flight_cnt;
      87             :   uint                txn_exec_done_cnt;
      88             :   uint                txn_sigverify_in_flight_cnt;
      89             :   uint                txn_sigverify_done_cnt;
      90             :   uint                poh_hashing_in_flight_cnt;
      91             :   uint                poh_hashing_done_cnt;
      92             :   uint                poh_hash_cmp_done_cnt; /* poh_hashing_done_cnt==poh_hash_cmp_done_cnt+len(mixin_in_progress) */
      93             :   uint                txn_done_cnt; /* A transaction is considered done when all types of tasks associated with it are done. */
      94             :   uint                shred_cnt;
      95             :   uint                mblk_cnt;          /* Total number of microblocks, including ticks and non ticks.
      96             :                                             mblk_cnt==len(unhashed)+len(hashing_in_progress)+hashing_in_flight_cnt+len(mixin_in_progress)+hash_cmp_done_cnt */
      97             :   uint                mblk_tick_cnt;     /* Total number of tick microblocks. */
      98             :   uint                mblk_freed_cnt;    /* This is ==hash_cmp_done_cnt in most cases, except for aborted
      99             :                                             blocks, where the freed cnt will catch up to mblk_cnt and surpass
     100             :                                             hash_cmp_done_cnt when the block is reaped. */
     101             :   uint                mblk_unhashed_cnt; /* ==len(unhashed) */
     102             :   ulong               hashcnt; /* How many hashes this block wants replay to do.  A mixin/record counts as one hash. */
     103             :   ulong               txn_pool_max_popcnt;   /* Peak transaction pool occupancy during the time this block was replaying. */
     104             :   ulong               mblk_pool_max_popcnt;  /* Peak mblk pool occupancy. */
     105             :   ulong               block_pool_max_popcnt; /* Peak block pool occupancy. */
     106             :   ulong               txn_idx[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     107             : 
     108             :   /* PoH verify. */
     109             :   fd_hash_t    poh_hash[ 1 ]; /* running end_hash of last parsed mblk */
     110             :   int          last_mblk_is_tick;
     111             :   mblk_slist_t mblks_unhashed[ 1 ]; /* A microblock, once parsed out, is in one of these queues.  It
     112             :                                        generally progresses from unhashed to hashing to mixin.  When a
     113             :                                        microblock is being hashed/in-flight, it'll be transiently out of
     114             :                                        any of the queues.  Once a microblock progresses through all stages
     115             :                                        of work, it'll be immediately freed. */
     116             :   mblk_slist_t mblks_hashing_in_progress[ 1 ];
     117             :   mblk_slist_t mblks_mixin_in_progress[ 1 ];
     118             :   uchar bmtree_mem[ FD_BMTREE_COMMIT_FOOTPRINT(0) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
     119             :   fd_bmtree_commit_t * bmtree;
     120             :   ulong max_tick_hashcnt;
     121             :   ulong curr_tick_hashcnt; /* Starts at 0, accumulates hashcnt, resets to 0 on the next tick. */
     122             :   ulong tick_height;       /* Block is built off of a parent block with this many ticks. */
     123             :   ulong max_tick_height;   /* Block should end with precisely this many ticks. */
     124             :   ulong hashes_per_tick;   /* Fixed per block, feature gated, known after bank clone. */
     125             :   int inconsistent_hashes_per_tick;
     126             : 
     127             :   /* Parser state. */
     128             :   uchar               txn[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
     129             :   ulong               mblks_rem;    /* Number of microblocks remaining in the current batch. */
     130             :   ulong               txns_rem;     /* Number of transactions remaining in the current microblock. */
     131             :   fd_acct_addr_t      aluts[ 256 ]; /* Resolve ALUT accounts into this buffer for more parallelism. */
     132             :   uint                fec_buf_sz;   /* Size of the fec_buf in bytes. */
     133             :   uint                fec_buf_soff; /* Starting offset into fec_buf for unparsed transactions. */
     134             :   uint                fec_buf_boff; /* Byte offset into raw block data of the first byte currently in fec_buf */
     135             :   uint                fec_eob:1;    /* FEC end-of-batch: set if the last FEC set in the batch is being
     136             :                                        ingested. */
     137             :   uint                fec_sob:1;    /* FEC start-of-batch: set if the parser expects to be receiving a new
     138             :                                        batch. */
     139             : 
     140             :   /* Block state. */
     141             :   uint                fec_eos:1;                          /* FEC end-of-stream: set if the last FEC set in the block has been
     142             :                                                              ingested. */
     143             :   uint                rooted:1;                           /* Set if the block is rooted. */
     144             :   uint                dying:1;                            /* Set if the block has been abandoned and no transactions should be
     145             :                                                              scheduled from it. */
     146             :   uint                refcnt:1;                           /* Starts at 1 when the block is added, set to 0 if caller has been
     147             :                                                              informed to decrement refcnt for sched. */
     148             :   uint                in_sched:1;                         /* Set if the block is being tracked by the scheduler. */
     149             :   uint                in_rdisp:1;                         /* Set if the block is being tracked by the dispatcher, either as staged
     150             :                                                              or unstaged. */
     151             :   uint                block_start_signaled:1;             /* Set if the start-of-block sentinel has been dispatched. */
     152             :   uint                block_end_signaled:1;               /* Set if the end-of-block sentinel has been dispatched. */
     153             :   uint                block_start_done:1;                 /* Set if the start-of-block processing has been completed. */
     154             :   uint                block_end_done:1;                   /* Set if the end-of-block processing has been completed. */
     155             :   uint                staged:1;                           /* Set if the block is in a dispatcher staging lane; a staged block is
     156             :                                                              tracked by the dispatcher. */
     157             :   ulong               staging_lane;                       /* Ignored if staged==0. */
     158             :   ulong               luf_depth;                          /* Depth of longest unstaged fork starting from this node; only
     159             :                                                              stageable unstaged descendants are counted. */
     160             :   uchar               fec_buf[ FD_SCHED_MAX_FEC_BUF_SZ ]; /* The previous FEC set could have some residual data that only becomes
     161             :                                                              parseable after the next FEC set is ingested. */
     162             :   uint                shred_blk_offs[ FD_SHRED_BLK_MAX ]; /* The byte offsets into block data of ingested shreds */
     163             : };
     164             : typedef struct fd_sched_block fd_sched_block_t;
     165             : 
     166             : FD_STATIC_ASSERT( sizeof(fd_hash_t)==sizeof(((fd_microblock_hdr_t *)0)->hash), unexpected poh hash size );
     167             : 
     168             : 
     169             : struct fd_sched_metrics {
     170             :   uint  block_added_cnt;
     171             :   uint  block_added_staged_cnt;
     172             :   uint  block_added_unstaged_cnt;
     173             :   uint  block_added_dead_ood_cnt;
     174             :   uint  block_removed_cnt;
     175             :   uint  block_abandoned_cnt;
     176             :   uint  block_bad_cnt;
     177             :   uint  block_promoted_cnt;
     178             :   uint  block_demoted_cnt;
     179             :   uint  deactivate_no_child_cnt;
     180             :   uint  deactivate_no_txn_cnt;
     181             :   uint  deactivate_pruned_cnt;
     182             :   uint  deactivate_abandoned_cnt;
     183             :   uint  lane_switch_cnt;
     184             :   uint  lane_promoted_cnt;
     185             :   uint  lane_demoted_cnt;
     186             :   uint  fork_observed_cnt;
     187             :   uint  alut_success_cnt;
     188             :   uint  alut_serializing_cnt;
     189             :   uint  txn_abandoned_parsed_cnt;
     190             :   uint  txn_abandoned_exec_done_cnt;
     191             :   uint  txn_abandoned_done_cnt;
     192             :   uint  txn_max_in_flight_cnt;
     193             :   ulong txn_weighted_in_flight_cnt;
     194             :   ulong txn_weighted_in_flight_tickcount;
     195             :   ulong txn_none_in_flight_tickcount;
     196             :   ulong txn_parsed_cnt;
     197             :   ulong txn_exec_done_cnt;
     198             :   ulong txn_sigverify_done_cnt;
     199             :   ulong txn_mixin_done_cnt;
     200             :   ulong txn_done_cnt;
     201             :   ulong mblk_parsed_cnt;
     202             :   ulong mblk_poh_hashed_cnt;
     203             :   ulong mblk_poh_done_cnt;
     204             :   ulong bytes_ingested_cnt;
     205             :   ulong bytes_ingested_unparsed_cnt;
     206             :   ulong bytes_dropped_cnt;
     207             :   ulong fec_cnt;
     208             : };
     209             : typedef struct fd_sched_metrics fd_sched_metrics_t;
     210             : 
     211             : #define DEQUE_NAME ref_q
     212           0 : #define DEQUE_T    ulong
     213             : #include "../../util/tmpl/fd_deque_dynamic.c"
     214             : 
     215             : struct fd_sched {
     216             :   char                  print_buf[ FD_SCHED_MAX_PRINT_BUF_SZ ];
     217             :   ulong                 print_buf_sz;
     218             :   fd_sched_metrics_t    metrics[ 1 ];
     219             :   ulong                 canary; /* == FD_SCHED_MAGIC */
     220             :   ulong                 depth;         /* Immutable. */
     221             :   ulong                 block_cnt_max; /* Immutable. */
     222             :   ulong                 exec_cnt;      /* Immutable. */
     223             :   long                  txn_in_flight_last_tick;
     224             :   ulong                 root_idx;
     225             :   fd_rdisp_t *          rdisp;
     226             :   ulong                 txn_exec_ready_bitset[ 1 ];
     227             :   ulong                 sigverify_ready_bitset[ 1 ];
     228             :   ulong                 poh_ready_bitset[ 1 ];
     229             :   ulong                 active_bank_idx; /* Index of the actively replayed block, or ULONG_MAX if no block is
     230             :                                             actively replayed; has to have a transaction to dispatch; staged
     231             :                                             blocks that have no transactions to dispatch are not eligible for
     232             :                                             being active. */
     233             :   ulong                 last_active_bank_idx;
     234             :   ulong                 staged_bitset;    /* Bit i set if staging lane i is occupied. */
     235             :   ulong                 staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is
     236             :                                                                                not set in the bitset. */
     237             :   ulong                 staged_popcnt_wmk;
     238             :   ulong                 txn_pool_free_cnt;
     239             :   fd_txn_p_t *          txn_pool;      /* Just a flat array. */
     240             :   fd_sched_txn_info_t * txn_info_pool; /* Just a flat array. */
     241             :   fd_sched_mblk_t *     mblk_pool;     /* Just a flat array. */
     242             :   ulong                 mblk_pool_free_cnt;
     243             :   uint                  mblk_pool_free_head;
     244             :   ulong                 tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */
     245             :   txn_bitset_t          exec_done_set[ txn_bitset_word_cnt ];      /* Indexed by txn_idx. */
     246             :   txn_bitset_t          sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     247             :   txn_bitset_t          poh_mixin_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     248             :   fd_sched_block_t *    block_pool; /* Just a flat array. */
     249             :   ulong                 block_pool_popcnt;
     250             :   ulong *               ref_q;
     251             : };
     252             : typedef struct fd_sched fd_sched_t;
     253             : 
     254             : 
     255             : /* Internal helpers. */
     256             : 
     257             : static int
     258             : verify_ticks_eager( fd_sched_block_t * block );
     259             : 
     260             : static int
     261             : verify_ticks_final( fd_sched_block_t * block );
     262             : 
     263             : static void
     264             : add_block( fd_sched_t * sched,
     265             :            ulong        bank_idx,
     266             :            ulong        parent_bank_idx );
     267             : 
     268             : FD_WARN_UNUSED static int
     269             : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     270             : 
     271             : FD_WARN_UNUSED static int
     272             : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     273             : 
     274             : static void
     275             : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     276             : 
     277             : static void
     278             : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     279             : 
     280             : FD_WARN_UNUSED static int
     281             : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block );
     282             : 
     283             : static void
     284           0 : free_mblk( fd_sched_t * sched, fd_sched_block_t * block, uint mblk_idx ) {
     285           0 :   sched->mblk_pool[ mblk_idx ].next = sched->mblk_pool_free_head;
     286           0 :   sched->mblk_pool_free_head = mblk_idx;
     287           0 :   sched->mblk_pool_free_cnt++;
     288           0 :   block->mblk_freed_cnt++;
     289           0 : }
     290             : 
     291             : static void
     292           0 : free_mblk_slist( fd_sched_t * sched, fd_sched_block_t * block, mblk_slist_t * list ) {
     293           0 :   while( !mblk_slist_is_empty( list, sched->mblk_pool ) ) {
     294           0 :     uint idx = (uint)mblk_slist_idx_pop_head( list, sched->mblk_pool );
     295           0 :     free_mblk( sched, block, idx );
     296           0 :   }
     297           0 : }
     298             : 
     299             : static void
     300             : try_activate_block( fd_sched_t * sched );
     301             : 
     302             : static void
     303             : check_or_set_active_block( fd_sched_t * sched );
     304             : 
     305             : static void
     306             : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block );
     307             : 
     308             : static void
     309             : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx );
     310             : 
     311             : static void
     312             : maybe_switch_block( fd_sched_t * sched, ulong bank_idx );
     313             : 
     314             : FD_FN_UNUSED static ulong
     315             : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx );
     316             : 
     317             : static ulong
     318             : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx );
     319             : 
     320             : static ulong
     321             : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx );
     322             : 
     323             : static int
     324             : lane_is_demotable( fd_sched_t * sched, int lane_idx );
     325             : 
     326             : static ulong
     327             : demote_lane( fd_sched_t * sched, int lane_idx );
     328             : 
     329             : static inline fd_sched_block_t *
     330           0 : block_pool_ele( fd_sched_t * sched, ulong idx ) {
     331           0 :   FD_TEST( idx<sched->block_cnt_max || idx==ULONG_MAX );
     332           0 :   return idx==ULONG_MAX ? NULL : sched->block_pool+idx;
     333           0 : }
     334             : 
     335             : FD_FN_UNUSED static inline int
     336           0 : block_is_void( fd_sched_block_t * block ) {
     337           0 :   /* We've seen everything in the block and no transaction got parsed
     338           0 :      out. */
     339           0 :   return block->fec_eos && block->txn_parsed_cnt==0;
     340           0 : }
     341             : 
     342             : static inline int
     343           0 : block_should_signal_end( fd_sched_block_t * block ) {
     344             :   /* Under the current policy of eager synchronous PoH mixin, hashing
     345             :      done plus fec_eos imply that all mixins have been done. */
     346           0 :   if( FD_UNLIKELY( !( !block->fec_eos || ((block->mblk_cnt==block->poh_hashing_done_cnt&&block->mblk_cnt==block->poh_hash_cmp_done_cnt)||block->mblk_cnt!=block->poh_hashing_done_cnt) ) ) ) FD_LOG_CRIT(( "invariant violation: slot %lu fec_eos %d mblk_cnt %u poh_hashing_done_cnt %u poh_hash_cmp_done_cnt %u", block->slot, block->fec_eos, block->mblk_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt ));
     347           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hashing_done_cnt && block->block_start_done && !block->block_end_signaled;
     348           0 : }
     349             : 
     350             : static inline int
     351           0 : block_will_signal_end( fd_sched_block_t * block ) {
     352           0 :   return block->fec_eos && !block->block_end_signaled;
     353           0 : }
     354             : 
     355             : /* Is there something known to be dispatchable in the block?  This is an
     356             :    important liveness property.  A block that doesn't contain any known
     357             :    dispatchable tasks will be deactivated or demoted. */
     358             : static inline int
     359           0 : block_is_dispatchable( fd_sched_block_t * block ) {
     360           0 :   ulong exec_queued_cnt      = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
     361           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
     362           0 :   ulong poh_queued_cnt       = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
     363           0 :   return exec_queued_cnt>0UL ||
     364           0 :          sigverify_queued_cnt>0UL ||
     365           0 :          poh_queued_cnt>0UL ||
     366           0 :          !block->block_start_signaled ||
     367           0 :          block_will_signal_end( block );
     368           0 : }
     369             : 
     370             : static inline int
     371           0 : block_is_in_flight( fd_sched_block_t * block ) {
     372           0 :   return block->txn_exec_in_flight_cnt || block->txn_sigverify_in_flight_cnt || block->poh_hashing_in_flight_cnt || (block->block_end_signaled && !block->block_end_done);
     373           0 : }
     374             : 
     375             : static inline int
     376           0 : block_is_done( fd_sched_block_t * block ) {
     377           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hash_cmp_done_cnt && block->block_start_done && block->block_end_done;
     378           0 : }
     379             : 
     380             : static inline int
     381           0 : block_is_stageable( fd_sched_block_t * block ) {
     382           0 :   int rv = !block_is_done( block ) && !block->dying;
     383           0 :   if( FD_UNLIKELY( rv && !block->in_rdisp ) ) {
     384             :     /* Invariant: stageable blocks may be currently staged or unstaged,
     385             :        but must be in the dispatcher either way.  When a block
     386             :        transitions to DONE, it will be immediately removed from the
     387             :        dispatcher.  When a block transitions to DYING, it will be
     388             :        eventually abandoned from the dispatcher. */
     389           0 :     FD_LOG_CRIT(( "invariant violation: stageable block->in_rdisp==0, txn_parsed_cnt %u, txn_done_cnt %u, fec_eos %u,, slot %lu, parent slot %lu",
     390           0 :                   block->txn_parsed_cnt, block->txn_done_cnt, (uint)block->fec_eos, block->slot, block->parent_slot ));
     391           0 :   }
     392           0 :   return rv;
     393           0 : }
     394             : 
     395             : static inline int
     396           0 : block_is_promotable( fd_sched_block_t * block ) {
     397           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && !block->staged;
     398           0 : }
     399             : 
     400             : static inline int
     401           0 : block_is_demotable( fd_sched_block_t * block ) {
     402             :   /* A block can only be demoted from rdisp if it is empty, meaning no
     403             :      PENDING, READY, or DISPATCHED transactions.  This is equivalent to
     404             :      having no in-flight transactions (DISPATCHED) and no queued
     405             :      transactions (PENDING or READY).  This function actually implements
     406             :      a stronger requirement.  We consider a block demotable only if
     407             :      there are no in-flight or queued tasks of any kind. */
     408           0 :   return !block_is_in_flight( block ) && !block_is_dispatchable( block ) && block->staged;
     409           0 : }
     410             : 
     411             : static inline int
     412           0 : block_is_activatable( fd_sched_block_t * block ) {
     413           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && block->staged;
     414           0 : }
     415             : 
     416             : static inline int
     417           0 : block_should_deactivate( fd_sched_block_t * block ) {
     418             :   /* We allow a grace period, during which a block has nothing to
     419             :      dispatch, but has something in-flight.  The block is allowed to
     420             :      stay activated and ingest FEC sets during this time.  The block
     421             :      will be deactivated if there's still nothing to dispatch by the
     422             :      time all in-flight tasks are completed. */
     423           0 :   return !block_is_activatable( block ) && !block_is_in_flight( block );
     424           0 : }
     425             : 
     426             : static inline int
     427           0 : block_is_prunable( fd_sched_block_t * block ) {
     428           0 :   return !block->in_rdisp && !block_is_in_flight( block );
     429           0 : }
     430             : 
     431             : static inline ulong
     432           0 : block_to_idx( fd_sched_t * sched, fd_sched_block_t * block ) { return (ulong)(block-sched->block_pool); }
     433             : 
     434             : __attribute__((format(printf,2,3)))
     435             : static void
     436             : fd_sched_printf( fd_sched_t * sched,
     437             :                  char const * fmt,
     438           0 :                  ... ) {
     439           0 :   va_list ap;
     440           0 :   ulong len;
     441           0 :   va_start( ap, fmt );
     442           0 :   int ret = vsnprintf( sched->print_buf+sched->print_buf_sz,
     443           0 :                        FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz,
     444           0 :                        fmt, ap );
     445           0 :   va_end( ap );
     446           0 :   len = fd_ulong_if( ret<0, 0UL, fd_ulong_min( (ulong)ret, FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz-1UL ) );
     447           0 :   sched->print_buf[ sched->print_buf_sz+len ] = '\0';
     448           0 :   sched->print_buf_sz += len;
     449           0 : }
     450             : 
     451             : FD_FN_UNUSED static void
     452           0 : print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * title ) {
     453           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     454           0 :   fd_sched_printf( sched, " | %-19s |                      | Count        |\n", title );
     455           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     456           0 : 
     457           0 :   ulong total_count = 0;
     458           0 :   for( ulong i=0UL; i<fd_histf_bucket_cnt( hist ); i++ ) {
     459           0 :     total_count += fd_histf_cnt( hist, i );
     460           0 :   }
     461           0 : 
     462           0 :   for( ulong i=0UL; i< fd_histf_bucket_cnt( hist ); i++ ) {
     463           0 :     ulong bucket_count = fd_histf_cnt( hist, i );
     464           0 : 
     465           0 :     char * lt_str;
     466           0 :     char lt_buf[ 64 ];
     467           0 :     if( FD_UNLIKELY( i==fd_histf_bucket_cnt( hist )-1UL ) ) {
     468           0 :       lt_str = "+Inf";
     469           0 :     } else {
     470           0 :       ulong edge = fd_histf_right( hist, i );
     471           0 :       if( converter==FD_METRICS_CONVERTER_NANOSECONDS ) {
     472           0 :         edge = fd_metrics_convert_ticks_to_nanoseconds( edge-1UL );
     473           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu nanos", edge ) );
     474           0 :       } else if( converter==FD_METRICS_CONVERTER_NONE ) {
     475           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu", edge-1UL ) );
     476           0 :       }
     477           0 :       lt_str = lt_buf;
     478           0 :     }
     479           0 : 
     480           0 :     /* Create visual bar - scale to max 20 characters. */
     481           0 :     char bar_buf[ 22 ];
     482           0 :     if( bucket_count>0UL && total_count>0UL ) {
     483           0 :       ulong bar_length = (bucket_count*20UL)/total_count;
     484           0 :       if( !bar_length ) bar_length = 1;
     485           0 :       for( ulong j=0UL; j<bar_length; j++ ) { bar_buf[ j ] = '*'; }
     486           0 :       bar_buf[ bar_length ] = '\0';
     487           0 :     } else {
     488           0 :       bar_buf[ 0 ] = '\0';
     489           0 :     }
     490           0 : 
     491           0 :     fd_sched_printf( sched, " | %19s | %-20s | %12lu |\n", lt_str, bar_buf, bucket_count );
     492           0 :   }
     493           0 : }
     494             : 
     495             : FD_FN_UNUSED static void
     496           0 : print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) {
     497           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     498           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     499           0 : }
     500             : 
     501             : FD_FN_UNUSED static void
     502           0 : print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) {
     503           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, max_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     504           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     505           0 : }
     506             : 
     507             : FD_FN_UNUSED static void
     508           0 : print_block_and_parent( fd_sched_t * sched, fd_sched_block_t * block ) {
     509           0 :   print_block_debug( sched, block );
     510           0 :   fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
     511           0 :   if( FD_LIKELY( parent ) ) print_block_debug( sched, parent );
     512           0 : }
     513             : 
     514             : FD_FN_UNUSED static void
     515           0 : print_metrics( fd_sched_t * sched ) {
     516           0 :     fd_sched_printf( sched, "metrics: block_added_cnt %u, block_added_staged_cnt %u, block_added_unstaged_cnt %u, block_added_dead_ood_cnt %u, block_removed_cnt %u, block_abandoned_cnt %u, block_bad_cnt %u, block_promoted_cnt %u, block_demoted_cnt %u, deactivate_no_child_cnt %u, deactivate_no_txn_cnt %u, deactivate_pruned_cnt %u, deactivate_abandoned_cnt %u, lane_switch_cnt %u, lane_promoted_cnt %u, lane_demoted_cnt %u, fork_observed_cnt %u, alut_success_cnt %u, alut_serializing_cnt %u, txn_abandoned_parsed_cnt %u, txn_abandoned_exec_done_cnt %u, txn_abandoned_done_cnt %u, txn_max_in_flight_cnt %u, txn_weighted_in_flight_cnt %lu, txn_weighted_in_flight_tickcount %lu, txn_none_in_flight_tickcount %lu, txn_parsed_cnt %lu, txn_exec_done_cnt %lu, txn_sigverify_done_cnt %lu, txn_mixin_done_cnt %lu, txn_done_cnt %lu, mblk_parsed_cnt %lu, mblk_poh_hashed_cnt %lu, mblk_poh_done_cnt %lu, bytes_ingested_cnt %lu, bytes_ingested_unparsed_cnt %lu, bytes_dropped_cnt %lu, fec_cnt %lu\n",
     517           0 :                      sched->metrics->block_added_cnt, sched->metrics->block_added_staged_cnt, sched->metrics->block_added_unstaged_cnt, sched->metrics->block_added_dead_ood_cnt, sched->metrics->block_removed_cnt, sched->metrics->block_abandoned_cnt, sched->metrics->block_bad_cnt, sched->metrics->block_promoted_cnt, sched->metrics->block_demoted_cnt, sched->metrics->deactivate_no_child_cnt, sched->metrics->deactivate_no_txn_cnt, sched->metrics->deactivate_pruned_cnt, sched->metrics->deactivate_abandoned_cnt, sched->metrics->lane_switch_cnt, sched->metrics->lane_promoted_cnt, sched->metrics->lane_demoted_cnt, sched->metrics->fork_observed_cnt, sched->metrics->alut_success_cnt, sched->metrics->alut_serializing_cnt, sched->metrics->txn_abandoned_parsed_cnt, sched->metrics->txn_abandoned_exec_done_cnt, sched->metrics->txn_abandoned_done_cnt, sched->metrics->txn_max_in_flight_cnt, sched->metrics->txn_weighted_in_flight_cnt, sched->metrics->txn_weighted_in_flight_tickcount, sched->metrics->txn_none_in_flight_tickcount, sched->metrics->txn_parsed_cnt, sched->metrics->txn_exec_done_cnt, sched->metrics->txn_sigverify_done_cnt, sched->metrics->txn_mixin_done_cnt, sched->metrics->txn_done_cnt, sched->metrics->mblk_parsed_cnt, sched->metrics->mblk_poh_hashed_cnt, sched->metrics->mblk_poh_done_cnt, sched->metrics->bytes_ingested_cnt, sched->metrics->bytes_ingested_unparsed_cnt, sched->metrics->bytes_dropped_cnt, sched->metrics->fec_cnt );
     518             : 
     519           0 : }
     520             : 
     521             : FD_FN_UNUSED static void
     522           0 : print_sched( fd_sched_t * sched ) {
     523           0 :   fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, staged_popcnt_wmk %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n",
     524           0 :                    sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->staged_popcnt_wmk, sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max );
     525           0 :   fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
     526           0 :   if( active_block ) print_block_debug( sched, active_block );
     527           0 :   for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
     528           0 :     if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
     529           0 :       fd_sched_block_t * block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
     530           0 :       print_block_debug( sched, block );
     531           0 :     }
     532           0 :   }
     533           0 : }
     534             : 
     535             : FD_FN_UNUSED static void
     536           0 : print_all( fd_sched_t * sched, fd_sched_block_t * block ) {
     537           0 :   print_metrics( sched );
     538           0 :   print_sched( sched );
     539           0 :   print_block_and_parent( sched, block );
     540           0 : }
     541             : 
     542             : static void
     543           0 : handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) {
     544           0 :   sched->print_buf_sz = 0UL;
     545           0 :   print_all( sched, block );
     546           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
     547           0 :   subtree_abandon( sched, block );
     548           0 :   sched->metrics->block_bad_cnt++;
     549           0 :   check_or_set_active_block( sched );
     550           0 : }
     551             : 
     552             : 
     553             : /* Public functions. */
     554             : 
     555             : ulong
     556           0 : fd_sched_align( void ) {
     557           0 :   return fd_ulong_max( alignof(fd_sched_t),
     558           0 :          fd_ulong_max( fd_rdisp_align(),
     559           0 :          fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */
     560           0 : }
     561             : 
     562             : ulong
     563             : fd_sched_footprint( ulong depth,
     564           0 :                     ulong block_cnt_max ) {
     565           0 :   if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */
     566           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */
     567           0 :   if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) return 0UL; /* mblk_pool use uint as pointers */
     568             : 
     569           0 :   ulong l = FD_LAYOUT_INIT;
     570           0 :   l = FD_LAYOUT_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     571           0 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */
     572           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     ); /* block pool */
     573           0 :   l = FD_LAYOUT_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     574           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   ); /* txn_pool */
     575           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          ); /* txn_info_pool */
     576           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              ); /* mblk_pool */
     577           0 :   return FD_LAYOUT_FINI( l, fd_sched_align() );
     578           0 : }
     579             : 
     580             : void *
     581             : fd_sched_new( void * mem,
     582             :               ulong  depth,
     583             :               ulong  block_cnt_max,
     584           0 :               ulong  exec_cnt ) {
     585             : 
     586           0 :   if( FD_UNLIKELY( !mem ) ) {
     587           0 :     FD_LOG_WARNING(( "NULL mem" ));
     588           0 :     return NULL;
     589           0 :   }
     590             : 
     591           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) {
     592           0 :     FD_LOG_WARNING(( "misaligned mem (%p)", mem ));
     593           0 :     return NULL;
     594           0 :   }
     595             : 
     596           0 :   if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) {
     597           0 :     FD_LOG_WARNING(( "bad depth (%lu)", depth ));
     598           0 :     return NULL;
     599           0 :   }
     600             : 
     601           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) {
     602           0 :     FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max ));
     603           0 :     return NULL;
     604           0 :   }
     605             : 
     606           0 :   if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) {
     607           0 :     FD_LOG_WARNING(( "bad depth (%lu)", depth ));
     608           0 :     return NULL;
     609           0 :   }
     610             : 
     611           0 :   if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) {
     612           0 :     FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt ));
     613           0 :     return NULL;
     614           0 :   }
     615             : 
     616           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     617           0 :   fd_sched_t *          sched          = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     618           0 :   void *                _rdisp         = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) );
     619           0 :   void *                _bpool         = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     );
     620           0 :   void *                _ref_q         = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     621           0 :   fd_txn_p_t *          _txn_pool      = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   );
     622           0 :   fd_sched_txn_info_t * _txn_info_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          );
     623           0 :   fd_sched_mblk_t *     _mblk_pool     = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              );
     624           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     625             : 
     626           0 :   sched->txn_pool      = _txn_pool;
     627           0 :   sched->txn_info_pool = _txn_info_pool;
     628           0 :   sched->mblk_pool     = _mblk_pool;
     629             : 
     630           0 :   ulong seed = ((ulong)fd_tickcount()) ^ FD_SCHED_MAGIC;
     631           0 :   fd_rdisp_new( _rdisp, depth, block_cnt_max, seed );
     632             : 
     633           0 :   fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool;
     634           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     635           0 :     bpool[ i ].in_sched = 0;
     636           0 :     mblk_slist_new( bpool[ i ].mblks_unhashed );
     637           0 :     mblk_slist_new( bpool[ i ].mblks_hashing_in_progress );
     638           0 :     mblk_slist_new( bpool[ i ].mblks_mixin_in_progress );
     639           0 :   }
     640             : 
     641           0 :   fd_memset( sched->metrics, 0, sizeof(fd_sched_metrics_t) );
     642           0 :   sched->txn_in_flight_last_tick = LONG_MAX;
     643             : 
     644           0 :   sched->canary               = FD_SCHED_MAGIC;
     645           0 :   sched->depth                = depth;
     646           0 :   sched->block_cnt_max        = block_cnt_max;
     647           0 :   sched->exec_cnt             = exec_cnt;
     648           0 :   sched->root_idx             = ULONG_MAX;
     649           0 :   sched->active_bank_idx      = ULONG_MAX;
     650           0 :   sched->last_active_bank_idx = ULONG_MAX;
     651           0 :   sched->staged_bitset        = 0UL;
     652           0 :   sched->staged_popcnt_wmk    = 0UL;
     653             : 
     654           0 :   sched->txn_exec_ready_bitset[ 0 ]  = fd_ulong_mask_lsb( (int)exec_cnt );
     655           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
     656           0 :   sched->poh_ready_bitset[ 0 ]       = fd_ulong_mask_lsb( (int)exec_cnt );
     657             : 
     658           0 :   sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */
     659             : 
     660           0 :   for( ulong i=0UL; i<depth-1UL; i++ ) sched->mblk_pool[ i ].next = (uint)(i+1UL);
     661           0 :   sched->mblk_pool[ depth-1UL ].next = UINT_MAX;
     662           0 :   sched->mblk_pool_free_head = 0U;
     663           0 :   sched->mblk_pool_free_cnt  = depth;
     664             : 
     665           0 :   txn_bitset_new( sched->exec_done_set );
     666           0 :   txn_bitset_new( sched->sigverify_done_set );
     667           0 :   txn_bitset_new( sched->poh_mixin_done_set );
     668             : 
     669           0 :   sched->block_pool_popcnt = 0UL;
     670             : 
     671           0 :   ref_q_new( _ref_q, block_cnt_max );
     672             : 
     673           0 :   return sched;
     674           0 : }
     675             : 
     676             : fd_sched_t *
     677           0 : fd_sched_join( void * mem ) {
     678             : 
     679           0 :   if( FD_UNLIKELY( !mem ) ) {
     680           0 :     FD_LOG_WARNING(( "NULL mem" ));
     681           0 :     return NULL;
     682           0 :   }
     683             : 
     684           0 :   fd_sched_t * sched         = (fd_sched_t *)mem;
     685           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     686           0 :   ulong        depth         = sched->depth;
     687           0 :   ulong        block_cnt_max = sched->block_cnt_max;
     688             : 
     689           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     690           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     691           0 :   void *           _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) );
     692           0 :   void *           _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     );
     693           0 :   void *           _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     694           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   );
     695           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          );
     696           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              );
     697           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     698             : 
     699           0 :   sched->rdisp      = fd_rdisp_join( _rdisp );
     700           0 :   sched->ref_q      = ref_q_join( _ref_q );
     701           0 :   sched->block_pool = _bpool;
     702             : 
     703           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     704           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_unhashed );
     705           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_hashing_in_progress );
     706           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_mixin_in_progress );
     707           0 :   }
     708             : 
     709           0 :   txn_bitset_join( sched->exec_done_set );
     710           0 :   txn_bitset_join( sched->sigverify_done_set );
     711           0 :   txn_bitset_join( sched->poh_mixin_done_set );
     712             : 
     713           0 :   return sched;
     714           0 : }
     715             : 
     716             : int
     717           0 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ) {
     718           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     719           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     720           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     721             : 
     722           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     723           0 :     sched->print_buf_sz = 0UL;
     724           0 :     print_metrics( sched );
     725           0 :     print_sched( sched );
     726           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     727           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     728           0 :   }
     729             : 
     730           0 :   ulong fec_buf_sz = 0UL;
     731           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     732           0 :   if( FD_LIKELY( !fec->is_first_in_block ) ) {
     733           0 :     fec_buf_sz += block->fec_buf_sz-block->fec_buf_soff;
     734           0 :   } else {
     735             :     /* No residual data as this is a fresh new block. */
     736           0 :   }
     737             :   /* Addition is safe and won't overflow because we checked the FEC set
     738             :      size above. */
     739           0 :   fec_buf_sz += fec->fec->data_sz;
     740             :   /* Assuming every transaction is min size, do we have enough free
     741             :      entries in the txn pool?  For a more precise txn count, we would
     742             :      have to do some parsing. */
     743           0 :   return sched->txn_pool_free_cnt>=fec_buf_sz/FD_TXN_MIN_SERIALIZED_SZ && sched->mblk_pool_free_cnt>=fec_buf_sz/sizeof(fd_microblock_hdr_t);
     744           0 : }
     745             : 
     746             : ulong
     747           0 : fd_sched_can_ingest_cnt( fd_sched_t * sched ) {
     748           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     749             :   /* Worst case, we need one byte from the incoming data to extract a
     750             :      transaction out of the residual data, and the rest of the incoming
     751             :      data contributes toward min sized transactions. */
     752           0 :   return fd_ulong_min( sched->txn_pool_free_cnt/FD_SCHED_MAX_TXN_PER_FEC, sched->mblk_pool_free_cnt/FD_SCHED_MAX_MBLK_PER_FEC );
     753           0 : }
     754             : 
     755             : int
     756           0 : fd_sched_is_drained( fd_sched_t * sched ) {
     757           0 :   int nothing_inflight = sched->exec_cnt==(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
     758           0 :   int nothing_queued = sched->active_bank_idx==ULONG_MAX;
     759           0 :   return nothing_inflight && nothing_queued;
     760           0 : }
     761             : 
     762             : FD_WARN_UNUSED int
     763             : fd_sched_fec_ingest( fd_sched_t *     sched,
     764           0 :                      fd_sched_fec_t * fec ) {
     765           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     766           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     767           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     768           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
     769             : 
     770           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     771             : 
     772           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     773           0 :     sched->print_buf_sz = 0UL;
     774           0 :     print_all( sched, block );
     775           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     776           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     777           0 :   }
     778             : 
     779           0 :   sched->metrics->fec_cnt++;
     780             : 
     781           0 :   if( FD_UNLIKELY( fec->is_first_in_block ) ) {
     782             :     /* This is a new block. */
     783           0 :     add_block( sched, fec->bank_idx, fec->parent_bank_idx );
     784           0 :     block->slot        = fec->slot;
     785           0 :     block->parent_slot = fec->parent_slot;
     786             : 
     787           0 :     if( FD_UNLIKELY( block->dying ) ) {
     788             :       /* The child of a dead block is also dead.  We added it to our
     789             :          fork tree just so we could track an entire lineage of dead
     790             :          children and propagate the dead property to the entire lineage,
     791             :          in case there were frags for more than one dead children
     792             :          in-flight at the time the parent was abandoned.  That being
     793             :          said, we shouldn't need to add the dead child to the
     794             :          dispatcher. */
     795           0 :       sched->metrics->block_added_dead_ood_cnt++;
     796             : 
     797             :       /* Ignore the FEC set for a dead block. */
     798           0 :       sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     799           0 :       return 1;
     800           0 :     }
     801             : 
     802             :     /* Try to find a staging lane for this block. */
     803           0 :     int alloc_lane = 0;
     804           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, fec->parent_bank_idx );
     805           0 :     if( FD_LIKELY( parent_block->staged ) ) {
     806             :       /* Parent is staged.  So see if we can continue down the same
     807             :          staging lane. */
     808           0 :       ulong staging_lane = parent_block->staging_lane;
     809           0 :       ulong child_idx    = parent_block->child_idx;
     810           0 :       while( child_idx!=ULONG_MAX ) {
     811           0 :         fd_sched_block_t * child = block_pool_ele( sched, child_idx );
     812           0 :         if( child->staged && child->staging_lane==staging_lane ) {
     813             :           /* Found a child on the same lane.  So we're done. */
     814           0 :           staging_lane = FD_RDISP_UNSTAGED;
     815           0 :           break;
     816           0 :         }
     817           0 :         child_idx = child->sibling_idx;
     818           0 :       }
     819             :       /* No child is staged on the same lane as the parent.  So stage
     820             :          this block.  This is the common case. */
     821           0 :       if( FD_LIKELY( staging_lane!=FD_RDISP_UNSTAGED ) ) {
     822           0 :         block->in_rdisp     = 1;
     823           0 :         block->staged       = 1;
     824           0 :         block->staging_lane = staging_lane;
     825           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, staging_lane );
     826           0 :         sched->metrics->block_added_cnt++;
     827           0 :         sched->metrics->block_added_staged_cnt++;
     828           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, staging_lane ));
     829           0 :       } else {
     830           0 :         alloc_lane = 1;
     831           0 :       }
     832           0 :     } else {
     833           0 :       if( block_is_stageable( parent_block ) ) {
     834             :         /* Parent is unstaged but stageable.  So let's be unstaged too.
     835             :            This is not only a policy decision to be lazy and not promote
     836             :            the parent at the moment, but also an important invariant
     837             :            that we maintain for deadlock freeness in the face of staging
     838             :            lane shortage.  See the comments in lane eviction for how
     839             :            this invariant is relevant. */
     840           0 :         block->in_rdisp = 1;
     841           0 :         block->staged   = 0;
     842           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     843           0 :         sched->metrics->block_added_cnt++;
     844           0 :         sched->metrics->block_added_unstaged_cnt++;
     845           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     846           0 :       } else {
     847           0 :         alloc_lane = 1;
     848           0 :       }
     849           0 :     }
     850           0 :     if( FD_UNLIKELY( alloc_lane ) ) {
     851             :       /* We weren't able to inherit the parent's staging lane.  So try
     852             :          to find a new staging lane. */
     853           0 :       if( FD_LIKELY( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) { /* Optimize for lane available. */
     854           0 :         int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
     855           0 :         if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
     856           0 :           FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
     857           0 :                         lane_idx, sched->staged_bitset ));
     858           0 :         }
     859           0 :         sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
     860           0 :         sched->staged_head_bank_idx[ lane_idx ] = fec->bank_idx;
     861           0 :         sched->staged_popcnt_wmk = fd_ulong_max( sched->staged_popcnt_wmk, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
     862           0 :         block->in_rdisp     = 1;
     863           0 :         block->staged       = 1;
     864           0 :         block->staging_lane = (ulong)lane_idx;
     865           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, block->staging_lane );
     866           0 :         sched->metrics->block_added_cnt++;
     867           0 :         sched->metrics->block_added_staged_cnt++;
     868           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, block->staging_lane ));
     869           0 :       } else {
     870             :         /* No lanes available. */
     871           0 :         block->in_rdisp = 1;
     872           0 :         block->staged   = 0;
     873           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     874           0 :         sched->metrics->block_added_cnt++;
     875           0 :         sched->metrics->block_added_unstaged_cnt++;
     876           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     877           0 :       }
     878           0 :     }
     879           0 :   }
     880             : 
     881           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
     882           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
     883           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
     884             : 
     885           0 :   if( FD_UNLIKELY( block->dying ) ) {
     886             :     /* Ignore the FEC set for a dead block. */
     887           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     888           0 :     return 1;
     889           0 :   }
     890             : 
     891           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
     892             :     /* Invariant: block must be in the dispatcher at this point. */
     893           0 :     sched->print_buf_sz = 0UL;
     894           0 :     print_all( sched, block );
     895           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     896           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
     897           0 :                   block->slot, block->parent_slot ));
     898           0 :   }
     899             : 
     900           0 :   if( FD_UNLIKELY( block->fec_eos ) ) {
     901             :     /* This means something is wrong upstream.  We're getting more FEC
     902             :        sets for a block that has already ended, or so we were told. */
     903           0 :     sched->print_buf_sz = 0UL;
     904           0 :     print_all( sched, block );
     905           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     906           0 :     FD_LOG_CRIT(( "invariant violation: block->fec_eos set but getting more FEC sets, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     907           0 :   }
     908           0 :   if( FD_UNLIKELY( block->fec_eob && fec->is_last_in_batch ) ) {
     909             :     /* If the previous FEC set ingestion and parse was successful,
     910             :        block->fec_eob should be cleared.  The fact that fec_eob is set
     911             :        means that the previous batch didn't parse properly.  So this is
     912             :        a bad block.  We should refuse to replay down the fork. */
     913           0 :     FD_LOG_INFO(( "bad block: failed to parse, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     914           0 :     handle_bad_block( sched, block );
     915           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     916           0 :     return 0;
     917           0 :   }
     918           0 :   if( FD_UNLIKELY( block->child_idx!=ULONG_MAX ) ) {
     919             :     /* This means something is wrong upstream.  FEC sets are not being
     920             :        delivered in replay order.  We got a child block FEC set before
     921             :        this block was completely delivered. */
     922           0 :     sched->print_buf_sz = 0UL;
     923           0 :     print_all( sched, block );
     924           0 :     fd_sched_block_t * child_block = block_pool_ele( sched, block->child_idx );
     925           0 :     print_block_debug( sched, child_block );
     926           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     927           0 :     FD_LOG_CRIT(( "invariant violation: block->child_idx %lu, slot %lu, parent slot %lu", block->child_idx, fec->slot, fec->parent_slot ));
     928           0 :   }
     929             : 
     930           0 :   FD_TEST( block->fec_buf_sz>=block->fec_buf_soff );
     931           0 :   if( FD_LIKELY( block->fec_buf_sz>block->fec_buf_soff ) ) {
     932             :     /* If there is residual data from the previous FEC set within the
     933             :        same batch, we move it to the beginning of the buffer and append
     934             :        the new FEC set. */
     935           0 :     memmove( block->fec_buf, block->fec_buf+block->fec_buf_soff, block->fec_buf_sz-block->fec_buf_soff );
     936           0 :   }
     937           0 :   block->fec_buf_boff += block->fec_buf_soff;
     938           0 :   block->fec_buf_sz   -= block->fec_buf_soff;
     939           0 :   block->fec_buf_soff  = 0;
     940             :   /* Addition is safe and won't overflow because we checked the FEC
     941             :      set size above. */
     942           0 :   if( FD_UNLIKELY( block->fec_buf_sz+fec->fec->data_sz>FD_SCHED_MAX_FEC_BUF_SZ ) ) {
     943             :     /* In a conformant block, there shouldn't be more than a
     944             :        transaction's worth of residual data left over from the previous
     945             :        FEC set within the same batch.  So if this condition doesn't
     946             :        hold, it's a bad block.  Instead of crashing, we should refuse to
     947             :        replay down the fork. */
     948           0 :     FD_LOG_INFO(( "bad block: fec_buf_sz %u, fec->data_sz %lu, slot %lu, parent slot %lu", block->fec_buf_sz, fec->fec->data_sz, fec->slot, fec->parent_slot ));
     949           0 :     handle_bad_block( sched, block );
     950           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     951           0 :     return 0;
     952           0 :   }
     953             : 
     954             :   /* Append the new FEC set to the end of the buffer. */
     955           0 :   fd_memcpy( block->fec_buf+block->fec_buf_sz, fec->fec->data, fec->fec->data_sz );
     956           0 :   block->fec_buf_sz += (uint)fec->fec->data_sz;
     957           0 :   sched->metrics->bytes_ingested_cnt += fec->fec->data_sz;
     958             : 
     959           0 :   block->fec_eob = fec->is_last_in_batch;
     960           0 :   block->fec_eos = fec->is_last_in_block;
     961             : 
     962           0 :   ulong block_sz = block->shred_cnt>0 ? block->shred_blk_offs[ block->shred_cnt-1 ] : 0UL;
     963           0 :   for( ulong i=0; i<fec->shred_cnt; i++ ) {
     964           0 :     if( FD_LIKELY( i<32UL ) ) {
     965           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ i ];
     966           0 :     } else if( FD_UNLIKELY( i!=fec->shred_cnt-1UL ) ) {
     967             :       /* We don't track shred boundaries after 32 shreds, assume they're
     968             :          sized uniformly */
     969           0 :       ulong num_overflow_shreds = fec->shred_cnt-32UL;
     970           0 :       ulong overflow_idx        = i-32UL;
     971           0 :       ulong overflow_data_sz    = fec->fec->data_sz-fec->fec->block_offs[ 31 ];
     972           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ 31 ] + (uint)(overflow_data_sz / num_overflow_shreds * (overflow_idx + 1UL));
     973           0 :     } else {
     974           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + (uint)fec->fec->data_sz;
     975           0 :     }
     976           0 :   }
     977             : 
     978           0 :   int err = fd_sched_parse( sched, block, fec->alut_ctx );
     979             : 
     980           0 :   if( FD_UNLIKELY( err==FD_SCHED_BAD_BLOCK ) ) {
     981           0 :     handle_bad_block( sched, block );
     982           0 :     sched->metrics->bytes_dropped_cnt += block->fec_buf_sz-block->fec_buf_soff;
     983           0 :     return 0;
     984           0 :   }
     985             : 
     986           0 :   if( FD_UNLIKELY( block->fec_eos && (block->txns_rem||block->mblks_rem) ) ) {
     987             :     /* A malformed block that fails to parse out exactly as many
     988             :        transactions and microblocks as it should. */
     989           0 :     FD_LOG_INFO(( "bad block: bytes_rem %u, txns_rem %lu, mblks_rem %lu, slot %lu, parent slot %lu", block->fec_buf_sz-block->fec_buf_soff, block->txns_rem, block->mblks_rem, block->slot, block->parent_slot ));
     990           0 :     handle_bad_block( sched, block );
     991           0 :     return 0;
     992           0 :   }
     993             : 
     994           0 :   if( FD_UNLIKELY( block->fec_eos && !block->last_mblk_is_tick ) ) {
     995             :     /* The last microblock should be a tick.
     996             : 
     997             :        Note that this early parse-time detection could cause us to throw
     998             :        a slightly different error from Agave, in the case that there are
     999             :        too few ticks, since the tick count check precedes the trailing
    1000             :        entry check in Agave.  That being said, ultimately a
    1001             :        TRAILING_ENTRY renders a block invalid, regardless of anything
    1002             :        else. */
    1003           0 :     FD_LOG_INFO(( "bad block: TRAILING_ENTRY, slot %lu, parent slot %lu, mblk_cnt %u", block->slot, block->parent_slot, block->mblk_cnt ));
    1004           0 :     handle_bad_block( sched, block );
    1005           0 :     return 0;
    1006           0 :   }
    1007             : 
    1008             :   /* We just received a FEC set, which may have made all transactions in
    1009             :      a partially parsed microblock available.  If this were a malformed
    1010             :      block that ends in a non-tick microblock, there's not going to be a
    1011             :      hashing task from the missing ending tick to drain the mixin queue.
    1012             :      So we try to drain the mixin queue right here.  Another option is
    1013             :      to drain it at dispatch time, when we are about to dispatch the end
    1014             :      of block signal, right before the check for whether block should
    1015             :      end. */
    1016           0 :   int mixin_res;
    1017           0 :   while( (mixin_res=maybe_mixin( sched, block )) ) {
    1018           0 :     if( FD_UNLIKELY( mixin_res==-1 ) ) {
    1019           0 :       handle_bad_block( sched, block );
    1020           0 :       return 0;
    1021           0 :     }
    1022           0 :     FD_TEST( mixin_res==1||mixin_res==2 );
    1023           0 :   }
    1024             : 
    1025             :   /* Check if we need to set the active block. */
    1026           0 :   check_or_set_active_block( sched );
    1027             : 
    1028           0 :   return 1;
    1029           0 : }
    1030             : 
    1031             : ulong
    1032           0 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) {
    1033           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1034           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1035             : 
    1036           0 :   ulong exec_ready_bitset0 = sched->txn_exec_ready_bitset[ 0 ];
    1037           0 :   ulong exec_fully_ready_bitset = sched->sigverify_ready_bitset[ 0 ] & sched->poh_ready_bitset[ 0 ] & exec_ready_bitset0;
    1038           0 :   if( FD_UNLIKELY( !exec_fully_ready_bitset ) ) {
    1039             :     /* Early exit if no exec tiles available. */
    1040           0 :     return 0UL;
    1041           0 :   }
    1042             : 
    1043           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
    1044             :     /* No need to try activating a block.  If we're in this state,
    1045             :        there's truly nothing to execute.  We will activate something
    1046             :        when we ingest a FEC set with transactions. */
    1047           0 :     return 0UL;
    1048           0 :   }
    1049             : 
    1050           0 :   out->task_type = FD_SCHED_TT_NULL;
    1051             : 
    1052             :   /* We could in theory reevaluate staging lane allocation here and do
    1053             :      promotion/demotion as needed.  It's a policy decision to minimize
    1054             :      fork churn for now and just execute down the same active fork. */
    1055             : 
    1056           0 :   ulong bank_idx = sched->active_bank_idx;
    1057           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1058           0 :   if( FD_UNLIKELY( block_should_deactivate( block ) ) ) {
    1059           0 :     sched->print_buf_sz = 0UL;
    1060           0 :     print_all( sched, block );
    1061           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1062           0 :     FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx ));
    1063           0 :   }
    1064             : 
    1065           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
    1066           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
    1067           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1068             : 
    1069           0 :   if( FD_UNLIKELY( !block->block_start_signaled ) ) {
    1070           0 :     out->task_type = FD_SCHED_TT_BLOCK_START;
    1071           0 :     out->block_start->bank_idx        = bank_idx;
    1072           0 :     out->block_start->parent_bank_idx = block->parent_idx;
    1073           0 :     out->block_start->slot            = block->slot;
    1074           0 :     block->block_start_signaled = 1;
    1075           0 :     return 1UL;
    1076           0 :   }
    1077             : 
    1078           0 :   ulong exec_tile_idx0 = fd_ulong_if( !!exec_fully_ready_bitset, (ulong)fd_ulong_find_lsb( exec_fully_ready_bitset ), ULONG_MAX );
    1079           0 :   ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
    1080           0 :   if( FD_LIKELY( exec_queued_cnt>0UL && fd_ulong_popcnt( exec_fully_ready_bitset ) ) ) { /* Optimize for no fork switching. */
    1081             :     /* Transaction execution has the highest priority.  Current mainnet
    1082             :        block times are very much dominated by critical path transaction
    1083             :        execution.  To achieve the fastest block replay speed, we can't
    1084             :        afford to make any mistake in critical path dispatching.  Any
    1085             :        deviation from perfect critical path dispatching is basically
    1086             :        irrecoverable.  As such, we try to keep all the exec tiles busy
    1087             :        with transaction execution, but we allow at most one transaction
    1088             :        to be in-flight per exec tile.  This is to ensure that whenever a
    1089             :        critical path transaction completes, we have at least one exec
    1090             :        tile, e.g. the one that just completed said transaction, readily
    1091             :        available to continue executing down the critical path. */
    1092           0 :     out->txn_exec->txn_idx = fd_rdisp_get_next_ready( sched->rdisp, bank_idx );
    1093           0 :     if( FD_UNLIKELY( out->txn_exec->txn_idx==0UL ) ) {
    1094             :       /* There are transactions queued but none ready for execution.
    1095             :          This implies that there must be in-flight transactions on whose
    1096             :          completion the queued transactions depend. So we return and
    1097             :          wait for those in-flight transactions to retire.  This is a
    1098             :          policy decision to execute as much as we can down the current
    1099             :          fork. */
    1100           0 :       if( FD_UNLIKELY( !block->txn_exec_in_flight_cnt ) ) {
    1101           0 :         sched->print_buf_sz = 0UL;
    1102           0 :         print_all( sched, block );
    1103           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1104           0 :         FD_LOG_CRIT(( "invariant violation: no ready transaction found but block->txn_exec_in_flight_cnt==0" ));
    1105           0 :       }
    1106             : 
    1107             :       /* Next up are PoH tasks.  Same dispatching policy as sigverify
    1108             :          tasks. */
    1109           0 :       ulong poh_ready_bitset = exec_fully_ready_bitset;
    1110           0 :       ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1111           0 :       if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1112           0 :         dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1113           0 :         return 1UL;
    1114           0 :       }
    1115             : 
    1116             :       /* Dispatch more sigverify tasks only if at least one exec tile is
    1117             :          executing transactions or completely idle.  Allow at most one
    1118             :          sigverify task in-flight per tile, and only dispatch to
    1119             :          completely idle tiles. */
    1120           0 :       ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1121           0 :       ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1122           0 :       if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1123           0 :         dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1124           0 :         sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
    1125           0 :         return 1UL;
    1126           0 :       }
    1127           0 :       return 0UL;
    1128           0 :     }
    1129           0 :     out->task_type = FD_SCHED_TT_TXN_EXEC;
    1130           0 :     out->txn_exec->bank_idx = bank_idx;
    1131           0 :     out->txn_exec->slot     = block->slot;
    1132           0 :     out->txn_exec->exec_idx = exec_tile_idx0;
    1133           0 :     FD_TEST( out->txn_exec->exec_idx!=ULONG_MAX );
    1134             : 
    1135           0 :     long now = fd_tickcount();
    1136           0 :     ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1137           0 :     ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( exec_ready_bitset0 );
    1138           0 :     sched->metrics->txn_none_in_flight_tickcount     += fd_ulong_if( txn_exec_busy_cnt==0UL && sched->txn_in_flight_last_tick!=LONG_MAX, delta, 0UL );
    1139           0 :     sched->metrics->txn_weighted_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt!=0UL, delta, 0UL );
    1140           0 :     sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1141           0 :     sched->txn_in_flight_last_tick = now;
    1142             : 
    1143           0 :     sched->txn_info_pool[ out->txn_exec->txn_idx ].tick_exec_disp = now;
    1144             : 
    1145           0 :     sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_clear_bit( exec_ready_bitset0, (int)exec_tile_idx0);
    1146           0 :     sched->tile_to_bank_idx[ exec_tile_idx0 ] = bank_idx;
    1147             : 
    1148           0 :     block->txn_exec_in_flight_cnt++;
    1149           0 :     sched->metrics->txn_max_in_flight_cnt = fd_uint_max( sched->metrics->txn_max_in_flight_cnt, block->txn_exec_in_flight_cnt );
    1150             : 
    1151           0 :     ulong total_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
    1152           0 :     if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    1153           0 :     if( FD_UNLIKELY( block->txn_exec_in_flight_cnt+block->txn_sigverify_in_flight_cnt+block->poh_hashing_in_flight_cnt!=total_exec_busy_cnt ) ) {
    1154             :       /* Ideally we'd simply assert that the two sides of the equation
    1155             :          are equal.  But abandoned blocks throw a wrench into this.  We
    1156             :          allow abandoned blocks to have in-flight transactions that are
    1157             :          naturally drained while we try to dispatch from another block.
    1158             :          In such cases, the total number of in-flight transactions
    1159             :          should include the abandoned blocks too.  The contract is that
    1160             :          blocks with in-flight transactions cannot be abandoned or
    1161             :          demoted from rdisp.  So a dying block has to be the head of one
    1162             :          of the staging lanes. */
    1163             :       // FIXME This contract no longer true if we implement immediate
    1164             :       // demotion of abandoned blocks.
    1165           0 :       ulong total_in_flight = 0UL;
    1166           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    1167           0 :         if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
    1168           0 :           fd_sched_block_t * staged_block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
    1169           0 :           if( FD_UNLIKELY( block_is_in_flight( staged_block )&&!(staged_block==block||staged_block->dying) ) ) {
    1170           0 :             sched->print_buf_sz = 0UL;
    1171           0 :             print_all( sched, staged_block );
    1172           0 :             FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1173           0 :             FD_LOG_CRIT(( "invariant violation: in-flight block is neither active nor dying" ));
    1174           0 :           }
    1175           0 :           total_in_flight += staged_block->txn_exec_in_flight_cnt;
    1176           0 :           total_in_flight += staged_block->txn_sigverify_in_flight_cnt;
    1177           0 :           total_in_flight += staged_block->poh_hashing_in_flight_cnt;
    1178           0 :         }
    1179           0 :       }
    1180           0 :       if( FD_UNLIKELY( total_in_flight!=total_exec_busy_cnt ) ) {
    1181           0 :         sched->print_buf_sz = 0UL;
    1182           0 :         print_all( sched, block );
    1183           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1184           0 :         FD_LOG_CRIT(( "invariant violation: total_in_flight %lu != total_exec_busy_cnt %lu", total_in_flight, total_exec_busy_cnt ));
    1185           0 :       }
    1186           0 :       FD_LOG_DEBUG(( "exec_busy_cnt %lu checks out", total_exec_busy_cnt ));
    1187           0 :     }
    1188           0 :     return 1UL;
    1189           0 :   }
    1190             : 
    1191             :   /* At this point txn_queued_cnt==0 */
    1192             : 
    1193             :   /* Next up are PoH tasks.  Same dispatching policy as sigverify. */
    1194           0 :   ulong poh_ready_bitset = exec_fully_ready_bitset;
    1195           0 :   ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1196           0 :   if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1197           0 :     dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1198           0 :     return 1UL;
    1199           0 :   }
    1200             : 
    1201             :   /* Try to dispatch a sigverify task, but leave one exec tile idle for
    1202             :      critical path execution, unless there's not going to be any more
    1203             :      real transactions for the critical path.  In the degenerate case of
    1204             :      only one exec tile, keep it busy. */
    1205           0 :   ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1206           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1207           0 :   if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1208           0 :     dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1209           0 :     sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
    1210           0 :     return 1UL;
    1211           0 :   }
    1212             : 
    1213           0 :   if( FD_UNLIKELY( block_should_signal_end( block ) ) ) {
    1214           0 :     FD_TEST( block->block_start_signaled );
    1215           0 :     if( FD_UNLIKELY( verify_ticks_final( block ) ) ) {
    1216             :       /* Tick verification can't be done at parse time (except for
    1217             :          TRAILING_ENTRY), because we may not know the expected number of
    1218             :          hashes yet.  It can't be driven by transaction dispatch or
    1219             :          completion, because the block may be empty.  Similary, it can't
    1220             :          be driven by PoH hashing, because a bad block may simply not
    1221             :          have any microblocks. */
    1222           0 :       handle_bad_block( sched, block );
    1223           0 :       out->task_type = FD_SCHED_TT_MARK_DEAD;
    1224           0 :       out->mark_dead->bank_idx = bank_idx;
    1225           0 :       return 1UL;
    1226           0 :     }
    1227           0 :     out->task_type = FD_SCHED_TT_BLOCK_END;
    1228           0 :     out->block_end->bank_idx = bank_idx;
    1229           0 :     block->block_end_signaled = 1;
    1230           0 :     FD_TEST( block->refcnt );
    1231           0 :     block->refcnt = 0;
    1232           0 :     if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
    1233           0 :     ref_q_push_tail( sched->ref_q, bank_idx );
    1234           0 :     return 1UL;
    1235           0 :   }
    1236             : 
    1237             :   /* Nothing queued for the active block.  If we haven't received all
    1238             :      the FEC sets for it, then return and wait for more FEC sets, while
    1239             :      there are in-flight transactions.  This is a policy decision to
    1240             :      minimize fork churn and allow for executing down the current fork
    1241             :      as much as we can.  If we have received all the FEC sets for it,
    1242             :      then we'd still like to return and wait for the in-flight
    1243             :      transactions to retire, before switching to a different block.
    1244             : 
    1245             :      Either way, there should be in-flight transactions.  We deactivate
    1246             :      the active block the moment we exhausted transactions from it. */
    1247           0 :   if( FD_UNLIKELY( !block_is_in_flight( block ) ) ) {
    1248           0 :     sched->print_buf_sz = 0UL;
    1249           0 :     print_all( sched, block );
    1250           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1251           0 :     FD_LOG_CRIT(( "invariant violation: expected in-flight transactions but none" ));
    1252           0 :   }
    1253             : 
    1254           0 :   return 0UL;
    1255           0 : }
    1256             : 
    1257             : int
    1258           0 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data ) {
    1259           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1260             : 
    1261           0 :   ulong bank_idx = ULONG_MAX;
    1262           0 :   switch( task_type ) {
    1263           0 :     case FD_SCHED_TT_BLOCK_START:
    1264           0 :     case FD_SCHED_TT_BLOCK_END: {
    1265           0 :       (void)txn_idx;
    1266           0 :       (void)data;
    1267           0 :       bank_idx = sched->active_bank_idx;
    1268           0 :       break;
    1269           0 :     }
    1270           0 :     case FD_SCHED_TT_TXN_EXEC:
    1271           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1272           0 :       (void)data;
    1273           0 :       FD_TEST( txn_idx < sched->depth );
    1274           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1275           0 :       break;
    1276           0 :     }
    1277           0 :     case FD_SCHED_TT_POH_HASH: {
    1278           0 :       (void)txn_idx;
    1279           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1280           0 :       break;
    1281           0 :     }
    1282           0 :     default: FD_LOG_CRIT(( "unsupported task_type %lu", task_type ));
    1283           0 :   }
    1284           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1285             : 
    1286           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1287           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1288           0 :                   block->slot, block->parent_slot, bank_idx ));
    1289           0 :   }
    1290           0 :   if( FD_UNLIKELY( !block->staged ) ) {
    1291             :     /* Invariant: only staged blocks can have in-flight transactions. */
    1292           0 :     FD_LOG_CRIT(( "invariant violation: block->staged==0, slot %lu, parent slot %lu",
    1293           0 :                   block->slot, block->parent_slot ));
    1294           0 :   }
    1295           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
    1296             :     /* Invariant: staged blocks must be in the dispatcher. */
    1297           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
    1298           0 :                   block->slot, block->parent_slot ));
    1299           0 :   }
    1300             : 
    1301           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
    1302           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
    1303           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1304             : 
    1305           0 :   int exec_tile_idx = (int)exec_idx;
    1306             : 
    1307           0 :   switch( task_type ) {
    1308           0 :     case FD_SCHED_TT_BLOCK_START: {
    1309           0 :       FD_TEST( !block->block_start_done );
    1310           0 :       block->block_start_done = 1;
    1311           0 :       break;
    1312           0 :     }
    1313           0 :     case FD_SCHED_TT_BLOCK_END: {
    1314             :       /* It may seem redundant to be invoking task_done() on these
    1315             :          somewhat fake tasks.  But these are necessary to drive state
    1316             :          transition for empty blocks or slow blocks. */
    1317           0 :       FD_TEST( !block->block_end_done );
    1318           0 :       block->block_end_done = 1;
    1319           0 :       sched->print_buf_sz = 0UL;
    1320           0 :       print_block_metrics( sched, block );
    1321           0 :       FD_LOG_DEBUG(( "block %lu:%lu replayed fully: %s", block->slot, bank_idx, sched->print_buf ));
    1322           0 :       break;
    1323           0 :     }
    1324           0 :     case FD_SCHED_TT_TXN_EXEC: {
    1325           0 :       long now = fd_tickcount();
    1326           0 :       ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1327           0 :       ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ] );
    1328           0 :       sched->metrics->txn_weighted_in_flight_tickcount += delta;
    1329           0 :       sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1330           0 :       sched->txn_in_flight_last_tick = now;
    1331             : 
    1332           0 :       sched->txn_info_pool[ txn_idx ].tick_exec_done = now;
    1333             : 
    1334           0 :       block->txn_exec_done_cnt++;
    1335           0 :       block->txn_exec_in_flight_cnt--;
    1336           0 :       FD_TEST( !fd_ulong_extract_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx ) );
    1337           0 :       sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx );
    1338           0 :       sched->metrics->txn_exec_done_cnt++;
    1339           0 :       txn_bitset_insert( sched->exec_done_set, txn_idx );
    1340           0 :       sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_EXEC_DONE;
    1341           0 :       if( txn_bitset_test( sched->sigverify_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1342             :         /* Release the txn_idx if all tasks on it are done.  This is
    1343             :            guaranteed to only happen once per transaction because
    1344             :            whichever one completed first would not release. */
    1345           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1346           0 :         sched->txn_pool_free_cnt++;
    1347           0 :         block->txn_done_cnt++;
    1348           0 :         sched->metrics->txn_done_cnt++;
    1349           0 :       } else {
    1350           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 0 );
    1351           0 :       }
    1352           0 :       break;
    1353           0 :     }
    1354           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1355           0 :       sched->txn_info_pool[ txn_idx ].tick_sigverify_done = fd_tickcount();
    1356           0 :       block->txn_sigverify_done_cnt++;
    1357           0 :       block->txn_sigverify_in_flight_cnt--;
    1358           0 :       FD_TEST( !fd_ulong_extract_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx ) );
    1359           0 :       sched->sigverify_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    1360           0 :       sched->metrics->txn_sigverify_done_cnt++;
    1361           0 :       txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    1362           0 :       sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_SIGVERIFY_DONE;
    1363           0 :       if( txn_bitset_test( sched->exec_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1364             :         /* Release the txn_idx if all tasks on it are done.  This is
    1365             :            guaranteed to only happen once per transaction because
    1366             :            whichever one completed first would not release. */
    1367           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1368           0 :         sched->txn_pool_free_cnt++;
    1369           0 :         block->txn_done_cnt++;
    1370           0 :         sched->metrics->txn_done_cnt++;
    1371           0 :       }
    1372           0 :       break;
    1373           0 :     }
    1374           0 :     case FD_SCHED_TT_POH_HASH: {
    1375           0 :       block->poh_hashing_in_flight_cnt--;
    1376           0 :       FD_TEST( !fd_ulong_extract_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx ) );
    1377           0 :       sched->poh_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    1378           0 :       fd_execrp_poh_hash_done_msg_t * msg = fd_type_pun( data );
    1379           0 :       fd_sched_mblk_t * mblk = sched->mblk_pool+msg->mblk_idx;
    1380           0 :       mblk->curr_hashcnt += msg->hashcnt;
    1381           0 :       memcpy( mblk->curr_hash, msg->hash, sizeof(fd_hash_t) );
    1382           0 :       ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
    1383           0 :       if( !hashcnt_todo ) {
    1384           0 :         block->poh_hashing_done_cnt++;
    1385           0 :         sched->metrics->mblk_poh_hashed_cnt++;
    1386           0 :         if( FD_LIKELY( !mblk->is_tick ) ) {
    1387             :           /* This is not a tick.  Enqueue for mixin. */
    1388           0 :           mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, msg->mblk_idx, sched->mblk_pool );
    1389           0 :         } else {
    1390             :           /* This is a tick.  No need to mixin.  Check the hash value
    1391             :              right away. */
    1392           0 :           block->poh_hash_cmp_done_cnt++;
    1393           0 :           sched->metrics->mblk_poh_done_cnt++;
    1394           0 :           free_mblk( sched, block, (uint)msg->mblk_idx );
    1395           0 :           if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
    1396           0 :             FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    1397           0 :             FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
    1398           0 :             FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, is_tick, slot %lu, parent slot %lu", msg->mblk_idx, our_str, ref_str, mblk->hashcnt, block->slot, block->parent_slot ));
    1399           0 :             handle_bad_block( sched, block );
    1400           0 :             return -1;
    1401           0 :           }
    1402           0 :         }
    1403             :         /* Try to drain the mixin queue. */
    1404           0 :         int mixin_res;
    1405           0 :         while( (mixin_res=maybe_mixin( sched, block )) ) {
    1406           0 :           if( FD_UNLIKELY( mixin_res==-1 ) ) {
    1407           0 :             handle_bad_block( sched, block );
    1408           0 :             return -1;
    1409           0 :           }
    1410           0 :           FD_TEST( mixin_res==1||mixin_res==2 );
    1411           0 :         }
    1412           0 :       } else {
    1413           0 :         mblk_slist_idx_push_tail( block->mblks_hashing_in_progress, msg->mblk_idx, sched->mblk_pool );
    1414           0 :       }
    1415           0 :       if( FD_UNLIKELY( verify_ticks_eager( block ) ) ) {
    1416           0 :         handle_bad_block( sched, block );
    1417           0 :         return -1;
    1418           0 :       }
    1419           0 :       break;
    1420           0 :     }
    1421           0 :   }
    1422             : 
    1423           0 :   if( FD_UNLIKELY( block->dying && !block_is_in_flight( block ) ) ) {
    1424           0 :     if( FD_UNLIKELY( sched->active_bank_idx==bank_idx ) ) {
    1425           0 :       FD_LOG_CRIT(( "invariant violation: active block shouldn't be dying, bank_idx %lu, slot %lu, parent slot %lu",
    1426           0 :                     bank_idx, block->slot, block->parent_slot ));
    1427           0 :     }
    1428           0 :     FD_LOG_DEBUG(( "dying block %lu drained", block->slot ));
    1429           0 :     subtree_abandon( sched, block );
    1430           0 :     return 0;
    1431           0 :   }
    1432             : 
    1433           0 :   if( FD_UNLIKELY( !block->dying && sched->active_bank_idx!=bank_idx ) ) {
    1434             :     /* Block is not dead.  So we should be actively replaying it. */
    1435           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    1436           0 :     FD_LOG_CRIT(( "invariant violation: sched->active_bank_idx %lu, slot %lu, parent slot %lu, bank_idx %lu, slot %lu, parent slot %lu",
    1437           0 :                   sched->active_bank_idx, active_block->slot, active_block->parent_slot,
    1438           0 :                   bank_idx, block->slot, block->parent_slot ));
    1439           0 :   }
    1440             : 
    1441           0 :   maybe_switch_block( sched, bank_idx );
    1442             : 
    1443           0 :   return 0;
    1444           0 : }
    1445             : 
    1446             : void
    1447           0 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx ) {
    1448           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1449           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1450             : 
    1451           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1452           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1453           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1454           0 :                   block->slot, block->parent_slot, bank_idx ));
    1455           0 :   }
    1456             : 
    1457           0 :   FD_LOG_INFO(( "abandoning block %lu slot %lu", bank_idx, block->slot ));
    1458           0 :   sched->print_buf_sz = 0UL;
    1459           0 :   print_all( sched, block );
    1460           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
    1461             : 
    1462           0 :   subtree_abandon( sched, block );
    1463           0 :   try_activate_block( sched );
    1464           0 : }
    1465             : 
    1466             : void
    1467           0 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot ) {
    1468           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1469           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1470             : 
    1471           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1472           0 :   add_block( sched, bank_idx, parent_bank_idx );
    1473           0 :   block->slot                   = slot;
    1474           0 :   block->fec_eos                = 1;
    1475           0 :   block->block_start_signaled   = 1;
    1476           0 :   block->block_end_signaled     = 1;
    1477           0 :   block->block_start_done       = 1;
    1478           0 :   block->block_end_done         = 1;
    1479           0 :   block->refcnt                 = 0;
    1480           0 :   if( FD_LIKELY( parent_bank_idx!=ULONG_MAX ) ) {
    1481           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1482           0 :     block->parent_slot = parent_block->slot;
    1483           0 :   }
    1484           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1485             :     /* Assumes that a NULL parent implies the snapshot slot. */
    1486           0 :     block->parent_slot = ULONG_MAX;
    1487           0 :     block->rooted      = 1;
    1488           0 :     sched->root_idx    = bank_idx;
    1489           0 :   }
    1490           0 : }
    1491             : 
    1492             : void
    1493           0 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx ) {
    1494           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1495           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1496           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1497           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1498             : 
    1499           0 :   fd_sched_block_t * new_root = block_pool_ele( sched, root_idx );
    1500           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1501           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1502           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1503           0 :                   old_root->slot, old_root->parent_slot ));
    1504           0 :   }
    1505             : 
    1506             :   /* Early exit if the new root is the same as the old root. */
    1507           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1508           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1509           0 :                   new_root->slot, new_root->parent_slot ));
    1510           0 :     return;
    1511           0 :   }
    1512             : 
    1513           0 :   subtree_prune( sched, sched->root_idx, root_idx );
    1514             : 
    1515           0 :   new_root->parent_idx = ULONG_MAX;
    1516           0 :   sched->root_idx = root_idx;
    1517           0 : }
    1518             : 
    1519             : void
    1520           0 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx ) {
    1521           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1522           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1523           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1524           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1525             : 
    1526           0 :   fd_sched_block_t * block    = block_pool_ele( sched, root_idx );
    1527           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1528           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1529           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1530           0 :                   old_root->slot, old_root->parent_slot ));
    1531           0 :   }
    1532             : 
    1533             :   /* Early exit if the new root is the same as the old root. */
    1534           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1535           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1536           0 :                   block->slot, block->parent_slot ));
    1537           0 :     return;
    1538           0 :   }
    1539             : 
    1540             :   /* Mark every node from the new root up through its parents to the
    1541             :      old root as being rooted. */
    1542           0 :   fd_sched_block_t * curr = block;
    1543           0 :   fd_sched_block_t * prev = NULL;
    1544           0 :   while( curr ) {
    1545           0 :     if( FD_UNLIKELY( !block_is_done( curr ) ) ) {
    1546           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is not done, slot %lu, parent slot %lu",
    1547           0 :                     curr->slot, curr->parent_slot ));
    1548           0 :     }
    1549           0 :     if( FD_UNLIKELY( curr->dying ) ) {
    1550           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is dying, slot %lu, parent slot %lu",
    1551           0 :                     curr->slot, curr->parent_slot ));
    1552           0 :     }
    1553           0 :     if( FD_UNLIKELY( curr->staged ) ) {
    1554           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is staged, slot %lu, parent slot %lu",
    1555           0 :                     curr->slot, curr->parent_slot ));
    1556           0 :     }
    1557           0 :     if( FD_UNLIKELY( curr->in_rdisp ) ) {
    1558           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is in the dispatcher, slot %lu, parent slot %lu",
    1559           0 :                     curr->slot, curr->parent_slot ));
    1560           0 :     }
    1561           0 :     curr->rooted = 1;
    1562           0 :     prev = curr;
    1563           0 :     curr = block_pool_ele( sched, curr->parent_idx );
    1564           0 :   }
    1565             : 
    1566             :   /* If we didn't reach the old root, the new root is not a descendant. */
    1567           0 :   if( FD_UNLIKELY( prev!=old_root ) ) {
    1568           0 :     FD_LOG_CRIT(( "invariant violation: new root is not a descendant of old root, new root slot %lu, parent slot %lu, old root slot %lu, parent slot %lu",
    1569           0 :                   block->slot, block->parent_slot, old_root->slot, old_root->parent_slot ));
    1570           0 :   }
    1571             : 
    1572           0 :   ulong old_active_bank_idx = sched->active_bank_idx;
    1573             : 
    1574             :   /* Now traverse from old root towards new root, and abandon all
    1575             :      minority forks. */
    1576           0 :   curr = old_root;
    1577           0 :   while( curr && curr->rooted && curr!=block ) { /* curr!=block to avoid abandoning good forks. */
    1578           0 :     fd_sched_block_t * rooted_child_block = NULL;
    1579           0 :     ulong              child_idx          = curr->child_idx;
    1580           0 :     while( child_idx!=ULONG_MAX ) {
    1581           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    1582           0 :       if( child->rooted ) {
    1583           0 :         rooted_child_block = child;
    1584           0 :       } else {
    1585             :         /* This is a minority fork. */
    1586           0 :         ulong abandoned_cnt = sched->metrics->block_abandoned_cnt;
    1587           0 :         subtree_abandon( sched, child );
    1588           0 :         abandoned_cnt = sched->metrics->block_abandoned_cnt-abandoned_cnt;
    1589           0 :         if( FD_UNLIKELY( abandoned_cnt ) ) FD_LOG_DEBUG(( "abandoned %lu blocks on minority fork starting at block %lu:%lu", abandoned_cnt, child->slot, child_idx ));
    1590           0 :       }
    1591           0 :       child_idx = child->sibling_idx;
    1592           0 :     }
    1593           0 :     curr = rooted_child_block;
    1594           0 :   }
    1595             : 
    1596             :   /* If the active block got abandoned, we need to reset it. */
    1597           0 :   if( sched->active_bank_idx==ULONG_MAX ) {
    1598           0 :     sched->metrics->deactivate_pruned_cnt += fd_uint_if( old_active_bank_idx!=ULONG_MAX, 1U, 0U );
    1599           0 :     try_activate_block( sched );
    1600           0 :   }
    1601           0 : }
    1602             : 
    1603             : ulong
    1604           0 : fd_sched_pruned_block_next( fd_sched_t * sched ) {
    1605           0 :   if( !ref_q_empty( sched->ref_q ) ) {
    1606           0 :     ulong bank_idx = ref_q_pop_head( sched->ref_q );
    1607           0 :     return bank_idx;
    1608           0 :   }
    1609           0 :   return ULONG_MAX;
    1610           0 : }
    1611             : 
    1612             : void
    1613           0 : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh ) {
    1614           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1615           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1616           0 :   FD_TEST( max_tick_height>tick_height );
    1617           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1618           0 :   block->tick_height = tick_height;
    1619           0 :   block->max_tick_height = max_tick_height;
    1620           0 :   block->hashes_per_tick = hashes_per_tick;
    1621             :   #if FD_SCHED_SKIP_POH
    1622             :   /* No-op. */
    1623             :   #else
    1624           0 :   if( FD_LIKELY( block->mblk_cnt ) ) {
    1625             :     /* Fix up the first mblk's curr_hash. */
    1626           0 :     FD_TEST( block->mblk_unhashed_cnt );
    1627           0 :     FD_TEST( !mblk_slist_is_empty( block->mblks_unhashed, sched->mblk_pool ) );
    1628           0 :     FD_TEST( !block->mblk_freed_cnt );
    1629           0 :     fd_sched_mblk_t * first_mblk = sched->mblk_pool + mblk_slist_idx_peek_head( block->mblks_unhashed, sched->mblk_pool );
    1630           0 :     memcpy( first_mblk->curr_hash, start_poh, sizeof(fd_hash_t) );
    1631           0 :   } else {
    1632           0 :     memcpy( block->poh_hash, start_poh, sizeof(fd_hash_t) );
    1633           0 :   }
    1634           0 :   #endif
    1635           0 : }
    1636             : 
    1637             : fd_txn_p_t *
    1638           0 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx ) {
    1639           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1640           0 :   if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
    1641           0 :     return NULL;
    1642           0 :   }
    1643           0 :   return sched->txn_pool+txn_idx;
    1644           0 : }
    1645             : 
    1646             : fd_sched_txn_info_t *
    1647           0 : fd_sched_get_txn_info( fd_sched_t * sched, ulong txn_idx ) {
    1648           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1649           0 :   if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
    1650           0 :     return NULL;
    1651           0 :   }
    1652           0 :   return sched->txn_info_pool+txn_idx;
    1653           0 : }
    1654             : 
    1655             : fd_hash_t *
    1656           0 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx ) {
    1657           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1658           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1659           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1660           0 :   FD_TEST( block->fec_eos );
    1661           0 :   FD_TEST( block->mblk_cnt );
    1662           0 :   return block->poh_hash;
    1663           0 : }
    1664             : 
    1665             : uint
    1666           0 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx ) {
    1667           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1668           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1669           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1670           0 :   return block->shred_cnt;
    1671           0 : }
    1672             : 
    1673             : void
    1674           0 : fd_sched_metrics_write( fd_sched_t * sched ) {
    1675           0 :   FD_MGAUGE_SET( REPLAY, SCHED_ACTIVE_BANK_IDX, sched->active_bank_idx );
    1676           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
    1677           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT_WMK, sched->staged_popcnt_wmk );
    1678           0 :   FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_POPCNT, sched->depth-sched->txn_pool_free_cnt-1UL );
    1679           0 :   FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_SIZE, sched->depth-1UL );
    1680           0 :   FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_POPCNT, sched->depth-sched->mblk_pool_free_cnt );
    1681           0 :   FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_SIZE, sched->depth );
    1682           0 :   FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_POPCNT, sched->block_pool_popcnt );
    1683           0 :   FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_SIZE, sched->block_cnt_max );
    1684             : 
    1685           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_STAGED, sched->metrics->block_added_staged_cnt );
    1686           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_UNSTAGED, sched->metrics->block_added_unstaged_cnt );
    1687           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_REPLAYED, sched->metrics->block_removed_cnt );
    1688           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ABANDONED, sched->metrics->block_abandoned_cnt );
    1689           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_BAD, sched->metrics->block_bad_cnt );
    1690           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_PROMOTED, sched->metrics->block_promoted_cnt );
    1691           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_DEMOTED, sched->metrics->block_demoted_cnt );
    1692           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_CHILD, sched->metrics->deactivate_no_child_cnt );
    1693           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_WORK, sched->metrics->deactivate_no_txn_cnt );
    1694           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_ABANDONED, sched->metrics->deactivate_abandoned_cnt );
    1695           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_MINORITY, sched->metrics->deactivate_pruned_cnt );
    1696           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_SWITCH, sched->metrics->lane_switch_cnt );
    1697           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_PROMOTE, sched->metrics->lane_promoted_cnt );
    1698           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_DEMOTE, sched->metrics->lane_demoted_cnt );
    1699           0 :   FD_MCNT_SET( REPLAY, SCHED_FORK_OBSERVED, sched->metrics->fork_observed_cnt );
    1700           0 :   FD_MCNT_SET( REPLAY, SCHED_ALUT_SUCCESS, sched->metrics->alut_success_cnt );
    1701           0 :   FD_MCNT_SET( REPLAY, SCHED_ALUT_FAILURE, sched->metrics->alut_serializing_cnt );
    1702           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_PARSED, sched->metrics->txn_abandoned_parsed_cnt );
    1703           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_EXEC, sched->metrics->txn_abandoned_exec_done_cnt );
    1704           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_DONE, sched->metrics->txn_abandoned_done_cnt );
    1705           0 :   FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT, sched->metrics->txn_weighted_in_flight_cnt );
    1706           0 :   FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT_DURATION, sched->metrics->txn_weighted_in_flight_tickcount );
    1707           0 :   FD_MCNT_SET( REPLAY, SCHED_NONE_IN_FLIGHT_DURATION, sched->metrics->txn_none_in_flight_tickcount );
    1708           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_PARSED, sched->metrics->txn_parsed_cnt );
    1709           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_EXEC, sched->metrics->txn_exec_done_cnt );
    1710           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_SIGVERIFY, sched->metrics->txn_sigverify_done_cnt );
    1711           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_MIXIN, sched->metrics->txn_mixin_done_cnt );
    1712           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_DONE, sched->metrics->txn_done_cnt );
    1713           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_PARSED, sched->metrics->mblk_parsed_cnt );
    1714           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_HASHED, sched->metrics->mblk_poh_hashed_cnt );
    1715           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_DONE, sched->metrics->mblk_poh_done_cnt );
    1716           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED, sched->metrics->bytes_ingested_cnt );
    1717           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED_PADDING, sched->metrics->bytes_ingested_unparsed_cnt );
    1718           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_DROPPED, sched->metrics->bytes_dropped_cnt );
    1719           0 :   FD_MCNT_SET( REPLAY, FEC, sched->metrics->fec_cnt );
    1720           0 : }
    1721             : 
    1722             : char *
    1723           0 : fd_sched_get_state_cstr( fd_sched_t * sched ) {
    1724           0 :   sched->print_buf_sz = 0UL;
    1725           0 :   print_metrics( sched );
    1726           0 :   print_sched( sched );
    1727           0 :   return sched->print_buf;
    1728           0 : }
    1729             : 
    1730           0 : void * fd_sched_leave ( fd_sched_t * sched ) { return sched; }
    1731           0 : void * fd_sched_delete( void * mem         ) { return   mem; }
    1732             : 
    1733             : 
    1734             : /* Internal helpers. */
    1735             : 
    1736             : static void
    1737             : add_block( fd_sched_t * sched,
    1738             :            ulong        bank_idx,
    1739           0 :            ulong        parent_bank_idx ) {
    1740           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1741           0 :   FD_TEST( !block->in_sched );
    1742           0 :   sched->block_pool_popcnt++;
    1743             : 
    1744           0 :   block->txn_parsed_cnt              = 0U;
    1745           0 :   block->txn_exec_in_flight_cnt      = 0U;
    1746           0 :   block->txn_exec_done_cnt           = 0U;
    1747           0 :   block->txn_sigverify_in_flight_cnt = 0U;
    1748           0 :   block->txn_sigverify_done_cnt      = 0U;
    1749           0 :   block->poh_hashing_in_flight_cnt   = 0U;
    1750           0 :   block->poh_hashing_done_cnt        = 0U;
    1751           0 :   block->poh_hash_cmp_done_cnt       = 0U;
    1752           0 :   block->txn_done_cnt                = 0U;
    1753           0 :   block->shred_cnt                   = 0U;
    1754           0 :   block->mblk_cnt                    = 0U;
    1755           0 :   block->mblk_freed_cnt              = 0U;
    1756           0 :   block->mblk_tick_cnt               = 0U;
    1757           0 :   block->mblk_unhashed_cnt           = 0U;
    1758           0 :   block->hashcnt                     = 0UL;
    1759           0 :   block->txn_pool_max_popcnt         = sched->depth - sched->txn_pool_free_cnt - 1UL;
    1760           0 :   block->mblk_pool_max_popcnt        = sched->depth - sched->mblk_pool_free_cnt;
    1761           0 :   block->block_pool_max_popcnt       = sched->block_pool_popcnt;
    1762             : 
    1763           0 :   mblk_slist_remove_all( block->mblks_unhashed, sched->mblk_pool );
    1764           0 :   mblk_slist_remove_all( block->mblks_hashing_in_progress, sched->mblk_pool );
    1765           0 :   mblk_slist_remove_all( block->mblks_mixin_in_progress, sched->mblk_pool );
    1766           0 :   block->last_mblk_is_tick = 0;
    1767           0 :   block->max_tick_hashcnt  = 0UL;
    1768           0 :   block->curr_tick_hashcnt = 0UL;
    1769           0 :   block->tick_height       = ULONG_MAX;
    1770           0 :   block->max_tick_height   = ULONG_MAX;
    1771           0 :   block->hashes_per_tick   = ULONG_MAX;
    1772           0 :   block->inconsistent_hashes_per_tick = 0;
    1773             : 
    1774           0 :   block->mblks_rem    = 0UL;
    1775           0 :   block->txns_rem     = 0UL;
    1776           0 :   block->fec_buf_sz   = 0U;
    1777           0 :   block->fec_buf_boff = 0U;
    1778           0 :   block->fec_buf_soff = 0U;
    1779           0 :   block->fec_eob      = 0;
    1780           0 :   block->fec_sob      = 1;
    1781             : 
    1782           0 :   block->fec_eos              = 0;
    1783           0 :   block->rooted               = 0;
    1784           0 :   block->dying                = 0;
    1785           0 :   block->refcnt               = 1;
    1786           0 :   block->in_sched             = 1;
    1787           0 :   block->in_rdisp             = 0;
    1788           0 :   block->block_start_signaled = 0;
    1789           0 :   block->block_end_signaled   = 0;
    1790           0 :   block->block_start_done     = 0;
    1791           0 :   block->block_end_done       = 0;
    1792           0 :   block->staged               = 0;
    1793             : 
    1794           0 :   block->luf_depth = 0UL;
    1795             : 
    1796             :   /* New leaf node, no child, no sibling. */
    1797           0 :   block->child_idx   = ULONG_MAX;
    1798           0 :   block->sibling_idx = ULONG_MAX;
    1799           0 :   block->parent_idx  = ULONG_MAX;
    1800             : 
    1801           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1802           0 :     return;
    1803           0 :   }
    1804             : 
    1805             :   /* node->parent link */
    1806           0 :   fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1807           0 :   block->parent_idx = parent_bank_idx;
    1808             : 
    1809             :   /* parent->node and sibling->node links */
    1810           0 :   ulong child_idx = bank_idx;
    1811           0 :   if( FD_LIKELY( parent_block->child_idx==ULONG_MAX ) ) { /* Optimize for no forking. */
    1812           0 :     parent_block->child_idx = child_idx;
    1813           0 :   } else {
    1814           0 :     fd_sched_block_t * curr_block = block_pool_ele( sched, parent_block->child_idx );
    1815           0 :     while( curr_block->sibling_idx!=ULONG_MAX ) {
    1816           0 :       curr_block = block_pool_ele( sched, curr_block->sibling_idx );
    1817           0 :     }
    1818           0 :     curr_block->sibling_idx = child_idx;
    1819           0 :     sched->metrics->fork_observed_cnt++;
    1820           0 :   }
    1821             : 
    1822           0 :   if( FD_UNLIKELY( parent_block->dying ) ) {
    1823           0 :     block->dying = 1;
    1824           0 :   }
    1825           0 : }
    1826             : 
    1827             : /* Agave invokes verify_ticks() anywhere between once per slot and once
    1828             :    per entry batch, before tranactions are parsed or dispatched for
    1829             :    execution.  We can't do quite the same thing due to out-of-order
    1830             :    scheduling and the fact that we allow parsing to run well ahead of
    1831             :    block boundaries.  Out-of-order scheduling is good, so is overlapping
    1832             :    parsing with execution.  The easiest thing for us would be to just
    1833             :    delay verify_ticks() wholesale till the end of a slot, except that
    1834             :    this opens us up to bogus tick and hash counts, potentially causing
    1835             :    runaway consumption of compute cycles.  Of all the checks that are
    1836             :    performed in verify_ticks(), two types are relevant to mitigating
    1837             :    this risk.  One is constraining the number of ticks, and the other is
    1838             :    constraining the number of hashes per tick.  So we implement these
    1839             :    checks here, and perform them on the fly as eagerly as possible.
    1840             : 
    1841             :    Returns 0 on success. */
    1842             : static int
    1843           0 : verify_ticks_eager( fd_sched_block_t * block ) {
    1844           0 :   FD_TEST( block->hashes_per_tick!=ULONG_MAX ); /* PoH params initialized. */
    1845             : 
    1846           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height>block->max_tick_height ) ) {
    1847           0 :     FD_LOG_INFO(( "bad block: TOO_MANY_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1848           0 :     return -1;
    1849           0 :   }
    1850           0 :   if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->mblk_tick_cnt && (block->hashes_per_tick!=block->max_tick_hashcnt||block->inconsistent_hashes_per_tick) ) ) {
    1851           0 :     FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, slot %lu, parent slot %lu, expected %lu, got %lu", block->slot, block->parent_slot, block->hashes_per_tick, block->max_tick_hashcnt ));
    1852           0 :     return -1;
    1853           0 :   }
    1854           0 :   if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->curr_tick_hashcnt>block->hashes_per_tick ) ) { /* >1 to ignore low power hashing or no hashing cases */
    1855             :     /* We couldn't really check this at parse time because we may not
    1856             :        have the expected hashes per tick value yet.  We couldn't delay
    1857             :        this till after all PoH hashing is done, because this would be a
    1858             :        DoS vector.  This can't be merged with the above check, because a
    1859             :        malformed block might not end with a tick.  As in, a block might
    1860             :        end with a non-tick microblock with a high hashcnt.  Note that
    1861             :        checking the hashcnt between ticks transitively places an upper
    1862             :        bound on the hashcnt of individual microblocks, thus mitigating
    1863             :        the DoS vector. */
    1864           0 :     FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, observed cumulative tick_hashcnt %lu, expected %lu, slot %lu, parent slot %lu", block->curr_tick_hashcnt, block->hashes_per_tick, block->slot, block->parent_slot ));
    1865           0 :     return -1;
    1866           0 :   }
    1867             : 
    1868           0 :   return 0;
    1869           0 : }
    1870             : 
    1871             : /* https://github.com/anza-xyz/agave/blob/v3.0.6/ledger/src/blockstore_processor.rs#L1057
    1872             : 
    1873             :    The only check we don't do here is TRAILING_ENTRY, which can be done
    1874             :    independently when we parse the final FEC set of a block.
    1875             : 
    1876             :    Returns 0 on success. */
    1877             : static int
    1878           0 : verify_ticks_final( fd_sched_block_t * block ) {
    1879           0 :   FD_TEST( block->fec_eos );
    1880             : 
    1881           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height<block->max_tick_height ) ) {
    1882           0 :     FD_LOG_INFO(( "bad block: TOO_FEW_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1883           0 :     return -1;
    1884           0 :   }
    1885             : 
    1886           0 :   return verify_ticks_eager( block );
    1887           0 : }
    1888             : 
    1889           0 : #define CHECK( cond )  do {             \
    1890           0 :   if( FD_UNLIKELY( !(cond) ) ) {        \
    1891           0 :     return FD_SCHED_AGAIN_LATER;        \
    1892           0 :   }                                     \
    1893           0 : } while( 0 )
    1894             : 
    1895             : /* CHECK that it is safe to read at least n more bytes. */
    1896           0 : #define CHECK_LEFT( n ) CHECK( (n)<=(block->fec_buf_sz-block->fec_buf_soff) )
    1897             : 
    1898             : /* Consume as much as possible from the buffer.  By the end of this
    1899             :    function, we will either have residual data that is unparseable only
    1900             :    because it is a batch that straddles FEC set boundaries, or we will
    1901             :    have reached the end of a batch.  In the former case, any remaining
    1902             :    bytes should be concatenated with the next FEC set for further
    1903             :    parsing.  In the latter case, any remaining bytes should be thrown
    1904             :    away. */
    1905             : FD_WARN_UNUSED static int
    1906           0 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    1907           0 :   while( 1 ) {
    1908           0 :     while( block->txns_rem>0UL ) {
    1909           0 :       int err;
    1910           0 :       if( FD_UNLIKELY( (err=fd_sched_parse_txn( sched, block, alut_ctx ))!=FD_SCHED_OK ) ) {
    1911           0 :         return err;
    1912           0 :       }
    1913           0 :     }
    1914           0 :     if( block->txns_rem==0UL && block->mblks_rem>0UL ) {
    1915           0 :       if( FD_UNLIKELY( block->mblk_cnt>=FD_SCHED_MAX_MBLK_PER_SLOT ) ) {
    1916             :         /* A valid block shouldn't contain more than this amount of
    1917             :            microblocks. */
    1918           0 :         FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks) >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, FD_SCHED_MAX_MBLK_PER_SLOT ));
    1919           0 :         return FD_SCHED_BAD_BLOCK;
    1920           0 :       }
    1921             : 
    1922           0 :       CHECK_LEFT( sizeof(fd_microblock_hdr_t) );
    1923           0 :       fd_microblock_hdr_t * hdr = (fd_microblock_hdr_t *)fd_type_pun( block->fec_buf+block->fec_buf_soff );
    1924           0 :       block->fec_buf_soff      += (uint)sizeof(fd_microblock_hdr_t);
    1925             : 
    1926           0 :       block->mblks_rem--;
    1927           0 :       block->txns_rem = hdr->txn_cnt;
    1928             : 
    1929           0 :       FD_TEST( sched->mblk_pool_free_cnt ); /* can_ingest should have guaranteed sufficient free capacity. */
    1930           0 :       uint mblk_idx = sched->mblk_pool_free_head;
    1931           0 :       sched->mblk_pool_free_head = sched->mblk_pool[ mblk_idx ].next;
    1932           0 :       sched->mblk_pool_free_cnt--;
    1933             : 
    1934           0 :       fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
    1935           0 :       mblk->start_txn_idx = block->txn_parsed_cnt;
    1936           0 :       mblk->end_txn_idx   = mblk->start_txn_idx+hdr->txn_cnt;
    1937             :       /* One might think that every microblock needs to have at least
    1938             :          one hash, otherwise the block should be considered invalid.  A
    1939             :          vanilla validator certainly produces microblocks that conform
    1940             :          to this.  But a modded validator could in theory produce zero
    1941             :          hash microblocks.  Agave's replay stage will happily take those
    1942             :          microblocks.  The Agave implementation-defined way of doing PoH
    1943             :          verify is as follows:
    1944             : 
    1945             :          For a tick microblock, do the same number of hashes as
    1946             :          specified by the microblock.  Zero hashes are allowed, in which
    1947             :          case this tick would have the same ending hash value as the
    1948             :          previous microblock.
    1949             : 
    1950             :          For a transaction microblock, if the number of hashes specified
    1951             :          by the microblock is <= 1, then do zero pure hashes, and simply
    1952             :          do a mixin/record.  Otherwise, do (number of hashes-1) amount
    1953             :          of pure hashing, and then do a mixin.  However, note that for
    1954             :          the purposes of tick_verify, the number of hashes specified by
    1955             :          the microblock is taken verbatim.
    1956             : 
    1957             :          https://github.com/anza-xyz/agave/blob/v3.0.6/entry/src/entry.rs#L232
    1958             : 
    1959             :          We implement the above for consensus. */
    1960           0 :       mblk->hashcnt = fd_ulong_sat_sub( hdr->hash_cnt, fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL ) ); /* For pure hashing, implement the above. */
    1961           0 :       memcpy( mblk->end_hash, hdr->hash, sizeof(fd_hash_t) );
    1962           0 :       memcpy( mblk->curr_hash, block->poh_hash, sizeof(fd_hash_t) );
    1963           0 :       mblk->curr_txn_idx = mblk->start_txn_idx;
    1964           0 :       mblk->curr_hashcnt = 0UL;
    1965           0 :       mblk->curr_sig_cnt = 0U;
    1966           0 :       mblk->is_tick      = !hdr->txn_cnt;
    1967             : 
    1968             :       /* Update block tracking. */
    1969           0 :       block->curr_tick_hashcnt = fd_ulong_sat_add( hdr->hash_cnt, block->curr_tick_hashcnt ); /* For tick_verify, take the number of hashes verbatim. */
    1970           0 :       block->hashcnt += mblk->hashcnt+fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL );
    1971           0 :       memcpy( block->poh_hash, hdr->hash, sizeof(fd_hash_t) );
    1972           0 :       block->last_mblk_is_tick = mblk->is_tick;
    1973           0 :       block->mblk_cnt++;
    1974           0 :       sched->metrics->mblk_parsed_cnt++;
    1975           0 :       if( FD_UNLIKELY( !hdr->txn_cnt ) ) {
    1976             :         /* This is a tick microblock. */
    1977           0 :         if( FD_UNLIKELY( block->mblk_tick_cnt && block->max_tick_hashcnt!=block->curr_tick_hashcnt ) ) {
    1978           0 :           block->inconsistent_hashes_per_tick = 1;
    1979           0 :           if( FD_LIKELY( block->hashes_per_tick!=ULONG_MAX && block->hashes_per_tick>1UL ) ) {
    1980             :             /* >1 to ignore low power hashing or hashing disabled */
    1981           0 :             FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, tick idx %u, max hashcnt %lu, curr hashcnt %lu, hashes_per_tick %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->hashes_per_tick ));
    1982           0 :             return FD_SCHED_BAD_BLOCK;
    1983           0 :           }
    1984           0 :         }
    1985           0 :         block->max_tick_hashcnt  = fd_ulong_max( block->curr_tick_hashcnt, block->max_tick_hashcnt );
    1986           0 :         block->curr_tick_hashcnt = 0UL;
    1987           0 :         block->mblk_tick_cnt++;
    1988           0 :       }
    1989             :       #if FD_SCHED_SKIP_POH
    1990             :       block->poh_hashing_done_cnt++;
    1991             :       block->poh_hash_cmp_done_cnt++;
    1992             :       free_mblk( sched, block, mblk_idx );
    1993             :       #else
    1994           0 :       mblk_slist_idx_push_tail( block->mblks_unhashed, mblk_idx, sched->mblk_pool );
    1995           0 :       block->mblk_unhashed_cnt++;
    1996           0 :       #endif
    1997           0 :       continue;
    1998           0 :     }
    1999           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL && block->fec_sob ) {
    2000           0 :       CHECK_LEFT( sizeof(ulong) );
    2001           0 :       FD_TEST( block->fec_buf_soff==0U );
    2002           0 :       block->mblks_rem     = FD_LOAD( ulong, block->fec_buf );
    2003           0 :       block->fec_buf_soff += (uint)sizeof(ulong);
    2004             : 
    2005           0 :       block->fec_sob = 0;
    2006           0 :       continue;
    2007           0 :     }
    2008           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL ) {
    2009           0 :       break;
    2010           0 :     }
    2011           0 :   }
    2012           0 :   if( block->fec_eob ) {
    2013             :     /* Ignore trailing bytes at the end of a batch. */
    2014           0 :     sched->metrics->bytes_ingested_unparsed_cnt += block->fec_buf_sz-block->fec_buf_soff;
    2015           0 :     block->fec_buf_boff += block->fec_buf_sz;
    2016           0 :     block->fec_buf_soff = 0U;
    2017           0 :     block->fec_buf_sz   = 0U;
    2018           0 :     block->fec_sob      = 1;
    2019           0 :     block->fec_eob      = 0;
    2020           0 :   }
    2021           0 :   return FD_SCHED_OK;
    2022           0 : }
    2023             : 
    2024             : FD_WARN_UNUSED static int
    2025           0 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    2026           0 :   fd_txn_t * txn = fd_type_pun( block->txn );
    2027             : 
    2028           0 :   ulong pay_sz = 0UL;
    2029           0 :   ulong txn_sz = fd_txn_parse_core( block->fec_buf+block->fec_buf_soff,
    2030           0 :                                     fd_ulong_min( FD_TXN_MTU, block->fec_buf_sz-block->fec_buf_soff ),
    2031           0 :                                     txn,
    2032           0 :                                     NULL,
    2033           0 :                                     &pay_sz );
    2034             : 
    2035           0 :   if( FD_UNLIKELY( !pay_sz || !txn_sz ) ) {
    2036             :     /* Can't parse out a full transaction. */
    2037           0 :     return FD_SCHED_AGAIN_LATER;
    2038           0 :   }
    2039             : 
    2040           0 :   if( FD_UNLIKELY( block->txn_parsed_cnt>=FD_MAX_TXN_PER_SLOT ) ) {
    2041             :     /* The block contains more transactions than a valid block would.
    2042             :        Mark the block dead instead of keep processing it. */
    2043           0 :     FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
    2044           0 :     return FD_SCHED_BAD_BLOCK;
    2045           0 :   }
    2046             : 
    2047             :   /* Try to expand ALUTs. */
    2048           0 :   int has_aluts   = txn->transaction_version==FD_TXN_V0 && txn->addr_table_adtl_cnt>0;
    2049           0 :   int serializing = 0;
    2050           0 :   if( has_aluts ) {
    2051           0 :     uchar __attribute__((aligned(FD_SLOT_HASHES_GLOBAL_ALIGN))) slot_hashes_mem[ FD_SYSVAR_SLOT_HASHES_FOOTPRINT ];
    2052           0 :     fd_slot_hashes_global_t const * slot_hashes_global = fd_sysvar_slot_hashes_read( alut_ctx->accdb, alut_ctx->xid, slot_hashes_mem );
    2053           0 :     if( FD_LIKELY( slot_hashes_global ) ) {
    2054           0 :       fd_slot_hash_t * slot_hash = deq_fd_slot_hash_t_join( (uchar *)slot_hashes_global + slot_hashes_global->hashes_offset );
    2055           0 :       serializing = !!fd_runtime_load_txn_address_lookup_tables( txn, block->fec_buf+block->fec_buf_soff, alut_ctx->accdb, alut_ctx->xid, alut_ctx->els, slot_hash, block->aluts );
    2056           0 :       sched->metrics->alut_success_cnt += (uint)!serializing;
    2057           0 :     } else {
    2058           0 :       serializing = 1;
    2059           0 :     }
    2060           0 :   }
    2061             : 
    2062           0 :   ulong bank_idx = (ulong)(block-sched->block_pool);
    2063           0 :   ulong txn_idx   = fd_rdisp_add_txn( sched->rdisp, bank_idx, txn, block->fec_buf+block->fec_buf_soff, serializing ? NULL : block->aluts, serializing );
    2064           0 :   FD_TEST( txn_idx!=0UL );
    2065           0 :   sched->metrics->txn_parsed_cnt++;
    2066           0 :   sched->metrics->alut_serializing_cnt += (uint)serializing;
    2067           0 :   sched->txn_pool_free_cnt--;
    2068           0 :   fd_txn_p_t * txn_p = sched->txn_pool + txn_idx;
    2069           0 :   txn_p->payload_sz  = pay_sz;
    2070             : 
    2071           0 :   txn_p->start_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff );
    2072           0 :   txn_p->start_shred_idx = fd_ushort_if( txn_p->start_shred_idx>0U, (ushort)(txn_p->start_shred_idx-1U), txn_p->start_shred_idx );
    2073           0 :   txn_p->end_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff+(uint)pay_sz );
    2074             : 
    2075           0 :   fd_memcpy( txn_p->payload, block->fec_buf+block->fec_buf_soff, pay_sz );
    2076           0 :   fd_memcpy( TXN(txn_p),     txn,                                txn_sz );
    2077           0 :   txn_bitset_remove( sched->exec_done_set, txn_idx );
    2078           0 :   txn_bitset_remove( sched->sigverify_done_set, txn_idx );
    2079           0 :   txn_bitset_remove( sched->poh_mixin_done_set, txn_idx );
    2080           0 :   sched->txn_info_pool[ txn_idx ].flags = 0UL;
    2081           0 :   sched->txn_info_pool[ txn_idx ].txn_err = 0;
    2082           0 :   sched->txn_info_pool[ txn_idx ].tick_parsed = fd_tickcount();
    2083           0 :   sched->txn_info_pool[ txn_idx ].tick_sigverify_disp = LONG_MAX;
    2084           0 :   sched->txn_info_pool[ txn_idx ].tick_sigverify_done = LONG_MAX;
    2085           0 :   sched->txn_info_pool[ txn_idx ].tick_exec_disp = LONG_MAX;
    2086           0 :   sched->txn_info_pool[ txn_idx ].tick_exec_done = LONG_MAX;
    2087           0 :   block->txn_idx[ block->txn_parsed_cnt ] = txn_idx;
    2088           0 :   block->fec_buf_soff += (uint)pay_sz;
    2089           0 :   block->txn_parsed_cnt++;
    2090             : #if FD_SCHED_SKIP_SIGVERIFY
    2091             :   txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    2092             :   block->txn_sigverify_done_cnt++;
    2093             : #endif
    2094             : #if FD_SCHED_SKIP_POH
    2095             :   txn_bitset_insert( sched->poh_mixin_done_set, txn_idx );
    2096             : #endif
    2097           0 :   block->txns_rem--;
    2098           0 :   return FD_SCHED_OK;
    2099           0 : }
    2100             : 
    2101             : #undef CHECK
    2102             : #undef CHECK_LEFT
    2103             : 
    2104             : static void
    2105           0 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    2106             :   /* Dispatch transactions for sigverify in parse order. */
    2107           0 :   out->task_type = FD_SCHED_TT_TXN_SIGVERIFY;
    2108           0 :   out->txn_sigverify->bank_idx = bank_idx;
    2109           0 :   out->txn_sigverify->txn_idx  = block->txn_idx[ block->txn_sigverify_done_cnt+block->txn_sigverify_in_flight_cnt ];
    2110           0 :   out->txn_sigverify->exec_idx = (ulong)exec_tile_idx;
    2111           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    2112           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    2113           0 :   block->txn_sigverify_in_flight_cnt++;
    2114           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    2115           0 : }
    2116             : 
    2117             : /* Assumes there is a PoH task available for dispatching. */
    2118             : static void
    2119           0 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    2120           0 :   fd_sched_mblk_t * mblk = NULL;
    2121           0 :   uint mblk_idx;
    2122           0 :   if( FD_LIKELY( !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ) ) {
    2123             :     /* There's a PoH task in progress, just continue working on that. */
    2124           0 :     mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_hashing_in_progress, sched->mblk_pool );
    2125           0 :     mblk = sched->mblk_pool+mblk_idx;
    2126           0 :   } else {
    2127             :     /* No in progress PoH task, so start a new one. */
    2128           0 :     FD_TEST( block->mblk_unhashed_cnt );
    2129           0 :     mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_unhashed, sched->mblk_pool );
    2130           0 :     mblk = sched->mblk_pool+mblk_idx;
    2131           0 :     block->mblk_unhashed_cnt--;
    2132           0 :   }
    2133           0 :   out->task_type = FD_SCHED_TT_POH_HASH;
    2134           0 :   out->poh_hash->bank_idx = bank_idx;
    2135           0 :   out->poh_hash->mblk_idx = mblk_idx;
    2136           0 :   out->poh_hash->exec_idx = (ulong)exec_tile_idx;
    2137           0 :   ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
    2138           0 :   out->poh_hash->hashcnt  = fd_ulong_min( hashcnt_todo, FD_SCHED_MAX_POH_HASHES_PER_TASK );
    2139           0 :   memcpy( out->poh_hash->hash, mblk->curr_hash, sizeof(fd_hash_t) );
    2140           0 :   sched->poh_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    2141           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    2142           0 :   block->poh_hashing_in_flight_cnt++;
    2143           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    2144           0 : }
    2145             : 
    2146             : /* Does up to one transaction mixin.  Returns 1 if one mixin was done, 2
    2147             :    if that mixin also completed a microblock, 0 if no transaction mixin
    2148             :    was available, -1 if there is a PoH verify error. */
    2149             : FD_WARN_UNUSED static int
    2150           0 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block ) {
    2151           0 :   if( FD_UNLIKELY( mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ) ) ) return 0;
    2152           0 :   FD_TEST( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt>0 );
    2153             : 
    2154             :   /* The microblock we would like to do mixin on is at the head of the
    2155             :      queue.  It may have had some mixin, it may have never had any
    2156             :      mixin.  In the case of the former, we should continue to mixin the
    2157             :      same head microblock until it's done, lest the per-block bmtree
    2158             :      gets clobbered when we start a new one. */
    2159           0 :   ulong mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
    2160           0 :   fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
    2161             : 
    2162           0 :   if( FD_UNLIKELY( mblk->end_txn_idx>block->txn_parsed_cnt ) ) {
    2163             :     /* A partially parsed microblock is by definition at the end of the
    2164             :        FEC stream.  If such a microblock is in progress, there should be
    2165             :        no other microblock in this block so far that hasn't been
    2166             :        dispatched, because microblocks are dispatched in parse order. */
    2167           0 :     if( FD_UNLIKELY( block->mblk_unhashed_cnt ) ) {
    2168           0 :       sched->print_buf_sz = 0UL;
    2169           0 :       print_all( sched, block );
    2170           0 :       FD_LOG_CRIT(( "invariant violation end_txn_idx %lu: %s", mblk->end_txn_idx, sched->print_buf ));
    2171           0 :     }
    2172             : 
    2173             :     /* If we've decided to start mixin on a partially parsed microblock,
    2174             :        there better be nothing else in-progress.  Otherwise, they might
    2175             :        clobber the per-block bmtree for mixin. */
    2176           0 :     if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx && (block->poh_hashing_in_flight_cnt||!mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool )||!mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool )) ) ) {
    2177           0 :       sched->print_buf_sz = 0UL;
    2178           0 :       print_all( sched, block );
    2179           0 :       FD_LOG_CRIT(( "invariant violation end_txn_idx %lu start_txn_idx %lu curr_txn_idx %lu: %s", mblk->end_txn_idx, mblk->start_txn_idx, mblk->curr_txn_idx, sched->print_buf ));
    2180           0 :     }
    2181           0 :   }
    2182             : 
    2183             :   /* Very rarely, we've finished hashing, but not all transactions in
    2184             :      the microblock have been parsed out.  This can happen if we haven't
    2185             :      received all the FEC sets for this microblock.  We can't yet fully
    2186             :      mixin the microblock.  So we'll stick it back into the end of the
    2187             :      queue, and try to see if there's a fully parsed microblock.
    2188             :      Unless, there's truly nothing else to mixin.  Then we would start
    2189             :      mixin with the partially parsed microblock.  We do this because the
    2190             :      txn pool is meant to be an OOO scheduling window not tied to
    2191             :      max_live_slots sizing requirements, so there shouldn't be a way for
    2192             :      external input to tie up txn pool entries for longer than
    2193             :      necessary. */
    2194           0 :   if( FD_UNLIKELY( mblk->curr_txn_idx>=block->txn_parsed_cnt || /* Nothing more to mixin for this microblock. */
    2195           0 :                    (mblk->end_txn_idx>block->txn_parsed_cnt &&  /* There is something to mixin, but the microblock isn't fully parsed yet ... */
    2196           0 :                     mblk->curr_txn_idx==mblk->start_txn_idx &&  /* ... and we haven't started mixin on it yet ... */
    2197           0 :                     (block->poh_hashing_in_flight_cnt ||        /* ... and another microblock is in-progress and might preempt this microblock and clobber the bmtree, so we shouldn't start the partial microblock just yet. */
    2198           0 :                      !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ||
    2199           0 :                      !mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ))) ) ) {
    2200           0 :     mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
    2201             : 
    2202             :     /* No other microblock in the mixin queue. */
    2203           0 :     if( FD_UNLIKELY( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt==1 ) ) return 0;
    2204             : 
    2205             :     /* At this point, there's at least one more microblock in the mixin
    2206             :        queue we could try.  It's a predecessor (in parse order) that
    2207             :        finished hashing later than the partially parsed microblock at
    2208             :        the head of the mixin queue. */
    2209             : 
    2210             :     /* It should never clobber the bmtree for a microblock that has had some mixin done on it. */
    2211           0 :     if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx ) ) {
    2212           0 :       sched->print_buf_sz = 0UL;
    2213           0 :       print_all( sched, block );
    2214           0 :       FD_LOG_CRIT(( "invariant violation curr_txn_idx %lu start_txn_idx %lu: %s", mblk->curr_txn_idx, mblk->start_txn_idx, sched->print_buf ));
    2215           0 :     }
    2216             : 
    2217           0 :     mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
    2218           0 :     mblk = sched->mblk_pool+mblk_idx;
    2219             : 
    2220             :     /* It should be a fresh microblock for mixin. */
    2221           0 :     FD_TEST( mblk->curr_txn_idx==mblk->start_txn_idx );
    2222             :     /* Invariant: at any given point in time, there can be at most one
    2223             :        microblock that hasn't been fully parsed yet, due to the nature
    2224             :        of sequential parsing.  So this microblock has to be fully
    2225             :        parsed. */
    2226           0 :     FD_TEST( mblk->end_txn_idx<=block->txn_parsed_cnt );
    2227           0 :   }
    2228             : 
    2229           0 :   FD_TEST( mblk->curr_txn_idx<mblk->end_txn_idx );
    2230             : 
    2231             :   /* Now mixin. */
    2232           0 :   if( FD_LIKELY( mblk->curr_txn_idx==mblk->start_txn_idx ) ) block->bmtree = fd_bmtree_commit_init( block->bmtree_mem, 32UL, 1UL, 0UL ); /* Optimize for single-transaction microblocks, which are the majority. */
    2233             : 
    2234           0 :   ulong txn_gidx = block->txn_idx[ mblk->curr_txn_idx ];
    2235           0 :   fd_txn_p_t * _txn = sched->txn_pool+txn_gidx;
    2236           0 :   fd_txn_t * txn = TXN(_txn);
    2237           0 :   for( ulong j=0; j<txn->signature_cnt; j++ ) {
    2238           0 :     fd_bmtree_node_t node[ 1 ];
    2239           0 :     fd_bmtree_hash_leaf( node, _txn->payload+txn->signature_off+FD_TXN_SIGNATURE_SZ*j, 64UL, 1UL );
    2240           0 :     fd_bmtree_commit_append( block->bmtree, node, 1UL );
    2241           0 :     mblk->curr_sig_cnt++;
    2242           0 :   }
    2243             : 
    2244             :   /* Release the txn_idx. */
    2245           0 :   txn_bitset_insert( sched->poh_mixin_done_set, txn_gidx );
    2246           0 :   sched->metrics->txn_mixin_done_cnt++;
    2247           0 :   if( txn_bitset_test( sched->exec_done_set, txn_gidx ) && txn_bitset_test( sched->sigverify_done_set, txn_gidx ) ) {
    2248           0 :     fd_rdisp_complete_txn( sched->rdisp, txn_gidx, 1 );
    2249           0 :     sched->txn_pool_free_cnt++;
    2250           0 :     block->txn_done_cnt++;
    2251           0 :     sched->metrics->txn_done_cnt++;
    2252           0 :   }
    2253             : 
    2254           0 :   mblk->curr_txn_idx++;
    2255           0 :   int rv = 2;
    2256           0 :   if( FD_LIKELY( mblk->curr_txn_idx==mblk->end_txn_idx ) ) {
    2257             :     /* Ready to compute the final hash for this microblock. */
    2258           0 :     block->poh_hash_cmp_done_cnt++;
    2259           0 :     sched->metrics->mblk_poh_done_cnt++;
    2260           0 :     uchar * root = fd_bmtree_commit_fini( block->bmtree );
    2261           0 :     uchar mixin_buf[ 64 ];
    2262           0 :     fd_memcpy( mixin_buf, mblk->curr_hash, 32UL );
    2263           0 :     fd_memcpy( mixin_buf+32UL, root, 32UL );
    2264           0 :     fd_sha256_hash( mixin_buf, 64UL, mblk->curr_hash );
    2265           0 :     free_mblk( sched, block, (uint)mblk_idx );
    2266           0 :     if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
    2267           0 :       FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    2268           0 :       FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
    2269           0 :       FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, txns [%lu,%lu), %u sigs, slot %lu, parent slot %lu", mblk_idx, our_str, ref_str, mblk->hashcnt, mblk->start_txn_idx, mblk->end_txn_idx, mblk->curr_sig_cnt, block->slot, block->parent_slot ));
    2270           0 :       return -1;
    2271           0 :     }
    2272           0 :   } else {
    2273             :     /* There are more transactions to mixin in this microblock. */
    2274           0 :     mblk_slist_idx_push_head( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
    2275           0 :     rv = 1;
    2276           0 :   }
    2277             : 
    2278           0 :   return rv;
    2279           0 : }
    2280             : 
    2281             : static void
    2282           0 : try_activate_block( fd_sched_t * sched ) {
    2283             : 
    2284             :   /* See if there are any allocated staging lanes that we can activate
    2285             :      for scheduling ... */
    2286           0 :   ulong staged_bitset = sched->staged_bitset;
    2287           0 :   while( staged_bitset ) {
    2288           0 :     int lane_idx  = fd_ulong_find_lsb( staged_bitset );
    2289           0 :     staged_bitset = fd_ulong_pop_lsb( staged_bitset );
    2290             : 
    2291           0 :     ulong              head_idx     = sched->staged_head_bank_idx[ lane_idx ];
    2292           0 :     fd_sched_block_t * head_block   = block_pool_ele( sched, head_idx );
    2293           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, head_block->parent_idx );
    2294           0 :     if( FD_UNLIKELY( parent_block->dying ) ) {
    2295             :       /* Invariant: no child of a dying block should be staged. */
    2296           0 :       FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, parent slot %lu on lane %d has parent_block->dying set, slot %lu, parent slot %lu",
    2297           0 :                     head_idx, head_block->slot, head_block->parent_slot, lane_idx, parent_block->slot, parent_block->parent_slot ));
    2298           0 :     }
    2299             :     //FIXME: restore this invariant check when we have immediate demotion of dying blocks
    2300             :     // if( FD_UNLIKELY( head_block->dying ) ) {
    2301             :     //   /* Invariant: no dying block should be staged. */
    2302             :     //   FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, prime %lu on lane %u has head_block->dying set",
    2303             :     //                 head_idx, (ulong)head_block->block_id.slot, (ulong)head_block->block_id.prime, lane_idx ));
    2304             :     // }
    2305           0 :     if( block_is_done( parent_block ) && block_is_activatable( head_block ) ) {
    2306             :       /* ... Yes, on this staging lane the parent block is done.  So we
    2307             :          can activate the staged child. */
    2308           0 :       if( FD_UNLIKELY( head_idx!=sched->last_active_bank_idx ) ) { /* Unlikely because only possible under forking or on slot boundary. */
    2309           0 :         if( FD_UNLIKELY( sched->last_active_bank_idx!=head_block->parent_idx ) ) { /* Forking is rare. */
    2310           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: lane switch to %d", head_block->slot, head_idx, lane_idx ));
    2311           0 :           sched->metrics->lane_switch_cnt++;
    2312           0 :         } else {
    2313           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: lane %d waking up on slot boundary", head_block->slot, head_idx, lane_idx ));
    2314           0 :         }
    2315           0 :       }
    2316           0 :       sched->active_bank_idx = head_idx;
    2317           0 :       return;
    2318           0 :     }
    2319           0 :   }
    2320             : 
    2321             :   /* ... No, promote unstaged blocks. */
    2322           0 :   ulong root_idx = sched->root_idx;
    2323           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2324           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2325           0 :   }
    2326             :   /* Find and stage the longest stageable unstaged fork.  This is a
    2327             :      policy decision. */
    2328           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2329           0 :   if( FD_LIKELY( depth>0UL ) ) {
    2330           0 :     if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) {
    2331             :       /* No more staging lanes available.  All of them are occupied by
    2332             :          slow squatters.  Only empty blocks can be demoted, and so
    2333             :          blocks with in-flight transactions, including dying in-flight
    2334             :          blocks, shouldn't be demoted.  We demote all demotable lanes.
    2335             :          Demotion isn't all that expensive, since demotable blocks have
    2336             :          no transactions in them.  If a demoted block proves to be
    2337             :          active still, it'll naturally promote back into a staging lane.
    2338             : 
    2339             :          In fact, all lanes should be demotable at this point.  None of
    2340             :          the lanes have anything dispatchable, otherwise we would have
    2341             :          simply activated one of the dispatchable lanes.  None of the
    2342             :          lanes have anything in-flight either, as we allow for a grace
    2343             :          period while something is in-flight, before we deactivate any
    2344             :          block.  In principle, we could get rid of the grace period and
    2345             :          deactivate right away.  In that case, it's okay if nothing is
    2346             :          demotable at the moment, as that simply implies that all lanes
    2347             :          have in-flight tasks.  We would get another chance to try to
    2348             :          demote when the last in-flight task on any lane completes.
    2349             : 
    2350             :          Another interesting side effect of the current dispatching and
    2351             :          lane switching policy is that each lane should have exactly one
    2352             :          block in it at this point.  A parent block by definition can't
    2353             :          be partially ingested.  Any parent block that is fully ingested
    2354             :          and dispatchable would have made the lane dispatchable, and we
    2355             :          wouldn't be here.  Any parent that is fully ingested and fully
    2356             :          dispatched would be fully done after the grace period.  So
    2357             :          there could only be one block per lane, and it is
    2358             :          simultaneously the head and the tail of the lane.
    2359             : 
    2360             :          A note on why this whole thing does not deadlock:
    2361             : 
    2362             :          One might reasonably wonder what happens if all the lanes are
    2363             :          non-empty, non-dead, but for some reason couldn't be activated
    2364             :          for dispatching.  We would deadlock in this case, as no lane
    2365             :          dispatches to the point of being demotable, and no unstaged
    2366             :          block can be promoted.  Such is not in fact possible.  The only
    2367             :          way a dispatchable lane can be ineligible for activation is if
    2368             :          it has a parent block that isn't done yet.  So a deadlock
    2369             :          happens when this parent block, or any of its dispatchable
    2370             :          ancestors, is unstaged.  An important invariant we maintain is
    2371             :          that a staged block can't have an unstaged stageable parent.
    2372             :          This invariant, by induction, gives us the guarantee that at
    2373             :          least one of the lanes can be activated. */
    2374           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    2375           0 :         if( FD_UNLIKELY( !lane_is_demotable( sched, l ) ) ) {
    2376           0 :           FD_LOG_CRIT(( "invariant violation: lane %d is not demotable", l ));
    2377           0 :         }
    2378           0 :         ulong demoted_cnt = demote_lane( sched, l );
    2379           0 :         if( FD_UNLIKELY( demoted_cnt!=1UL ) ) {
    2380           0 :           FD_LOG_CRIT(( "invariant violation: %lu blocks demoted from lane %d, expected 1 demotion", demoted_cnt, l ));
    2381           0 :         }
    2382           0 :         sched->metrics->lane_demoted_cnt++;
    2383           0 :       }
    2384           0 :     }
    2385           0 :     FD_TEST( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) );
    2386           0 :     int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
    2387           0 :     if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
    2388           0 :       FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
    2389           0 :                     lane_idx, sched->staged_bitset ));
    2390           0 :     }
    2391           0 :     ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2392           0 :     if( FD_UNLIKELY( head_bank_idx==ULONG_MAX ) ) {
    2393             :       /* We found a promotable fork depth>0.  This should not happen. */
    2394           0 :       FD_LOG_CRIT(( "invariant violation: head_bank_idx==ULONG_MAX" ));
    2395           0 :     }
    2396             :     /* We don't bother with promotion unless the block is immediately
    2397             :        dispatchable.  So it's okay to set the active block here.  This
    2398             :        doesn't cause out-of-order block replay because any parent block
    2399             :        must be fully done.  If the parent block were dead, this fork
    2400             :        would be marked dead too and ineligible for promotion.  If the
    2401             :        parent block were not dead and not done and staged, we wouldn't
    2402             :        be trying to promote an unstaged fork.  If the parent block were
    2403             :        not dead and not done and unstaged, it would've been part of this
    2404             :        unstaged fork. */
    2405           0 :     fd_sched_block_t * head_block = block_pool_ele( sched, head_bank_idx );
    2406           0 :     FD_LOG_DEBUG(( "activating block %lu:%lu: unstaged promotion to lane %d", head_block->slot, head_bank_idx, lane_idx ));
    2407           0 :     sched->active_bank_idx = head_bank_idx;
    2408           0 :     return;
    2409           0 :   }
    2410             :   /* No unstaged blocks to promote.  So we're done.  Yay. */
    2411           0 : }
    2412             : 
    2413             : static void
    2414           0 : check_or_set_active_block( fd_sched_t * sched ) {
    2415           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
    2416           0 :     try_activate_block( sched );
    2417           0 :   } else {
    2418           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    2419           0 :     if( FD_UNLIKELY( block_should_deactivate( active_block ) ) ) {
    2420           0 :       sched->print_buf_sz = 0UL;
    2421           0 :       print_all( sched, active_block );
    2422           0 :       FD_LOG_NOTICE(( "%s", sched->print_buf ));
    2423           0 :       FD_LOG_CRIT(( "invariant violation: should have been deactivated" ));
    2424           0 :     }
    2425           0 :   }
    2426           0 : }
    2427             : 
    2428             : /* This function has two main jobs:
    2429             :    - Mark everything on the fork tree dying.
    2430             :    - Take blocks out of rdisp if possible. */
    2431             : static void
    2432           0 : subtree_mark_and_maybe_prune_rdisp( fd_sched_t * sched, fd_sched_block_t * block ) {
    2433           0 :   if( FD_UNLIKELY( block->rooted ) ) {
    2434           0 :     FD_LOG_CRIT(( "invariant violation: rooted block should not be abandoned, slot %lu, parent slot %lu",
    2435           0 :                   block->slot, block->parent_slot ));
    2436           0 :   }
    2437             :   /* All minority fork nodes pass through this function eventually.  So
    2438             :      this is a good point to check per-node invariants for minority
    2439             :      forks. */
    2440           0 :   if( FD_UNLIKELY( block->staged && !block->in_rdisp ) ) {
    2441           0 :     FD_LOG_CRIT(( "invariant violation: staged block is not in the dispatcher, slot %lu, parent slot %lu",
    2442           0 :                   block->slot, block->parent_slot ));
    2443           0 :   }
    2444             : 
    2445             :   /* Setting the flag is non-optional and can happen more than once. */
    2446           0 :   block->dying = 1;
    2447             : 
    2448             :   /* Removal from dispatcher should only happen once. */
    2449           0 :   if( block->in_rdisp ) {
    2450           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2451           0 :     if( FD_UNLIKELY( !parent ) ) {
    2452             :       /* Only the root has no parent.  Abandon should never be called on
    2453             :          the root.  So any block we are trying to abandon should have a
    2454             :          parent. */
    2455           0 :       FD_LOG_CRIT(( "invariant violation: parent not found slot %lu, parent slot %lu",
    2456           0 :                     block->slot, block->parent_slot ));
    2457           0 :     }
    2458             : 
    2459             :     /* The dispatcher expects blocks to be abandoned in the same order
    2460             :        that they were added on each lane.  There are no requirements on
    2461             :        the order of abandoning if two blocks are not on the same lane,
    2462             :        or if a block is unstaged.  This means that in general we
    2463             :        shouldn't abandon a child block if the parent hasn't been
    2464             :        abandoned yet, if and only if they are on the same lane.  So wait
    2465             :        until we can abandon the parent, and then descend down the fork
    2466             :        tree to ensure orderly abandoning. */
    2467           0 :     int in_order = !parent->in_rdisp || /* parent is not in the dispatcher */
    2468           0 :                    !parent->staged   || /* parent is in the dispatcher but not staged */
    2469           0 :                    !block->staged    || /* parent is in the dispatcher and staged but this block is unstaged */
    2470           0 :                    block->staging_lane!=parent->staging_lane; /* this block is on a different staging lane than its parent */
    2471             : 
    2472           0 :     if( FD_UNLIKELY( in_order && block->staged && sched->active_bank_idx==sched->staged_head_bank_idx[ block->staging_lane ] && sched->active_bank_idx!=ULONG_MAX ) ) {
    2473           0 :       FD_TEST( block_pool_ele( sched, sched->active_bank_idx )==block );
    2474           0 :       FD_LOG_DEBUG(( "reset active_bank_idx %lu: abandon", sched->active_bank_idx ));
    2475           0 :       sched->last_active_bank_idx = sched->active_bank_idx;
    2476           0 :       sched->active_bank_idx = ULONG_MAX;
    2477           0 :       sched->metrics->deactivate_abandoned_cnt++;
    2478           0 :     }
    2479             : 
    2480             :     /* We inform the dispatcher of an abandon only when there are no
    2481             :        more in-flight transactions.  Otherwise, if the dispatcher
    2482             :        recycles the same txn_id that was just abandoned, and we receive
    2483             :        completion of an in-flight transaction whose txn_id was just
    2484             :        recycled. */
    2485             :     // FIXME The recycling might be fine now that we no longer use
    2486             :     // txn_id to index into anything.  We might be able to just drop
    2487             :     // txn_id on abandoned blocks.  Though would this leak transaction
    2488             :     // content if the txn_id is recycled?
    2489             :     // Note that subtree pruning from sched isn't dependent on the
    2490             :     // in-flight check being present here, as is_prunable already checks
    2491             :     // for in-flight==0.
    2492           0 :     int abandon = in_order && !block_is_in_flight( block );
    2493             : 
    2494           0 :     if( abandon ) {
    2495           0 :       block->in_rdisp = 0;
    2496           0 :       fd_rdisp_abandon_block( sched->rdisp, (ulong)(block-sched->block_pool) );
    2497           0 :       sched->txn_pool_free_cnt += block->txn_parsed_cnt-block->txn_done_cnt; /* in_flight_cnt==0 */
    2498             : 
    2499           0 :       sched->metrics->block_abandoned_cnt++;
    2500           0 :       sched->metrics->txn_abandoned_parsed_cnt    += block->txn_parsed_cnt;
    2501           0 :       sched->metrics->txn_abandoned_exec_done_cnt += block->txn_exec_done_cnt;
    2502           0 :       sched->metrics->txn_abandoned_done_cnt      += block->txn_done_cnt;
    2503             : 
    2504             :       //FIXME when demote supports non-empty blocks, we should demote
    2505             :       //the block from the lane unconditionally and immediately,
    2506             :       //regardles of whether it's safe to abandon or not.  So a block
    2507             :       //would go immediately from staged to unstaged and eventually to
    2508             :       //abandoned.
    2509           0 :       if( FD_LIKELY( block->staged ) ) {
    2510           0 :         FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: abandon", block->slot, block_to_idx( sched, block ), block->staging_lane ));
    2511           0 :         block->staged = 0;
    2512             :         /* Now release the staging lane.  This will release the lane as
    2513             :            soon as we abandon the head block on a lane.  Technically a
    2514             :            release should only happen when we remove the tail block on a
    2515             :            lane.  This is fine though.  The way we abandon guarantees by
    2516             :            induction that an entire lane will be abandoned.  Only the
    2517             :            head block on a lane can possibly have in-flight
    2518             :            transactions, and so once a head block becomes eligible for
    2519             :            abandoning, the entire lane all the way to the tail block,
    2520             :            will be eligible. */
    2521           0 :         sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2522           0 :         sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2523           0 :       }
    2524           0 :     }
    2525           0 :   }
    2526             : 
    2527             :   /* Abandon the entire fork chaining off of this block. */
    2528           0 :   ulong child_idx = block->child_idx;
    2529           0 :   while( child_idx!=ULONG_MAX ) {
    2530           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2531           0 :     subtree_mark_and_maybe_prune_rdisp( sched, child );
    2532           0 :     child_idx = child->sibling_idx;
    2533           0 :   }
    2534           0 : }
    2535             : 
    2536             : /* It's safe to call this function more than once on the same block.
    2537             :    The final call is when there are no more in-flight tasks for this
    2538             :    block, at which point the block will be pruned from sched. */
    2539             : static void
    2540           0 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block ) {
    2541           0 :   subtree_mark_and_maybe_prune_rdisp( sched, block );
    2542           0 :   if( block_is_prunable( block ) ) {
    2543           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2544           0 :     if( FD_LIKELY( parent ) ) {
    2545             :       /* Splice the block out of its parent's children list. */
    2546           0 :       ulong block_idx = block_to_idx( sched, block );
    2547           0 :       ulong * idx_p = &parent->child_idx;
    2548           0 :       while( *idx_p!=block_idx ) {
    2549           0 :         idx_p = &(block_pool_ele( sched, *idx_p )->sibling_idx);
    2550           0 :       }
    2551           0 :       *idx_p = block->sibling_idx;
    2552           0 :     }
    2553           0 :     subtree_prune( sched, block_to_idx( sched, block ), ULONG_MAX );
    2554           0 :   }
    2555           0 : }
    2556             : 
    2557             : static void
    2558           0 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx ) {
    2559           0 :   fd_sched_block_t * head = block_pool_ele( sched, bank_idx );
    2560           0 :   head->parent_idx        = ULONG_MAX;
    2561           0 :   fd_sched_block_t * tail = head;
    2562             : 
    2563           0 :   while( head ) {
    2564           0 :     FD_TEST( head->in_sched );
    2565           0 :     head->in_sched = 0;
    2566           0 :     if( head->refcnt ) {
    2567           0 :       FD_TEST( !head->block_end_done );
    2568           0 :       head->refcnt = 0;
    2569           0 :       if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
    2570           0 :       ref_q_push_tail( sched->ref_q, block_to_idx( sched, head ) );
    2571           0 :     }
    2572             : 
    2573           0 :     ulong child_idx = head->child_idx;
    2574           0 :     while( child_idx!=ULONG_MAX ) {
    2575           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2576             :       /* Add children to be visited.  We abuse the parent_idx field to
    2577             :          link up the next block to visit. */
    2578           0 :       if( child_idx!=except_idx ) {
    2579           0 :         tail->parent_idx = child_idx;
    2580           0 :         tail             = child;
    2581           0 :         tail->parent_idx = ULONG_MAX;
    2582           0 :       }
    2583           0 :       child_idx = child->sibling_idx;
    2584           0 :     }
    2585             : 
    2586             :     /* Prune the current block.  We will never publish halfway into a
    2587             :        staging lane, because anything on the rooted fork should have
    2588             :        finished replaying gracefully and be out of the dispatcher.  In
    2589             :        fact, anything that we are publishing away should be out of the
    2590             :        dispatcher at this point.  And there should be no more in-flight
    2591             :        transactions. */
    2592           0 :     if( FD_UNLIKELY( block_is_in_flight( head ) ) ) {
    2593           0 :       FD_LOG_CRIT(( "invariant violation: block has transactions in flight (%u exec %u sigverify %u poh), slot %lu, parent slot %lu",
    2594           0 :                     head->txn_exec_in_flight_cnt, head->txn_sigverify_in_flight_cnt, head->poh_hashing_in_flight_cnt, head->slot, head->parent_slot ));
    2595           0 :     }
    2596           0 :     if( FD_UNLIKELY( head->in_rdisp ) ) {
    2597             :       /* We should have removed it from the dispatcher when we were
    2598             :          notified of the new root, or when in-flight transactions were
    2599             :          drained. */
    2600           0 :       FD_LOG_CRIT(( "invariant violation: block is in the dispatcher, slot %lu, parent slot %lu", head->slot, head->parent_slot ));
    2601           0 :     }
    2602             : 
    2603             :     /* Return remaining mblk descriptors to the shared pool. */
    2604           0 :     free_mblk_slist( sched, head, head->mblks_unhashed );
    2605           0 :     free_mblk_slist( sched, head, head->mblks_hashing_in_progress );
    2606           0 :     free_mblk_slist( sched, head, head->mblks_mixin_in_progress );
    2607             : 
    2608           0 :     if( FD_UNLIKELY( !head->block_end_done ) ) {
    2609           0 :       sched->print_buf_sz = 0UL;
    2610           0 :       print_block_metrics( sched, head );
    2611           0 :       if( FD_LIKELY( head->block_start_done ) ) FD_LOG_DEBUG(( "block %lu:%lu replayed partially, pruning without full replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2612           0 :       else FD_LOG_DEBUG(( "block %lu:%lu replayed nothing, pruning without any replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2613           0 :     }
    2614             : 
    2615           0 :     sched->block_pool_popcnt--;
    2616             : 
    2617           0 :     fd_sched_block_t * next = block_pool_ele( sched, head->parent_idx );
    2618             : 
    2619             :     /* We don't have to clear the indices here since no one should be
    2620             :        accessing them.  Defensive programming. */
    2621           0 :     head->parent_idx  = ULONG_MAX;
    2622           0 :     head->child_idx   = ULONG_MAX;
    2623           0 :     head->sibling_idx = ULONG_MAX;
    2624             : 
    2625           0 :     head = next;
    2626           0 :   }
    2627           0 : }
    2628             : 
    2629             : static void
    2630           0 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx ) {
    2631             :   /* This only happens rarely when there are dying in-flight blocks.
    2632             :      Early exit and don't let dying blocks affect replay. */
    2633           0 :   if( FD_UNLIKELY( bank_idx!=sched->active_bank_idx ) ) return;
    2634             : 
    2635           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2636           0 :   if( FD_UNLIKELY( block_is_done( block ) ) ) {
    2637           0 :     fd_rdisp_remove_block( sched->rdisp, bank_idx );
    2638           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: remove", block->slot, bank_idx, block->staging_lane ));
    2639           0 :     block->in_rdisp = 0;
    2640           0 :     block->staged   = 0;
    2641           0 :     sched->metrics->block_removed_cnt++;
    2642           0 :     FD_LOG_DEBUG(( "reset active_bank_idx %lu: remove", sched->active_bank_idx ));
    2643           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2644           0 :     sched->active_bank_idx = ULONG_MAX;
    2645             : 
    2646             :     /* See if there is a child block down the same staging lane.  This
    2647             :        is a policy decision to minimize fork churn.  We could in theory
    2648             :        reevaluate staging lane allocation here and do promotion/demotion
    2649             :        as needed. */
    2650           0 :     ulong child_idx = block->child_idx;
    2651           0 :     while( child_idx!=ULONG_MAX ) {
    2652           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2653           0 :       if( FD_LIKELY( child->staged && child->staging_lane==block->staging_lane ) ) {
    2654             :         /* There is a child block down the same staging lane ... */
    2655           0 :         if( FD_LIKELY( !child->dying ) ) {
    2656             :           /* ... and the child isn't dead */
    2657           0 :           if( FD_UNLIKELY( !block_is_activatable( child ) ) ) {
    2658             :             /* ... but the child is not activatable, likely because
    2659             :                there are no transactions available yet. */
    2660           0 :             sched->metrics->deactivate_no_txn_cnt++;
    2661           0 :             try_activate_block( sched );
    2662           0 :             return;
    2663           0 :           }
    2664             :           /* ... and it's immediately dispatchable, so switch the active
    2665             :              block to it, and have the child inherit the head status of
    2666             :              the lane.  This is the common case. */
    2667           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: child inheritance on lane %lu", child->slot, child_idx, child->staging_lane ));
    2668           0 :           sched->active_bank_idx = child_idx;
    2669           0 :           sched->staged_head_bank_idx[ block->staging_lane ] = child_idx;
    2670           0 :           if( FD_UNLIKELY( !fd_ulong_extract_bit( sched->staged_bitset, (int)block->staging_lane ) ) ) {
    2671           0 :             FD_LOG_CRIT(( "invariant violation: staged_bitset 0x%lx bit %lu is not set, slot %lu, parent slot %lu, child slot %lu, parent slot %lu",
    2672           0 :                           sched->staged_bitset, block->staging_lane, block->slot, block->parent_slot, child->slot, child->parent_slot ));
    2673           0 :           }
    2674           0 :           return;
    2675           0 :         } else {
    2676             :           /* ... but the child block is considered dead, likely because
    2677             :              the parser considers it invalid. */
    2678           0 :           FD_LOG_INFO(( "child block %lu is already dead", child->slot ));
    2679           0 :           subtree_abandon( sched, child );
    2680           0 :           break;
    2681           0 :         }
    2682           0 :       }
    2683           0 :       child_idx = child->sibling_idx;
    2684           0 :     }
    2685             :     /* There isn't a child block down the same staging lane.  This is
    2686             :        the last block in the staging lane.  Release the staging lane. */
    2687           0 :     sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2688           0 :     sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2689           0 :     sched->metrics->deactivate_no_child_cnt++;
    2690           0 :     try_activate_block( sched );
    2691           0 :   } else if( block_should_deactivate( block ) ) {
    2692             :     /* We exhausted the active block, but it's not fully done yet.  We
    2693             :        are just not getting FEC sets for it fast enough.  This could
    2694             :        happen when the network path is congested, or when the leader
    2695             :        simply went down.  Reset the active block. */
    2696           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2697           0 :     sched->active_bank_idx = ULONG_MAX;
    2698           0 :     sched->metrics->deactivate_no_txn_cnt++;
    2699           0 :     try_activate_block( sched );
    2700           0 :   }
    2701           0 : }
    2702             : 
    2703             : FD_FN_UNUSED static ulong
    2704           0 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx ) {
    2705           0 :   ulong root_idx = sched->root_idx;
    2706           0 : 
    2707           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2708           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2709           0 :   }
    2710           0 : 
    2711           0 :   /* First pass: compute the longest unstaged fork depth for each node
    2712           0 :      in the fork tree. */
    2713           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2714           0 : 
    2715           0 :   /* Second pass: stage blocks on the longest unstaged fork. */
    2716           0 :   ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2717           0 : 
    2718           0 :   if( FD_UNLIKELY( (depth>0UL && head_bank_idx==ULONG_MAX) || (depth==0UL && head_bank_idx!=ULONG_MAX) ) ) {
    2719           0 :     FD_LOG_CRIT(( "invariant violation: depth %lu, head_bank_idx %lu",
    2720           0 :                   depth, head_bank_idx ));
    2721           0 :   }
    2722           0 : 
    2723           0 :   return head_bank_idx;
    2724           0 : }
    2725             : 
    2726             : /* Returns length of the longest stageable unstaged fork, if there is
    2727             :    one, and 0 otherwise. */
    2728             : static ulong
    2729           0 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx ) {
    2730           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2731           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2732           0 :   }
    2733             : 
    2734           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2735             : 
    2736           0 :   ulong max_child_depth = 0UL;
    2737           0 :   ulong child_idx       = block->child_idx;
    2738           0 :   while( child_idx!=ULONG_MAX ) {
    2739           0 :     ulong child_depth = compute_longest_unstaged_fork( sched, child_idx );
    2740           0 :     if( child_depth > max_child_depth ) {
    2741           0 :       max_child_depth = child_depth;
    2742           0 :     }
    2743           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2744           0 :     child_idx = child->sibling_idx;
    2745           0 :   }
    2746             : 
    2747           0 :   block->luf_depth = max_child_depth + fd_ulong_if( block_is_promotable( block ), 1UL, 0UL );
    2748           0 :   return block->luf_depth;
    2749           0 : }
    2750             : 
    2751             : static ulong
    2752           0 : stage_longest_unstaged_fork_helper( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2753           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2754           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2755           0 :   }
    2756             : 
    2757           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2758             : 
    2759           0 :   int   stage_it = fd_int_if( block_is_promotable( block ), 1, 0 );
    2760           0 :   ulong rv       = fd_ulong_if( stage_it, bank_idx, ULONG_MAX );
    2761           0 :   if( FD_LIKELY( stage_it ) ) {
    2762           0 :     block->staged = 1;
    2763           0 :     block->staging_lane = (ulong)lane_idx;
    2764           0 :     fd_rdisp_promote_block( sched->rdisp, bank_idx, block->staging_lane );
    2765           0 :     sched->metrics->block_promoted_cnt++;
    2766           0 :     FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: promote", block->slot, bank_idx, block->staging_lane ));
    2767           0 :   }
    2768             : 
    2769             :   /* Base case: leaf node. */
    2770           0 :   if( block->child_idx==ULONG_MAX ) return rv;
    2771             : 
    2772           0 :   ulong max_depth      = 0UL;
    2773           0 :   ulong best_child_idx = ULONG_MAX;
    2774           0 :   ulong child_idx      = block->child_idx;
    2775           0 :   while( child_idx!=ULONG_MAX ) {
    2776           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2777           0 :     if( child->luf_depth>max_depth ) {
    2778           0 :       max_depth      = child->luf_depth;
    2779           0 :       best_child_idx = child_idx;
    2780           0 :     }
    2781           0 :     child_idx = child->sibling_idx;
    2782           0 :   }
    2783             : 
    2784             :   /* Recursively stage descendants. */
    2785           0 :   if( best_child_idx!=ULONG_MAX ) {
    2786           0 :     ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, best_child_idx, lane_idx );
    2787           0 :     rv = fd_ulong_if( rv!=ULONG_MAX, rv, head_bank_idx );
    2788           0 :   }
    2789             : 
    2790           0 :   return rv;
    2791           0 : }
    2792             : 
    2793             : /* Returns idx of head block of staged lane on success, idx_null
    2794             :    otherwise. */
    2795             : static ulong
    2796           0 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2797           0 :   ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, bank_idx, lane_idx );
    2798           0 :   if( FD_LIKELY( head_bank_idx!=ULONG_MAX ) ) {
    2799           0 :     sched->metrics->lane_promoted_cnt++;
    2800           0 :     sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
    2801             :     /* No need to update staged_popcnt_wmk because the fact that there
    2802             :        are unstaged blocks implies we already maxed out lanes at one
    2803             :        point. */
    2804           0 :     sched->staged_head_bank_idx[ lane_idx ] = head_bank_idx;
    2805           0 :   }
    2806           0 :   return head_bank_idx;
    2807           0 : }
    2808             : 
    2809             : /* Check if an entire staging lane can be demoted.  Returns 1 if all
    2810             :    blocks in the lane are demotable, 0 otherwise. */
    2811             : static int
    2812           0 : lane_is_demotable( fd_sched_t * sched, int lane_idx ) {
    2813           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2814             : 
    2815           0 :   while( bank_idx!=ULONG_MAX ) {
    2816           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2817           0 :     FD_TEST( block->staged );
    2818           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2819             : 
    2820           0 :     if( FD_UNLIKELY( !block_is_demotable( block ) ) ) {
    2821             :       /* Found a non-demotable block.  Early exit. */
    2822           0 :       return 0;
    2823           0 :     }
    2824             : 
    2825             :     /* Find the child in the same staging lane. */
    2826           0 :     ulong child_idx = block->child_idx;
    2827           0 :     ulong next_bank_idx = ULONG_MAX;
    2828           0 :     while( child_idx!=ULONG_MAX ) {
    2829           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2830           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2831           0 :         next_bank_idx = child_idx;
    2832           0 :         break;
    2833           0 :       }
    2834           0 :       child_idx = child->sibling_idx;
    2835           0 :     }
    2836           0 :     bank_idx = next_bank_idx;
    2837           0 :   }
    2838             : 
    2839           0 :   return 1;
    2840           0 : }
    2841             : 
    2842             : /* Demote all blocks in a staging lane.  Assumes that all blocks in the
    2843             :    lane are demotable.  Returns the number of blocks demoted. */
    2844             : static ulong
    2845           0 : demote_lane( fd_sched_t * sched, int lane_idx ) {
    2846           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2847           0 :   uint  demoted_cnt = 0U;
    2848             : 
    2849           0 :   while( bank_idx!=ULONG_MAX ) {
    2850           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2851           0 :     FD_TEST( block->staged );
    2852           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2853             : 
    2854           0 :     int ret = fd_rdisp_demote_block( sched->rdisp, bank_idx );
    2855           0 :     if( FD_UNLIKELY( ret!=0 ) ) {
    2856           0 :       FD_LOG_CRIT(( "fd_rdisp_demote_block failed for slot %lu, bank_idx %lu, lane %d", block->slot, bank_idx, lane_idx ));
    2857           0 :     }
    2858           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: demote", block->slot, bank_idx, block->staging_lane ));
    2859           0 :     block->staged = 0;
    2860           0 :     demoted_cnt++;
    2861             : 
    2862             :     /* Find the child in the same staging lane. */
    2863           0 :     ulong child_idx = block->child_idx;
    2864           0 :     ulong next_bank_idx = ULONG_MAX;
    2865           0 :     while( child_idx!=ULONG_MAX ) {
    2866           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2867           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2868           0 :         next_bank_idx = child_idx;
    2869           0 :         break;
    2870           0 :       }
    2871           0 :       child_idx = child->sibling_idx;
    2872           0 :     }
    2873           0 :     bank_idx = next_bank_idx;
    2874           0 :   }
    2875             : 
    2876             :   /* Clear the lane. */
    2877           0 :   sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, lane_idx );
    2878           0 :   sched->staged_head_bank_idx[ lane_idx ] = ULONG_MAX;
    2879             : 
    2880           0 :   sched->metrics->block_demoted_cnt += demoted_cnt;
    2881           0 :   FD_LOG_DEBUG(( "demoted %u blocks in lane %d", demoted_cnt, lane_idx ));
    2882           0 :   return demoted_cnt;
    2883           0 : }

Generated by: LCOV version 1.14