LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 11 0.0 %
Date: 2026-03-19 18:19:27 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_replay_fd_sched_h
       2             : #define HEADER_fd_src_discof_replay_fd_sched_h
       3             : 
       4             : #include "fd_rdisp.h"
       5             : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */
       6             : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */
       7             : 
       8             : #include "../../flamenco/accdb/fd_accdb_user.h"
       9             : #include "../../util/spad/fd_spad.h" /* for ALUTs */
      10             : 
      11             : /* fd_sched wraps all the smarts and mechanical chores around scheduling
      12             :    transactions for replay execution.  It is built on top of the
      13             :    dispatcher fd_rdisp.  The dispatcher is responsible for high
      14             :    performance lane-based scheduling of transactions.  On top of that,
      15             :    we add fork-aware management of lanes, and policies regarding which
      16             :    lanes to prioritize for execution.
      17             : 
      18             :    Conceptually, transactions in a block form a DAG.  We would like to
      19             :    make our way through a block with a sufficient degree of parallelism,
      20             :    such that the execution time of the critical path of the DAG is the
      21             :    limiting factor.  The dispatcher does a good job of emerging the
      22             :    critical path of the DAG on the fly.  Blocks are tracked by the
      23             :    dispatcher either as a block staged on a lane, or as an unstaged
      24             :    block.  When a block is staged, it will enjoy the most intelligent
      25             :    online scheduling that the dispatcher has to offer.  Lanes have to
      26             :    consist of linear chains of blocks down a fork.  So to map a fork
      27             :    tree to lanes, we will need multiple lanes.  Ideally, every branch in
      28             :    the fork tree sits on some lane.  However, memory footprint limits us
      29             :    to a few number of lanes.
      30             : 
      31             :    This module implements a state machine for ensuring that blocks enter
      32             :    into and exit ouf of lanes in an orderly fashion.  The public APIs of
      33             :    this module are invoked to drive state transitions on a small number
      34             :    of events, such as new transactions arriving, or transactions
      35             :    completing, or a block being aborted/abandoned.  We also implement
      36             :    policies for deciding which blocks get staged onto lanes, or evicted
      37             :    from lanes, as well as which lanes to prioritize for execution.
      38             : 
      39             : 
      40             :    The general order in which calls happen under the normal case is:
      41             : 
      42             :    fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ...
      43             :    more ingest, more ready, more done ...
      44             :    ...
      45             :    fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled
      46             :    fd_sched_txn_done()*
      47             :    fd_sched_block_is_done()
      48             :    end-of-block processing in caller
      49             :    fd_sched_txn_next_ready() starts returning transactions from the next block
      50             :    more ingest, more ready, more done ...
      51             :    ... */
      52             : 
      53             : #define FD_SCHED_MIN_DEPTH 478
      54             : #define FD_SCHED_MAX_DEPTH FD_RDISP_MAX_DEPTH
      55             : 
      56             : struct fd_sched;
      57             : typedef struct fd_sched fd_sched_t;
      58             : 
      59             : struct fd_sched_alut_ctx {
      60             :   fd_accdb_user_t   accdb[1];
      61             :   fd_funk_txn_xid_t xid[1];
      62             :   ulong             els; /* Effective lookup slot. */
      63             : };
      64             : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t;
      65             : 
      66             : struct fd_sched_fec {
      67             :   ulong            bank_idx;            /* Index of the block.  Assumed to be in [0, block_cnt_max).  Caller
      68             :                                            is responsible for ensuring that bank idx is in bounds and unique
      69             :                                            across equivocated blocks. */
      70             :   ulong            parent_bank_idx;     /* Index of the parent block.  Assumed to be in [0, block_cnt_max).
      71             :                                            Caller is responsible for ensuring that parent bank idx is in
      72             :                                            bounds and unique across equivocated blocks. */
      73             :   ulong            slot;                /* Slot number of the block. */
      74             :   ulong            parent_slot;         /* Slot number of the parent block. */
      75             :   fd_store_fec_t * fec;                 /* FEC set data. */
      76             :   uint             shred_cnt;           /* Number of shreds in the FEC set. */
      77             :   uint             is_last_in_batch:1;  /* Set if this is the last FEC set in the batch; relevant because the
      78             :                                            parser should ignore trailing bytes at the end of a batch. */
      79             :   uint             is_last_in_block:1;  /* Set if this is the last FEC set in the block. */
      80             :   uint             is_first_in_block:1; /* Set if this is the first FEC set in the block.  Bank should increment refcnt for sched if such a FEC set has been ingested by sched. */
      81             : 
      82             :   fd_sched_alut_ctx_t alut_ctx[ 1 ];
      83             : };
      84             : typedef struct fd_sched_fec fd_sched_fec_t;
      85             : 
      86             : /* The state of a transaction.  Non mutually exclusive. */
      87           0 : #define FD_SCHED_TXN_EXEC_DONE      (0x0001UL)
      88           0 : #define FD_SCHED_TXN_SIGVERIFY_DONE (0x0002UL)
      89           0 : #define FD_SCHED_TXN_IS_COMMITTABLE (0x0004UL)
      90           0 : #define FD_SCHED_TXN_IS_FEES_ONLY   (0x0008UL)
      91             : #define FD_SCHED_TXN_REPLAY_DONE    (FD_SCHED_TXN_EXEC_DONE|FD_SCHED_TXN_SIGVERIFY_DONE)
      92             : 
      93             : struct fd_sched_txn_info {
      94             :    ulong flags;
      95             :    int   txn_err;
      96             :    long  tick_parsed;
      97             :    long  tick_sigverify_disp;
      98             :    long  tick_sigverify_done;
      99             :    long  tick_exec_disp;
     100             :    long  tick_exec_done;
     101             : };
     102             : typedef struct fd_sched_txn_info fd_sched_txn_info_t;
     103             : 
     104             : /* The scheduler may return one of the following types of tasks for the
     105             :    replay tile.
     106             : 
     107             :    e - passed down to exec tiles.
     108             :    i - replay completes the task immediately.
     109             :    q - replay may either do it immediately or queue the task up. */
     110           0 : #define FD_SCHED_TT_NULL          (0UL)
     111           0 : #define FD_SCHED_TT_BLOCK_START   (1UL) /* (i) Start-of-block processing. */
     112           0 : #define FD_SCHED_TT_BLOCK_END     (2UL) /* (q) End-of-block processing. */
     113           0 : #define FD_SCHED_TT_TXN_EXEC      (3UL) /* (e) Transaction execution. */
     114           0 : #define FD_SCHED_TT_TXN_SIGVERIFY (4UL) /* (e) Transaction sigverify. */
     115             : #define FD_SCHED_TT_LTHASH        (5UL) /* (e) Account lthash. */
     116           0 : #define FD_SCHED_TT_POH_HASH      (6UL) /* (e) PoH hashing. */
     117           0 : #define FD_SCHED_TT_MARK_DEAD     (7UL) /* (i) Mark the block dead. */
     118             : 
     119             : struct fd_sched_block_start {
     120             :   ulong bank_idx;        /* Same as in fd_sched_fec_t. */
     121             :   ulong parent_bank_idx; /* Same as in fd_sched_fec_t. */
     122             :   ulong slot;            /* Slot number of the block. */
     123             : };
     124             : typedef struct fd_sched_block_start fd_sched_block_start_t;
     125             : 
     126             : struct fd_sched_block_end {
     127             :   ulong bank_idx;
     128             : };
     129             : typedef struct fd_sched_block_end fd_sched_block_end_t;
     130             : 
     131             : struct fd_sched_txn_exec {
     132             :   ulong bank_idx;
     133             :   ulong slot;
     134             :   ulong txn_idx;
     135             :   ulong exec_idx;
     136             : };
     137             : typedef struct fd_sched_txn_exec fd_sched_txn_exec_t;
     138             : 
     139             : struct fd_sched_txn_sigverify {
     140             :   ulong bank_idx;
     141             :   ulong txn_idx;
     142             :   ulong exec_idx;
     143             : };
     144             : typedef struct fd_sched_txn_sigverify fd_sched_txn_sigverify_t;
     145             : 
     146             : struct fd_sched_poh_hash {
     147             :   ulong     bank_idx;
     148             :   ulong     mblk_idx;
     149             :   ulong     exec_idx;
     150             :   ulong     hashcnt;
     151             :   fd_hash_t hash[ 1 ];
     152             : };
     153             : typedef struct fd_sched_poh_hash fd_sched_poh_hash_t;
     154             : 
     155             : struct fd_sched_mark_dead {
     156             :   ulong     bank_idx;
     157             : };
     158             : typedef struct fd_sched_mark_dead fd_sched_mark_dead_t;
     159             : 
     160             : struct fd_sched_task {
     161             :   ulong task_type; /* Set to one of the task types defined above. */
     162             :   union {
     163             :     fd_sched_block_start_t   block_start[ 1 ];
     164             :     fd_sched_block_end_t     block_end[ 1 ];
     165             :     fd_sched_txn_exec_t      txn_exec[ 1 ];
     166             :     fd_sched_txn_sigverify_t txn_sigverify[ 1 ];
     167             :     fd_sched_poh_hash_t      poh_hash[ 1 ];
     168             :     fd_sched_mark_dead_t     mark_dead[ 1 ];
     169             :   };
     170             : };
     171             : typedef struct fd_sched_task fd_sched_task_t;
     172             : 
     173             : FD_PROTOTYPES_BEGIN
     174             : 
     175             : /* fd_sched_{align,footprint} return the required alignment and
     176             :    footprint in bytes for a region of memory to be used as a scheduler.
     177             :    footprint silently returns 0 if params are invalid (thus convenient
     178             :    to validate params).
     179             : 
     180             :    depth controls the reorder buffer transaction count (~1 million
     181             :    recommended for live replay, ~10k recommended for async replay).
     182             :    block_cnt_max is the maximum number of blocks that will be tracked by
     183             :    the scheduler. */
     184             : 
     185             : ulong
     186             : fd_sched_align( void );
     187             : 
     188             : ulong
     189             : fd_sched_footprint( ulong depth,           /* in [FD_SCHED_MIN_DEPTH,FD_SCHED_MAX_DEPTH] */
     190             :                     ulong block_cnt_max ); /* >= 1 */
     191             : 
     192             : /* fd_sched_new creates a sched object backed by the given memory region
     193             :    (conforming to align() and footprint()).  Returns NULL if any
     194             :    parameter is invalid. */
     195             : 
     196             : void *
     197             : fd_sched_new( void * mem,
     198             :               ulong  depth,
     199             :               ulong  block_cnt_max,
     200             :               ulong  exec_cnt );
     201             : 
     202             : fd_sched_t *
     203             : fd_sched_join( void * mem );
     204             : 
     205             : /* Add the data in the FEC set to the scheduler.  If is_last_fec is 1,
     206             :    then this is the last FEC set in the block.  Transactions may span
     207             :    FEC set boundaries.  The scheduler is responsible for incrementally
     208             :    parsing transactions from concatenated FEC set data.  Assumes that
     209             :    FEC sets are delivered in replay order.  That is, forks form a
     210             :    partial ordering over FEC sets: in-order per fork, but arbitrary
     211             :    ordering across forks.  The fork tree is implied by the stream of
     212             :    parent-child relationships delivered in FEC sets.  Also assumes that
     213             :    there is enough space in the scheduler to ingest the FEC set.  The
     214             :    caller should generally call fd_sched_fec_can_ingest() first.
     215             : 
     216             :    Returns 1 on success, 0 if the block is bad and should be marked
     217             :    dead. */
     218             : FD_WARN_UNUSED int
     219             : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     220             : 
     221             : /* Check if there is enough space in the scheduler to ingest the data in
     222             :    the FEC set.  Returns 1 if there is, 0 otherwise.  This is a cheap
     223             :    and conservative check. */
     224             : int
     225             : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     226             : 
     227             : /* Returns the number of worst-case FEC sets sched can ingest. This is a
     228             :    cheap and conservative check. */
     229             : ulong
     230             : fd_sched_can_ingest_cnt( fd_sched_t * sched );
     231             : 
     232             : /* Returns 1 if sched is drained, 0 otherwise.  A drained scheduler will
     233             :    not return more work.  Otherwise, next_ready will return more work,
     234             :    so long as there are exec tiles available. */
     235             : int
     236             : fd_sched_is_drained( fd_sched_t * sched );
     237             : 
     238             : /* Obtain a transaction eligible for execution.  This implies that all
     239             :    prior transactions with w-r or w-w conflicts have completed.
     240             :    Information regarding the scheduled transaction is written to the out
     241             :    pointer.  Returns 1 on success, 0 on failure.  Failures are generally
     242             :    transient and non-fatal, and are simply an indication that no
     243             :    transaction is ready for execution yet.  When in-flight transactions
     244             :    retire or when more FEC sets are ingested, more transactions may
     245             :    become ready for execution.
     246             : 
     247             :    Transactions on the same fork will be returned in a way that
     248             :    maintains the serial fiction.  That is, reordering can happen, but
     249             :    only within the constraint that transactions appear to be ready in
     250             :    the order in which they occur in the block.  Transactions from
     251             :    different forks may interleave, and the caller should be prepared to
     252             :    switch execution context in response to interleavings.  The scheduler
     253             :    will barrier on block boundaries, in the sense that transactions from
     254             :    a subsequent block will not be returned for execution until all
     255             :    transactions from the previous block have completed.  This gives the
     256             :    caller a chance to perform end-of-block processing before
     257             :    transactions from a subsequent block start executing.  In general,
     258             :    the caller should check if the last transaction in the current block
     259             :    is done, and if so, do end-of-block processing before calling this
     260             :    function to start the next block.
     261             : 
     262             :    In addition to returning transactions for execution, this function
     263             :    may also return a sigverify task.  Sigverify can be completed
     264             :    aynschronously outside the critical path of transaction execution, as
     265             :    long as every transaction in a block passes sigverify before we
     266             :    commit the block.  The scheduler prioritizes actual execution of
     267             :    transactions over sigverify, and in general sigverify tasks are only
     268             :    returned when no real transaction can be dispatched.  In other words,
     269             :    the scheduler tries to exploit idle cycles in the exec tiles during
     270             :    times of low parallelism critical path progression.
     271             : 
     272             :    This function may also return a PoH hashing task.  These tasks are
     273             :    lower priority than transaction execution, but higher priority than
     274             :    sigverify.  This is because sigverify tasks are generally bite-sized,
     275             :    whereas PoH hashing can be longer, so we would like to get started on
     276             :    hashing sooner rather than later. */
     277             : ulong
     278             : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out );
     279             : 
     280             : /* Mark a task as complete.  For transaction execution, this means that
     281             :    the effects of the execution are now visible on any core that could
     282             :    execute a subsequent transaction.  Returns 0 on success, -1 if given
     283             :    the result of the task, the block turns out to be bad.  -1 is only
     284             :    returned from PoH tasks.
     285             : 
     286             :    If a block has been abandoned or marked dead for any reason, it'll be
     287             :    pruned the moment in-flight task count hits 0 due to the last task
     288             :    completing.  Then, in the immediate ensuing stem run loop,
     289             :    sched_pruned_next() will return the index for the corresponding bank
     290             :    so the refcnt can be decremented for sched.
     291             : 
     292             :    The transaction at the given index may be freed upon return from this
     293             :    function.  Nonetheless, as long as there is no intervening FEC
     294             :    ingestion, it would still be safe to query the transaction using
     295             :    get_txn(). */
     296             : int
     297             : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data );
     298             : 
     299             : /* Abandon a block.  This means that we are no longer interested in
     300             :    executing the block.  This also implies that any block which chains
     301             :    off of the provided block shall be abandoned.  This is mainly used
     302             :    when a block is aborted because we decided that it would be a
     303             :    dead/invalid block, and so there's no point in spending resources
     304             :    executing it.  The scheduler will no longer return transactions from
     305             :    abandoned blocks for execution.  This should only be invoked on an
     306             :    actively replayed block, and should only be invoked once on it.
     307             : 
     308             :    An abandoned block will be pruned from sched as soon as, and only if,
     309             :    the block has no more in-flight tasks associated with it.  No sooner,
     310             :    no later.  In the immediate ensuing stem run loop,
     311             :    sched_pruned_next() will return the index for the corresponding bank
     312             :    so the refcnt can be decremented for sched.  After that point, the
     313             :    bank_idx may be recycled for another block. */
     314             : void
     315             : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx );
     316             : 
     317             : /* Add a block as immediately done to the scheduler.  This is useful for
     318             :    installing the snapshot slot, or for informing the scheduler of a
     319             :    packed leader block.  Parent block should be ULONG_MAX for the
     320             :    snapshot slot, and otherwise a block that hasn't been pruned. */
     321             : void
     322             : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot );
     323             : 
     324             : /* Advance the root, pruning all blocks across forks that do not descend
     325             :    from the new root.  Assumes the new root is in the fork tree and
     326             :    connected to the current root.  Also assumes that there are no more
     327             :    in-flight transactions from the soon-to-be-pruned blocks.  This
     328             :    should be called after root_notify() and the caller is responsible
     329             :    for figuring out the new root to safely prune to. */
     330             : void
     331             : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx );
     332             : 
     333             : /* Notify the scheduler of a new root.  This has the effect of calling
     334             :    abandon() on all minority forks that do not descend from the new
     335             :    root.  Shortly after a call to this function, in-flight transactions
     336             :    from these abandoned blocks should retire from the execution
     337             :    pipeline, and the new root will be safe for pruning. */
     338             : void
     339             : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx );
     340             : 
     341             : /* Returns the index of a bank whose refcnt should be decremented for
     342             :    sched.  This function should be called in a loop to drain all
     343             :    outstanding refcnt decrements before any other sched API is called in
     344             :    a stem run loop.  Returns ULONG_MAX when there are no more
     345             :    outstanding refrences from sched and the loop should break. */
     346             : ulong
     347             : fd_sched_pruned_block_next( fd_sched_t * sched );
     348             : 
     349             : void
     350             : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh );
     351             : 
     352             : fd_txn_p_t *
     353             : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx );
     354             : 
     355             : fd_sched_txn_info_t *
     356             : fd_sched_get_txn_info( fd_sched_t * sched, ulong txn_idx );
     357             : 
     358             : fd_hash_t *
     359             : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx );
     360             : 
     361             : uint
     362             : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx );
     363             : 
     364             : void
     365             : fd_sched_metrics_write( fd_sched_t * sched );
     366             : 
     367             : /* Serialize the current state as a cstr to the returned buffer.  Caller
     368             :    may read from the buffer until the next invocation of any fd_sched
     369             :    function. */
     370             : char *
     371             : fd_sched_get_state_cstr( fd_sched_t * sched );
     372             : 
     373             : void *
     374             : fd_sched_leave( fd_sched_t * sched );
     375             : 
     376             : void *
     377             : fd_sched_delete( void * mem );
     378             : 
     379             : FD_PROTOTYPES_END
     380             : 
     381             : #endif /* HEADER_fd_src_discof_replay_fd_sched_h */

Generated by: LCOV version 1.14