LCOV - code coverage report
Current view: top level - flamenco/stakes - fd_vote_stakes.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 1 1 100.0 %
Date: 2026-03-19 18:19:27 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_flamenco_stakes_fd_vote_stakes_h
       2             : #define HEADER_fd_src_flamenco_stakes_fd_vote_stakes_h
       3             : 
       4             : #include "../../util/fd_util_base.h"
       5             : #include "../types/fd_types_custom.h"
       6             : 
       7             : /* fd_vote_stakes_t is a data structure that stores vote account stake
       8             :    updates across epoch boundaries.  It offers a mapping from vote
       9             :    account pubkeys to their t_1 and t_2 stakes.  The structure is
      10             :    designed to work with a large amount of vote accounts (in the order
      11             :    of 10s of millions) along with a relatively small number of forks
      12             :    across epoch boundaries.
      13             : 
      14             :    The structure is designed around these operations:
      15             :    1. Inserting updated vote account stakes into a given fork.
      16             :    2. Querying the stake for a given vote account with a given fork.
      17             : 
      18             :    Concurrent queries are allowed but concurrent inserts are not.  This
      19             :    is fine because the structure is only modified during boot and during
      20             :    the epoch boundary.
      21             : 
      22             :    Given a large number of vote accounts (e.g. 2^25 = 33,554,432), we
      23             :    need to store the pubkey, t_1 stake, and t_2 stake for each vote
      24             :    account.  We also need to store potentially ~32 forks across each
      25             :    epoch boundary.  If done naively, this would require
      26             :    2^25 * (32 + 8 + 8) * 32 = 51GB of memory not including any overhead
      27             :    for maintaining lookups for accounts.
      28             : 
      29             :    To avoid this, we can use some important runtime protocol properties.
      30             :    The most notable is that across forks, we will only have a small
      31             :    number of differences in vote account stakes: this is because
      32             :    realistically very few vote accounts will have differences in stakes
      33             :    that are caused by forks right before the epoch boundary.  So we can
      34             :    set our bound on elements as the total number of vote accounts +
      35             :    stakes across forks.  Let's say this is 2^26 if the max number of
      36             :    vote accounts we want to support is 2^25.  Now to store our index we
      37             :    only need:
      38             :    2^26 * (32 + 8 + 8) = 3GB of memory (not including overhead).
      39             :    Our index structure will look like this:
      40             :    pool<pubkey, stake_t_1, stake_t_2>
      41             : 
      42             :    What is described above the index of all vote accounts.  We need to
      43             :    account for the stakes across forks.  We have to maintain a list of
      44             :    all of the index entries used by each fork.  It is sufficient to use
      45             :    a uint list of indices into the index.  So each fork is just:
      46             :    pool<uint>.
      47             : 
      48             :    4 (uint pool idx) * 2^25 (# vote accounts) * 32 (# forks) = 4GiB
      49             : 
      50             :    For a given fork, when we insert a vote account we check if:
      51             :    1. The vote account + stake pair is already in the index.  If it is,
      52             :       we just increment a reference to the pair.  Add it to the list of
      53             :       pool indices that the fork maintains.
      54             :    2. The vote account + stake pair is not in the index.  If it is not,
      55             :       we need to insert it into the index and assume a reference of 1.
      56             :       Add it to the local list of pool indices that the fork maintains.
      57             :    In order to make both of these cases performant we need a mapping
      58             :    from pubkey + stake to the index pool element.  This is simply
      59             :    represented as:
      60             :    map<(pubkey+stake), index_pool_idx>.
      61             : 
      62             :    Now for queries, we need a way to query the t_1 and t_2 stake for a
      63             :    pubkey on a given fork.  The problem is the above map requires the
      64             :    t_1 stake as part of the key and there are potentially multiple
      65             :    entries for a given pubkey.  This is solved with a
      66             :    multi_map<pubkey, index_pool_idx>.  We query for a given pubkey and
      67             :    iterate through all of the entries: this is an acceptable trade-off
      68             :    since there will almost always be one element for a given pubkey
      69             :    (except around the epoch boundary).  However, for each index entry
      70             :    we need a way to make sure that it's the one that is in our fork's
      71             :    list of indices.  This is solved with a map for each fork that is
      72             :    keyed by the index pool idx.
      73             :    map<index_pool_idx, fork list entry>.
      74             : 
      75             :    Now, we can quickly insert entries for a given fork and also do fast
      76             :    queries for a given pubkey.
      77             : 
      78             :    The only remaining operation is updating the root fork.  If we are
      79             :    updating which fork is the root, we can safely assume that all other
      80             :    forks are no longer valid:
      81             :    1. either the fork was a competing fork that executed the epoch
      82             :       boundary and is no longer needed
      83             :    2. the fork corresponds to a fork for the previous epoch boundary.
      84             : 
      85             :    For any fork that's being removed, we need to reset its fork's pool
      86             :    and remove any references to the index pool entries.  If an index
      87             :    entry has a reference count of 0, we can remove it from the index
      88             :    entirely.  Under the hood, the forks in use are stored in a deque;
      89             :    when a root is being advanced, all entries from the deque are removed
      90             :    and each removed fork's entries are released.
      91             : 
      92             :    The memory footprint of what is actually described above is larger
      93             :    because each key of the index needs to be a compound of
      94             :    the pubkey, stake_t_1, node_account_t_1, and epoch.
      95             : 
      96             :    As an important note, the vote stakes object can be used globally
      97             :    across different threads, but it is not safe to access concurrently.
      98             :    The caller is responsible for ensuring that reads and writes are
      99             :    properly synchronized. */
     100             : 
     101             : FD_PROTOTYPES_BEGIN
     102             : 
     103       60595 : #define FD_VOTE_STAKES_ALIGN (128UL)
     104             : 
     105             : struct fd_vote_stakes;
     106             : typedef struct fd_vote_stakes fd_vote_stakes_t;
     107             : 
     108             : /* fd_vote_stakes_align returns the minimum alignment required for the
     109             :    fd_vote_stakes_t struct. */
     110             : 
     111             : ulong
     112             : fd_vote_stakes_align( void );
     113             : 
     114             : /* fd_vote_stakes_footprint returns the minimum footprint required for
     115             :    the fd_vote_stakes_t object given the max number of vote accounts,
     116             :    the max fork width (number of forks that can cross the epoch
     117             :    boundary), and the max number of map chains.  The map chains should
     118             :    be a power of 2 that is roughly equal to the expected number of vote
     119             :    accounts and not the maximum. */
     120             : 
     121             : ulong
     122             : fd_vote_stakes_footprint( ulong max_vote_accounts,
     123             :                           ulong expected_vote_accounts,
     124             :                           ulong max_fork_width );
     125             : 
     126             : 
     127             : /* fd_vote_stakes_new creates a new fd_vote_stakes_t object given a
     128             :    region of memory sized out according to fd_vote_stakes_footprint. */
     129             : 
     130             : void *
     131             : fd_vote_stakes_new( void * shmem,
     132             :                     ulong  max_vote_accounts,
     133             :                     ulong  expected_vote_accounts,
     134             :                     ulong  max_fork_width,
     135             :                     ulong  seed );
     136             : 
     137             : 
     138             : /* fd_vote_stakes_join joins a valid fd_vote_stakes_t object from a
     139             :    region of memory. */
     140             : 
     141             : fd_vote_stakes_t *
     142             : fd_vote_stakes_join( void * shmem );
     143             : 
     144             : /* fd_vote_stakes_root_{insert, update}_key are APIs for
     145             :    inserting and updating keys for the root fork.  These
     146             :    operations are split out in order to support the snapshot loading
     147             :    process.  The set of stakes from the T-1 epoch are inserted into
     148             :    the root fork with a call to fd_vote_stakes_root_insert_key.  The
     149             :    set of stakes from the T-2 epoch are updated with a call to
     150             :    fd_vote_stakes_root_update_meta.  The caller is responsible for
     151             :    ensuring that for a given pubkey, insert_key is called before
     152             :    update_meta.  It is important that these APIs should only be called
     153             :    while the root fork is the only and current fork in use.
     154             : 
     155             :    If update_meta is called on a key that has not had a corresponding
     156             :    insert_key call, a key is created into the root fork with a t-1 stake
     157             :    of 0.  This usually means the vote account has been deleted, but it
     158             :    can be possible in the case where the only staker of a vote account
     159             :    has been marked delinquent in epoch T-1 and needs to be counted
     160             :    towards clock calculation for the rest of the epoch. */
     161             : 
     162             : void
     163             : fd_vote_stakes_root_insert_key( fd_vote_stakes_t *  vote_stakes,
     164             :                                 fd_pubkey_t const * pubkey,
     165             :                                 fd_pubkey_t const * node_account_t_1,
     166             :                                 ulong               stake_t_1,
     167             :                                 ulong               epoch );
     168             : 
     169             : void
     170             : fd_vote_stakes_root_update_meta( fd_vote_stakes_t *  vote_stakes,
     171             :                                  fd_pubkey_t const * pubkey,
     172             :                                  fd_pubkey_t const * node_account_t_2,
     173             :                                  ulong               stake_t_2,
     174             :                                  ulong               epoch );
     175             : 
     176             : /* fd_vote_stakes_insert_{key, update, fini} is API for inserting
     177             :    entries into a given fork.  It reflects the access pattern during
     178             :    epoch rewards, where the current stake for a vote account is
     179             :    accumulated by iterating over the set of vote accounts.  The caller
     180             :    is responsible for ensuring that fd_vote_stakes_insert_key is only
     181             :    called once for each vote account.  It is unsafe to call any
     182             :    other vote_stakes API between calls to insert_key and insert_fini
     183             :    except other insert_* APIs.
     184             : 
     185             :    The calling pattern is as follows:
     186             : 
     187             :    for each vote account: call fd_vote_stakes_insert_key() once
     188             :    for each stake delegation: call fd_vote_stakes_insert_update()
     189             : 
     190             :    after all entries are inserted, call fd_vote_stakes_insert_fini()
     191             : 
     192             :    Under the hood, insert_key inserts an entry into the fork's map and
     193             :    into the index.  Each call to insert_update increments the stake for
     194             :    the given vote account.  insert_fini will either dedup the entry if
     195             :    one already exists, or insert a new map entry.  */
     196             : 
     197             : void
     198             : fd_vote_stakes_insert_key( fd_vote_stakes_t *  vote_stakes,
     199             :                            ushort              fork_idx,
     200             :                            fd_pubkey_t const * pubkey,
     201             :                            fd_pubkey_t const * node_account_t_1,
     202             :                            fd_pubkey_t const * node_account_t_2,
     203             :                            ulong               stake_t_2,
     204             :                            ulong               epoch,
     205             :                            uchar               exists_curr );
     206             : 
     207             : void
     208             : fd_vote_stakes_insert_update( fd_vote_stakes_t *  vote_stakes,
     209             :                               ushort              fork_idx,
     210             :                               fd_pubkey_t const * pubkey,
     211             :                               ulong               stake );
     212             : 
     213             : void
     214             : fd_vote_stakes_insert_fini( fd_vote_stakes_t * vote_stakes,
     215             :                             ushort             fork_idx );
     216             : 
     217             : /* fd_vote_stakes_genesis_fini finalizes the vote stakes on the genesis
     218             :    block.  Any vote stakes that have been inserted will be updated to
     219             :    have identical T-1/T-2 stakes and node accounts.  This function
     220             :    assumes that all vote accounts have already been inserted into the
     221             :    genesis fork. */
     222             : 
     223             : void
     224             : fd_vote_stakes_genesis_fini( fd_vote_stakes_t * vote_stakes );
     225             : 
     226             : /* fd_vote_stakes_new_child creates a new child fork and returns the
     227             :    index identifier for the new fork. */
     228             : 
     229             : ushort
     230             : fd_vote_stakes_new_child( fd_vote_stakes_t * vote_stakes );
     231             : 
     232             : /* fd_vote_stakes_advance_root will move the root fork to the new
     233             :    candidate root fork.  If the root_idx is equal to the root, this
     234             :    function is a no-op.  However, if the root is different, all other
     235             :    child nodes will be removed from the structure. */
     236             : 
     237             : void
     238             : fd_vote_stakes_advance_root( fd_vote_stakes_t * vote_stakes,
     239             :                              ushort             root_idx );
     240             : 
     241             : /* fd_vote_stakes_query_stake queries the stake for a given vote account
     242             :    in the given fork.  If the element is found returns 1, otherwise
     243             :    returns 0.  If any of the optional fields are set to NULL, then their
     244             :    corresponding value will not be set.  If the stake_t_{1,2}_out_opt is
     245             :    set to 0UL and the record is found, that means the vote account
     246             :    either did not exist at the end of the t-{1,2} epoch boundary or had
     247             :    zero stake: they are treated as the same thing. */
     248             : 
     249             : int
     250             : fd_vote_stakes_query( fd_vote_stakes_t const * vote_stakes,
     251             :                       ushort                   fork_idx,
     252             :                       fd_pubkey_t const *      pubkey,
     253             :                       ulong *                  stake_t_1_out_opt,
     254             :                       ulong *                  stake_t_2_out_opt,
     255             :                       fd_pubkey_t *            node_account_t_1_out_opt,
     256             :                       fd_pubkey_t *            node_account_t_2_out_opt );
     257             : 
     258             : int
     259             : fd_vote_stakes_query_pubkey( fd_vote_stakes_t const * vote_stakes,
     260             :                              ushort                   fork_idx,
     261             :                              fd_pubkey_t const *      pubkey );
     262             : 
     263             : /* fd_vote_stakes_query_t_1 and fd_vote_stakes_query_t_2 are shortcuts
     264             :    for querying the t_1 and t_2 stake for a given vote account in the
     265             :    given fork.  0 is returned if the vote account does not exist for the
     266             :    epoch or if it has zero stake.  If the account is found, stake_out
     267             :    and node_account_out will be set. */
     268             : 
     269             : int
     270             : fd_vote_stakes_query_t_1( fd_vote_stakes_t const * vote_stakes,
     271             :                           ushort                   fork_idx,
     272             :                           fd_pubkey_t const *      pubkey,
     273             :                           ulong *                  stake_out,
     274             :                           fd_pubkey_t *            node_account_out );
     275             : 
     276             : int
     277             : fd_vote_stakes_query_t_2( fd_vote_stakes_t const * vote_stakes,
     278             :                           ushort                   fork_idx,
     279             :                           fd_pubkey_t const *      pubkey,
     280             :                           ulong *                  stake_out,
     281             :                           fd_pubkey_t *            node_account_out );
     282             : 
     283             : /* fd_vote_stakes_ele_cnt returns the number of entries for a given
     284             :    fork. */
     285             : 
     286             : uint
     287             : fd_vote_stakes_ele_cnt( fd_vote_stakes_t * vote_stakes,
     288             :                         ushort             fork_idx );
     289             : 
     290             : /* fd_vote_stakes_get_root_idx returns the index of the root fork. */
     291             : 
     292             : ushort
     293             : fd_vote_stakes_get_root_idx( fd_vote_stakes_t * vote_stakes );
     294             : 
     295             : /* fd_vote_stakes_reset resets the vote stakes object to the initial
     296             :    state.  This is useful for resetting vote stakes if a new snapshot
     297             :    manifest is being loaded. */
     298             : 
     299             : void
     300             : fd_vote_stakes_reset( fd_vote_stakes_t * vote_stakes );
     301             : 
     302             : #define FD_VOTE_STAKES_ITER_FOOTPRINT (16UL)
     303             : #define FD_VOTE_STAKES_ITER_ALIGN     (8UL)
     304             : struct stakes_map_iter_t;
     305             : typedef struct stakes_map_iter_t fd_vote_stakes_iter_t;
     306             : 
     307             : /* A caller can iterate through the entries for a given fork.  The
     308             :    iterator is initialized by a call to fd_vote_stakes_fork_iter_init.
     309             :    The caller is responsible for managing the memory for the iterator.
     310             :    It is safe to call fd_vote_stakes_fork_iter_next if the result of
     311             :    fd_vote_stakes_fork_iter_done() == 0.  It is safe to call
     312             :    fd_vote_stakes_fork_iter_ele() to get the current entry if there is
     313             :    a valid initialized iterator.  fd_vote_stakes_fork_iter_next is
     314             :    called to advance the iterator.
     315             : 
     316             :    It is not safe to call any other vote stakes apis while an iteration
     317             :    is in progress.
     318             : 
     319             :    Example use:
     320             :    uchar __attribute__((aligned(FD_VOTE_STAKES_ITER_ALIGN))) iter_mem[ FD_VOTE_STAKES_ITER_FOOTPRINT ];
     321             :    for( fd_vote_stakes_iter_t * iter = fd_vote_stakes_fork_iter_init( vote_stakes, fork_idx, iter_mem );
     322             :         !fd_vote_stakes_fork_iter_done( vote_stakes, fork_idx, iter );
     323             :         fd_vote_stakes_fork_iter_next( vote_stakes, fork_idx, iter ) ) {
     324             :      fd_vote_stakes_fork_iter_ele( vote_stakes, fork_idx, iter, &pubkey, &stake_t_1, &stake_t_2, &node_account_t_1, &node_account_t_2 );
     325             :    }
     326             : 
     327             :    Under the hood, the vote stakes iterator is a wrapper of the map
     328             :    chain iterator.
     329             : 
     330             :    TODO: fork_idx can probably get absorbed into the iterator. */
     331             : 
     332             : fd_vote_stakes_iter_t *
     333             : fd_vote_stakes_fork_iter_init( fd_vote_stakes_t * vote_stakes,
     334             :                                ushort             fork_idx,
     335             :                                uchar              iter_mem[ static FD_VOTE_STAKES_ITER_FOOTPRINT ] );
     336             : 
     337             : int
     338             : fd_vote_stakes_fork_iter_done( fd_vote_stakes_t *      vote_stakes,
     339             :                                ushort                  fork_idx,
     340             :                                fd_vote_stakes_iter_t * iter );
     341             : 
     342             : void
     343             : fd_vote_stakes_fork_iter_next( fd_vote_stakes_t *      vote_stakes,
     344             :                                ushort                  fork_idx,
     345             :                                fd_vote_stakes_iter_t * iter );
     346             : 
     347             : void
     348             : fd_vote_stakes_fork_iter_ele( fd_vote_stakes_t *      vote_stakes,
     349             :                               ushort                  fork_idx,
     350             :                               fd_vote_stakes_iter_t * iter,
     351             :                               fd_pubkey_t *           pubkey_out,
     352             :                               ulong *                 stake_t_1_out_opt,
     353             :                               ulong *                 stake_t_2_out_opt,
     354             :                               fd_pubkey_t *           node_account_t_1_out_opt,
     355             :                               fd_pubkey_t *           node_account_t_2_out_opt );
     356             : 
     357             : FD_PROTOTYPES_END
     358             : 
     359             : #endif

Generated by: LCOV version 1.14