LCOV - code coverage report
Current view: top level - discof/restore/utils - fd_ssctrl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 63 0.0 %
Date: 2026-03-19 18:19:27 Functions: 0 57 0.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       2             : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       3             : 
       4             : #include "../../../util/net/fd_net_headers.h"
       5             : #include "../../../flamenco/runtime/fd_runtime_const.h"
       6             : #include "../../../ballet/lthash/fd_lthash.h"
       7             : 
       8             : /* The snapshot tiles have a somewhat involved state machine, which is
       9             :    controlled by snapct.  Imagine first the following sequence:
      10             : 
      11             :     1. snapct is reading a full snapshot from the network and sends some
      12             :        data to snapdc to be decompressed.
      13             :     2. snapct hits a network error, and resets the connection to a new
      14             :        peer.
      15             :     3. The decompressor fails on data from the old peer, and sends a
      16             :        malformed message to snapct.
      17             :     4. snapct receives the malformed message, and abandons the new
      18             :        connection, even though it was not malformed.
      19             : 
      20             :    There are basically two ways to prevent this.  Option A is the tiles
      21             :    can pass not just control messages to one another, but also tag them
      22             :    with some xid indicating which "attempt" the control message is for.
      23             : 
      24             :    This is pretty hard to reason about, and the state machine can grow
      25             :    quite complicated.
      26             : 
      27             :    There's an easier way: the tiles just are fully synchronized with
      28             :    snapct.  Whatever "attempt" snapct is on, we ensure all other tiles
      29             :    are on it too.  This means when any tile fails a snapshot, all tiles
      30             :    must fail it and fully flush all frags in the pipeline before snapct
      31             :    can proceed with a new attempt.
      32             : 
      33             :    The control flow then is basically,
      34             : 
      35             :      1. All tiles start in the IDLE state.
      36             :      2. snapct initializes the pipeline by sending an INIT message.
      37             :         Each tile enters the PROCESSING state and then forwards the INIT
      38             :         message down the pipeline.  When snapct receives this INIT
      39             :         message, the entire pipeline is in PROCESSING state.
      40             :      3. Tiles continue to process data / frags as applicable.  If an
      41             :         error occurs, the tile enters the ERROR state and also sends an
      42             :         ERROR message downstream.  All downstream tiles also enter the
      43             :         ERROR state and forward the message.  Note that upstream tiles
      44             :         will not be in an ERROR state and will continue producing frags.
      45             :         When snapct receives the ERROR message, it will send a FAIL
      46             :         message.  snapct then waits for this FAIL message to be
      47             :         progagated through the pipeline and received back.  It then
      48             :         knows that all tiles are synchonized back in an IDLE state and
      49             :         it can try again with a new INIT.
      50             :      4. Once snapct detects that the processing is finished, it sends
      51             :         a DONE message through the pipeline and waits for it to be
      52             :         received back.  We then either move on to the incremental
      53             :         snapshot, or shut down the whole pipeline.
      54             : 
      55             :    The keeps the tiles in lockstep, and simplifies the state machine to
      56             :    a manageable level.
      57             : 
      58             :    It is a strict requirement that all tiles in the pipeline eventually
      59             :    forward all control messages they receive.  Each control message is
      60             :    only generated once in snapct and will not be re-sent.  The pipeline
      61             :    will be locked on flushing that control message until all tiles
      62             :    forward it on. If a control message is dropped, the pipeline will
      63             :    deadlock.  Note that a tile can choose to hold onto a control message
      64             :    and forward it later after performing some asynchronous routine.  */
      65             : 
      66           0 : #define FD_SNAPSHOT_STATE_IDLE                 (0UL) /* Performing no work and should receive no data frags */
      67           0 : #define FD_SNAPSHOT_STATE_PROCESSING           (1UL) /* Performing usual work, no errors / EoF condition encountered */
      68           0 : #define FD_SNAPSHOT_STATE_FINISHING            (2UL) /* Tile has observed EoF, expects no additional data frags */
      69           0 : #define FD_SNAPSHOT_STATE_ERROR                (3UL) /* Some error occurred, will wait for a FAIL command to reset */
      70           0 : #define FD_SNAPSHOT_STATE_SHUTDOWN             (4UL) /* All work finished, tile can perform final cleanup and exit */
      71             : 
      72           0 : #define FD_SNAPSHOT_MSG_DATA                   (0UL) /* Fragment represents some snapshot data */
      73           0 : #define FD_SNAPSHOT_MSG_META                   (1UL) /* Fragment represents a fd_ssctrl_meta_t message */
      74             : 
      75           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL         (2UL) /* Pipeline should start processing a full snapshot */
      76           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR         (3UL) /* Pipeline should start processing an incremental snapshot */
      77           0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL              (4UL) /* Current snapshot failed, undo work and reset to idle state */
      78           0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT              (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */
      79           0 : #define FD_SNAPSHOT_MSG_CTRL_DONE              (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */
      80           0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN          (7UL) /* Snapshot load successful, no work left to do, perform final cleanup and shut down*/
      81           0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR             (8UL) /* Some tile encountered an error with the current stream */
      82           0 : #define FD_SNAPSHOT_MSG_CTRL_FINI              (9UL) /* Current snapshot has been fully loaded, finish processing */
      83             : 
      84             : /* snapin -> snapls */
      85             : /* snapin -> snapwm -> snaplv */
      86           0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED         (10UL) /* Hash result sent from snapin to snapls or from snapin to snapwm to snaplv */
      87           0 : #define FD_SNAPSHOT_MSG_EXP_CAPITALIZATION    (11UL) /* Capitalization sent from snapin to snapwm in vinyl mode to verify capitalization */
      88             : 
      89             : /* snapin -> snapls */
      90           0 : #define FD_SNAPSHOT_HASH_MSG_SUB              (12UL) /* Duplicate account sent from snapin to snapls, includes account header and data */
      91           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR          (13UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */
      92           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA         (14UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */
      93             : /* snapwm -> snaplv */
      94           0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_SUB       (15UL) /* Duplicate partial hash result sent from snapwm to snaplv (to subtract) */
      95             : /* snapwm -> snaplv -> snaplh */
      96           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_META_BATCH   (16UL) /* Duplicate account(s) meta batch sent from snapwm to snaplv */
      97             : 
      98             : /* snapla -> snapls */
      99             : /* snaplh -> snaplv */
     100           0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD       (17UL) /* Hash result sent from snapla (snaplh) to snapls (snaplv) */
     101             : 
     102             : 
     103             : /* Sent by snapct to tell snapld whether to load a local file or
     104             :    download from a particular external peer. */
     105             : typedef struct fd_ssctrl_init {
     106             :   int           file;
     107             :   int           zstd;
     108             :   ulong         slot; /* slot advertised by the snapshot peer */
     109             :   fd_ip4_port_t addr;
     110             :   uchar         snapshot_hash[ FD_HASH_FOOTPRINT ]; /* advertised snapshot hash from snapshot file name */
     111             :   char          hostname[ 256UL ];
     112             :   char          path[ PATH_MAX ];
     113             :   ulong         path_len;
     114             :   int           is_https;
     115             : } fd_ssctrl_init_t;
     116             : 
     117             : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */
     118             : typedef struct fd_ssctrl_meta {
     119             :   ulong total_sz;
     120             : } fd_ssctrl_meta_t;
     121             : 
     122             : typedef struct fd_ssctrl_capitalization {
     123             :   ulong capitalization;
     124             : } fd_ssctrl_capitalization_t;
     125             : 
     126             : typedef struct fd_ssctrl_hash_result {
     127             :   fd_lthash_value_t lthash;
     128             :   long              capitalization;
     129             : } fd_ssctrl_hash_result_t;
     130             : 
     131             : struct fd_snapshot_account_hdr {
     132             :   uchar   pubkey[ FD_PUBKEY_FOOTPRINT ];
     133             :   uchar   owner[ FD_PUBKEY_FOOTPRINT ];
     134             :   ulong   lamports;
     135             :   uchar   executable;
     136             :   ulong   data_len;
     137             : };
     138             : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t;
     139             : 
     140             : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct
     141             :    with the appropriate account metadata fields. */
     142             : static inline void
     143             : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account,
     144             :                            uchar const                    pubkey[ FD_PUBKEY_FOOTPRINT ],
     145             :                            uchar const                    owner[ FD_PUBKEY_FOOTPRINT ],
     146             :                            ulong                          lamports,
     147             :                            uchar                          executable,
     148           0 :                            ulong                          data_len ) {
     149           0 :   fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT );
     150           0 :   fd_memcpy( account->owner,  owner,  FD_PUBKEY_FOOTPRINT );
     151           0 :   account->lamports   = lamports;
     152           0 :   account->executable = executable;
     153           0 :   account->data_len   = data_len;
     154           0 : }
     155             : 
     156             : /* fd_snapshot_full_account is the contents of the
     157             :    SNAPSHOT_HASH_MSG_SUB message.  It contains a fd_snapshot_account_hdr_t
     158             :    header and the corresponding account data in a single message.
     159             : 
     160             :    For simplicity and conformance to burst limitations in snapin, the
     161             :    entire duplicate account is sent in one message (one frag).  Consider
     162             :    caching the lthash of the duplicate account so we do not have to
     163             :    send the entire account over. */
     164             : struct fd_snapshot_full_account {
     165             :   fd_snapshot_account_hdr_t hdr;
     166             :   uchar                     data[ FD_RUNTIME_ACC_SZ_MAX ];
     167             : };
     168             : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t;
     169             : 
     170             : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL)
     171             : #define FD_SNAPSHOT_MAX_SNAPLH_TILES (8UL)
     172             : 
     173             : static inline const char *
     174           0 : fd_ssctrl_state_str( ulong state ) {
     175           0 :   switch( state ) {
     176           0 :     case FD_SNAPSHOT_STATE_IDLE:        return "idle";
     177           0 :     case FD_SNAPSHOT_STATE_PROCESSING:  return "processing";
     178           0 :     case FD_SNAPSHOT_STATE_FINISHING:   return "finishing";
     179           0 :     case FD_SNAPSHOT_STATE_ERROR:       return "error";
     180           0 :     case FD_SNAPSHOT_STATE_SHUTDOWN:    return "shutdown";
     181           0 :     default:                            return "unknown";
     182           0 :   }
     183           0 : }
     184             : 
     185             : static inline const char *
     186           0 : fd_ssctrl_msg_ctrl_str( ulong sig ) {
     187           0 :   switch( sig ) {
     188           0 :     case FD_SNAPSHOT_MSG_DATA:                return "data";
     189           0 :     case FD_SNAPSHOT_MSG_META:                return "meta";
     190           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_FULL:      return "init_full";
     191           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_INCR:      return "init_incr";
     192           0 :     case FD_SNAPSHOT_MSG_CTRL_FAIL:           return "fail";
     193           0 :     case FD_SNAPSHOT_MSG_CTRL_NEXT:           return "next";
     194           0 :     case FD_SNAPSHOT_MSG_CTRL_DONE:           return "done";
     195           0 :     case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN:       return "shutdown";
     196           0 :     case FD_SNAPSHOT_MSG_CTRL_ERROR:          return "error";
     197           0 :     case FD_SNAPSHOT_MSG_CTRL_FINI:           return "fini";
     198           0 :     case FD_SNAPSHOT_HASH_MSG_EXPECTED:       return "hash_expected";
     199           0 :     case FD_SNAPSHOT_MSG_EXP_CAPITALIZATION:  return "exp_capitalization";
     200           0 :     case FD_SNAPSHOT_HASH_MSG_SUB:            return "hash_sub";
     201           0 :     case FD_SNAPSHOT_HASH_MSG_SUB_HDR:        return "hash_sub_hdr";
     202           0 :     case FD_SNAPSHOT_HASH_MSG_SUB_DATA:       return "hash_sub_data";
     203           0 :     case FD_SNAPSHOT_HASH_MSG_RESULT_SUB:     return "hash_result_sub";
     204           0 :     case FD_SNAPSHOT_HASH_MSG_SUB_META_BATCH: return "hash_sub_meta_batch";
     205           0 :     case FD_SNAPSHOT_HASH_MSG_RESULT_ADD:     return "hash_result_add";
     206           0 :     default:                                  return "unknown";
     207           0 :   }
     208           0 : }
     209             : 
     210             : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */

Generated by: LCOV version 1.14