Line data Source code
1 : #ifndef HEADER_fd_src_discof_replay_fd_replay_tile_h 2 : #define HEADER_fd_src_discof_replay_fd_replay_tile_h 3 : 4 : /* Banks and Reasm 5 : ================= 6 : 7 : OVERVIEW 8 : 9 : Reasm and banks are tightly coupled. Reasm maintains a tree of FEC 10 : sets organized as a main tree (rooted at the published root) plus 11 : orphan trees. Each FEC set in the connected tree may be associated 12 : with a bank via bank_idx, or be still unreplayed. In general, reasm 13 : tries to approximate the state of banks as closely as possible. It's 14 : inexact, because reasm is stores at the FEC unit, while banks are 15 : stored at the slot unit. 16 : 17 : When reasm delivers a FEC set (via fd_reasm_pop), the replay tile 18 : processes it by assigning it a bank. If it's the first FEC in a 19 : slot (fec_set_idx==0), a new bank is provisioned from the parent's 20 : bank. Subsequent FECs in the same slot inherit the bank_idx from 21 : the preceding FEC. This means all FEC sets within a single slot 22 : share the same bank_idx, with the exception of equivocating FECs. 23 : 24 : PUBLISHING (ROOT ADVANCEMENT) 25 : 26 : When tower sends a new consensus root, replay advances the 27 : published root along the rooted fork as far as possible. A block 28 : on the rooted fork is safe to prune when it and all minority fork 29 : subtrees branching from it have refcnt 0. Publishing calls 30 : fd_reasm_publish to prune the reasm tree (and the store) of any 31 : FEC sets that do not descend from the new root. 32 : 33 : REASM EVICTION (POOL-PRESSURE EVICTION) 34 : 35 : When the reasm pool is nearly full (1 free element remaining) and a 36 : new FEC needs to be inserted, reasm runs its eviction policy to free 37 : space. The eviction in general prioritizes orphans first, and then 38 : frontier slots that are incomplete. Evicted orphans has no effect on 39 : the banks; if they were orphans, then banks had no knowledge of them. 40 : 41 : If eviction succeeds, the evicted chain is returned as a linked 42 : list of pool elements (removed from maps but still acquired in 43 : the pool). The replay tile is responsible for: 44 : 1. If the evicted chain had a valid bank_idx, marking that bank 45 : dead and abandoning it in the scheduler. 46 : 2. Publishing each evicted FEC to repair (REPLAY_SIG_REASM_EVICTED) 47 : so repair can re-request the data. 48 : 3. Releasing each evicted element back to the reasm pool before 49 : the next insert. 50 : 51 : BANKS-DRIVEN EVICTION 52 : 53 : Separately from reasm pool pressure, when banks are full (no free 54 : bank slots) and the scheduler is drained, replay itself evicts 55 : frontier banks to make room. This works by: 56 : 1. Iterating over frontier (leaf) banks. 57 : 2. Marking each as dead and abandoning it in the scheduler. 58 : 3. Calling fd_reasm_remove on the corresponding FEC chain in 59 : reasm, which walks up the tree to the bank boundary (slot 60 : boundary or equivocation point) and removes the chain. 61 : 4. Same process happens as above where evicted FECs are published 62 : to repair. 63 : 64 : By evicting and publishing evicted FECs to repair, replay is 65 : attempting a "go-around" strategy to ensure progress is made even 66 : when memory pressure is high. An evicted FEC - if valid - will be 67 : requested by repair and eventually re-delivered to replay, where 68 : hopefully by then there will be pool capacity to insert and replay 69 : the FEC. */ 70 : 71 : #include "../poh/fd_poh_tile.h" 72 : #include "../../disco/tiles.h" 73 : #include "../reasm/fd_reasm.h" 74 : #include "../../flamenco/types/fd_types_custom.h" 75 : 76 0 : #define REPLAY_SIG_SLOT_COMPLETED (0) 77 0 : #define REPLAY_SIG_SLOT_DEAD (1) 78 0 : #define REPLAY_SIG_ROOT_ADVANCED (2) 79 0 : #define REPLAY_SIG_RESET (3) 80 0 : #define REPLAY_SIG_BECAME_LEADER (4) 81 0 : #define REPLAY_SIG_OC_ADVANCED (5) 82 0 : #define REPLAY_SIG_TXN_EXECUTED (6) 83 0 : #define REPLAY_SIG_REASM_EVICTED (7) 84 : 85 : /* fd_replay_slot_completed promises that it will deliver at most 2 86 : frags for a given slot (at most 2 equivocating blocks). The first 87 : block is the first one we replay to completion. The second version 88 : (if there is) is always the confirmed equivocating block. This 89 : guarantee is provided by fd_reasm. */ 90 : 91 : struct fd_replay_slot_completed { 92 : ulong slot; 93 : ulong root_slot; 94 : ulong storage_slot; 95 : ulong epoch; 96 : ulong slot_in_epoch; 97 : ulong slots_per_epoch; 98 : ulong block_height; 99 : ulong parent_slot; 100 : 101 : fd_hash_t block_id; /* block id (last FEC set's merkle root) of the slot received from replay */ 102 : fd_hash_t parent_block_id; /* parent block id of the slot received from replay */ 103 : fd_hash_t bank_hash; /* bank hash of the slot received from replay */ 104 : fd_hash_t block_hash; /* last microblock header hash of slot received from replay */ 105 : ulong transaction_count; /* since genesis */ 106 : 107 : struct { 108 : double initial; 109 : double terminal; 110 : double taper; 111 : double foundation; 112 : double foundation_term; 113 : } inflation; 114 : 115 : struct { 116 : ulong lamports_per_uint8_year; 117 : double exemption_threshold; 118 : uchar burn_percent; 119 : } rent; 120 : 121 : /* Reference to the bank for this completed slot. TODO: We can 122 : eliminate non-timestamp fields and have consumers just use 123 : bank_idx. */ 124 : ulong bank_idx; 125 : 126 : long first_fec_set_received_nanos; /* timestamp when replay received the first fec of the slot from turbine or repair */ 127 : long preparation_begin_nanos; /* timestamp when replay began preparing the state to begin execution of the slot */ 128 : long first_transaction_scheduled_nanos; /* timestamp when replay first sent a transaction to be executed */ 129 : long last_transaction_finished_nanos; /* timestamp when replay received the last execution completion */ 130 : long completion_time_nanos; /* timestamp when replay completed finalizing the slot and notified tower */ 131 : 132 : int is_leader; /* whether we were leader for this slot */ 133 : ulong identity_balance; 134 : 135 : /* since slot start, default ULONG_MAX */ 136 : ulong vote_success; 137 : ulong vote_failed; 138 : ulong nonvote_success; 139 : ulong nonvote_failed; 140 : 141 : ulong transaction_fee; 142 : ulong priority_fee; 143 : ulong tips; 144 : ulong shred_cnt; 145 : 146 : struct { 147 : ulong block_cost; 148 : ulong vote_cost; 149 : ulong allocated_accounts_data_size; 150 : ulong block_cost_limit; 151 : ulong vote_cost_limit; 152 : ulong account_cost_limit; 153 : } cost_tracker; 154 : }; 155 : 156 : typedef struct fd_replay_slot_completed fd_replay_slot_completed_t; 157 : 158 : struct fd_replay_slot_dead { 159 : ulong slot; 160 : fd_hash_t block_id; 161 : }; 162 : typedef struct fd_replay_slot_dead fd_replay_slot_dead_t; 163 : 164 : struct fd_replay_oc_advanced { 165 : ulong slot; 166 : ulong bank_idx; 167 : }; 168 : typedef struct fd_replay_oc_advanced fd_replay_oc_advanced_t; 169 : 170 : struct fd_replay_root_advanced { 171 : ulong bank_idx; 172 : }; 173 : typedef struct fd_replay_root_advanced fd_replay_root_advanced_t; 174 : 175 : struct fd_replay_txn_executed { 176 : fd_txn_p_t txn[ 1 ]; 177 : int is_committable; 178 : int is_fees_only; 179 : int txn_err; 180 : long tick_parsed; 181 : long tick_sigverify_disp; 182 : long tick_sigverify_done; 183 : long tick_exec_disp; 184 : long tick_exec_done; 185 : }; 186 : typedef struct fd_replay_txn_executed fd_replay_txn_executed_t; 187 : 188 : struct fd_replay_fec_evicted { 189 : fd_hash_t mr; 190 : ulong slot; 191 : uint fec_set_idx; 192 : ulong bank_idx; 193 : }; 194 : typedef struct fd_replay_fec_evicted fd_replay_fec_evicted_t; 195 : 196 : 197 : union fd_replay_message { 198 : fd_replay_slot_completed_t slot_completed; 199 : fd_replay_root_advanced_t root_advanced; 200 : fd_replay_oc_advanced_t oc_advanced; 201 : fd_poh_reset_t reset; 202 : fd_became_leader_t became_leader; 203 : fd_replay_txn_executed_t txn_executed; 204 : fd_replay_fec_evicted_t reasm_evicted; 205 : }; 206 : 207 : typedef union fd_replay_message fd_replay_message_t; 208 : 209 : #endif /* HEADER_fd_src_discof_replay_fd_replay_tile_h */