Line data Source code
1 : #include "fd_reasm.h"
2 : #include "fd_reasm_private.h"
3 : #include "../../disco/shred/fd_fec_set.h"
4 :
5 : #define LOGGING 0
6 :
7 : FD_FN_CONST ulong
8 0 : fd_reasm_align( void ) {
9 0 : return alignof(fd_reasm_t);
10 0 : }
11 :
12 : FD_FN_CONST ulong
13 0 : fd_reasm_footprint( ulong fec_max ) {
14 0 : ulong max_slots = fd_ulong_max(fec_max / FD_FEC_BLK_MAX, 1UL); /* add capacity for a block id per slot */
15 0 : int lgf_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max + max_slots ) ); /* capacity for fec_max fecs + (fec_max / 1024) more block ids */
16 0 : return FD_LAYOUT_FINI(
17 0 : FD_LAYOUT_APPEND(
18 0 : FD_LAYOUT_APPEND(
19 0 : FD_LAYOUT_APPEND(
20 0 : FD_LAYOUT_APPEND(
21 0 : FD_LAYOUT_APPEND(
22 0 : FD_LAYOUT_APPEND(
23 0 : FD_LAYOUT_APPEND(
24 0 : FD_LAYOUT_APPEND(
25 0 : FD_LAYOUT_INIT,
26 0 : alignof(fd_reasm_t), sizeof(fd_reasm_t) ),
27 0 : pool_align(), pool_footprint ( fec_max ) ),
28 0 : ancestry_align(), ancestry_footprint( fec_max ) ),
29 0 : frontier_align(), frontier_footprint( fec_max ) ),
30 0 : orphaned_align(), orphaned_footprint( fec_max ) ),
31 0 : subtrees_align(), subtrees_footprint( fec_max ) ),
32 0 : bfs_align(), bfs_footprint ( fec_max ) ),
33 0 : xid_align(), xid_footprint ( lgf_max ) ),
34 0 : fd_reasm_align() );
35 0 : }
36 :
37 : void *
38 : fd_reasm_new( void * shmem,
39 : ulong fec_max,
40 0 : ulong seed ) {
41 :
42 0 : if( FD_UNLIKELY( !shmem ) ) {
43 0 : FD_LOG_WARNING(( "NULL mem" ));
44 0 : return NULL;
45 0 : }
46 :
47 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_reasm_align() ) ) ) {
48 0 : FD_LOG_WARNING(( "misaligned mem" ));
49 0 : return NULL;
50 0 : }
51 :
52 0 : ulong footprint = fd_reasm_footprint( fec_max );
53 0 : if( FD_UNLIKELY( !footprint ) ) {
54 0 : FD_LOG_WARNING(( "bad fec_max (%lu)", fec_max ));
55 0 : return NULL;
56 0 : }
57 :
58 0 : fd_wksp_t * wksp = fd_wksp_containing( shmem );
59 0 : if( FD_UNLIKELY( !wksp ) ) {
60 0 : FD_LOG_WARNING(( "shmem must be part of a workspace" ));
61 0 : return NULL;
62 0 : }
63 :
64 0 : fd_memset( shmem, 0, footprint );
65 :
66 0 : ulong max_slots = fd_ulong_max(fec_max / FD_FEC_BLK_MAX, 1UL); /* add capacity for a block id per slot */
67 0 : int lgf_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max + max_slots ) ); /* capacity for fec_max fecs + (fec_max / 1024) more block ids */
68 :
69 0 : fd_reasm_t * reasm;
70 0 : FD_SCRATCH_ALLOC_INIT( l, shmem );
71 0 : reasm = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_reasm_t), sizeof(fd_reasm_t) );
72 0 : void * pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( fec_max ) );
73 0 : void * ancestry = FD_SCRATCH_ALLOC_APPEND( l, ancestry_align(), ancestry_footprint( fec_max ) );
74 0 : void * frontier = FD_SCRATCH_ALLOC_APPEND( l, frontier_align(), frontier_footprint( fec_max ) );
75 0 : void * orphaned = FD_SCRATCH_ALLOC_APPEND( l, orphaned_align(), orphaned_footprint( fec_max ) );
76 0 : void * subtrees = FD_SCRATCH_ALLOC_APPEND( l, subtrees_align(), subtrees_footprint( fec_max ) );
77 0 : void * bfs = FD_SCRATCH_ALLOC_APPEND( l, bfs_align(), bfs_footprint ( fec_max ) );
78 0 : void * xid = FD_SCRATCH_ALLOC_APPEND( l, xid_align(), xid_footprint ( lgf_max ) );
79 0 : FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_reasm_align() ) == (ulong)shmem + footprint );
80 :
81 0 : reasm->slot0 = ULONG_MAX;
82 0 : reasm->root = pool_idx_null( pool );
83 0 : reasm->pool_gaddr = fd_wksp_gaddr_fast( wksp, pool_join( pool_new( pool, fec_max ) ) );
84 0 : reasm->ancestry = ancestry_new( ancestry, fec_max, seed );
85 0 : reasm->frontier = frontier_new( frontier, fec_max, seed );
86 0 : reasm->orphaned = orphaned_new( orphaned, fec_max, seed );
87 0 : reasm->subtrees = subtrees_new( subtrees, fec_max, seed );
88 0 : /* */ subtreel_new( reasm->_subtrlf );
89 0 : /* */ out_new ( reasm->_out );
90 0 : reasm->bfs = bfs_new ( bfs, fec_max );
91 0 : reasm->xid = xid_new ( xid, lgf_max, seed );
92 :
93 0 : return shmem;
94 0 : }
95 :
96 : fd_reasm_t *
97 0 : fd_reasm_join( void * shreasm ) {
98 0 : fd_reasm_t * reasm = (fd_reasm_t *)shreasm;
99 :
100 0 : if( FD_UNLIKELY( !reasm ) ) {
101 0 : FD_LOG_WARNING(( "NULL reasm" ));
102 0 : return NULL;
103 0 : }
104 : /* pool join handled in fd_reasm_new */
105 0 : reasm->ancestry = ancestry_join( reasm->ancestry );
106 0 : reasm->frontier = frontier_join( reasm->frontier );
107 0 : reasm->orphaned = orphaned_join( reasm->orphaned );
108 0 : reasm->subtrees = subtrees_join( reasm->subtrees );
109 0 : reasm->subtreel = subtreel_join( reasm->_subtrlf );
110 0 : reasm->out = out_join ( reasm->_out );
111 0 : reasm->bfs = bfs_join ( reasm->bfs );
112 0 : reasm->xid = xid_join ( reasm->xid );
113 :
114 0 : return reasm;
115 0 : }
116 :
117 : void *
118 0 : fd_reasm_leave( fd_reasm_t * reasm ) {
119 :
120 0 : if( FD_UNLIKELY( !reasm ) ) {
121 0 : FD_LOG_WARNING(( "NULL reasm" ));
122 0 : return NULL;
123 0 : }
124 :
125 0 : return (void *)reasm;
126 0 : }
127 :
128 : void *
129 0 : fd_reasm_delete( void * shreasm ) {
130 0 : fd_reasm_t * reasm = (fd_reasm_t *)shreasm;
131 :
132 0 : if( FD_UNLIKELY( !reasm ) ) {
133 0 : FD_LOG_WARNING(( "NULL reasm" ));
134 0 : return NULL;
135 0 : }
136 :
137 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)reasm, fd_reasm_align() ) ) ) {
138 0 : FD_LOG_WARNING(( "misaligned reasm" ));
139 0 : return NULL;
140 0 : }
141 :
142 0 : return reasm;
143 0 : }
144 :
145 0 : fd_reasm_fec_t * fd_reasm_root ( fd_reasm_t * reasm ) { return pool_ele ( reasm_pool ( reasm ), reasm->root ); }
146 0 : fd_reasm_fec_t const * fd_reasm_root_const ( fd_reasm_t const * reasm ) { return pool_ele_const( reasm_pool_const( reasm ), reasm->root ); }
147 0 : fd_reasm_fec_t * fd_reasm_parent ( fd_reasm_t * reasm, fd_reasm_fec_t * child ) { return pool_ele ( reasm_pool ( reasm ), child->parent ); }
148 0 : fd_reasm_fec_t const * fd_reasm_parent_const ( fd_reasm_t const * reasm, fd_reasm_fec_t const * child ) { return pool_ele_const( reasm_pool_const( reasm ), child->parent ); }
149 0 : fd_reasm_fec_t * fd_reasm_child ( fd_reasm_t * reasm, fd_reasm_fec_t * parent ) { return pool_ele ( reasm_pool ( reasm ), parent->child ); }
150 0 : fd_reasm_fec_t const * fd_reasm_child_const ( fd_reasm_t const * reasm, fd_reasm_fec_t const * parent ) { return pool_ele_const( reasm_pool_const( reasm ), parent->child ); }
151 0 : fd_reasm_fec_t * fd_reasm_sibling ( fd_reasm_t * reasm, fd_reasm_fec_t * sibling ) { return pool_ele ( reasm_pool ( reasm ), sibling->sibling ); }
152 0 : fd_reasm_fec_t const * fd_reasm_sibling_const( fd_reasm_t const * reasm, fd_reasm_fec_t const * sibling ) { return pool_ele_const( reasm_pool_const( reasm ), sibling->sibling ); }
153 :
154 : ulong
155 0 : fd_reasm_slot0( fd_reasm_t * reasm ) {
156 0 : return reasm->slot0;
157 0 : }
158 :
159 : ulong
160 0 : fd_reasm_free( fd_reasm_t * reasm ) {
161 0 : return pool_free( reasm_pool( reasm ) );
162 0 : }
163 :
164 : fd_reasm_fec_t *
165 0 : fd_reasm_peek( fd_reasm_t * reasm ) {
166 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
167 0 : for( out_iter_t iter = out_iter_fwd_init( reasm->out, pool );
168 0 : !out_iter_done ( iter, reasm->out, pool );
169 0 : iter = out_iter_fwd_next( iter, reasm->out, pool ) ) {
170 0 : fd_reasm_fec_t * fec = out_iter_ele( iter, reasm->out, pool );
171 0 : if( FD_LIKELY( fec && !fec->popped && ( !fec->eqvoc || fec->confirmed ) ) ) return fec;
172 0 : }
173 0 : return NULL;
174 0 : }
175 :
176 : fd_reasm_fec_t *
177 0 : fd_reasm_pop( fd_reasm_t * reasm ) {
178 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
179 0 : while( FD_LIKELY( !out_is_empty( reasm->out, pool ) ) ) {
180 0 : fd_reasm_fec_t * fec = out_ele_pop_head( reasm->out, pool );
181 0 : fec->in_out = 0;
182 0 : if( FD_LIKELY( !fec->popped && ( !fec->eqvoc || fec->confirmed ) ) ) {
183 0 : fec->popped = 1;
184 0 : return fec;
185 0 : }
186 0 : }
187 0 : return NULL;
188 0 : }
189 :
190 : fd_reasm_fec_t *
191 : fd_reasm_query( fd_reasm_t * reasm,
192 0 : fd_hash_t const * merkle_root ) {
193 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
194 0 : fd_reasm_fec_t * fec = NULL;
195 0 : fec = ancestry_ele_query( reasm->ancestry, merkle_root, NULL, pool );
196 0 : fec = fd_ptr_if( !fec, frontier_ele_query( reasm->frontier, merkle_root, NULL, pool ), fec );
197 0 : fec = fd_ptr_if( !fec, orphaned_ele_query( reasm->orphaned, merkle_root, NULL, pool ), fec );
198 0 : fec = fd_ptr_if( !fec, subtrees_ele_query( reasm->subtrees, merkle_root, NULL, pool ), fec );
199 0 : return fec;
200 0 : }
201 :
202 : void
203 : fd_reasm_confirm( fd_reasm_t * reasm,
204 0 : fd_hash_t const * block_id ) {
205 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
206 0 : fd_reasm_fec_t * fec = ancestry_ele_query( reasm->ancestry, block_id, NULL, pool );
207 0 : fec = fd_ptr_if( !fec, frontier_ele_query( reasm->frontier, block_id, NULL, pool ), fec );
208 :
209 : /* TODO there is a potential optimization where we don't actually need
210 : to confirm every FEC and instead just confirm at the slot-level.
211 : Given roughly ~1k shreds per slot at 32 shreds per FEC, this would
212 : save ~32 loop iterations. Punting given the additional complexity
213 : of bookkeeping and logic this would require. */
214 :
215 0 : fd_reasm_fec_t * last_inserted = NULL;
216 0 : while( FD_LIKELY( fec && !fec->confirmed ) ) {
217 0 : fec->confirmed = 1;
218 :
219 0 : xid_t * xid = xid_query( reasm->xid, (fec->slot << 32) | fec->fec_set_idx, NULL );
220 0 : xid->idx = pool_idx( pool, fec );
221 0 : if( FD_UNLIKELY( fec->slot_complete ) ) {
222 0 : xid_t * bid = xid_query( reasm->xid, ( fec->slot << 32 ) | UINT_MAX, NULL );
223 0 : bid->idx = pool_idx( pool, fec );
224 0 : }
225 :
226 0 : if( FD_LIKELY( !fec->popped && !fec->in_out ) ) {
227 : /* Let's say that the delivery queue already contains A, and
228 : we confirm A - B - C. We walk upwards from C, but we need to
229 : make sure B and C are inserted after A, and in that order. */
230 0 : if( FD_UNLIKELY( !last_inserted ) ) out_ele_push_tail( reasm->out, fec, pool );
231 0 : else out_ele_insert_before( reasm->out, fec, last_inserted, pool );
232 0 : fec->in_out = 1;
233 0 : last_inserted = fec;
234 0 : }
235 0 : fec = fd_reasm_parent( reasm, fec );
236 0 : }
237 0 : }
238 :
239 : /* This is a gross case reasm needs to handle because Agave currently
240 : does not validate chained merkle roots across slots ie. if a leader
241 : sends a bad chained merkle root on a slot boundary, the cluster
242 : might converge on the leader's block anyways. So we overwrite the
243 : chained merkle root based on the slot and parent_off metadata.
244 : There are two cases: 1. we receive the parent before the child. In
245 : this case we just overwrite the child's CMR. 2. we receive the
246 : child before the parent. In this case every time we receive a new
247 : FEC set we need to check the orphan roots for whether we can link
248 : the orphan to the new FEC via slot metadata, since the chained
249 : merkle root metadata on that orphan root might be wrong. */
250 :
251 : static void
252 : overwrite_invalid_cmr( fd_reasm_t * reasm,
253 0 : fd_reasm_fec_t * child ) {
254 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
255 0 : if( FD_UNLIKELY( child->fec_set_idx==0 && !fd_reasm_query( reasm, &child->cmr ) ) ) {
256 0 : xid_t * parent_bid = xid_query( reasm->xid, (child->slot - child->parent_off) << 32 | UINT_MAX, NULL );
257 0 : if( FD_LIKELY( parent_bid ) ) {
258 0 : fd_reasm_fec_t * parent = pool_ele( pool, parent_bid->idx );
259 0 : if( FD_LIKELY( parent ) ) {
260 0 : FD_BASE58_ENCODE_32_BYTES( child->cmr.key, cmr_b58 );
261 0 : FD_BASE58_ENCODE_32_BYTES( parent->key.key, parent_key_b58 );
262 0 : FD_LOG_INFO(( "overwriting invalid cmr for FEC slot: %lu fec_set_idx: %u from %s (CMR) to %s (parent's block id)", child->slot, child->fec_set_idx, cmr_b58, parent_key_b58 ));
263 0 : child->cmr = parent->key; /* use the parent's merkle root */
264 0 : }
265 0 : }
266 0 : }
267 0 : }
268 :
269 : /* Mark the entire subtree beginning from root as equivocating. This is
270 : linear in the number of descendants in the subtree, but amortizes
271 : because we can short-circuit the BFS at nodes that are already marked
272 : equivocating, so each node is visited at most once. */
273 :
274 : static void
275 : eqvoc( fd_reasm_t * reasm,
276 0 : fd_reasm_fec_t * root ) {
277 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
278 0 : ulong * bfs = reasm->bfs;
279 0 : bfs_push_tail( bfs, pool_idx( pool, root ) );
280 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
281 0 : fd_reasm_fec_t * descendant = pool_ele( pool, bfs_pop_head( bfs ) );
282 0 : if( FD_LIKELY( descendant->eqvoc ) ) continue;
283 0 : descendant->eqvoc = 1;
284 0 : fd_reasm_fec_t * child = fd_reasm_child( reasm, descendant );
285 0 : while( FD_LIKELY( child ) ) {
286 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
287 0 : child = fd_reasm_sibling( reasm, child );
288 0 : }
289 0 : }
290 0 : }
291 :
292 : static void
293 : link( fd_reasm_t * reasm,
294 : fd_reasm_fec_t * parent,
295 0 : fd_reasm_fec_t * child ) {
296 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
297 0 : child->parent = pool_idx( pool, parent );
298 0 : if( FD_LIKELY( parent->child == pool_idx_null( pool ) ) ) {
299 0 : parent->child = pool_idx( pool, child ); /* set as left-child. */
300 0 : } else {
301 0 : fd_reasm_fec_t * sibling = pool_ele( pool, parent->child );
302 0 : while( FD_LIKELY( sibling->sibling != pool_idx_null( pool ) ) ) sibling = pool_ele( pool, sibling->sibling );
303 0 : sibling->sibling = pool_idx( pool, child ); /* set as right-sibling. */
304 0 : }
305 0 : }
306 :
307 : /* Assumes caller is de-duplicating FEC sets of the same merkle root. */
308 : static xid_t *
309 0 : xid_update( fd_reasm_t * reasm, ulong slot, uint fec_set_idx, ulong pool_idx ) {
310 0 : xid_t * xid = xid_query( reasm->xid, (slot << 32) | fec_set_idx, NULL );
311 0 : if( FD_UNLIKELY( xid ) ) {
312 0 : xid->cnt++;
313 0 : } else {
314 0 : xid = xid_insert( reasm->xid, (slot << 32) | fec_set_idx );
315 0 : if( FD_UNLIKELY( !xid ) ) FD_LOG_CRIT(( "xid map full, slot=%lu fec_set_idx=%u", slot, fec_set_idx )); // TODO remove after reasm eviction is implemented
316 0 : xid->idx = pool_idx;
317 0 : xid->cnt = 1;
318 0 : }
319 0 : return xid;
320 0 : }
321 :
322 : static fd_reasm_fec_t *
323 : clear_slot_metadata( fd_reasm_t * reasm,
324 0 : fd_reasm_fec_t * fec ) {
325 : /* remove from bid and xid */
326 0 : if( FD_UNLIKELY( fec->slot_complete ) ) {
327 0 : xid_t * bid = xid_query( reasm->xid, (fec->slot << 32)|UINT_MAX, NULL );
328 0 : bid->cnt--;
329 0 : if( FD_LIKELY( !bid->cnt ) ) xid_remove( reasm->xid, bid );
330 0 : }
331 0 : xid_t * xid = xid_query( reasm->xid, ( fec->slot << 32 ) | fec->fec_set_idx, NULL );
332 0 : xid->cnt--;
333 0 : if( FD_LIKELY( !xid->cnt ) ) xid_remove( reasm->xid, xid );
334 :
335 0 : return fec;
336 0 : }
337 :
338 : void
339 : fd_reasm_pool_release( fd_reasm_t * reasm,
340 0 : fd_reasm_fec_t * ele ){
341 0 : pool_ele_release( reasm_pool( reasm ), ele );
342 0 : }
343 :
344 : ulong
345 0 : fd_reasm_pool_idx( fd_reasm_t * reasm, fd_reasm_fec_t * ele ) {
346 0 : return pool_idx( reasm_pool( reasm ), ele );
347 0 : }
348 :
349 : fd_reasm_fec_t *
350 : fd_reasm_remove( fd_reasm_t * reasm,
351 : fd_reasm_fec_t * head,
352 0 : fd_store_t * opt_store ) {
353 : /* see fd_forest.c clear_leaf */
354 :
355 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
356 0 : orphaned_t * orphaned = reasm->orphaned;
357 0 : frontier_t * frontier = reasm->frontier;
358 0 : ancestry_t * ancestry = reasm->ancestry;
359 0 : subtrees_t * subtrees = reasm->subtrees;
360 0 : subtreel_t * subtreel = reasm->subtreel;
361 :
362 0 : FD_TEST( head );
363 :
364 0 : fd_reasm_fec_t * tail = head;
365 :
366 :
367 :
368 0 : if( FD_LIKELY( orphaned_ele_query( orphaned, &head->key, NULL, pool ) ||
369 0 : subtrees_ele_query( subtrees, &head->key, NULL, pool ) ) ) {
370 0 : FD_TEST( head->child == ULONG_MAX ); /* must be a leaf node */
371 0 : } else {
372 : /* Node is in frontier or ancestry. If the leaf is in the frontier,
373 : we could be removing something that has been executed. Move the
374 : head pointer up to where we begin evicting.
375 :
376 : We search up the tree until the theoretical boundary of a bank.
377 : This is usually when we jump to a parent slot, but if an
378 : equivocation occured, this could also be the middle of the slot.
379 :
380 : 0 ── 32 ── 64 ── 96 (confirmed)
381 : └── 64' ── 96' ── 128' (eqvoc)
382 :
383 : Note we only have execute a slot twice (have 2 bank idxs for it)
384 : if the slot equivocated, we replayed the wrong version, and then
385 : replayed the confirmed version afterwards.
386 :
387 : The state after executing the wrong version first is:
388 :
389 : 0 ──── 32 ──── 64 ──── 96 ──── 128
390 : (bank_idx=1) (bank_idx=1) .... (all bank_idx=1)
391 :
392 : After receiving and executing the confirmed version, the state
393 : looks like:
394 :
395 : 0 (b=2) ── 32 (b=2) ── 64 (b=2) ── 96 (b=2) (confirmed)
396 : └── 64' (b=1) ── 96' (b=1) ── 128' (b=1) (eqvoc)
397 :
398 : Here we want to evict only until fec 64'. Or let's say we are
399 : getting around to executing the confirmed version, but we haven't
400 : executed it yet.
401 :
402 : 0 (b=1) ── 32 (b=1) ── 64 (b=ULONG_MAX) ── 96 (b=ULONG_MAX) (confirmed, but not executed yet)
403 : └── 64' (b=1) ── 96' (b=1) ── 128'(b=1) (eqvoc)
404 :
405 : Now we know we should evict until the max(parent has > 1 child, or fec set idx == 0) */
406 :
407 0 : while( FD_LIKELY( head ) ) {
408 0 : fd_reasm_fec_t * parent = fd_reasm_parent( reasm, head ); /* parent must exist. It's not possible to walk up past root */
409 0 : FD_TEST( head->bank_idx == tail->bank_idx );
410 :
411 0 : if( FD_UNLIKELY( head->fec_set_idx==0 ) ) break;
412 0 : if( FD_UNLIKELY( head->sibling != pool_idx_null( pool ) ) ) break; /* if the parent has more than 1 child, we know for sure the parent is a slot boundary or eqvoc point, so we can stop here. */
413 0 : if( FD_UNLIKELY( parent->child != pool_idx( pool, head ) ) ) break; /* if the parent has more than 1 child, we know for sure the parent is a slot boundary or eqvoc point, so we can stop here. */
414 0 : if( FD_UNLIKELY( parent->slot_complete ) ) break; /* specifically catches case where slot complete is in middle of slot. */
415 0 : head = parent;
416 0 : }
417 0 : }
418 0 : FD_LOG_INFO(( "evicting reasm slot %lu, fec idx %u, down to %u bank_idx %lu", head->slot, head->fec_set_idx, tail->fec_set_idx, head->bank_idx ));
419 :
420 : /* Orphan the entire subtree from the tail :( */
421 0 : if( FD_UNLIKELY( fd_reasm_child( reasm, tail ) ) ) {
422 : /* subtree this child. This code path should only be hit if this is
423 : banks-driven eviction, so children are guaranteed to be in main
424 : tree right now. */
425 0 : ulong * bfs = reasm->bfs;
426 0 : bfs_push_tail( bfs, pool_idx( pool, tail ) );
427 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
428 0 : fd_reasm_fec_t * ele = pool_ele( pool, bfs_pop_head( bfs ) );
429 0 : fd_reasm_fec_t * child = fd_reasm_child( reasm, ele );
430 0 : while( FD_LIKELY( child ) ) {
431 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
432 0 : child = pool_ele( pool, child->sibling );
433 0 : }
434 :
435 0 : if( FD_UNLIKELY( ele == tail ) ) continue;
436 : /* remove each child from the maps */
437 0 : if( FD_UNLIKELY( !ancestry_ele_remove( ancestry, &ele->key, NULL, pool ) ) ) frontier_ele_remove( frontier, &ele->key, NULL, pool );
438 0 : if( FD_LIKELY( ele->in_out ) ) { out_ele_remove( reasm->out, ele, pool ); ele->in_out = 0; }
439 :
440 0 : if( FD_UNLIKELY( ele->parent == pool_idx( pool, tail ) ) ) {
441 0 : subtrees_ele_insert( subtrees, ele, pool );
442 0 : subtreel_ele_push_tail( reasm->subtreel, ele, pool );
443 0 : ele->parent = ULONG_MAX;
444 0 : ele->sibling = ULONG_MAX;
445 0 : } else {
446 0 : orphaned_ele_insert( orphaned, ele, pool );
447 0 : }
448 0 : }
449 : /* unlink the leaf from its children. */
450 0 : tail->child = ULONG_MAX;
451 0 : }
452 :
453 0 : fd_reasm_fec_t * parent = fd_reasm_parent( reasm, head );
454 0 : if( FD_LIKELY( parent ) ) {
455 : /* Clean up the parent pointing to this head, and remove block from the maps
456 : remove the block from the parent's child list */
457 :
458 0 : fd_reasm_fec_t * child = pool_ele( pool, parent->child );
459 0 : if( FD_LIKELY( 0==memcmp( &child->key, &head->key, sizeof(fd_hash_t) ) ) ) { /* evicted is left-child (or only child) */
460 0 : parent->child = child->sibling;
461 0 : } else {
462 : /* evicted is a right-sibling */
463 0 : fd_reasm_fec_t * sibling = pool_ele( pool, child->sibling );
464 0 : fd_reasm_fec_t * prev = child;
465 0 : while( FD_LIKELY( sibling && memcmp( &sibling->key, &head->key, sizeof(fd_hash_t) ) ) ) {
466 0 : prev = sibling;
467 0 : sibling = pool_ele( pool, sibling->sibling );
468 0 : }
469 0 : prev->sibling = sibling->sibling;
470 0 : }
471 :
472 : /* remove the chain itself from the maps */
473 :
474 0 : fd_reasm_fec_t * removed_orphan = NULL;
475 0 : if( FD_LIKELY ( removed_orphan = orphaned_ele_remove( orphaned, &head->key, NULL, pool ) ) ) {
476 0 : clear_slot_metadata( reasm, head );
477 0 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
478 0 : return head;
479 0 : }
480 :
481 : /* remove from ancestry and frontier */
482 0 : fd_reasm_fec_t * curr = head;
483 0 : while( FD_LIKELY( curr ) ) {
484 0 : fd_reasm_fec_t * removed = ancestry_ele_remove( ancestry, &curr->key, NULL, pool );
485 0 : if( !removed ) removed = frontier_ele_remove( frontier, &curr->key, NULL, pool );
486 0 : if( FD_LIKELY( removed->in_out ) ) { out_ele_remove( reasm->out, removed, pool ); removed->in_out = 0; }
487 :
488 0 : curr = fd_reasm_child( reasm, curr ); /* guaranteed only one child */
489 0 : clear_slot_metadata( reasm, removed );
490 0 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &removed->key );
491 0 : }
492 :
493 : /* We removed from the main tree, so we might need to insert parent into the frontier.
494 : Only need to add parent to the frontier if it doesn't have any other children. */
495 :
496 0 : if( parent->child == pool_idx_null( pool ) ) {
497 0 : parent = ancestry_ele_remove( ancestry, &parent->key, NULL, pool );
498 0 : FD_TEST( parent );
499 0 : frontier_ele_insert( frontier, parent, pool );
500 0 : }
501 0 : return head;
502 0 : }
503 :
504 : /* No parent, remove from subtrees and subtree list */
505 0 : subtrees_ele_remove( subtrees, &head->key, NULL, pool );
506 0 : subtreel_ele_remove( subtreel, head, pool );
507 0 : clear_slot_metadata( reasm, head );
508 0 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
509 0 : return head;
510 0 : }
511 :
512 : fd_reasm_fec_t *
513 : latest_confirmed_fec( fd_reasm_t * reasm,
514 0 : ulong subtree_root ) {
515 0 : ulong * bfs = reasm->bfs;
516 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
517 0 : bfs_push_tail( bfs, subtree_root );
518 0 : fd_reasm_fec_t * latest_confirmed = NULL;
519 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
520 0 : fd_reasm_fec_t * ele = pool_ele( pool, bfs_pop_head( bfs ) );
521 0 : if( FD_LIKELY( ele->confirmed ) ) {
522 0 : if( FD_LIKELY( latest_confirmed == NULL ||
523 0 : latest_confirmed->slot < ele->slot ||
524 0 : (latest_confirmed->slot == ele->slot && latest_confirmed->fec_set_idx < ele->fec_set_idx)) )
525 0 : latest_confirmed = ele;
526 0 : }
527 0 : fd_reasm_fec_t * child = fd_reasm_child( reasm, ele );
528 0 : while( FD_LIKELY( child ) ) {
529 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
530 0 : child = pool_ele( pool, child->sibling );
531 0 : }
532 0 : }
533 0 : return latest_confirmed;
534 0 : }
535 :
536 : static fd_reasm_fec_t *
537 : gca( fd_reasm_t * reasm,
538 : fd_reasm_fec_t * a,
539 0 : fd_reasm_fec_t * b ) {
540 0 : fd_reasm_fec_t * parent1 = a;
541 0 : fd_reasm_fec_t * parent2 = b;
542 0 : while( FD_LIKELY( parent1 && parent2 ) ) {
543 0 : if( FD_LIKELY( parent1 == parent2 ) ) return parent1;
544 0 : if( parent1->slot > parent2->slot ||
545 0 : ( parent1->slot == parent2->slot && parent1->fec_set_idx > parent2->fec_set_idx ) ) parent1 = fd_reasm_parent( reasm, parent1 );
546 0 : else parent2 = fd_reasm_parent( reasm, parent2 );
547 0 : }
548 0 : return NULL;
549 0 : }
550 :
551 : /* Caller guarantees new_root and parent_root are non-NULL */
552 : static fd_reasm_fec_t *
553 : evict( fd_reasm_t * reasm,
554 : fd_store_t * opt_store,
555 : fd_hash_t const * new_root FD_PARAM_UNUSED,
556 0 : fd_hash_t const * parent_root ) {
557 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
558 0 : frontier_t * frontier = reasm->frontier;
559 0 : orphaned_t * orphaned = reasm->orphaned;
560 0 : subtrees_t * subtrees = reasm->subtrees;
561 0 : subtreel_t * subtreel = reasm->subtreel;
562 :
563 : /* Generally, best policy for eviction is to evict in the order of:
564 : 1. Highest unconfirmed orphan leaf - furthest from root
565 : 2. Highest incomplete, unconfirmed leaf in ancestry - furthest from tip of execution
566 : 3. Highest confirmed orphan leaf - evictable, since unrelated to banks, but less ideal */
567 :
568 0 : fd_reasm_fec_t * unconfrmd_orphan = NULL; /* 1st best candidate for eviction is the highest unconfirmed orphan. */
569 0 : fd_reasm_fec_t * confirmed_orphan = NULL; /* 3rd best candidate for eviction is the highest confirmed orphan. */
570 0 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
571 0 : !subtreel_iter_done ( iter, subtreel, pool );
572 0 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
573 0 : fd_reasm_fec_t * ele = subtreel_iter_ele( iter, subtreel, pool );
574 0 : if( ele->child != ULONG_MAX || memcmp( &ele->key, parent_root, sizeof(fd_hash_t) ) == 0 ) continue;
575 0 : if( FD_UNLIKELY( ele->confirmed ) ) confirmed_orphan = fd_ptr_if( !confirmed_orphan || ele->slot > confirmed_orphan->slot, ele, confirmed_orphan );
576 0 : else unconfrmd_orphan = fd_ptr_if( !unconfrmd_orphan || ele->slot > unconfrmd_orphan->slot, ele, unconfrmd_orphan );
577 0 : }
578 0 : for( orphaned_iter_t iter = orphaned_iter_init( orphaned, pool );
579 0 : !orphaned_iter_done( iter, orphaned, pool );
580 0 : iter = orphaned_iter_next( iter, orphaned, pool ) ) {
581 0 : fd_reasm_fec_t * ele = orphaned_iter_ele( iter, orphaned, pool );
582 0 : if( ele->child != ULONG_MAX || memcmp( &ele->key, parent_root, sizeof(fd_hash_t) ) == 0 ) continue;
583 0 : if( FD_UNLIKELY( ele->confirmed ) ) confirmed_orphan = fd_ptr_if( !confirmed_orphan || ele->slot > confirmed_orphan->slot, ele, confirmed_orphan );
584 0 : else unconfrmd_orphan = fd_ptr_if( !unconfrmd_orphan || ele->slot > unconfrmd_orphan->slot, ele, unconfrmd_orphan );
585 0 : }
586 :
587 0 : if( FD_UNLIKELY( unconfrmd_orphan )) {
588 0 : return fd_reasm_remove( reasm, unconfrmd_orphan, opt_store );
589 0 : }
590 :
591 0 : fd_reasm_fec_t * unconfrmd_leaf = NULL; /* 2nd best candidate for eviction is the highest unconfirmed, incomplete slot. */
592 0 : for( frontier_iter_t iter = frontier_iter_init( frontier, pool );
593 0 : !frontier_iter_done( iter, frontier, pool );
594 0 : iter = frontier_iter_next( iter, frontier, pool ) ) {
595 0 : fd_reasm_fec_t * ele = frontier_iter_ele( iter, frontier, pool );
596 0 : if( iter.ele_idx == reasm->root
597 0 : || 0==memcmp( &ele->key, parent_root, sizeof(fd_hash_t) )
598 0 : || ele->confirmed
599 0 : || ele->slot_complete
600 0 : || ele->is_leader ) continue; /* not a candidate */
601 0 : unconfrmd_leaf = fd_ptr_if( !unconfrmd_leaf || ele->slot > unconfrmd_leaf->slot, ele, unconfrmd_leaf );
602 0 : }
603 :
604 0 : if( FD_UNLIKELY( unconfrmd_leaf )) {
605 0 : return fd_reasm_remove( reasm, unconfrmd_leaf, opt_store );
606 0 : }
607 :
608 : /* Already did traversal to find best confirmed orphan candidate,
609 : which is the third choice */
610 :
611 0 : if( FD_UNLIKELY( confirmed_orphan )) {
612 0 : fd_reasm_fec_t * parent = fd_reasm_query( reasm, parent_root );
613 0 : if( !parent ) {
614 0 : return fd_reasm_remove( reasm, confirmed_orphan, opt_store );
615 0 : }
616 : /* for any subtree:
617 : 0 ── 1 ── 2 ── 3 (confirmed) ── 4(confirmed) ── 5 ── 6 ──> add 7 here is valid.
618 : └──> add 7 here is valid.
619 : └──> add 7 here is invalid. */
620 0 : ulong subtree_root = reasm->root;
621 0 : if( subtrees_ele_query( subtrees, parent_root, NULL, pool ) ||
622 0 : orphaned_ele_query( orphaned, parent_root, NULL, pool ) ) {
623 : /* if adding to an orphan, find the root of the orphan subtree. */
624 0 : fd_reasm_fec_t * root = parent;
625 0 : while( FD_LIKELY( root->parent != ULONG_MAX ) ) {
626 0 : root = pool_ele( pool, root->parent );
627 0 : }
628 0 : subtree_root = pool_idx( pool, root );
629 0 : }
630 :
631 0 : fd_reasm_fec_t * latest_confirmed_leaf = latest_confirmed_fec( reasm, subtree_root );
632 0 : if( !latest_confirmed_leaf || latest_confirmed_leaf == gca( reasm, latest_confirmed_leaf, parent )) {
633 0 : return fd_reasm_remove( reasm, confirmed_orphan, opt_store );
634 0 : }
635 : /* is a useless new fork. */
636 0 : return NULL;
637 0 : }
638 0 : return NULL; /* nothing else could be evicted */
639 0 : }
640 :
641 : fd_reasm_fec_t *
642 : fd_reasm_insert( fd_reasm_t * reasm,
643 : fd_hash_t const * merkle_root,
644 : fd_hash_t const * chained_merkle_root,
645 : ulong slot,
646 : uint fec_set_idx,
647 : ushort parent_off,
648 : ushort data_cnt,
649 : int data_complete,
650 : int slot_complete,
651 : int is_leader,
652 : fd_store_t * opt_store,
653 0 : fd_reasm_fec_t ** evicted ) {
654 :
655 : # if LOGGING
656 : FD_BASE58_ENCODE_32_BYTES( merkle_root->key, merkle_root_b58 );
657 : FD_BASE58_ENCODE_32_BYTES( chained_merkle_root->key, chained_merkle_root_b58 );
658 : FD_LOG_NOTICE(( "inserting (%lu %u) %s %s. %u %d %d", slot, fec_set_idx, merkle_root_b58, chained_merkle_root_b58, data_cnt, data_complete, slot_complete ));
659 : # endif
660 :
661 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
662 0 : # if FD_REASM_USE_HANDHOLDING
663 0 : FD_TEST( !fd_reasm_query( reasm, merkle_root ) );
664 0 : # endif
665 :
666 0 : ulong null = pool_idx_null( pool );
667 0 : ancestry_t * ancestry = reasm->ancestry;
668 0 : frontier_t * frontier = reasm->frontier;
669 0 : orphaned_t * orphaned = reasm->orphaned;
670 0 : subtrees_t * subtrees = reasm->subtrees;
671 0 : subtreel_t * subtreel = reasm->subtreel;
672 :
673 0 : ulong * bfs = reasm->bfs;
674 0 : out_t * out = reasm->out;
675 :
676 0 : *evicted = NULL;
677 :
678 0 : if( FD_UNLIKELY( pool_free( pool )==1UL ) ) {
679 0 : FD_TEST( reasm->root!=pool_idx_null( pool ) );
680 : /* The eviction removes evicted elements from the maps, but leaves
681 : the elements in the pool for caller to release. Thus, in order
682 : for the following insert/acquire to succeed, we have to start
683 : evicting when we have 1 remaining free element in the pool. This
684 : element is the one that will be acquired below. reasm is
685 : dependent on the caller to then release the evicted elements back
686 : to the pool before the next insert/acquire. */
687 0 : fd_reasm_fec_t * evicted_fec = evict( reasm, opt_store, merkle_root, chained_merkle_root );
688 0 : if( FD_UNLIKELY( evicted_fec == NULL ) ) {
689 0 : FD_LOG_INFO(("reasm failed to evict a fec set when inserting slot %lu fec set %u", slot, fec_set_idx));
690 :
691 : /* in this case we want to signal to the replay tile that we
692 : failed to insert the FEC set. This is effectively is the same
693 : logic as if we had this FEC set, and then it got evicted, and
694 : then the caller now needs to process the evicted FEC set. So
695 : here we acquire the final pool element for it and return it
696 : to the caller as the evicted FEC set. */
697 :
698 0 : fd_reasm_fec_t * fec = pool_ele_acquire( pool );
699 0 : fec->key = *merkle_root;
700 0 : fec->cmr = *chained_merkle_root;
701 0 : fec->parent = null;
702 0 : fec->child = null;
703 0 : fec->slot = slot;
704 0 : fec->parent_off = parent_off;
705 0 : fec->fec_set_idx = fec_set_idx;
706 0 : fec->bank_idx = null;
707 :
708 0 : *evicted = fec;
709 0 : return NULL;
710 0 : }
711 :
712 0 : *evicted = evicted_fec;
713 0 : }
714 :
715 0 : FD_TEST( pool_free( pool ) );
716 0 : fd_reasm_fec_t * fec = pool_ele_acquire( pool );
717 0 : fec->key = *merkle_root;
718 0 : fec->next = null;
719 0 : fec->parent = null;
720 0 : fec->child = null;
721 0 : fec->sibling = null;
722 0 : fec->slot = slot;
723 0 : fec->parent_off = parent_off;
724 0 : fec->fec_set_idx = fec_set_idx;
725 0 : fec->data_cnt = data_cnt;
726 0 : fec->data_complete = data_complete;
727 0 : fec->slot_complete = slot_complete;
728 0 : fec->is_leader = is_leader;
729 0 : fec->eqvoc = 0;
730 0 : fec->confirmed = 0;
731 0 : fec->popped = 0;
732 0 : fec->bank_dead = 0;
733 0 : fec->bank_idx = null;
734 0 : fec->parent_bank_idx = null;
735 0 : fec->bank_seq = null;
736 0 : fec->parent_bank_seq = null;
737 :
738 : /* set the out and subtreel pointers to null */
739 0 : fec->out.next = null;
740 0 : fec->out.prev = null;
741 0 : fec->in_out = 0;
742 0 : fec->subtreel.next = null;
743 0 : fec->subtreel.prev = null;
744 :
745 0 : if( FD_UNLIKELY( !chained_merkle_root ) ) { /* initialize the reasm with the root */
746 0 : FD_TEST( reasm->root==pool_idx_null( pool ) );
747 0 : fec->confirmed = 1;
748 0 : fec->popped = 1;
749 0 : /* */ xid_update( reasm, slot, UINT_MAX, pool_idx( pool, fec ) );
750 0 : /* */ xid_update( reasm, slot, fec_set_idx, pool_idx( pool, fec ) );
751 0 : reasm->root = pool_idx( pool, fec );
752 0 : reasm->slot0 = slot;
753 0 : frontier_ele_insert( reasm->frontier, fec, pool );
754 0 : return fec;
755 0 : }
756 :
757 0 : fec->cmr = *chained_merkle_root;
758 0 : FD_TEST( memcmp( &fec->cmr, chained_merkle_root, sizeof(fd_hash_t) ) == 0 );
759 :
760 0 : if( FD_UNLIKELY( slot_complete ) ) {
761 0 : xid_t * bid = xid_query( reasm->xid, (slot << 32) | UINT_MAX, NULL );
762 0 : if( FD_UNLIKELY( bid ) ) {
763 0 : fd_reasm_fec_t * orig_fec = pool_ele( pool, bid->idx );
764 0 : FD_BASE58_ENCODE_32_BYTES( orig_fec->key.key, prev_block_id_b58 );
765 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, curr_block_id_b58 );
766 0 : FD_LOG_WARNING(( "equivocating block_id for FEC slot: %lu fec_set_idx: %u prev: %s curr: %s", fec->slot, fec->fec_set_idx, prev_block_id_b58, curr_block_id_b58 )); /* it's possible there's equivocation... */
767 0 : }
768 0 : xid_update( reasm, slot, UINT_MAX, pool_idx( pool, fec ) );
769 0 : }
770 0 : overwrite_invalid_cmr( reasm, fec ); /* handle receiving parent before child */
771 :
772 : /* First, we search for the parent of this new FEC and link if found.
773 : The new FEC set may result in a new leaf or a new orphan tree root
774 : so we need to check that. */
775 :
776 0 : fd_reasm_fec_t * parent = NULL;
777 0 : if( FD_LIKELY ( parent = ancestry_ele_query ( ancestry, &fec->cmr, NULL, pool ) ) ) { /* parent is connected non-leaf */
778 0 : frontier_ele_insert( frontier, fec, pool );
779 0 : out_ele_push_tail ( out, fec, pool );
780 0 : fec->in_out = 1;
781 0 : } else if( FD_LIKELY ( parent = frontier_ele_remove( frontier, &fec->cmr, NULL, pool ) ) ) { /* parent is connected leaf */
782 0 : ancestry_ele_insert( ancestry, parent, pool );
783 0 : frontier_ele_insert( frontier, fec, pool );
784 0 : out_ele_push_tail ( out, fec, pool );
785 0 : fec->in_out = 1;
786 0 : } else if( FD_LIKELY ( parent = orphaned_ele_query ( orphaned, &fec->cmr, NULL, pool ) ) ) { /* parent is orphaned non-root */
787 0 : orphaned_ele_insert( orphaned, fec, pool );
788 0 : } else if( FD_LIKELY ( parent = subtrees_ele_query ( subtrees, &fec->cmr, NULL, pool ) ) ) { /* parent is orphaned root */
789 0 : orphaned_ele_insert( orphaned, fec, pool );
790 0 : } else { /* parent not found */
791 0 : subtrees_ele_insert ( subtrees, fec, pool );
792 0 : subtreel_ele_push_tail( subtreel, fec, pool );
793 0 : }
794 :
795 0 : if( FD_LIKELY( parent ) ) link( reasm, parent, fec );
796 :
797 : /* Second, we search for children of this new FEC and link them to it.
798 : By definition any children must be orphaned (a child cannot be part
799 : of a connected tree before its parent). Therefore, we only search
800 : through the orphaned subtrees. As part of this operation, we also
801 : coalesce connected orphans into the same tree. This way we only
802 : need to search the orphan tree roots (vs. all orphaned nodes). */
803 :
804 0 : ulong min_descendant = ULONG_MAX; /* needed for eqvoc checks below */
805 0 : FD_TEST( bfs_empty( bfs ) );
806 0 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
807 0 : !subtreel_iter_done ( iter, subtreel, pool );
808 0 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
809 0 : bfs_push_tail( bfs, subtreel_iter_idx( iter, subtreel, pool ) );
810 0 : }
811 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) { /* link orphan subtrees to the new FEC */
812 0 : fd_reasm_fec_t * orphan_root = pool_ele( pool, bfs_pop_head( bfs ) );
813 0 : FD_TEST( orphan_root ); // `overwrite_invalid_cmr` relies on orphan_root being non-null
814 0 : overwrite_invalid_cmr( reasm, orphan_root ); /* case 2: received child before parent */
815 0 : if( FD_LIKELY( 0==memcmp( orphan_root->cmr.uc, fec->key.uc, sizeof(fd_hash_t) ) ) ) { /* this orphan_root is a direct child of fec */
816 0 : link( reasm, fec, orphan_root );
817 0 : subtrees_ele_remove( subtrees, &orphan_root->key, NULL, pool );
818 0 : subtreel_ele_remove( subtreel, orphan_root, pool );
819 0 : orphaned_ele_insert( orphaned, orphan_root, pool );
820 0 : min_descendant = fd_ulong_min( min_descendant, orphan_root->slot );
821 0 : }
822 0 : }
823 :
824 : /* Third, we advance the frontier outward beginning from fec as we may
825 : have connected orphaned descendants to fec in the above step. This
826 : does a BFS outward from fec until it reaches leaves, moving fec and
827 : its non-leaf descendants into ancestry and leaves into frontier.
828 :
829 : parent (ancestry) orphan root (subtrees)
830 : | |
831 : fec (frontier) orphan child (orphaned)
832 :
833 : parent
834 : |
835 : fec <- frontier is here
836 : |
837 : orphan root
838 : |
839 : orphan child <- advance to here */
840 :
841 0 : if( FD_LIKELY( frontier_ele_query( frontier, &fec->key, NULL, pool ) ) ) bfs_push_tail( bfs, pool_idx( pool, fec ) );
842 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
843 0 : fd_reasm_fec_t * parent = pool_ele( pool, bfs_pop_head( bfs ) );
844 0 : fd_reasm_fec_t * child = pool_ele( pool, parent->child );
845 0 : if( FD_LIKELY( child ) ) {
846 0 : frontier_ele_remove( frontier, &parent->key, NULL, pool );
847 0 : ancestry_ele_insert( ancestry, parent, pool );
848 0 : }
849 0 : while( FD_LIKELY( child ) ) {
850 0 : FD_TEST( orphaned_ele_remove( orphaned, &child->key, NULL, pool ) );
851 0 : frontier_ele_insert( frontier, child, pool );
852 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
853 0 : out_ele_push_tail( out, child, pool );
854 0 : child->in_out = 1;
855 0 : child = pool_ele( pool, child->sibling );
856 0 : }
857 0 : }
858 :
859 : /* Fourth, check and handle equivocation. There are three cases.
860 :
861 : 1. we've already seen this FEC's xid (slot, fec_set_idx)
862 : 2. this FEC's parent equivocates. */
863 :
864 0 : xid_t * xid = xid_query( reasm->xid, (slot<<32) | fec_set_idx, NULL );
865 0 : if( FD_UNLIKELY( xid ) ) {
866 0 : eqvoc( reasm, fec );
867 0 : eqvoc( reasm, pool_ele( pool, xid->idx ) ); /* first appearance of this xid */
868 0 : }
869 0 : xid_update( reasm, slot, fec_set_idx, pool_idx( pool, fec ) );
870 0 : if( FD_UNLIKELY( parent && parent->eqvoc && !parent->confirmed ) ) eqvoc( reasm, fec );
871 :
872 : /* 3. this FEC's parent is a slot_complete, but this FEC is part of
873 : the same slot. Or this fec is a slot_complete, but it's child
874 : is part of the same slot. i.e.
875 :
876 : A - B - C (slot cmpl) - D - E - F (slot cmpl)
877 :
878 : We do not want to deliver this entire slot if possible. The
879 : block has TWO slot complete flags. This is not honest behavior.
880 :
881 : Two ways this can happen:
882 : scenario 1: A - B - C (slot cmpl) - D - E - F (slot cmpl)
883 :
884 : scenario 2: A - B - C (slot cmpl)
885 : \
886 : D - E - F (slot cmpl) [true equivocation case]
887 :
888 : Scenario 2 is handled first-class in reasm, and we will only
889 : replay this slot if one version gets confirmed (or we did not
890 : see evidence of the other version until after we replayed the
891 : first).
892 :
893 : In scenario 1, it is impossible for the cluster to converge on
894 : ABC(slot cmpl)DEF(slot cmpl), but depending on the order in
895 : which each node receives the FEC sets, the cluster could either
896 : confirm ABC(slot cmpl), or mark the slot dead. In general, if
897 : the majority of nodes received and replayed the shorter version
898 : before seeing the second half, the slot could still end up
899 : getting confirmed. Whereas if the majority of nodes saw shreds
900 : from DEF before finishing replay and voting on ABC, the slot
901 : would likely be marked dead.
902 :
903 : Firedancer handles this case by marking the slot eqvoc upon
904 : detecting a slot complete in the middle of a slot. reasm will
905 : mark the earliest FEC possible in the slot as eqvoc, but may not
906 : be able to detect fec 0 because the FEC may be orphaned, or fec
907 : 0 may not exist yet. Thus, it is possible for Firedancer to
908 : replay ABC(slot cmpl), but it is impossible for reasm to deliver
909 : fecs D, E, or F. The node would then vote for ABC(slot cmpl).
910 :
911 : If the node sees D before replaying A, B, or C, then it would be
912 : able to mark ABCD as eqvoc, and prevent the corresponding FECs
913 : from being delivered. In this case, the Firedancer node would
914 : have an incompletely executed bank that eventually gets pruned
915 : away.
916 :
917 : Agave's handling differs because they key by slot, but our
918 : handling is compatible with the protocol. */
919 :
920 0 : if( FD_UNLIKELY( (parent && parent->slot_complete && parent->slot == slot) ||
921 0 : (fec->slot_complete && min_descendant == slot) ) ) {
922 : /* walk up to the earliest fec in slot */
923 0 : fd_reasm_fec_t * curr = fec;
924 0 : while( FD_LIKELY( curr->parent != pool_idx_null( pool ) && pool_ele( pool, curr->parent )->slot == slot ) ) {
925 0 : curr = pool_ele( pool, curr->parent );
926 0 : }
927 0 : eqvoc( reasm, curr );
928 0 : }
929 :
930 : /* Finally, return the newly inserted FEC. */
931 0 : return fec;
932 0 : }
933 :
934 : fd_reasm_fec_t *
935 : fd_reasm_publish( fd_reasm_t * reasm,
936 : fd_hash_t const * merkle_root,
937 0 : fd_store_t * opt_store ) {
938 :
939 0 : # if FD_REASM_USE_HANDHOLDING
940 0 : if( FD_UNLIKELY( !pool_ele( reasm_pool( reasm ), reasm->root ) ) ) { FD_LOG_WARNING(( "missing root" )); return NULL; }
941 0 : if( FD_UNLIKELY( !fd_reasm_query( reasm, merkle_root ) ) ) {
942 0 : FD_BASE58_ENCODE_32_BYTES( merkle_root->key, merkle_root_b58 );
943 0 : FD_LOG_WARNING(( "merkle root %s not found", merkle_root_b58 ));
944 0 : return NULL;
945 0 : }
946 0 : # endif
947 :
948 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
949 0 : ulong null = pool_idx_null( pool );
950 0 : fd_reasm_fec_t * oldr = pool_ele( pool, reasm->root );
951 0 : fd_reasm_fec_t * newr = fd_reasm_query( reasm, merkle_root );
952 0 : ulong * bfs = reasm->bfs;
953 :
954 0 : bfs_push_tail( bfs, pool_idx( pool, oldr ) );
955 :
956 : /* First, BFS down the tree, pruning all of root's ancestors and also
957 : any descendants of those ancestors. */
958 :
959 : /* Also, prune any subtrees who's root is less than the new root. */
960 :
961 0 : subtreel_t * subtreel = reasm->subtreel;
962 0 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
963 0 : !subtreel_iter_done ( iter, subtreel, pool );
964 0 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
965 0 : fd_reasm_fec_t * ele = subtreel_iter_ele( iter, subtreel, pool );
966 0 : if( ele->slot < newr->slot ) {
967 0 : bfs_push_tail( bfs, pool_idx( pool, ele ) );
968 0 : }
969 0 : }
970 :
971 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
972 0 : fd_reasm_fec_t * head = pool_ele( pool, bfs_pop_head( bfs ) );
973 :
974 0 : fd_reasm_fec_t * fec = ancestry_ele_remove( reasm->ancestry, &head->key, NULL, pool );
975 0 : if( FD_UNLIKELY( !fec ) ) fec = frontier_ele_remove( reasm->frontier, &head->key, NULL, pool );
976 0 : if( FD_UNLIKELY( !fec ) ) fec = orphaned_ele_remove( reasm->orphaned, &head->key, NULL, pool );
977 0 : if( FD_UNLIKELY( !fec ) ) {
978 0 : fec = subtrees_ele_remove( reasm->subtrees, &head->key, NULL, pool );
979 0 : subtreel_ele_remove( reasm->subtreel, head, pool );
980 0 : }
981 :
982 0 : fd_reasm_fec_t * child = pool_ele( pool, head->child );
983 0 : while( FD_LIKELY( child ) ) { /* iterate over children */
984 0 : if( FD_LIKELY( child != newr ) ) { /* stop at new root */
985 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
986 0 : }
987 0 : child = pool_ele( pool, child->sibling ); /* right-sibling */
988 0 : }
989 0 : clear_slot_metadata( reasm, head );
990 0 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
991 0 : if( FD_LIKELY( head->in_out ) ) { out_ele_remove( reasm->out, head, pool ); head->in_out = 0; }
992 0 : pool_ele_release( pool, head );
993 0 : }
994 :
995 0 : newr->parent = null; /* unlink old root */
996 0 : newr->sibling = null;
997 0 : reasm->root = pool_idx( pool, newr ); /* replace with new root */
998 0 : return newr;
999 0 : }
1000 :
1001 : #include <stdio.h>
1002 :
1003 : FD_FN_UNUSED static void
1004 0 : print( fd_reasm_t const * reasm, fd_reasm_fec_t const * fec, int space, const char * prefix ) {
1005 0 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1006 0 :
1007 0 : if( fec == NULL ) return;
1008 0 :
1009 0 : if( space > 0 ) printf( "\n" );
1010 0 : for( int i = 0; i < space; i++ ) printf( " " );
1011 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1012 0 : printf( "%s%s", prefix, key_b58 );
1013 0 :
1014 0 : fd_reasm_fec_t const * curr = pool_ele_const( pool, fec->child );
1015 0 : char new_prefix[1024]; /* FIXME size this correctly */
1016 0 : while( curr ) {
1017 0 : if( pool_ele_const( pool, curr->sibling ) ) {
1018 0 : sprintf( new_prefix, "├── " ); /* branch indicating more siblings follow */
1019 0 : print( reasm, curr, space + 4, new_prefix );
1020 0 : } else {
1021 0 : sprintf( new_prefix, "└── " ); /* end branch */
1022 0 : print( reasm, curr, space + 4, new_prefix );
1023 0 : }
1024 0 : curr = pool_ele_const( pool, curr->sibling );
1025 0 : }
1026 0 : }
1027 :
1028 : static void
1029 0 : ancestry_print( fd_reasm_t const * reasm, fd_reasm_fec_t const * fec, int space, const char * prefix, fd_reasm_fec_t const * prev, ulong recurse_depth ) {
1030 0 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1031 0 : if( fec == NULL ) return;
1032 0 : recurse_depth++;
1033 0 : if( recurse_depth == 2048 ) {
1034 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1035 0 : FD_LOG_NOTICE(("Cutting off ancestry print at depth %lu, slot %lu. Continue printing with this root key %s.", recurse_depth, fec->slot, key_b58 ));
1036 0 : return;
1037 0 : }
1038 0 : fd_reasm_fec_t const * child = fd_reasm_child_const( reasm, fec );
1039 :
1040 0 : if( !prev || /* root OR */
1041 0 : ( fec->slot_complete || (!prev->eqvoc && fec->eqvoc) || fec->child == pool_idx_null( pool ) || child->sibling != pool_idx_null( pool ) )) {
1042 0 : if( space > 0 ) printf( "\n" );
1043 0 : for( int i = 0; i < space; i++ ) printf( " " );
1044 0 : printf( "%s", prefix );
1045 :
1046 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1047 0 : key_b58[5] = '\0'; /* only print first 5 characters of key_b58 */
1048 0 : printf( "%lu(%u) %s", fec->slot, fec->fec_set_idx, key_b58 );
1049 0 : if( fec->eqvoc ) printf( " [eqvoc]" );
1050 0 : if( fec->is_leader ) printf( " [leader]" );
1051 0 : space += 5;
1052 0 : fflush(stdout);
1053 0 : }
1054 :
1055 0 : char new_prefix[1024]; /* FIXME size this correctly */
1056 :
1057 0 : while( child ) {
1058 0 : if( pool_ele_const( pool, child->sibling ) ) {
1059 0 : sprintf( new_prefix, "├── " ); /* branch indicating more siblings follow */
1060 0 : ancestry_print( reasm, child, space, new_prefix, fec, recurse_depth );
1061 0 : } else {
1062 0 : sprintf( new_prefix, "└── " ); /* end branch */
1063 0 : ancestry_print( reasm, child, space, new_prefix, fec, recurse_depth );
1064 0 : }
1065 0 : child = pool_ele_const( pool, child->sibling );
1066 0 : }
1067 0 : }
1068 :
1069 : void
1070 0 : fd_reasm_print( fd_reasm_t const * reasm ) {
1071 0 : FD_LOG_NOTICE( ( "\n\n[Reasm - showing only leaves, slot completes, and branches]" ) );
1072 0 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1073 0 : printf( "ele cnt: %lu\n", pool_used( pool ) );
1074 :
1075 0 : if( FD_LIKELY( reasm->root != pool_idx_null( pool ) ) ) {
1076 0 : printf( "\n\n[Connected Fecs]\n" );
1077 0 : ancestry_print( reasm, fd_reasm_root_const( reasm ), 0, "", NULL, 0 );
1078 0 : }
1079 :
1080 0 : printf( "\n\n[Unconnected Fecs]\n" );
1081 0 : subtreel_t const * subtreel = reasm->_subtrlf;
1082 0 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
1083 0 : !subtreel_iter_done ( iter, subtreel, pool );
1084 0 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
1085 0 : fd_reasm_fec_t const * fec = subtreel_iter_ele_const( iter, subtreel, pool );
1086 0 : ancestry_print( reasm, fec, 0, "", NULL, 0 );
1087 0 : }
1088 :
1089 0 : printf( "\n\n" );
1090 0 : fflush(stdout);
1091 0 : }
|