Line data Source code
1 : #include "fd_prog_load.h"
2 : #include "fd_progcache_user.h"
3 : #include "fd_progcache.h"
4 : #include "fd_progcache_rec.h"
5 : #include "../accdb/fd_accdb_sync.h"
6 : #include "../accdb/fd_accdb_impl_v1.h"
7 : #include "../../util/racesan/fd_racesan_target.h"
8 :
9 : FD_TL fd_progcache_metrics_t fd_progcache_metrics_default;
10 :
11 : fd_progcache_t *
12 : fd_progcache_join( fd_progcache_t * cache,
13 : fd_progcache_shmem_t * shmem,
14 : uchar * scratch,
15 12 : ulong scratch_sz ) {
16 12 : if( FD_UNLIKELY( !cache ) ) {
17 0 : FD_LOG_WARNING(( "NULL cache" ));
18 0 : return NULL;
19 0 : }
20 12 : if( FD_LIKELY( scratch_sz ) ) {
21 12 : if( FD_UNLIKELY( !scratch ) ) {
22 0 : FD_LOG_WARNING(( "NULL scratch" ));
23 0 : return NULL;
24 0 : }
25 12 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)scratch, FD_PROGCACHE_SCRATCH_ALIGN ) ) ) {
26 0 : FD_LOG_WARNING(( "misaligned scratch" ));
27 0 : return NULL;
28 0 : }
29 12 : }
30 12 : memset( cache, 0, sizeof(fd_progcache_t) );
31 12 : if( FD_UNLIKELY( !fd_progcache_shmem_join( cache->join, shmem ) ) ) return NULL;
32 :
33 12 : cache->metrics = &fd_progcache_metrics_default;
34 12 : cache->scratch = scratch;
35 12 : cache->scratch_sz = scratch_sz;
36 :
37 12 : return cache;
38 12 : }
39 :
40 : void *
41 : fd_progcache_leave( fd_progcache_t * cache,
42 12 : fd_progcache_shmem_t ** opt_shmem ) {
43 12 : if( FD_UNLIKELY( !cache ) ) {
44 0 : FD_LOG_WARNING(( "NULL cache" ));
45 0 : return NULL;
46 0 : }
47 12 : if( FD_UNLIKELY( !fd_progcache_shmem_leave( cache->join, opt_shmem ) ) ) return NULL;
48 12 : cache->scratch = NULL;
49 12 : cache->scratch_sz = 0UL;
50 12 : return cache;
51 12 : }
52 :
53 : /* fd_progcache_chain_evict evicts records from the index (hash map) and
54 : frees program cache memory. Non-rooted progcache_rec objects are not
55 : freed immediately (kept as exists=0, deferred to publish/cancel for
56 : cleanup.) */
57 :
58 : static void
59 : fd_progcache_chain_evict( fd_progcache_t * cache,
60 0 : uint chain_idx ) {
61 0 : fd_progcache_join_t * ljoin = cache->join;
62 0 : fd_prog_recm_shmem_private_chain_t * chain = fd_prog_recm_shmem_private_chain( ljoin->rec.map->map, chain_idx );
63 :
64 : /* Lock chain */
65 0 : struct {
66 0 : fd_prog_recm_txn_t txn[1];
67 0 : fd_prog_recm_txn_private_info_t info[1];
68 0 : } _map_txn;
69 0 : fd_prog_recm_txn_t * map_txn = fd_prog_recm_txn_init( _map_txn.txn, ljoin->rec.map, 1UL );
70 0 : _map_txn.info->chain = chain;
71 0 : map_txn->lock_cnt = 1UL;
72 0 : int txn_err = fd_prog_recm_txn_try( map_txn, FD_MAP_FLAG_BLOCKING );
73 0 : FD_TEST( txn_err==FD_MAP_SUCCESS );
74 0 : ulong cnt = fd_prog_recm_private_vcnt_cnt( _map_txn.info->ver_cnt );
75 0 : if( FD_UNLIKELY( !cnt ) ) goto exit;
76 0 : if( FD_UNLIKELY( chain->head_cidx==UINT_MAX ) ) {
77 0 : FD_LOG_CRIT(( "corrupt progcache: chain[%u] cnt=%lu head_cidx=UINT_MAX", chain_idx, cnt ));
78 0 : }
79 :
80 : /* Peek key of first record */
81 0 : fd_progcache_rec_t * rec0 = ljoin->rec.pool->ele;
82 0 : fd_funk_rec_key_t key = *rec0[ chain->head_cidx ].pair.key;
83 :
84 : /* Iterate chain and lock all records matching key */
85 0 : ulong lock_cnt = 0UL;
86 0 : for( uint node = chain->head_cidx; node!=UINT_MAX; node = rec0[ node ].map_next ) {
87 0 : fd_progcache_rec_t * rec = &ljoin->rec.pool->ele[ node ];
88 0 : if( FD_UNLIKELY( !fd_funk_rec_key_eq( rec->pair.key, &key ) ) ) break;
89 0 : if( FD_UNLIKELY( !fd_rwlock_trywrite( &rec->lock ) ) ) {
90 : /* CAS failed, unlock records we just locked */
91 0 : for( uint node1 = chain->head_cidx; node1!=node; node1 = rec0[ node1 ].map_next ) {
92 0 : fd_rwlock_unwrite( &ljoin->rec.pool->ele[ node1 ].lock );
93 0 : }
94 0 : lock_cnt = 0UL;
95 0 : break; /* give up */
96 0 : }
97 0 : lock_cnt++;
98 0 : }
99 :
100 : /* All records locked, now free underlying memory */
101 0 : uint * next = &chain->head_cidx;
102 0 : for( uint node = chain->head_cidx; node!=UINT_MAX && lock_cnt; ) {
103 0 : fd_progcache_rec_t * rec = &ljoin->rec.pool->ele[ node ];
104 0 : *next = rec->map_next;
105 0 : node = rec->map_next;
106 0 : rec->map_next = UINT_MAX;
107 0 : int is_root = fd_funk_txn_xid_eq_root( rec->pair.xid );
108 0 : ulong rodata_sz = rec->rodata_sz;
109 0 : ulong data_max = rec->data_max;
110 :
111 0 : fd_progcache_val_free( rec, ljoin );
112 0 : rec->exists = 0;
113 0 : rec->executable = 0;
114 0 : rec->invalidate = 0;
115 0 : fd_rwlock_unwrite( &rec->lock );
116 :
117 0 : if( is_root ) fd_prog_recp_release( ljoin->rec.pool, rec, 1 );
118 0 : else {} /* record free deferred to txn cleanup */
119 :
120 0 : lock_cnt--;
121 0 : FD_CRIT( cnt>0, "invariant violation" );
122 0 : cnt--;
123 0 : cache->metrics->evict_cnt++;
124 0 : cache->metrics->evict_tot_sz += rodata_sz;
125 0 : cache->metrics->evict_freed_sz += data_max;
126 0 : }
127 :
128 0 : chain->ver_cnt = fd_prog_recm_private_vcnt( fd_prog_recm_private_vcnt_ver( chain->ver_cnt ), cnt );
129 :
130 : /* Unlock chain */
131 0 : exit:
132 0 : fd_prog_recm_txn_test( map_txn ); /* increments ver, keeps above cnt */
133 0 : fd_prog_recm_txn_fini( map_txn );
134 0 : }
135 :
136 : static void
137 0 : fd_progcache_clock_evict( fd_progcache_t * cache ) {
138 0 : fd_progcache_join_t * ljoin = cache->join;
139 0 : fd_rwlock_write( &ljoin->shmem->clock.lock );
140 :
141 : /* Evict until a hardcoded threshold worth of data is freed */
142 :
143 0 : uint head = ljoin->shmem->clock.head;
144 0 : uint chain_max = (uint)fd_prog_recm_chain_cnt( ljoin->rec.map );
145 0 : ulong free_target = cache->metrics->evict_freed_sz + (16UL<<20); /* 16 MiB */
146 0 : for(;;) {
147 0 : fd_prog_recm_shmem_private_chain_t * chain = fd_prog_recm_shmem_private_chain( ljoin->rec.map->map, head );
148 0 : uint clock_pre = atomic_load_explicit( &chain->clock, memory_order_relaxed );
149 0 : ulong ver_cnt = atomic_load_explicit( (atomic_ulong *)&chain->ver_cnt, memory_order_relaxed );
150 0 : ulong cnt = fd_prog_recm_private_vcnt_cnt( ver_cnt );
151 0 : if( FD_LIKELY( clock_pre ) ) {
152 0 : atomic_store_explicit( &chain->clock, 0, memory_order_relaxed );
153 0 : } else if( cnt ) {
154 0 : fd_progcache_chain_evict( cache, head );
155 0 : if( cache->metrics->evict_freed_sz >= free_target ) break;
156 0 : }
157 0 : head++;
158 0 : if( head==chain_max ) head = 0;
159 0 : }
160 0 : ljoin->shmem->clock.head = head;
161 :
162 0 : fd_rwlock_unwrite( &ljoin->shmem->clock.lock );
163 0 : }
164 :
165 : /* fd_progcache_load_fork pivots the progcache object to the selected
166 : fork (identified by tip XID).
167 :
168 : Populates cache->fork, which is a array-backed list of XIDs sorted
169 : newest to oldest. Cache lookups only respect records with an XID
170 : present in that list.
171 :
172 : For any given xid, the epoch_slot0 is assumed to stay constant. */
173 :
174 : static void
175 : fd_progcache_load_fork_slow( fd_progcache_t * cache,
176 : fd_funk_txn_xid_t const * xid,
177 11409 : ulong epoch_slot0 ) {
178 11409 : fd_accdb_lineage_t * lineage = cache->lineage;
179 11409 : fd_progcache_join_t const * ljoin = cache->join;
180 11409 : fd_rwlock_read( &ljoin->shmem->txn.rwlock );
181 :
182 11409 : fd_funk_txn_xid_t next_xid = *xid;
183 11409 : if( FD_UNLIKELY( next_xid.ul[0]<epoch_slot0 ) ) {
184 0 : FD_LOG_CRIT(( "fd_progcache_load_fork: attempted to load xid=%lu:%lu, which predates first slot of bank's epoch (epoch_slot0=%lu)",
185 0 : next_xid.ul[0], next_xid.ul[1], epoch_slot0 ));
186 0 : }
187 :
188 11409 : lineage->fork_depth = 0UL;
189 :
190 11409 : ulong i;
191 >1844*10^16 : for( i=0UL;; i++ ) {
192 11410 : if( FD_UNLIKELY( i>=FD_PROGCACHE_DEPTH_MAX ) ) {
193 0 : FD_LOG_CRIT(( "fd_progcache_load_fork: fork depth exceeded max of %lu", (ulong)FD_PROGCACHE_DEPTH_MAX ));
194 0 : }
195 11410 : uint next_idx = (uint)fd_prog_txnm_idx_query_const( ljoin->txn.map, &next_xid, UINT_MAX, ljoin->txn.pool );
196 11410 : if( FD_UNLIKELY( next_idx==UINT_MAX ) ) break;
197 871 : fd_progcache_txn_t * candidate = &ljoin->txn.pool[ next_idx ];
198 :
199 871 : uint parent_idx = candidate->parent_idx;
200 871 : FD_TEST( parent_idx!=next_idx );
201 871 : lineage->fork[ i ] = next_xid;
202 875 : if( parent_idx==UINT_MAX || next_xid.ul[0]<epoch_slot0 ) {
203 : /* Reached root or fork graph node is from previous epoch */
204 875 : i++;
205 875 : break;
206 875 : }
207 >1844*10^16 : next_xid = ljoin->txn.pool[ parent_idx ].xid;
208 >1844*10^16 : }
209 :
210 11409 : lineage->fork_depth = i;
211 :
212 11409 : fd_rwlock_unread( &ljoin->shmem->txn.rwlock );
213 11409 : }
214 :
215 : static inline void
216 : fd_progcache_load_fork( fd_progcache_t * cache,
217 : fd_funk_txn_xid_t const * xid,
218 13607 : ulong epoch_slot0 ) {
219 : /* Skip if already on the correct fork */
220 13607 : fd_accdb_lineage_t * lineage = cache->lineage;
221 13607 : if( FD_LIKELY( (!!lineage->fork_depth) & (!!fd_funk_txn_xid_eq( &lineage->fork[ 0 ], xid ) ) ) ) return;
222 11410 : fd_progcache_load_fork_slow( cache, xid, epoch_slot0 ); /* switch fork */
223 11410 : }
224 :
225 : /* fd_progcache_query searches for a program cache entry on the current
226 : fork. Stops short of an epoch boundary. */
227 :
228 : static int
229 : fd_progcache_search_chain( fd_progcache_t const * cache,
230 : ulong chain_idx,
231 : fd_funk_rec_key_t const * key,
232 : ulong epoch_slot0,
233 6802 : fd_progcache_rec_t ** out_rec ) { /* read locked */
234 6802 : *out_rec = NULL;
235 :
236 6802 : fd_progcache_join_t const * ljoin = cache->join;
237 6802 : fd_accdb_lineage_t const * lineage = cache->lineage;
238 6802 : fd_prog_recm_shmem_t * shmap = ljoin->rec.map->map;
239 6802 : fd_prog_recm_shmem_private_chain_t const * chain_tbl = fd_prog_recm_shmem_private_chain_const( shmap, 0UL );
240 6802 : fd_prog_recm_shmem_private_chain_t const * chain = chain_tbl + chain_idx;
241 6802 : fd_progcache_rec_t * rec_tbl = ljoin->rec.pool->ele;
242 6802 : ulong rec_max = fd_prog_recp_ele_max( ljoin->rec.pool );
243 6802 : ulong ver_cnt = FD_VOLATILE_CONST( chain->ver_cnt );
244 :
245 : /* Start a speculative transaction for the chain containing revisions
246 : of the program cache key we are looking for. */
247 6802 : ulong cnt = fd_prog_recm_private_vcnt_cnt( ver_cnt );
248 6802 : if( FD_UNLIKELY( fd_prog_recm_private_vcnt_ver( ver_cnt )&1 ) ) {
249 0 : return FD_MAP_ERR_AGAIN; /* chain is locked */
250 0 : }
251 6802 : FD_COMPILER_MFENCE();
252 6802 : uint ele_idx = chain->head_cidx;
253 :
254 : /* Walk the map chain, remember the best entry */
255 6802 : fd_progcache_rec_t * best = NULL;
256 6802 : long best_slot = -1L;
257 12288 : for( ulong i=0UL; i<cnt; i++, ele_idx=FD_VOLATILE_CONST( rec_tbl[ ele_idx ].map_next ) ) {
258 5486 : if( FD_UNLIKELY( (ulong)ele_idx >= rec_max ) ) return FD_MAP_ERR_AGAIN;
259 5486 : fd_progcache_rec_t * rec = &rec_tbl[ ele_idx ];
260 :
261 : /* Skip over unrelated records (hash collision) */
262 5486 : if( FD_UNLIKELY( !fd_funk_rec_key_eq( rec->pair.key, key ) ) ) continue;
263 :
264 : /* Skip over records from an older epoch (FIXME could bail early
265 : here if the chain is ordered) */
266 5400 : ulong found_slot = rec->pair.xid->ul[0];
267 5400 : if( found_slot==ULONG_MAX ) found_slot = FD_VOLATILE_CONST( ljoin->shmem->txn.last_publish->ul[0] );
268 :
269 5400 : if( FD_UNLIKELY( found_slot<epoch_slot0 ) ) continue;
270 :
271 : /* Skip over records that are older than what we already have */
272 5400 : if( FD_UNLIKELY( (long)found_slot<best_slot ) ) continue;
273 :
274 : /* Confirm that record is part of the current fork */
275 5400 : if( FD_UNLIKELY( !fd_accdb_lineage_has_xid( lineage, rec->pair.xid ) ) ) continue;
276 :
277 5400 : best = rec;
278 5400 : best_slot = (long)found_slot;
279 5400 : if( FD_UNLIKELY( rec->map_next==ele_idx ) ) {
280 0 : FD_LOG_CRIT(( "fd_progcache_search_chain detected cycle" ));
281 0 : }
282 5400 : if( rec->map_next > rec_max ) {
283 5316 : if( FD_UNLIKELY( !fd_funk_rec_map_private_idx_is_null( rec->map_next ) ) ) {
284 0 : FD_LOG_CRIT(( "fd_progcache_search_chain detected memory corruption: rec->map_next %u is out of bounds (rec_max %lu)",
285 0 : rec->map_next, rec_max ));
286 0 : }
287 5316 : }
288 5400 : }
289 6802 : if( best && FD_UNLIKELY( !fd_rwlock_tryread( &best->lock ) ) ) {
290 0 : return FD_MAP_ERR_AGAIN;
291 0 : }
292 :
293 : /* Retry if we were overrun */
294 6802 : if( FD_UNLIKELY( FD_VOLATILE_CONST( chain->ver_cnt )!=ver_cnt ) ) {
295 0 : if( best ) fd_rwlock_unread( &best->lock );
296 0 : return FD_MAP_ERR_AGAIN;
297 0 : }
298 :
299 6802 : *out_rec = best;
300 6802 : return FD_MAP_SUCCESS;
301 6802 : }
302 :
303 : static fd_progcache_rec_t * /* read locked */
304 : fd_progcache_query( fd_progcache_t * cache,
305 : fd_funk_txn_xid_t const * xid,
306 : fd_funk_rec_key_t const * key,
307 6803 : ulong epoch_slot0 ) {
308 : /* Hash key to chain */
309 6803 : fd_funk_xid_key_pair_t pair[1];
310 6803 : fd_funk_txn_xid_copy( pair->xid, xid );
311 6803 : fd_funk_rec_key_copy( pair->key, key );
312 6803 : fd_prog_recm_t const * rec_map = cache->join->rec.map;
313 6803 : ulong hash = fd_funk_rec_map_key_hash( pair, rec_map->map->seed );
314 6803 : ulong chain_idx = (hash & (rec_map->map->chain_cnt-1UL) );
315 :
316 : /* Traverse chain for candidate */
317 6803 : fd_progcache_rec_t * rec = NULL;
318 6803 : for(;;) {
319 6802 : int err = fd_progcache_search_chain( cache, chain_idx, key, epoch_slot0, &rec );
320 6807 : if( FD_LIKELY( err==FD_MAP_SUCCESS ) ) break;
321 >1844*10^16 : FD_SPIN_PAUSE();
322 >1844*10^16 : fd_racesan_hook( "fd_progcache_query_wait" );
323 : /* FIXME backoff */
324 >1844*10^16 : }
325 :
326 6803 : return rec;
327 6803 : }
328 :
329 : fd_progcache_rec_t * /* read locked */
330 : fd_progcache_peek( fd_progcache_t * cache,
331 : fd_funk_txn_xid_t const * xid,
332 : void const * prog_addr,
333 6803 : ulong epoch_slot0 ) {
334 6803 : if( FD_UNLIKELY( !cache || !cache->join->shmem ) ) FD_LOG_CRIT(( "NULL progcache" ));
335 6803 : fd_progcache_load_fork( cache, xid, epoch_slot0 );
336 6803 : fd_funk_rec_key_t key[1]; memcpy( key->uc, prog_addr, 32UL );
337 6803 : fd_progcache_rec_t * rec = fd_progcache_query( cache, xid, key, epoch_slot0 );
338 6803 : if( FD_UNLIKELY( !rec ) ) return NULL;
339 5399 : if( rec->slot < epoch_slot0 ) {
340 0 : fd_rwlock_unread( &rec->lock );
341 0 : rec = NULL;
342 0 : }
343 5399 : cache->metrics->hit_cnt += !!rec;
344 5399 : return rec;
345 6803 : }
346 :
347 : static void
348 : fd_progcache_rec_push_tail( fd_progcache_rec_t * rec_pool,
349 : fd_progcache_rec_t * rec,
350 : uint * rec_head_idx, /* write locked (txn) */
351 1404 : uint * rec_tail_idx ) {
352 1404 : uint rec_idx = (uint)( rec - rec_pool );
353 1404 : uint rec_prev_idx = *rec_tail_idx;
354 :
355 1404 : rec->prev_idx = rec_prev_idx;
356 1404 : rec->next_idx = FD_FUNK_REC_IDX_NULL;
357 :
358 1404 : if( fd_funk_rec_idx_is_null( rec_prev_idx ) ) {
359 913 : *rec_head_idx = rec_idx;
360 913 : } else {
361 491 : rec_pool[ rec_prev_idx ].next_idx = rec_idx;
362 491 : }
363 1404 : *rec_tail_idx = rec_idx;
364 1404 : }
365 :
366 : __attribute__((warn_unused_result))
367 : static int
368 : fd_progcache_push( fd_progcache_join_t * cache,
369 : fd_progcache_txn_t * txn, /* read locked */
370 : fd_progcache_rec_t * rec,
371 : void const * prog_addr,
372 1404 : ulong target_slot ) {
373 :
374 : /* Determine record's xid-key pair */
375 :
376 1404 : rec->prev_idx = FD_FUNK_REC_IDX_NULL;
377 1404 : rec->next_idx = FD_FUNK_REC_IDX_NULL;
378 1404 : memcpy( rec->pair.key, prog_addr, 32UL );
379 1404 : if( FD_UNLIKELY( txn ) ) {
380 1404 : fd_funk_txn_xid_copy( rec->pair.xid, &txn->xid );
381 1404 : } else {
382 0 : fd_funk_txn_xid_set_root( rec->pair.xid );
383 0 : }
384 :
385 : /* Lock rec_map chain, entering critical section */
386 :
387 1404 : struct {
388 1404 : fd_prog_recm_txn_t txn[1];
389 1404 : fd_prog_recm_txn_private_info_t info[1];
390 1404 : } _map_txn;
391 1404 : fd_prog_recm_txn_t * map_txn = fd_prog_recm_txn_init( _map_txn.txn, cache->rec.map, 1UL );
392 1404 : fd_prog_recm_txn_add( map_txn, &rec->pair, 1 );
393 1404 : int txn_err = fd_prog_recm_txn_try( map_txn, FD_MAP_FLAG_BLOCKING );
394 1404 : if( FD_UNLIKELY( txn_err!=FD_MAP_SUCCESS ) ) {
395 0 : FD_LOG_CRIT(( "Failed to insert progcache record: cannot lock funk rec map chain: %i-%s", txn_err, fd_map_strerror( txn_err ) ));
396 0 : }
397 :
398 : /* Mark chain as recently accessed */
399 :
400 1404 : fd_prog_recm_query_t query[1];
401 1404 : int query_err = fd_prog_recm_txn_query( cache->rec.map, &rec->pair, NULL, query, 0 );
402 1404 : fd_prog_recm_clock_touch( cache, query->memo );
403 :
404 : /* Check if record exists */
405 :
406 1404 : if( FD_UNLIKELY( query_err==FD_MAP_SUCCESS ) ) {
407 : /* Always replace existing rooted records */
408 0 : fd_progcache_rec_t * prev_rec = query->ele;
409 0 : if( fd_funk_txn_xid_eq_root( rec->pair.xid ) && prev_rec->slot < target_slot ) {
410 0 : fd_rwlock_write( &prev_rec->lock );
411 0 : fd_prog_recm_txn_remove( cache->rec.map, &rec->pair, NULL, query, FD_MAP_FLAG_USE_HINT );
412 0 : fd_progcache_val_free( prev_rec, cache );
413 0 : fd_rwlock_unwrite( &prev_rec->lock );
414 0 : fd_prog_recp_release( cache->rec.pool, prev_rec, 1 );
415 0 : } else {
416 0 : fd_prog_recm_txn_test( map_txn );
417 0 : fd_prog_recm_txn_fini( map_txn );
418 0 : return 0;
419 0 : }
420 1404 : } else if( FD_UNLIKELY( query_err!=FD_MAP_ERR_KEY ) ) {
421 0 : FD_LOG_CRIT(( "fd_prog_recm_txn_query failed: %i-%s", query_err, fd_map_strerror( query_err ) ));
422 0 : }
423 :
424 : /* Phase 4: Insert new record */
425 :
426 1404 : int insert_err = fd_prog_recm_txn_insert( cache->rec.map, rec );
427 1404 : if( FD_UNLIKELY( insert_err!=FD_MAP_SUCCESS ) ) {
428 0 : FD_LOG_CRIT(( "fd_prog_recm_txn_insert failed: %i-%s", insert_err, fd_map_strerror( insert_err ) ));
429 0 : }
430 :
431 : /* Phase 5: Insert rec into rec_map */
432 :
433 1404 : if( txn ) {
434 1404 : fd_progcache_rec_push_tail( cache->rec.pool->ele,
435 1404 : rec,
436 1404 : &txn->rec_head_idx,
437 1404 : &txn->rec_tail_idx );
438 1404 : }
439 :
440 : /* Phase 6: Finish rec_map transaction */
441 :
442 1404 : int test_err = fd_prog_recm_txn_test( map_txn );
443 1404 : if( FD_UNLIKELY( test_err!=FD_MAP_SUCCESS ) ) FD_LOG_CRIT(( "fd_prog_recm_txn_test failed: %i-%s", test_err, fd_map_strerror( test_err ) ));
444 1404 : fd_prog_recm_txn_fini( map_txn );
445 1404 : return 1;
446 1404 : }
447 :
448 : /* fd_progcache_lock_best_txn picks a fork graph node close to
449 : target_slot and write locks it for program cache entry insertion.
450 :
451 : The cache entry should be placed as far up the fork graph as
452 : possible (so it can be shared across more downstream forks), but not
453 : too early (or it would cause non-determinism). */
454 :
455 : static fd_progcache_txn_t *
456 : fd_progcache_lock_best_txn( fd_progcache_t * cache,
457 1404 : ulong target_slot ) {
458 1404 : fd_progcache_join_t * ljoin = cache->join;
459 1404 : fd_accdb_lineage_t * lineage = cache->lineage;
460 :
461 1404 : fd_funk_txn_xid_t last_publish[1];
462 1404 : fd_funk_txn_xid_ld_atomic( last_publish, ljoin->shmem->txn.last_publish );
463 1404 : if( target_slot <= last_publish->ul[0] &&
464 1404 : !fd_funk_txn_xid_eq_root( last_publish ) ) {
465 0 : return NULL; /* publishing record immediately */
466 0 : }
467 :
468 : /* Scan fork graph for oldest node >= the target slot. */
469 1404 : ulong target_xid_idx = ULONG_MAX;
470 1404 : ulong fork_depth = lineage->fork_depth;
471 2808 : for( ulong xid_idx=0UL; xid_idx<fork_depth && lineage->fork[ xid_idx ].ul[0]>=target_slot; xid_idx++ ) {
472 1404 : target_xid_idx = xid_idx;
473 1404 : }
474 :
475 1404 : if( FD_UNLIKELY( target_xid_idx==ULONG_MAX ) ) FD_LOG_CRIT(( "no target xid idx found for slot %lu", target_slot ));
476 1404 : fd_funk_txn_xid_t const * xid = &lineage->fork[ target_xid_idx ];
477 1404 : if( FD_UNLIKELY( fd_funk_txn_xid_eq_root( xid ) ) ) target_xid_idx--;
478 1404 : if( FD_UNLIKELY( target_xid_idx==ULONG_MAX ) ) FD_LOG_CRIT(( "no target xid idx found for slot %lu", target_slot ));
479 1404 : xid = &lineage->fork[ target_xid_idx ];
480 :
481 1404 : fd_rwlock_read( &ljoin->shmem->txn.rwlock );
482 1404 : fd_progcache_txn_t * txn = fd_prog_txnm_ele_query( ljoin->txn.map, xid, NULL, ljoin->txn.pool );
483 1404 : if( FD_UNLIKELY( !txn ) ) {
484 : /* Did replay tile root this slot in the mean time? */
485 0 : fd_funk_txn_xid_ld_atomic( last_publish, ljoin->shmem->txn.last_publish );
486 0 : if( FD_LIKELY( target_slot <= last_publish->ul[0] &&
487 0 : !fd_funk_txn_xid_eq_root( last_publish ) ) ) {
488 0 : fd_rwlock_unread( &ljoin->shmem->txn.rwlock );
489 0 : return NULL; /* published in the meantime */
490 0 : }
491 0 : FD_LOG_CRIT(( "XID %lu:%lu is missing", xid->ul[0], xid->ul[1] ));
492 0 : }
493 1404 : fd_rwlock_write( &txn->lock );
494 1404 : fd_rwlock_unread( &ljoin->shmem->txn.rwlock );
495 1404 : return txn;
496 1404 : }
497 :
498 : /* account_xid_lower_bound tries to find the oldest XID at which the
499 : given record is exactly present. sample_xid is an arbitrary XID at
500 : which the given record is present. May return a newer XID, if the
501 : oldest XID cannot be determined exactly. */
502 :
503 : static fd_funk_txn_xid_t
504 : account_xid_lower_bound( fd_accdb_user_t * accdb,
505 : fd_accdb_ro_t const * record,
506 1404 : fd_funk_txn_xid_t const * sample_xid ) {
507 1404 : switch( record->ref->accdb_type ) {
508 1404 : case FD_ACCDB_TYPE_V1: { /* possibly rooted */
509 1404 : fd_funk_rec_t * rec = (fd_funk_rec_t *)record->ref->user_data;
510 1404 : fd_funk_txn_xid_t res;
511 1404 : fd_funk_txn_xid_ld_atomic( &res, rec->pair.xid );
512 1404 : if( FD_UNLIKELY( fd_funk_txn_xid_eq_root( &res ) ) ) {
513 0 : fd_funk_txn_xid_ld_atomic( &res, fd_funk_last_publish( fd_accdb_user_v1_funk( accdb ) ) );
514 0 : }
515 1404 : return res;
516 0 : }
517 0 : case FD_ACCDB_TYPE_V2: { /* rooted */
518 0 : fd_funk_txn_xid_t res;
519 0 : fd_funk_txn_xid_ld_atomic( &res, fd_funk_last_publish( fd_accdb_user_v1_funk( accdb ) ) );
520 0 : return res;
521 0 : }
522 0 : default: /* unknown */
523 0 : return *sample_xid;
524 1404 : }
525 1404 : }
526 :
527 : /* fd_progcache_spill_open loads a program into the cache spill buffer.
528 : The spill area is an "emergency" area for temporary program loads in
529 : case the record pool/heap are too contended. */
530 :
531 : static fd_progcache_rec_t * /* read locked */
532 : fd_progcache_spill_open( fd_progcache_t * cache,
533 : fd_sbpf_elf_info_t const * elf_info,
534 : fd_sbpf_loader_config_t const * config,
535 : ulong const load_slot,
536 : fd_features_t const * features,
537 : uchar const * bin,
538 0 : ulong bin_sz ) {
539 0 : fd_progcache_join_t * join = cache->join;
540 0 : fd_progcache_shmem_t * shmem = join->shmem;
541 0 : if( !cache->spill_active ) fd_rwlock_write( &shmem->spill.lock );
542 0 : else FD_TEST( FD_VOLATILE_CONST( shmem->spill.lock.value )==FD_RWLOCK_WRITE_LOCK );
543 :
544 : /* Allocate record */
545 :
546 0 : if( FD_UNLIKELY( shmem->spill.rec_used >= FD_MAX_INSTRUCTION_STACK_DEPTH ) ) {
547 0 : FD_LOG_CRIT(( "spill buffer overflow: rec_used=%u rec_max=%lu", shmem->spill.rec_used, FD_MAX_INSTRUCTION_STACK_DEPTH ));
548 0 : }
549 0 : cache->spill_active++;
550 0 : uint rec_idx = shmem->spill.rec_used++;
551 0 : shmem->spill.spad_off[ rec_idx ] = shmem->spill.spad_used;
552 0 : fd_progcache_rec_t * rec = &shmem->spill.rec[ rec_idx ];
553 0 : memset( rec, 0, sizeof(fd_progcache_rec_t) );
554 0 : rec->lock.value = 1; /* read lock; no concurrency, don't need CAS */
555 0 : rec->exists = 1;
556 0 : rec->slot = load_slot;
557 :
558 : /* Load program */
559 :
560 0 : if( elf_info ) {
561 0 : ulong off0 = fd_ulong_align_up( shmem->spill.spad_used, fd_progcache_val_align() );
562 0 : ulong off1 = off0 + fd_progcache_val_footprint( elf_info );
563 0 : if( FD_UNLIKELY( off1 > FD_PROGCACHE_SPAD_MAX ) ) {
564 0 : FD_LOG_CRIT(( "spill buffer overflow: spad_used=%u val_sz=%lu spad_max=%lu", shmem->spill.spad_used, off1-off0, FD_PROGCACHE_SPAD_MAX ));
565 0 : }
566 0 : rec->data_gaddr = fd_wksp_gaddr_fast( join->wksp, shmem->spill.spad + off0 );
567 0 : rec->data_max = (uint)( off1 - off0 );
568 :
569 0 : long dt = -fd_tickcount();
570 0 : if( FD_LIKELY( fd_progcache_rec_load( rec, cache->join->wksp, elf_info, config, load_slot, features, bin, bin_sz, cache->scratch, cache->scratch_sz ) ) ) {
571 : /* Valid program, allocate data */
572 0 : shmem->spill.spad_used = (uint)off1;
573 0 : } else {
574 0 : fd_progcache_rec_nx( rec );
575 0 : }
576 0 : dt += fd_tickcount();
577 0 : cache->metrics->cum_load_ticks += (ulong)dt;
578 :
579 0 : } else {
580 0 : rec->data_gaddr = 0UL;
581 0 : rec->data_max = 0U;
582 0 : }
583 :
584 0 : cache->metrics->spill_cnt++;
585 0 : cache->metrics->spill_tot_sz += rec->rodata_sz;
586 :
587 0 : FD_TEST( rec->exists );
588 0 : return rec;
589 0 : }
590 :
591 : /* fd_progcache_insert allocates a cache entry, loads a program into it,
592 : and publishes the cache entry to the global index (recm). If an OOM
593 : condition is detected, attempts to run the cache eviction algo, and
594 : finally falls back to using the spill buffer. Returns NULL if the
595 : insertion raced with another thread (frees any previously allocated
596 : resource in that case). */
597 :
598 1404 : #define INVALID_PROGRAM ((fd_progcache_rec_t *)0x1)
599 :
600 : static fd_progcache_rec_t * /* read locked */
601 : fd_progcache_insert( fd_progcache_t * cache,
602 : fd_accdb_user_t * accdb,
603 : fd_funk_txn_xid_t const * load_xid,
604 : void const * prog_addr,
605 : fd_prog_load_env_t const * env,
606 1404 : long slot_min ) {
607 1404 : fd_progcache_join_t * ljoin = cache->join;
608 :
609 : /* XID overview:
610 :
611 : - load_xid: tip of fork currently being executed
612 : - modify_xid: xid in which program was last modified / deployed
613 : - txn->xid: xid in which program cache entry is inserted
614 :
615 : slot(load_xid) > slot(entry_xid) >= slot(txn->xid) */
616 :
617 : /* Acquire reference to ELF binary data */
618 :
619 1404 : fd_accdb_ro_t progdata[1];
620 1404 : ulong elf_offset;
621 1404 : if( FD_UNLIKELY( !fd_prog_load_elf( accdb, load_xid, progdata, prog_addr, &elf_offset ) ) ) {
622 0 : return INVALID_PROGRAM;
623 0 : }
624 :
625 1404 : uchar const * bin = (uchar const *)fd_accdb_ref_data_const( progdata ) + elf_offset;
626 1404 : ulong bin_sz = /* */fd_accdb_ref_data_sz ( progdata ) - elf_offset;
627 :
628 : /* Pre-flight checks, determine required buffer size */
629 :
630 1404 : fd_features_t const * features = env->features;
631 1404 : ulong const load_slot = env->slot;
632 1404 : fd_prog_versions_t versions = fd_prog_versions( features, load_slot );
633 1404 : fd_sbpf_loader_config_t config = {
634 1404 : .sbpf_min_version = versions.min_sbpf_version,
635 1404 : .sbpf_max_version = versions.max_sbpf_version,
636 1404 : };
637 1404 : fd_sbpf_elf_info_t elf_info[1];
638 1404 : int peek_err = fd_sbpf_elf_peek( elf_info, bin, bin_sz, &config );
639 :
640 : /* Derive the slot in which the account was modified in */
641 :
642 1404 : fd_funk_txn_xid_t modify_xid = account_xid_lower_bound( accdb, progdata, load_xid );
643 1404 : ulong target_slot = modify_xid.ul[0];
644 : /* Prevent cache entry from crossing epoch boundary */
645 1404 : target_slot = fd_ulong_max( target_slot, env->epoch_slot0 );
646 : /* Prevent cache entry from shadowing invalidation */
647 1404 : target_slot = (ulong)fd_long_max( (long)target_slot, slot_min );
648 :
649 : /* Allocate record and heap space */
650 :
651 1404 : fd_progcache_rec_t * rec = fd_prog_recp_acquire( ljoin->rec.pool, NULL, 1, NULL );
652 1404 : if( FD_UNLIKELY( !rec ) ) {
653 0 : cache->metrics->oom_desc_cnt++;
654 0 : fd_progcache_clock_evict( cache );
655 0 : rec = fd_prog_recp_acquire( ljoin->rec.pool, NULL, 1, NULL );
656 0 : if( FD_UNLIKELY( !rec ) ) {
657 : /* Out of memory (record table) */
658 0 : if( peek_err==FD_SBPF_ELF_SUCCESS ) {
659 0 : rec = fd_progcache_spill_open( cache, elf_info, &config, load_slot, features, bin, bin_sz );
660 0 : } else {
661 0 : rec = fd_progcache_spill_open( cache, NULL, NULL, load_slot, features, NULL, 0UL );
662 0 : }
663 0 : fd_accdb_close_ro( accdb, progdata );
664 0 : return rec;
665 0 : }
666 0 : }
667 1404 : memset( rec, 0, sizeof(fd_progcache_rec_t) );
668 1404 : rec->exists = 1;
669 1404 : rec->slot = target_slot;
670 :
671 1404 : if( FD_LIKELY( peek_err==FD_SBPF_ELF_SUCCESS ) ) {
672 1107 : ulong val_align = fd_progcache_val_align();
673 1107 : ulong val_footprint = fd_progcache_val_footprint( elf_info );
674 1107 : if( FD_UNLIKELY( !fd_progcache_val_alloc( rec, ljoin, val_align, val_footprint ) ) ) {
675 0 : cache->metrics->oom_heap_cnt++;
676 0 : fd_progcache_clock_evict( cache );
677 0 : if( FD_UNLIKELY( !fd_progcache_val_alloc( rec, ljoin, val_align, val_footprint ) ) ) {
678 : /* Out of memory (heap) */
679 0 : rec->exists = 0;
680 0 : fd_prog_recp_release( ljoin->rec.pool, rec, 1 );
681 0 : rec = fd_progcache_spill_open( cache, elf_info, &config, load_slot, features, bin, bin_sz );
682 0 : fd_accdb_close_ro( accdb, progdata );
683 0 : return rec;
684 0 : }
685 0 : }
686 1107 : } else {
687 297 : fd_progcache_rec_nx( rec );
688 297 : }
689 :
690 : /* Publish cache entry to index */
691 :
692 1404 : fd_progcache_txn_t * txn = fd_progcache_lock_best_txn( cache, target_slot );
693 1404 : fd_rwlock_write( &rec->lock );
694 1404 : int push_ok = fd_progcache_push( ljoin, txn, rec, prog_addr, target_slot );
695 1404 : if( txn ) fd_rwlock_unwrite( &txn->lock );
696 1404 : if( FD_UNLIKELY( !push_ok ) ) {
697 0 : fd_rwlock_unwrite( &rec->lock );
698 0 : fd_progcache_val_free( rec, ljoin );
699 0 : fd_prog_recp_release( ljoin->rec.pool, rec, 1 );
700 0 : fd_accdb_close_ro( accdb, progdata );
701 0 : return NULL;
702 0 : }
703 :
704 : /* Load program
705 : (The write lock was acquired before loading such that another
706 : thread trying to load the same record instead waits for us to
707 : complete) */
708 :
709 1404 : if( FD_LIKELY( peek_err==FD_SBPF_ELF_SUCCESS ) ) {
710 1107 : long dt = -fd_tickcount();
711 1107 : if( FD_UNLIKELY( !fd_progcache_rec_load( rec, cache->join->wksp, elf_info, &config, load_slot, features, bin, bin_sz, cache->scratch, cache->scratch_sz ) ) ) {
712 : /* Not a valid program (mark cache entry as non-executable) */
713 45 : fd_progcache_val_free( rec, ljoin );
714 45 : fd_progcache_rec_nx( rec );
715 45 : }
716 1107 : dt += fd_tickcount();
717 1107 : cache->metrics->cum_load_ticks += (ulong)dt;
718 1107 : }
719 :
720 1404 : fd_rwlock_demote( &rec->lock );
721 1404 : fd_accdb_close_ro( accdb, progdata );
722 :
723 1404 : cache->metrics->fill_cnt++;
724 1404 : cache->metrics->fill_tot_sz += rec->rodata_sz;
725 1404 : FD_TEST( rec->exists );
726 1404 : return rec;
727 1404 : }
728 :
729 : fd_progcache_rec_t * /* read locked */
730 : fd_progcache_pull( fd_progcache_t * cache,
731 : fd_accdb_user_t * accdb,
732 : fd_funk_txn_xid_t const * xid,
733 : void const * prog_addr,
734 6804 : fd_prog_load_env_t const * env ) {
735 6804 : if( FD_UNLIKELY( !cache || !cache->join->shmem ) ) FD_LOG_CRIT(( "NULL progcache" ));
736 6804 : long dt = -fd_tickcount();
737 6804 : fd_progcache_load_fork( cache, xid, env->epoch_slot0 );
738 6804 : cache->metrics->lookup_cnt++;
739 :
740 6804 : retry:;
741 6803 : fd_progcache_rec_t * found_rec = fd_progcache_peek( cache, xid, prog_addr, env->epoch_slot0 );
742 6803 : long slot_min = -1L;
743 6803 : if( !found_rec ) goto miss;
744 :
745 : /* Cache invalidation, update next slot */
746 5399 : if( found_rec->invalidate ) {
747 0 : slot_min = (long)found_rec->slot+1L;
748 0 : if( FD_UNLIKELY( xid->ul[0] < (ulong)slot_min ) ) {
749 0 : FD_LOG_CRIT(( "Program cache entry %016lx%016lx%016lx%016lx invalidated at slot %lu but loaded at slot %lu",
750 0 : fd_ulong_bswap( FD_LOAD( ulong, (uchar const *)prog_addr ) ),
751 0 : fd_ulong_bswap( FD_LOAD( ulong, (uchar const *)prog_addr+ 8 ) ),
752 0 : fd_ulong_bswap( FD_LOAD( ulong, (uchar const *)prog_addr+16 ) ),
753 0 : fd_ulong_bswap( FD_LOAD( ulong, (uchar const *)prog_addr+24 ) ),
754 0 : found_rec->slot,
755 0 : xid->ul[0] ));
756 0 : }
757 0 : fd_rwlock_unread( &found_rec->lock );
758 0 : goto miss;
759 0 : }
760 :
761 5399 : goto done;
762 :
763 5399 : miss:
764 1404 : cache->metrics->miss_cnt++;
765 1404 : found_rec = fd_progcache_insert( cache, accdb, xid, prog_addr, env, slot_min );
766 1404 : if( !found_rec ) goto retry;
767 1404 : if( found_rec==INVALID_PROGRAM ) return NULL;
768 6806 : done:
769 6806 : dt += fd_tickcount();
770 6806 : cache->metrics->cum_pull_ticks += (ulong)dt;
771 6806 : return found_rec;
772 1404 : }
773 :
774 : void
775 : fd_progcache_invalidate( fd_progcache_t * cache,
776 : fd_funk_txn_xid_t const * xid,
777 : void const * prog_addr,
778 0 : ulong slot ) {
779 0 : if( FD_UNLIKELY( !cache || !cache->join->shmem ) ) FD_LOG_CRIT(( "NULL progcache" ));
780 0 : fd_progcache_join_t * ljoin = cache->join;
781 :
782 0 : fd_progcache_rec_t * rec = NULL;
783 0 : for(;;) {
784 0 : rec = fd_prog_recp_acquire( ljoin->rec.pool, NULL, 1, NULL );
785 0 : if( FD_LIKELY( rec ) ) break;
786 0 : fd_progcache_clock_evict( cache );
787 0 : }
788 0 : rec->exists = 1;
789 0 : rec->slot = slot;
790 0 : fd_progcache_rec_nx( rec );
791 0 : rec->invalidate = 1;
792 :
793 0 : fd_rwlock_read( &ljoin->shmem->txn.rwlock );
794 0 : fd_progcache_txn_t * txn = (fd_progcache_txn_t * )fd_prog_txnm_ele_query_const( ljoin->txn.map, xid, NULL, ljoin->txn.pool );
795 0 : if( FD_UNLIKELY( !txn ) ) {
796 0 : FD_LOG_CRIT(( "fd_progcache_invalidate(xid=%lu:%lu) failed: database transaction not found", xid->ul[0], xid->ul[1] ));
797 0 : }
798 :
799 0 : fd_rwlock_write( &txn->lock );
800 0 : fd_rwlock_unread( &ljoin->shmem->txn.rwlock );
801 0 : int push_ok = fd_progcache_push( cache->join, txn, rec, prog_addr, slot );
802 0 : fd_rwlock_unwrite( &txn->lock );
803 0 : if( FD_UNLIKELY( !push_ok ) ) {
804 0 : rec->exists = 0;
805 0 : fd_prog_recp_release( ljoin->rec.pool, rec, 1 );
806 0 : }
807 :
808 0 : cache->metrics->invalidate_cnt++;
809 0 : return;
810 0 : }
811 :
812 : static void
813 0 : fd_progcache_spill_close( fd_progcache_t * cache ) {
814 0 : FD_TEST( cache->spill_active );
815 0 : cache->spill_active--;
816 :
817 0 : fd_progcache_shmem_t * shmem = cache->join->shmem;
818 :
819 : /* Cascade: rewind rec_used and spad_used while the top record is
820 : closed. This reclaims spill spad memory in LIFO order. */
821 0 : while( shmem->spill.rec_used > 0 &&
822 0 : !shmem->spill.rec[ shmem->spill.rec_used-1 ].exists ) {
823 0 : shmem->spill.rec_used--;
824 0 : shmem->spill.spad_used = shmem->spill.spad_off[ shmem->spill.rec_used ];
825 0 : }
826 :
827 0 : if( cache->spill_active==0 ) {
828 0 : fd_rwlock_t * spill_lock = &shmem->spill.lock;
829 0 : FD_TEST( spill_lock->value==0xFFFF );
830 0 : FD_TEST( shmem->spill.rec_used==0 );
831 0 : FD_TEST( shmem->spill.spad_used==0 );
832 0 : fd_rwlock_unwrite( spill_lock );
833 0 : }
834 0 : }
835 :
836 : void
837 : fd_progcache_rec_close( fd_progcache_t * cache,
838 6808 : fd_progcache_rec_t * rec ) {
839 6808 : if( FD_UNLIKELY( !rec ) ) return;
840 6808 : if( FD_UNLIKELY( !rec->exists ) ) FD_LOG_CRIT(( "use-after-free: progcache record %p is dead", (void *)rec ));
841 6808 : FD_TEST( FD_VOLATILE_CONST( rec->lock.value )!=0 );
842 6808 : fd_rwlock_unread( &rec->lock );
843 6808 : fd_progcache_shmem_t * shmem = cache->join->shmem;
844 6808 : if( rec >= shmem->spill.rec &&
845 6808 : rec < shmem->spill.rec + FD_MAX_INSTRUCTION_STACK_DEPTH ) {
846 0 : rec->exists = 0;
847 0 : fd_progcache_spill_close( cache );
848 0 : }
849 6808 : }
|