LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_admin.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 137 433 31.6 %
Date: 2026-03-19 18:19:27 Functions: 8 18 44.4 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : 
       3             : int
       4       49970 : fd_wksp_private_lock( fd_wksp_t * wksp ) {
       5             : # if FD_WKSP_LOCK_RECLAIM
       6             :   int   warning = 0;
       7             : #endif
       8       49970 :   ulong me      = fd_log_group_id();
       9             : 
      10       49970 :   ulong * _owner = &wksp->owner;
      11       77038 :   for(;;) {
      12             : 
      13             :     /* Note that we emulate CAS on platforms without FD_HAS_ATOMIC
      14             :        to minimize the amount of code differences we have to test.  On
      15             :        platforms without FD_HAS_ATOMIC, a workspace should not be used
      16             :        concurrently though. */
      17             : 
      18       77038 :     FD_COMPILER_MFENCE();
      19       77038 : #   if FD_HAS_ATOMIC
      20       77038 :     ulong pid = FD_ATOMIC_CAS( _owner, ULONG_MAX, me );
      21             : #   else
      22             :     ulong pid = FD_VOLATILE_CONST( *_owner );
      23             :     if( pid==ULONG_MAX ) FD_VOLATILE( *_owner ) = me;
      24             : #   endif
      25       77038 :     FD_COMPILER_MFENCE();
      26             : 
      27       77038 :     if( FD_LIKELY( pid==ULONG_MAX ) ) return FD_WKSP_SUCCESS;
      28             : 
      29             : # if FD_WKSP_LOCK_RECLAIM
      30             :     int status = fd_log_group_id_query( pid );
      31             :     if( FD_UNLIKELY( status==FD_LOG_GROUP_ID_QUERY_DEAD ) ) { /* A process died while holding the lock, try to recover the lock */
      32             : 
      33             :       FD_COMPILER_MFENCE();
      34             : #     if FD_HAS_ATOMIC
      35             :       ulong cur = FD_ATOMIC_CAS( _owner, pid, me );
      36             : #     else
      37             :       ulong cur = FD_VOLATILE_CONST( *_owner );
      38             :       if( cur==pid ) FD_VOLATILE( *_owner ) = me;
      39             : #     endif
      40             :       FD_COMPILER_MFENCE();
      41             : 
      42             :       if( FD_LIKELY( cur==pid ) ) { /* We recovered the lock from the dead pid, try to fix up incomplete ops */
      43             : 
      44             :         FD_LOG_WARNING(( "Process %lu died in an operation on wksp %s; verifying", pid, wksp->name ));
      45             :         if( FD_LIKELY( !fd_wksp_verify( wksp ) ) ) { /* logs details of issues detected */
      46             :           FD_LOG_NOTICE(( "wksp verified" ));
      47             :           return FD_WKSP_SUCCESS;
      48             :         }
      49             : 
      50             :         FD_LOG_WARNING(( "Issues detected; rebuilding" ));
      51             :         if( FD_UNLIKELY( fd_wksp_rebuild( wksp, wksp->seed ) ) ) { /* Rebuild failed (logs details of issues detected) */
      52             :           /* Return control of the lock to the previous owner */
      53             :           FD_COMPILER_MFENCE();
      54             :           FD_VOLATILE( *_owner ) = pid;
      55             :           FD_COMPILER_MFENCE();
      56             :           FD_LOG_WARNING(( "corrupt wksp detected" ));
      57             :           return FD_WKSP_ERR_CORRUPT;
      58             :         }
      59             : 
      60             :         FD_LOG_NOTICE(( "wksp rebuilt" ));
      61             :         return FD_WKSP_SUCCESS;
      62             : 
      63             :       }
      64             : 
      65             :       /* Somebody beat us to recovering the lock ... try again */
      66             : 
      67             :     } else if( FD_UNLIKELY( status!=FD_LOG_GROUP_ID_QUERY_LIVE ) ) { /* Unclear pid status ... issue a warning and try again */
      68             : 
      69             :       if( FD_UNLIKELY( !warning ) ) {
      70             :         FD_LOG_WARNING(( "wksp %s is owned by unknown pid %li; attempting to recover", wksp->name, pid ));
      71             :         warning = 1;
      72             :       }
      73             : 
      74             :     }
      75             : 
      76             :     /* At this point, either another thread in this process has the
      77             :        lock, another active thread in another process has the lock,
      78             :        another unknown status thread in other process has the lock or
      79             :        another thread beat us to reclaim the lock from a dead process.
      80             :        In any case, we don't have the lock.  Wait a while to limit O/S
      81             :        contention and try again. */
      82             : 
      83             :     FD_YIELD();
      84             : # else
      85             : 
      86             :     /* If we are running without FD_WKSP_LOCK_RECLAIM then it is assumed
      87             :        that the contention is caused by a tile pinned to another core,
      88             :        and that this core is itself pinned so spin locking is best. */
      89       26796 :     FD_SPIN_PAUSE();
      90             : 
      91       26796 : #endif
      92       26796 :   }
      93             : 
      94             :   /* never get here */
      95       49970 : }
      96             : 
      97             : /* Public APIs ********************************************************/
      98             : 
      99             : ulong
     100             : fd_wksp_part_max_est( ulong footprint,
     101           1 :                       ulong sz_typical ) {
     102           1 :   footprint       = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     103           1 :   ulong data_end  = footprint - 1UL;
     104           1 :   ulong pinfo_off = fd_wksp_private_pinfo_off();
     105           1 :   ulong consumed  = sizeof(fd_wksp_private_pinfo_t) + sz_typical;
     106           1 :   ulong part_max  = (data_end - pinfo_off) / (consumed + (ulong)!consumed); /* avoid div-by-zero */
     107           1 :   if( FD_UNLIKELY( (!footprint) | (!sz_typical) | (sz_typical>consumed) | (pinfo_off>data_end) ) ) return 0UL;
     108           1 :   return fd_ulong_min( part_max, FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     109           1 : }
     110             : 
     111             : ulong
     112             : fd_wksp_data_max_est( ulong footprint,
     113           0 :                       ulong part_max ) {
     114           0 :   footprint = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     115           0 :   ulong data_end  = footprint - 1UL;
     116           0 :   ulong data_off  = fd_wksp_private_data_off( part_max );
     117           0 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) |
     118           0 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* covered above */
     119           0 :                    (!footprint) | (data_off>=data_end) ) ) return 0UL;
     120           0 :   return data_end - data_off;
     121           0 : }
     122             : 
     123             : ulong
     124           0 : fd_wksp_align( void ) {
     125           0 :   return FD_WKSP_ALIGN;
     126           0 : }
     127             : 
     128             : ulong
     129             : fd_wksp_footprint( ulong part_max,
     130           4 :                    ulong data_max ) {
     131           4 :   ulong data_off = fd_wksp_private_data_off( part_max );
     132           4 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) | (!data_max) |
     133           4 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* Covered above */
     134           4 :                    (data_max > (ULONG_MAX - FD_WKSP_ALIGN + 1UL - data_off - 1UL)                         ) ) ) return 0UL;
     135           4 :   return fd_ulong_align_up( data_off + data_max + 1UL, FD_WKSP_ALIGN );
     136           4 : }
     137             : 
     138             : void *
     139             : fd_wksp_new( void *       shmem,
     140             :              char const * name,
     141             :              uint         seed,
     142             :              ulong        part_max,
     143           1 :              ulong        data_max ) {
     144           1 :   fd_wksp_t * wksp = (fd_wksp_t *)shmem;
     145             : 
     146           1 :   if( FD_UNLIKELY( !wksp ) ) {
     147           0 :     FD_LOG_WARNING(( "NULL shmem" ));
     148           0 :     return NULL;
     149           0 :   }
     150             : 
     151           1 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     152           0 :     FD_LOG_WARNING(( "bad align" ));
     153           0 :     return NULL;
     154           0 :   }
     155             : 
     156           1 :   ulong name_len = fd_shmem_name_len( name );
     157           1 :   if( FD_UNLIKELY( !name_len ) ) {
     158           0 :     FD_LOG_WARNING(( "bad name" ));
     159           0 :     return NULL;
     160           0 :   }
     161             : 
     162           1 :   ulong footprint = fd_wksp_footprint( part_max, data_max );
     163           1 :   if( FD_UNLIKELY( !footprint ) ) {
     164           0 :     FD_LOG_WARNING(( "bad part_max and/or data_max" ));
     165           0 :     return NULL;
     166           0 :   }
     167             : 
     168           1 :   fd_memset( wksp, 0, fd_wksp_footprint( part_max, 1UL ) );
     169             : 
     170           1 :   wksp->part_max       = part_max;
     171           1 :   wksp->data_max       = data_max;
     172           1 :   wksp->gaddr_lo       = fd_wksp_private_data_off( part_max );
     173           1 :   wksp->gaddr_hi       = wksp->gaddr_lo + data_max;
     174           1 :   fd_memcpy( wksp->name, name, name_len+1UL );
     175           1 :   wksp->seed           = seed;
     176           1 :   wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     177           1 :   wksp->part_head_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     178           1 :   wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     179           1 :   wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     180           1 :   wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     181           1 :   wksp->cycle_tag      = 4UL;  /* Verify uses tags 0-3 */
     182           1 :   wksp->owner          = 0UL;  /* Mark as locked and in construction */
     183             : 
     184             :   /* Note that wksp->owner was set to zero above, "locking" the wksp by
     185             :      group_id 0.  And the memset above set all the partition tags to
     186             :      zero such that there are no allocated partitions.  So once we set
     187             :      magic below, we can finish the initialization by rebuilding and
     188             :      unlocking.  Since fd_log_group_id is non-zero, the zero owner
     189             :      indicates to any remote observer of the shared memory region that
     190             :      the wksp is being built for the first time. */
     191             : 
     192           1 :   FD_COMPILER_MFENCE();
     193           1 :   FD_VOLATILE( wksp->magic ) = FD_WKSP_MAGIC;
     194           1 :   FD_COMPILER_MFENCE();
     195             : 
     196           1 :   int err = fd_wksp_rebuild( wksp, seed );
     197           1 :   if( FD_UNLIKELY( err ) ) { /* Should be impossible at this point */
     198             : 
     199           0 :     FD_COMPILER_MFENCE();
     200           0 :     FD_VOLATILE( wksp->magic ) = 0UL;
     201           0 :     FD_COMPILER_MFENCE();
     202             : 
     203           0 :     FD_LOG_WARNING(( "fd_wksp_rebuild failed (%i-%s)", err, fd_wksp_strerror( err ) ));
     204           0 :     return NULL;
     205           0 :   }
     206             : 
     207             :   #if FD_HAS_DEEPASAN
     208             :   /* Poison entire workspace except wksp header and the pinfo array. */
     209             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     210             :   fd_asan_poison( wksp_data, footprint - fd_wksp_private_pinfo_off() );
     211             :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     212             :   fd_asan_unpoison( pinfo, FD_WKSP_PRIVATE_PINFO_FOOTPRINT * part_max );
     213             :   #endif
     214             : 
     215           1 :   fd_wksp_private_unlock( wksp );
     216             : 
     217           1 :   return wksp;
     218           1 : }
     219             : 
     220             : fd_wksp_t *
     221           1 : fd_wksp_join( void * shwksp ) {
     222           1 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     223             : 
     224           1 :   if( FD_UNLIKELY( !wksp ) ) {
     225           0 :     FD_LOG_WARNING(( "NULL shwksp" ));
     226           0 :     return NULL;
     227           0 :   }
     228             : 
     229           1 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     230           0 :     FD_LOG_WARNING(( "bad align" ));
     231           0 :     return NULL;
     232           0 :   }
     233             : 
     234           1 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     235           0 :     FD_LOG_WARNING(( "bad magic" ));
     236           0 :     return NULL;
     237           0 :   }
     238             : 
     239           1 :   return wksp;
     240           1 : }
     241             : 
     242             : void *
     243           1 : fd_wksp_leave( fd_wksp_t * wksp ) {
     244           1 :   if( FD_UNLIKELY( !wksp ) ) {
     245           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     246           0 :     return NULL;
     247           0 :   }
     248             : 
     249           1 :   return (void *)wksp;
     250           1 : }
     251             : 
     252             : void *
     253           1 : fd_wksp_delete( void * shwksp ) {
     254           1 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     255             : 
     256           1 :   if( FD_UNLIKELY( !wksp ) ) {
     257           0 :     FD_LOG_WARNING(( "NULL shwksp" ));
     258           0 :     return NULL;
     259           0 :   }
     260             : 
     261           1 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     262           0 :     FD_LOG_WARNING(( "bad align" ));
     263           0 :     return NULL;
     264           0 :   }
     265             : 
     266           1 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     267           0 :     FD_LOG_WARNING(( "bad magic" ));
     268           0 :     return NULL;
     269           0 :   }
     270             : 
     271             :   /* TODO: consider testing owner */
     272             : 
     273           1 :   FD_COMPILER_MFENCE();
     274           1 :   FD_VOLATILE( wksp->magic ) = 0UL;
     275           1 :   FD_COMPILER_MFENCE();
     276             : 
     277             : # if FD_HAS_DEEPASAN
     278             :   /* Unpoison entire wksp region. */
     279             :   ulong footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     280             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     281             :   fd_asan_unpoison( wksp_data, footprint - fd_wksp_private_pinfo_off());
     282             : # endif
     283             : 
     284           1 :   return wksp;
     285           1 : }
     286             : 
     287           0 : char const * fd_wksp_name    ( fd_wksp_t const * wksp ) { return wksp->name;     }
     288           0 : uint         fd_wksp_seed    ( fd_wksp_t const * wksp ) { return wksp->seed;     }
     289           0 : ulong        fd_wksp_part_max( fd_wksp_t const * wksp ) { return wksp->part_max; }
     290           0 : ulong        fd_wksp_data_max( fd_wksp_t const * wksp ) { return wksp->data_max; }
     291             : 
     292             : ulong
     293           0 : fd_wksp_owner( fd_wksp_t const * wksp ) {
     294           0 :   FD_COMPILER_MFENCE();
     295           0 :   ulong owner = FD_VOLATILE_CONST( wksp->owner );
     296           0 :   FD_COMPILER_MFENCE();
     297           0 :   return owner;
     298           0 : }
     299             : 
     300             : char const *
     301           0 : fd_wksp_strerror( int err ) {
     302           0 :   switch( err ) {
     303           0 :   case FD_WKSP_SUCCESS:     return "success";
     304           0 :   case FD_WKSP_ERR_INVAL:   return "inval";
     305           0 :   case FD_WKSP_ERR_FAIL:    return "fail";
     306           0 :   case FD_WKSP_ERR_CORRUPT: return "corrupt";
     307           0 :   default: break;
     308           0 :   }
     309           0 :   return "unknown";
     310           0 : }
     311             : 
     312             : int
     313           0 : fd_wksp_verify( fd_wksp_t * wksp ) {
     314             : 
     315           0 : # define TEST(c) do {                                                                             \
     316           0 :     if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_WKSP_ERR_CORRUPT; } \
     317           0 :   } while(0)
     318             : 
     319             :   /* Validate metadata */
     320             : 
     321           0 :   TEST( wksp );
     322           0 :   TEST( wksp->magic==FD_WKSP_MAGIC );
     323             : 
     324           0 :   ulong part_max = wksp->part_max;
     325           0 :   ulong data_max = wksp->data_max;
     326           0 :   TEST( fd_wksp_footprint( part_max, data_max ) );
     327             : 
     328           0 :   ulong gaddr_lo = wksp->gaddr_lo; TEST( gaddr_lo==fd_wksp_private_data_off( part_max ) );
     329           0 :   ulong gaddr_hi = wksp->gaddr_hi; TEST( gaddr_hi==gaddr_lo+data_max                    );
     330             : 
     331           0 :   TEST( fd_shmem_name_len( wksp->name ) );
     332             : 
     333             :   /* seed is arbitrary */
     334             : 
     335           0 :   TEST( wksp->cycle_tag >= 4UL );
     336             : 
     337             :   /* TODO: consider verifying owner */
     338             : 
     339           0 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     340             : 
     341             :   /* Clear out cycle tags */
     342             : 
     343           0 :   for( ulong i=0UL; i<part_max; i++ ) pinfo[ i ].cycle_tag = 0UL;
     344             : 
     345             :   /* Verify the idle stack */
     346             : 
     347           0 :   ulong idle_cnt = 0UL;
     348             : 
     349           0 :   do {
     350           0 :     ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     351           0 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     352             : 
     353             :       /* Visit i.  Note that i has not been validated yet. */
     354             : 
     355           0 :       TEST( i<part_max            ); /* Validate i */
     356           0 :       TEST( !pinfo[ i ].cycle_tag ); /* Make sure not visited before */
     357           0 :       pinfo[ i ].cycle_tag = 1UL;    /* Mark as visited in idle stack */
     358           0 :       idle_cnt++;                    /* Update the idle cnt */
     359             : 
     360             :       /* Advance to the next idle */
     361             : 
     362           0 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx );
     363           0 :     }
     364           0 :   } while(0);
     365             : 
     366             :   /* Idle stack looks intact, verify partitioning */
     367             : 
     368           0 :   ulong free_cnt = 0UL;
     369           0 :   ulong used_cnt = 0UL;
     370             : 
     371           0 :   do {
     372           0 :     ulong j = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     373           0 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_head_cidx );
     374           0 :     ulong g = gaddr_lo;
     375             : 
     376           0 :     int last_free = 0;
     377             : 
     378           0 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     379             : 
     380             :       /* At this point, we last visited j.  Visit i.  Note that j has
     381             :          been validated but i has not. */
     382             : 
     383           0 :       TEST( i<part_max                                           ); /* Validate i */
     384           0 :       TEST( pinfo[ i ].gaddr_lo==g                               ); /* Make sure partition is tightly adjacent to previous */
     385           0 :       TEST( pinfo[ i ].gaddr_hi> g                               ); /* Make sure partition size is non-zero */
     386           0 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].prev_cidx )==j ); /* Make sure correct prev partition */
     387           0 :       TEST( !pinfo[ i ].cycle_tag                                ); /* Make sure not visited before */
     388           0 :       pinfo[ i ].cycle_tag = 2UL;                                   /* Mark as visited in partitioning */
     389             : 
     390           0 :       g = pinfo[ i ].gaddr_hi;                                      /* Extract where the next partition should start */
     391           0 :       int is_free = !pinfo[ i ].tag;                                /* Determine if this partition is free or used */
     392           0 :       TEST( !(last_free & is_free) );                               /* Make sure no adjacent free partitions */
     393           0 :       free_cnt += (ulong) is_free;                                  /* Update the free cnt */
     394           0 :       used_cnt += (ulong)!is_free;                                  /* Update the used cnt */
     395             : 
     396             :       /* Advance to the next partition */
     397             : 
     398           0 :       last_free = is_free;
     399             : 
     400           0 :       j = i;
     401           0 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].next_cidx );
     402           0 :     }
     403             : 
     404           0 :     TEST( fd_wksp_private_pinfo_idx( wksp->part_tail_cidx )==j ); /* Make sure correct partition tail */
     405           0 :     TEST( g==gaddr_hi );                                          /* Make sure complete partitioning */
     406           0 :     TEST( (idle_cnt + free_cnt + used_cnt)==part_max );           /* Make sure no lost idle partitions */
     407           0 :   } while(0);
     408             : 
     409             :   /* Idle stack and partitioning look intact, validate used treap */
     410             : 
     411           0 :   do {
     412           0 :     ulong visit_cnt = 0UL;
     413             : 
     414           0 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     415           0 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     416           0 :     ulong g = gaddr_lo;
     417             : 
     418           0 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     419           0 :       TEST( i<part_max );                                             /* Validate i */
     420           0 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     421           0 :     }
     422             : 
     423           0 :     for(;;) {
     424             : 
     425             :       /* At this point i is and everything on stack is validated */
     426             : 
     427           0 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     428           0 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     429             : 
     430             :         /* Pop stack */
     431             : 
     432           0 :         i = s;
     433           0 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     434             : 
     435             :         /* Visit i */
     436             : 
     437           0 :         ulong p = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     438             : 
     439           0 :         TEST( pinfo[ i ].gaddr_lo>=g    );            /* Make sure this starts after last visited */
     440           0 :         TEST( pinfo[ i ].tag            );            /* Make sure tagged as a used partition */
     441           0 :         TEST( pinfo[ i ].cycle_tag==2UL );            /* Make sure in partitioning and not visited yet this traversal */
     442           0 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) /* Make sure heap property satisfied */
     443           0 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     444             : 
     445           0 :         TEST( !pinfo[ i ].in_same );                  /* Make sure unique */
     446           0 :         TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx ) ) ); /* " */
     447             : 
     448           0 :         pinfo[ i ].cycle_tag = 3UL;                   /* Mark as visited this traversal */
     449           0 :         visit_cnt++;                                  /* Update the visit cnt */
     450           0 :         g = pinfo[ i ].gaddr_hi;                      /* Get minimum start for next partition */
     451             : 
     452             :         /* Traverse the right subtree */
     453             : 
     454           0 :         p = i;
     455           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     456           0 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     457           0 :           TEST( i<part_max );                                             /* Validate i */
     458           0 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     459           0 :         }
     460             : 
     461           0 :       } else {
     462             : 
     463             :         /* At this point i and everything on the stack is validated.
     464             :            Push i to the stack and recurse on the left subtree. */
     465             : 
     466           0 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     467           0 :         s = i;
     468           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     469           0 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     470           0 :           TEST( i<part_max );                                             /* Validate i */
     471           0 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     472           0 :         }
     473             : 
     474           0 :       }
     475           0 :     }
     476             : 
     477           0 :     TEST( visit_cnt==used_cnt ); /* Make sure all used partitions in used treap */
     478           0 :   } while(0);
     479             : 
     480             :   /* Idle stack, partitioning and used treap look intact, validate the
     481             :      free treap. */
     482             : 
     483           0 :   do {
     484           0 :     ulong visit_cnt = 0UL;
     485             : 
     486           0 :     ulong i  = fd_wksp_private_pinfo_idx( wksp->part_free_cidx );
     487           0 :     ulong s  = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     488           0 :     ulong sz = 0UL;
     489             : 
     490           0 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     491           0 :       TEST( i<part_max );                                             /* Validate i */
     492           0 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     493           0 :     }
     494             : 
     495           0 :     for(;;) {
     496             : 
     497             :       /* At this point i and everything on the stack is validated */
     498             : 
     499           0 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     500           0 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     501             : 
     502             :         /* Pop stack */
     503             : 
     504           0 :         i = s;
     505           0 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     506             : 
     507             :         /* Visit i */
     508             : 
     509           0 :         ulong p   = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     510           0 :         ulong isz = fd_wksp_private_pinfo_sz( pinfo + i );               /* Extract the size */
     511             : 
     512           0 :         TEST( isz>sz              ); /* Make sure this partition i larger than previous */
     513           0 :         TEST( !pinfo[ i ].tag     ); /* Make sure tagged as a free partition */
     514           0 :         TEST( !pinfo[ i ].in_same ); /* Make sure marked as not in same */
     515             : 
     516           0 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) { /* Make sure heap property satisfied */
     517           0 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     518           0 :         }
     519             : 
     520           0 :         sz = isz; /* Update largest size partition seen so far */
     521             : 
     522             :         /* Traverse all same sized partitions */
     523             : 
     524           0 :         ulong j = i;
     525           0 :         for(;;) {
     526             : 
     527             :           /* At this point, j is validated */
     528             : 
     529           0 :           TEST( pinfo[ j ].cycle_tag==2UL ); /* Make sure in partitioning and not visited yet this traversal */
     530           0 :           pinfo[ j ].cycle_tag = 3UL;        /* Mark as visited this traversal */
     531           0 :           visit_cnt++;
     532             : 
     533           0 :           ulong k = fd_wksp_private_pinfo_idx( pinfo[ j ].same_cidx );    /* Get the next same sized */
     534           0 :           if( fd_wksp_private_pinfo_idx_is_null( k ) ) break;             /* If no more, we are done with this node */
     535           0 :           TEST( k<part_max );                                             /* Make sure valid index */
     536           0 :           TEST( fd_wksp_private_pinfo_sz( pinfo + k )==sz );              /* Make sure same size */
     537           0 :           TEST( pinfo[ k ].in_same );                                     /* Make sure marked as in same */
     538           0 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].left_cidx  ) ) );
     539           0 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].right_cidx ) ) );
     540           0 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ k ].parent_cidx )==j ); /* Make sure correct parent */
     541           0 :           j = k;
     542           0 :         }
     543             : 
     544             :         /* Recurse on the right subtree */
     545             : 
     546           0 :         p = i;
     547           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     548           0 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     549           0 :           TEST( i<part_max );                                             /* Validate i */
     550           0 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     551           0 :         }
     552             : 
     553           0 :       } else {
     554             : 
     555           0 :         TEST( i<part_max ); /* Validate i */
     556             : 
     557             :         /* At this point i and everything on the stack is validated.
     558             :            Push i to the stack and recurse on the left subtree. */
     559             : 
     560           0 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     561           0 :         s = i;
     562           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     563           0 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     564           0 :           TEST( i<part_max );                                             /* Validate i */
     565           0 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     566           0 :         }
     567           0 :       }
     568           0 :     }
     569             : 
     570           0 :     TEST( visit_cnt==free_cnt ); /* Make sure all free partitions in free treap */
     571             : 
     572           0 :   } while(0);
     573             : 
     574           0 : # undef TEST
     575             : 
     576           0 :   return FD_WKSP_SUCCESS;
     577           0 : }
     578             : 
     579             : int
     580             : fd_wksp_rebuild( fd_wksp_t * wksp,
     581           1 :                  uint        seed ) {
     582             : 
     583             :   /* Load the wksp metadata, don't rebuild if any of it looks even
     584             :      slightly off. */
     585             : 
     586           1 :   if( FD_UNLIKELY( !wksp ) ) {
     587           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     588           0 :     return FD_WKSP_ERR_CORRUPT;
     589           0 :   }
     590             : 
     591           1 :   ulong magic     = wksp->magic;
     592           1 :   ulong part_max  = wksp->part_max;
     593           1 :   ulong data_max  = wksp->data_max;
     594           1 :   ulong gaddr_lo  = wksp->gaddr_lo;
     595           1 :   ulong gaddr_hi  = wksp->gaddr_hi;
     596           1 :   ulong cycle_tag = wksp->cycle_tag;
     597             : 
     598             :   /* TODO: consider verifying owner */
     599             : 
     600           1 :   ulong footprint    = fd_wksp_footprint( part_max, data_max );
     601           1 :   ulong gaddr_lo_exp = fd_wksp_private_data_off( part_max );
     602           1 :   ulong gaddr_hi_exp = gaddr_lo_exp + data_max;
     603           1 :   if( FD_UNLIKELY( (magic!=FD_WKSP_MAGIC) | (!footprint) | (!fd_shmem_name_len( wksp->name )) |
     604           1 :                    (gaddr_lo!=gaddr_lo_exp) | (gaddr_hi!=gaddr_hi_exp) | (cycle_tag<4UL) ) ) {
     605           0 :     FD_LOG_WARNING(( "bad metadata\n\t"
     606           0 :                      "magic     %016lx (exp %016lx)\n\t"
     607           0 :                      "part_max  %lu data_max %lu (footprint %lu)\n\t"
     608           0 :                      "gaddr_lo  %lu (exp %lu)\n\t"
     609           0 :                      "gaddr_hi  %lu (exp %lu)\n\t"
     610           0 :                      "cycle_tag %lu (exp>=4)",
     611           0 :                      magic, FD_WKSP_MAGIC, part_max, data_max, footprint,
     612           0 :                      gaddr_lo, gaddr_lo_exp, gaddr_hi, gaddr_hi_exp, cycle_tag ));
     613           0 :     return FD_WKSP_ERR_CORRUPT;
     614           0 :   }
     615             : 
     616             :   /* Scan the wksp pinfo and insert any used partitions into the used
     617             :      treap and put the rest on the idle stack.  If there is any sign of
     618             :      corruption (empty, bad range or overlap between used partitions),
     619             :      we abort the rebuild (this is almost certainly data corruption of
     620             :      some form and we don't have enough info to resolve a conflict
     621             :      without potentially making the situation worse).  We do the scan in
     622             :      reverse order to rebuild the idle stack in forward order.
     623             : 
     624             :      Note that we don't ever change the gaddr_lo,gaddr_hi of any tagged
     625             :      partitions such that operation is guaranteed to never change the
     626             :      single source of truth.  As such, this operation can be interrupted
     627             :      and restarted arbitrarily safely.*/
     628             : 
     629           1 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     630             : 
     631           1 :   do {
     632           1 :     wksp->seed           = seed;
     633           1 :     wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush idle stack */
     634           1 :     wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush used treap */
     635           1 :     wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush free treap */
     636             : 
     637           1 :     ulong i = part_max;
     638     1374914 :     while( i ) {
     639     1374913 :       i--;
     640             : 
     641             :       /* Ideally, heap priorities should just be a shuffling of the
     642             :          integers [0,part_max).  fd_uint_hash will generate such a
     643             :          shuffling for part_max = 2^32.  Using the lower 30 bits
     644             :          (reserving bit 31 for bulk operations) will yield something
     645             :          very close.  We use seed to mix it up some more. */
     646             : 
     647     1374913 :       pinfo[ i ].in_same    = 0U;
     648     1374913 :       pinfo[ i ].heap_prio  = fd_uint_hash( seed ^ (uint)i ) & ((1U<<30)-1U);
     649     1374913 :       pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     650     1374913 :       pinfo[ i ].cycle_tag  = 0U;
     651             : 
     652     1374913 :       ulong tag = pinfo[ i ].tag;
     653     1374913 :       if( !tag ) { /* Not used ... make it available for reuse below */
     654     1374913 :         fd_wksp_private_idle_stack_push( i, wksp, pinfo );
     655     1374913 :         continue;
     656     1374913 :       }
     657             : 
     658           0 :       pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     659           0 :       pinfo[ i ].next_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     660             : 
     661           0 :       if( FD_UNLIKELY( fd_wksp_private_used_treap_insert( i, wksp, pinfo ) ) ) return FD_WKSP_ERR_CORRUPT; /* Logs details */
     662           0 :     }
     663           1 :   } while(0);
     664             : 
     665             :   /* At this point, a partition is either in the idle stack or used
     666             :      treap.  Further, we have:
     667             : 
     668             :                  | used                       | idle
     669             :        ----------+----------------------------+--------
     670             :        gaddr_*   | non-empty range            | 0
     671             :                  | no overlap with other used | 0
     672             :        tag       | non-zero                   | 0
     673             :        in_same   | 0                          | 0
     674             :        heap_prio | randomized                 | randomized
     675             :        prev      | NULL                       | NULL
     676             :        next      | NULL                       | NULL
     677             :        left      | used treap managed         | NULL
     678             :        right     | used treap managed         | NULL
     679             :        same      | used treap managed (NULL)  | NULL
     680             :        parent    | used treap managed         | idle stack managed
     681             :        stack     | wksp managed               | wksp managed
     682             :        cycle_tag | wksp managed               | wksp managed
     683             : 
     684             :      In-order traverse the used treap to rebuild the partitioning and
     685             :      the free treap. */
     686             : 
     687           1 :   do {
     688           1 :     uint * j_next_cidx_ptr = &wksp->part_head_cidx;  /* Location of most recently added partition next link */
     689             : 
     690           1 :     ulong  j  = FD_WKSP_PRIVATE_PINFO_IDX_NULL; /* Most recently added partition */
     691           1 :     ulong  g0 = gaddr_lo;                       /* Most recently added partition end */
     692             : 
     693           1 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     694           1 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     695           1 :     for(;;) {
     696           1 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     697           1 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     698             : 
     699             :         /* Pop traversal stack */
     700             : 
     701           0 :         i = s;
     702           0 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     703             : 
     704             :         /* Visit i */
     705             : 
     706           0 :         ulong g1 = pinfo[ i ].gaddr_lo;
     707           0 :         if( g1 > g0 ) { /* There's a gap between i and the most recently added partition */
     708             : 
     709             :           /* Acquire an idle partition to hold the gap */
     710             : 
     711           0 :           if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     712           0 :             FD_LOG_WARNING(( "part_max (%lu) too small to fill gap before partition %lu (tag %lu gaddr_lo %lu gaddr_hi %lu)",
     713           0 :                              part_max, i, pinfo[i].tag, pinfo[i].gaddr_lo, pinfo[i].gaddr_hi ));
     714           0 :             return FD_WKSP_ERR_CORRUPT;
     715           0 :           }
     716           0 :           ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     717             : 
     718             :           /* Populate the acquired partition with the gap details,
     719             :              append it to the wksp partitioning and insert it into the
     720             :              free treap.  Note that stack_push/pop reset gaddr_lo,
     721             :              gaddr_hi, tag, in_same, {prev, next, left, right, same,
     722             :              parent}_cidx.  It preserved heap_prio from its original
     723             :              assignment and didn't touch stack_cidx or cycle_tag. */
     724             : 
     725           0 :           pinfo[ k ].gaddr_lo  = g0;
     726           0 :           pinfo[ k ].gaddr_hi  = g1;
     727           0 :           pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     728           0 :           *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     729           0 :           j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     730           0 :           j  = k;
     731           0 :           g0 = g1;
     732             : 
     733           0 :           fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     734           0 :         }
     735             : 
     736             :         /* Add i to the partitioning. */
     737             : 
     738           0 :         pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     739           0 :         *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( i );
     740           0 :         j_next_cidx_ptr  = &pinfo[ i ].next_cidx;
     741           0 :         j  = i;
     742           0 :         g0 = pinfo[ i ].gaddr_hi;
     743             : 
     744             :         /* Traverse the right subtree */
     745             : 
     746           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     747             : 
     748           0 :       } else {
     749             : 
     750             :         /* Push i to the stack and recurse on the left subtree. */
     751             : 
     752           0 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     753           0 :         s = i;
     754           0 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     755             : 
     756           0 :       }
     757           1 :     }
     758             : 
     759           1 :     if( g0 < gaddr_hi ) { /* Have final gap to fill */
     760             : 
     761             :       /* This works the same as the above */
     762             : 
     763           1 :       if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     764           0 :         FD_LOG_WARNING(( "part_max (%lu) too small to complete partitioning", part_max ));
     765           0 :         return FD_WKSP_ERR_CORRUPT;
     766           0 :       }
     767           1 :       ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     768             : 
     769           1 :       pinfo[ k ].gaddr_lo  = g0;
     770           1 :       pinfo[ k ].gaddr_hi  = gaddr_hi;
     771           1 :       pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     772           1 :       *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     773           1 :       j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     774           1 :       j  = k;
     775             :     //g0 = gaddr_hi;
     776             : 
     777           1 :       fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     778           1 :     }
     779             : 
     780           1 :     wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( j );
     781             : 
     782           1 :   } while(0);
     783             : 
     784           1 :   return FD_WKSP_SUCCESS;
     785           1 : }
     786             : 
     787             : #if defined(__linux__)
     788             : 
     789             : #include <sys/mman.h>
     790             : #include <errno.h>
     791             : 
     792             : void
     793           0 : fd_wksp_mprotect( fd_wksp_t * wksp, int flag ) {
     794           0 :   if( FD_UNLIKELY( !wksp ) ) {
     795           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     796           0 :     return;
     797           0 :   }
     798             : 
     799           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     800           0 :     FD_LOG_WARNING(( "bad align" ));
     801           0 :     return;
     802           0 :   }
     803             : 
     804           0 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     805           0 :     FD_LOG_WARNING(( "bad magic" ));
     806           0 :     return;
     807           0 :   }
     808             : 
     809           0 :   ulong wksp_footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     810           0 :   if( FD_UNLIKELY( mprotect( (void*)wksp, wksp_footprint, ( flag ? PROT_READ : (PROT_READ|PROT_WRITE) ) ) ) ) {
     811           0 :     FD_LOG_WARNING(( "mprotect failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     812           0 :     return;
     813           0 :   }
     814           0 : }
     815             : 
     816             : #endif

Generated by: LCOV version 1.14