LCOV - code coverage report
Current view: top level - ballet/sha256 - fd_sha256.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 180 255 70.6 %
Date: 2026-03-19 18:19:27 Functions: 9 12 75.0 %

          Line data    Source code
       1             : #include "fd_sha256.h"
       2             : #include "fd_sha256_constants.h"
       3             : 
       4             : #if FD_HAS_SHANI
       5             : /* For the optimized repeated hash */
       6             : #include "../../util/simd/fd_sse.h"
       7             : #endif
       8             : 
       9             : ulong
      10       22188 : fd_sha256_align( void ) {
      11       22188 :   return FD_SHA256_ALIGN;
      12       22188 : }
      13             : 
      14             : ulong
      15       11093 : fd_sha256_footprint( void ) {
      16       11093 :   return FD_SHA256_FOOTPRINT;
      17       11093 : }
      18             : 
      19             : void *
      20       11098 : fd_sha256_new( void * shmem ) {
      21       11098 :   fd_sha256_t * sha = (fd_sha256_t *)shmem;
      22             : 
      23       11098 :   if( FD_UNLIKELY( !shmem ) ) {
      24           0 :     FD_LOG_WARNING(( "NULL shmem" ));
      25           0 :     return NULL;
      26           0 :   }
      27             : 
      28       11098 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_sha256_align() ) ) ) {
      29           0 :     FD_LOG_WARNING(( "misaligned shmem" ));
      30           0 :     return NULL;
      31           0 :   }
      32             : 
      33       11098 :   ulong footprint = fd_sha256_footprint();
      34             : 
      35       11098 :   fd_memset( sha, 0, footprint );
      36             : 
      37       11098 :   FD_COMPILER_MFENCE();
      38       11098 :   FD_VOLATILE( sha->magic ) = FD_SHA256_MAGIC;
      39       11098 :   FD_COMPILER_MFENCE();
      40             : 
      41       11098 :   return (void *)sha;
      42       11098 : }
      43             : 
      44             : fd_sha256_t *
      45       11097 : fd_sha256_join( void * shsha ) {
      46             : 
      47       11097 :   if( FD_UNLIKELY( !shsha ) ) {
      48           0 :     FD_LOG_WARNING(( "NULL shsha" ));
      49           0 :     return NULL;
      50           0 :   }
      51             : 
      52       11097 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      53           0 :     FD_LOG_WARNING(( "misaligned shsha" ));
      54           0 :     return NULL;
      55           0 :   }
      56             : 
      57       11097 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      58             : 
      59       11097 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      60           0 :     FD_LOG_WARNING(( "bad magic" ));
      61           0 :     return NULL;
      62           0 :   }
      63             : 
      64       11097 :   return sha;
      65       11097 : }
      66             : 
      67             : void *
      68           0 : fd_sha256_leave( fd_sha256_t * sha ) {
      69             : 
      70           0 :   if( FD_UNLIKELY( !sha ) ) {
      71           0 :     FD_LOG_WARNING(( "NULL sha" ));
      72           0 :     return NULL;
      73           0 :   }
      74             : 
      75           0 :   return (void *)sha;
      76           0 : }
      77             : 
      78             : void *
      79           0 : fd_sha256_delete( void * shsha ) {
      80             : 
      81           0 :   if( FD_UNLIKELY( !shsha ) ) {
      82           0 :     FD_LOG_WARNING(( "NULL shsha" ));
      83           0 :     return NULL;
      84           0 :   }
      85             : 
      86           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      87           0 :     FD_LOG_WARNING(( "misaligned shsha" ));
      88           0 :     return NULL;
      89           0 :   }
      90             : 
      91           0 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      92             : 
      93           0 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      94           0 :     FD_LOG_WARNING(( "bad magic" ));
      95           0 :     return NULL;
      96           0 :   }
      97             : 
      98           0 :   FD_COMPILER_MFENCE();
      99           0 :   FD_VOLATILE( sha->magic ) = 0UL;
     100           0 :   FD_COMPILER_MFENCE();
     101             : 
     102           0 :   return (void *)sha;
     103           0 : }
     104             : 
     105             : #ifndef FD_SHA256_CORE_IMPL
     106             : #if FD_HAS_SHANI
     107             : #define FD_SHA256_CORE_IMPL 1
     108             : #else
     109             : #define FD_SHA256_CORE_IMPL 0
     110             : #endif
     111             : #endif
     112             : 
     113             : #if FD_SHA256_CORE_IMPL==0
     114             : 
     115             : /* The implementation below was derived from OpenSSL's SHA-256
     116             :    implementation (Apache-2.0 licensed).  See in particular:
     117             : 
     118             :     https://github.com/openssl/openssl/blob/master/crypto/sha/sha256.c
     119             : 
     120             :    (link valid circa 2022-Dec).  It has been made more strict with more
     121             :    extensive implementation documentation, has been simplified and has
     122             :    been streamlined specifically for use inside Firedancer base machine
     123             :    model (no machine specific capabilities required).
     124             : 
     125             :    In particular, fd_sha256_core_ref is based on OpenSSL's
     126             :    OPENSSL_SMALL_FOOTPRINT SHA-256 implementation (Apache licensed).
     127             :    This should work anywhere but it is not the highest performance
     128             :    implementation possible.
     129             : 
     130             :    It is also straightforward to replace these implementations with HPC
     131             :    implementations that target specific machine capabilities without
     132             :    requiring any changes to caller code. */
     133             : 
     134             : static void
     135             : fd_sha256_core_ref( uint *        state,
     136             :                     uchar const * block,
     137             :                     ulong         block_cnt ) {
     138             : 
     139             : 
     140             : # define ROTATE     fd_uint_rotate_left
     141             : # define Sigma0(x)  (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
     142             : # define Sigma1(x)  (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
     143             : # define sigma0(x)  (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
     144             : # define sigma1(x)  (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
     145             : # define Ch(x,y,z)  (((x) & (y)) ^ ((~(x)) & (z)))
     146             : # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
     147             : 
     148             :   uint const * W = (uint const *)block;
     149             :   do {
     150             :     uint a = state[0];
     151             :     uint b = state[1];
     152             :     uint c = state[2];
     153             :     uint d = state[3];
     154             :     uint e = state[4];
     155             :     uint f = state[5];
     156             :     uint g = state[6];
     157             :     uint h = state[7];
     158             : 
     159             :     uint X[16];
     160             : 
     161             :     ulong i;
     162             :     for( i=0UL; i<16UL; i++ ) {
     163             :       X[i] = fd_uint_bswap( W[i] );
     164             :       uint T1 = X[i] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     165             :       uint T2 = Sigma0(a) + Maj(a, b, c);
     166             :       h = g;
     167             :       g = f;
     168             :       f = e;
     169             :       e = d + T1;
     170             :       d = c;
     171             :       c = b;
     172             :       b = a;
     173             :       a = T1 + T2;
     174             :     }
     175             :     for( ; i<64UL; i++ ) {
     176             :       uint s0 = X[(i +  1UL) & 0x0fUL];
     177             :       uint s1 = X[(i + 14UL) & 0x0fUL];
     178             :       s0 = sigma0(s0);
     179             :       s1 = sigma1(s1);
     180             :       X[i & 0xfUL] += s0 + s1 + X[(i + 9UL) & 0xfUL];
     181             :       uint T1 = X[i & 0xfUL ] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     182             :       uint T2 = Sigma0(a) + Maj(a, b, c);
     183             :       h = g;
     184             :       g = f;
     185             :       f = e;
     186             :       e = d + T1;
     187             :       d = c;
     188             :       c = b;
     189             :       b = a;
     190             :       a = T1 + T2;
     191             :     }
     192             : 
     193             :     state[0] += a;
     194             :     state[1] += b;
     195             :     state[2] += c;
     196             :     state[3] += d;
     197             :     state[4] += e;
     198             :     state[5] += f;
     199             :     state[6] += g;
     200             :     state[7] += h;
     201             : 
     202             :     W += 16UL;
     203             :   } while( --block_cnt );
     204             : 
     205             : # undef ROTATE
     206             : # undef Sigma0
     207             : # undef Sigma1
     208             : # undef sigma0
     209             : # undef sigma1
     210             : # undef Ch
     211             : # undef Maj
     212             : 
     213             : }
     214             : 
     215             : #define fd_sha256_core fd_sha256_core_ref
     216             : 
     217             : #elif FD_SHA256_CORE_IMPL==1
     218             : 
     219             : /* _mm_sha256rnds2_epu32 does two rounds, one from the first uint in
     220             :    wk and one from the second.  Since wk stores four rounds worth of
     221             :    message schedule values, it makes sense for the macro to do four
     222             :    rounds at a time.  We need to permute wk in between so that the
     223             :    second call to the intrinsic will use the other values. */
     224      298720 : #define FOUR_ROUNDS( wk ) do {                                                               \
     225      298720 :       vu_t __wk = (wk);                                                                      \
     226      298720 :       vu_t temp_state = stateFEBA;                                                           \
     227      298720 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, __wk );                       \
     228      298720 :       stateHGDC = temp_state;                                                                \
     229      298720 :                                                                                              \
     230      298720 :       temp_state = stateFEBA;                                                                \
     231      298720 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, vu_permute( __wk, 2,3,0,1 ) );\
     232      298720 :       stateHGDC = temp_state;                                                                \
     233      298720 :     } while( 0 )
     234             : 
     235             : 
     236             : /* For completeness, here's the documentation for _mm_sha256msg1_epu32
     237             :    and _mm_sha256msg2_epu32 in a slightly reformatted way, where all
     238             :    values are uints, and "-" indicates a don't-care value:
     239             : 
     240             :        _mm_sha256msg1_epu32( (w[j  ], w[j+1], w[j+1], w[j+3]),
     241             :                              (w[j+4], -,      -,      -     ) )
     242             :          = ( w[j  ]+s0( w[j+1] ),  w[j+1]+s0( w[j+2] ),
     243             :              w[j+2]+s0( w[j+3] ),  w[j+3]+s0( w[j+4] ) ).
     244             : 
     245             : 
     246             :        _mm_sha256msg2_epu32( (v[j  ], v[j+1], v[j+1], v[j+3]),
     247             :                              (-,      -,      w[j-2], w[j-1]) )
     248             :          sets w[j  ] = v[j  ] + s1( w[j-2] ) and
     249             :               w[j+1] = v[j+1] + s1( w[j-1] ), and then returns
     250             : 
     251             :            ( v[j  ]+s1( w[j-2] ), v[j+1]+s1( w[j-1] ),
     252             :              v[j+2]+s1( w[j  ] ), v[j+3]+s1( w[j+1] ) )   */
     253             : 
     254             : 
     255             : /* w[i] for i>= 16 is w[i-16] + s0(w[i-15]) + w[i-7] + s1(w[i-2])
     256             :    Since our vector size is 4 uints, it's only s1 that is a little
     257             :    problematic, because it references items in the same vector.
     258             :    Thankfully, the msg2 intrinsic takes care of the complexity, but we
     259             :    need to execute it last.
     260             : 
     261             :    We get w[i-16] and s0(s[i-15]) using the msg1 intrinsic, setting j =
     262             :    i-16.  For example, to compute w1013, we pass in w0003 and w0407.
     263             :    Then we can get w[i-7] by using the alignr instruction on
     264             :    (w[i-8], w[i-7], w[i-6], w[i-5]) and (w[i-4], w[i-3], w[i-2], w[i-1])
     265             :    to concatenate them and shift by one uint.  Continuing with the
     266             :    example of w1013, we need w080b and w0c0f.  We then put
     267             :              v[i] = w[i-16] + s0(w[i-15]) + w[i-7],
     268             :    and invoke the msg2 intrinsic with j=i, which gives w[i], as desired.
     269             :    Each invocation of NEXT_W computes 4 values of w. */
     270             : 
     271      224040 : #define NEXT_W( w_minus_16, w_minus_12, w_minus_8, w_minus_4 ) (__extension__({      \
     272      224040 :     vu_t __w_i_16_s0_i_15 = _mm_sha256msg1_epu32( w_minus_16, w_minus_12 );          \
     273      224040 :     vu_t __w_i_7          = _mm_alignr_epi8( w_minus_4, w_minus_8, 4 );              \
     274      224040 :     _mm_sha256msg2_epu32( vu_add( __w_i_7, __w_i_16_s0_i_15 ), w_minus_4 );          \
     275      224040 :     }))
     276             : 
     277             : /* Zen 5's sha256rnds2 has an RTP of 2, while Zen 4's has an RTP of 1. We can
     278             :    win some performance by moving the schedule updates earlier in the loop,
     279             :    which improves the speed by around 1M hashes/s for the repeated hashing. */
     280             : #ifdef __znver5__
     281             : #define FULL_ROUNDS() do {                                                                                        \
     282             :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
     283             :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
     284             :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
     285             :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
     286             :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
     287             :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
     288             :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
     289             :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
     290             :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
     291             :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
     292             :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
     293             :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
     294             :     /*                                              */ FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
     295             :     /*                                              */ FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
     296             :     /*                                              */ FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
     297             :     /*                                              */ FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
     298             :     } while ( 0 )
     299             : #else
     300       18670 : #define FULL_ROUNDS() do {                                                                                        \
     301       18670 :     /*                                              */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
     302       18670 :     /*                                              */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
     303       18670 :     /*                                              */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
     304       18670 :     /*                                              */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
     305       18670 :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
     306       18670 :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
     307       18670 :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
     308       18670 :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
     309       18670 :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
     310       18670 :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
     311       18670 :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
     312       18670 :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
     313       18670 :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
     314       18670 :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
     315       18670 :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
     316       18670 :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
     317       18670 :     } while ( 0 )
     318             : #endif
     319             : 
     320             : 
     321             : void
     322             : fd_sha256_core_shaext( uint *        state,       /* 64-byte aligned, 8 entries */
     323             :                        uchar const * block,       /* ideally 128-byte aligned (but not required), 64*block_cnt in size */
     324        9675 :                        ulong         block_cnt ) {/* positive */
     325        9675 :   vu_t stateABCD = vu_ld( state     );
     326        9675 :   vu_t stateEFGH = vu_ld( state+4UL );
     327             : 
     328        9675 :   vu_t baseFEBA = vu_permute2( stateEFGH, stateABCD, 1, 0, 1, 0 );
     329        9675 :   vu_t baseHGDC = vu_permute2( stateEFGH, stateABCD, 3, 2, 3, 2 );
     330             : 
     331       28345 :   for( ulong b=0UL; b<block_cnt; b++ ) {
     332       18670 :     vu_t stateFEBA = baseFEBA;
     333       18670 :     vu_t stateHGDC = baseHGDC;
     334             : 
     335       18670 :     vu_t w0003 = vu_bswap( vu_ldu( block+64UL*b      ) );
     336       18670 :     vu_t w0407 = vu_bswap( vu_ldu( block+64UL*b+16UL ) );
     337       18670 :     vu_t w080b = vu_bswap( vu_ldu( block+64UL*b+32UL ) );
     338       18670 :     vu_t w0c0f = vu_bswap( vu_ldu( block+64UL*b+48UL ) );
     339             : 
     340       18670 :     FULL_ROUNDS();
     341             : 
     342       18670 :     baseFEBA = vu_add( baseFEBA, stateFEBA );
     343       18670 :     baseHGDC = vu_add( baseHGDC, stateHGDC );
     344             : 
     345       18670 :   }
     346             : 
     347        9675 :   stateABCD = vu_permute2( baseFEBA, baseHGDC, 3, 2, 3, 2 );
     348        9675 :   stateEFGH = vu_permute2( baseFEBA, baseHGDC, 1, 0, 1, 0 );
     349        9675 :   vu_st( state,     stateABCD );
     350        9675 :   vu_st( state+4UL, stateEFGH );
     351        9675 : }
     352             : 
     353        9676 : #define fd_sha256_core fd_sha256_core_shaext
     354             : 
     355             : #else
     356             : #error "Unsupported FD_SHA256_CORE_IMPL"
     357             : #endif
     358             : 
     359             : fd_sha256_t *
     360        2349 : fd_sha256_init( fd_sha256_t * sha ) {
     361        2349 :   sha->state[0] = FD_SHA256_INITIAL_A;
     362        2349 :   sha->state[1] = FD_SHA256_INITIAL_B;
     363        2349 :   sha->state[2] = FD_SHA256_INITIAL_C;
     364        2349 :   sha->state[3] = FD_SHA256_INITIAL_D;
     365        2349 :   sha->state[4] = FD_SHA256_INITIAL_E;
     366        2349 :   sha->state[5] = FD_SHA256_INITIAL_F;
     367        2349 :   sha->state[6] = FD_SHA256_INITIAL_G;
     368        2349 :   sha->state[7] = FD_SHA256_INITIAL_H;
     369        2349 :   sha->buf_used = 0UL;
     370        2349 :   sha->bit_cnt  = 0UL;
     371        2349 :   return sha;
     372        2349 : }
     373             : 
     374             : fd_sha256_t *
     375             : fd_sha256_append( fd_sha256_t * sha,
     376             :                   void const *  _data,
     377        7389 :                   ulong         sz ) {
     378             : 
     379             :   /* If no data to append, we are done */
     380             : 
     381        7389 :   if( FD_UNLIKELY( !sz ) ) return sha; /* optimize for non-trivial append */
     382             : 
     383             :   /* Unpack inputs */
     384             : 
     385        7336 :   uint *  state    = sha->state;
     386        7336 :   uchar * buf      = sha->buf;
     387        7336 :   ulong   buf_used = sha->buf_used;
     388        7336 :   ulong   bit_cnt  = sha->bit_cnt;
     389             : 
     390        7336 :   uchar const * data = (uchar const *)_data;
     391             : 
     392             :   /* Update bit_cnt */
     393             :   /* FIXME: could accumulate bytes here and do bit conversion in append */
     394             :   /* FIXME: Overflow handling if more than 2^64 bits (unlikely) */
     395             : 
     396        7336 :   sha->bit_cnt = bit_cnt + (sz<<3);
     397             : 
     398             :   /* Handle buffered bytes from previous appends */
     399             : 
     400        7336 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     401             : 
     402             :     /* If the append isn't large enough to complete the current block,
     403             :        buffer these bytes too and return */
     404             : 
     405        4902 :     ulong buf_rem = FD_SHA256_PRIVATE_BUF_MAX - buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     406        4902 :     if( FD_UNLIKELY( sz < buf_rem ) ) { /* optimize for large append */
     407        2813 :       fd_memcpy( buf + buf_used, data, sz );
     408        2813 :       sha->buf_used = buf_used + sz;
     409        2813 :       return sha;
     410        2813 :     }
     411             : 
     412             :     /* Otherwise, buffer enough leading bytes of data to complete the
     413             :        block, update the hash and then continue processing any remaining
     414             :        bytes of data. */
     415             : 
     416        2089 :     fd_memcpy( buf + buf_used, data, buf_rem );
     417        2089 :     data += buf_rem;
     418        2089 :     sz   -= buf_rem;
     419             : 
     420        2089 :     fd_sha256_core( state, buf, 1UL );
     421        2089 :     sha->buf_used = 0UL;
     422        2089 :   }
     423             : 
     424             :   /* Append the bulk of the data */
     425             : 
     426        4523 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     427        4523 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt ); /* optimized for large append */
     428             : 
     429             :   /* Buffer any leftover bytes */
     430             : 
     431        4523 :   buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL); /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     432        4523 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     433        4369 :     fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     434        4369 :     sha->buf_used = buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     435        4369 :   }
     436             : 
     437        4523 :   return sha;
     438        7336 : }
     439             : 
     440             : void *
     441             : fd_sha256_fini( fd_sha256_t * sha,
     442        2342 :                 void *        _hash ) {
     443             : 
     444             :   /* Unpack inputs */
     445             : 
     446        2342 :   uint *  state    = sha->state;
     447        2342 :   uchar * buf      = sha->buf;
     448        2342 :   ulong   buf_used = sha->buf_used; /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     449        2342 :   ulong   bit_cnt  = sha->bit_cnt;
     450             : 
     451             :   /* Append the terminating message byte */
     452             : 
     453        2342 :   buf[ buf_used ] = (uchar)0x80;
     454        2342 :   buf_used++;
     455             : 
     456             :   /* If there isn't enough room to save the message length in bits at
     457             :      the end of the in progress block, clear the rest of the in progress
     458             :      block, update the hash and start a new block. */
     459             : 
     460        2342 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) { /* optimize for well aligned use of append */
     461          87 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     462          87 :     fd_sha256_core( state, buf, 1UL );
     463          87 :     buf_used = 0UL;
     464          87 :   }
     465             : 
     466             :   /* Clear in progress block up to last 64-bits, append the message
     467             :      size in bytes in the last 64-bits of the in progress block and
     468             :      update the hash to finalize it. */
     469             : 
     470        2342 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     471        2342 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     472        2342 :   fd_sha256_core( state, buf, 1UL );
     473             : 
     474             :   /* Unpack the result into md (annoying bswaps here) */
     475             : 
     476        2342 :   state[0] = fd_uint_bswap( state[0] );
     477        2342 :   state[1] = fd_uint_bswap( state[1] );
     478        2342 :   state[2] = fd_uint_bswap( state[2] );
     479        2342 :   state[3] = fd_uint_bswap( state[3] );
     480        2342 :   state[4] = fd_uint_bswap( state[4] );
     481        2342 :   state[5] = fd_uint_bswap( state[5] );
     482        2342 :   state[6] = fd_uint_bswap( state[6] );
     483        2342 :   state[7] = fd_uint_bswap( state[7] );
     484        2342 :   return memcpy( _hash, state, 32 );
     485        2342 : }
     486             : 
     487             : void *
     488             : fd_sha256_hash( void const * _data,
     489             :                 ulong        sz,
     490        4858 :                 void *       _hash ) {
     491        4858 :   uchar const * data = (uchar const *)_data;
     492             : 
     493             :   /* This is just the above streamlined to eliminate all the overheads
     494             :      to support incremental hashing. */
     495             : 
     496        4858 :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     497        4858 :   uint  state[8] __attribute__((aligned(32)));
     498             : 
     499        4858 :   state[0] = FD_SHA256_INITIAL_A;
     500        4858 :   state[1] = FD_SHA256_INITIAL_B;
     501        4858 :   state[2] = FD_SHA256_INITIAL_C;
     502        4858 :   state[3] = FD_SHA256_INITIAL_D;
     503        4858 :   state[4] = FD_SHA256_INITIAL_E;
     504        4858 :   state[5] = FD_SHA256_INITIAL_F;
     505        4858 :   state[6] = FD_SHA256_INITIAL_G;
     506        4858 :   state[7] = FD_SHA256_INITIAL_H;
     507             : 
     508        4858 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     509        4858 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt );
     510             : 
     511        4858 :   ulong buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL);
     512        4858 :   if( FD_UNLIKELY( buf_used ) ) fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     513        4858 :   buf[ buf_used ] = (uchar)0x80;
     514        4858 :   buf_used++;
     515             : 
     516        4858 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) {
     517           0 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     518           0 :     fd_sha256_core( state, buf, 1UL );
     519           0 :     buf_used = 0UL;
     520           0 :   }
     521             : 
     522        4858 :   ulong bit_cnt = sz << 3;
     523        4858 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     524        4858 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     525        4858 :   fd_sha256_core( state, buf, 1UL );
     526             : 
     527        4858 :   state[0] = fd_uint_bswap( state[0] );
     528        4858 :   state[1] = fd_uint_bswap( state[1] );
     529        4858 :   state[2] = fd_uint_bswap( state[2] );
     530        4858 :   state[3] = fd_uint_bswap( state[3] );
     531        4858 :   state[4] = fd_uint_bswap( state[4] );
     532        4858 :   state[5] = fd_uint_bswap( state[5] );
     533        4858 :   state[6] = fd_uint_bswap( state[6] );
     534        4858 :   state[7] = fd_uint_bswap( state[7] );
     535        4858 :   return memcpy( _hash, state, 32 );
     536        4858 : }
     537             : 
     538             : 
     539             : 
     540             : void *
     541             : fd_sha256_hash_32_repeated( void const * _data,
     542             :                             void *       _hash,
     543           0 :                             ulong        cnt ) {
     544           0 :   uchar const * data = (uchar const *)_data;
     545           0 :   uchar       * hash = (uchar       *)_hash;
     546           0 : #if FD_HAS_SHANI
     547           0 :   vu_t       w0003 = vu_bswap( vu_ldu( data      ) );
     548           0 :   vu_t       w0407 = vu_bswap( vu_ldu( data+16UL ) );
     549           0 :   vb_t const w080b = vb( 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
     550           0 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 );
     551           0 :   vb_t const w0c0f = vb( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     552           0 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ); /* 32 bytes */
     553             : 
     554           0 :   vu_t const initialFEBA = vu( FD_SHA256_INITIAL_F, FD_SHA256_INITIAL_E, FD_SHA256_INITIAL_B, FD_SHA256_INITIAL_A );
     555           0 :   vu_t const initialHGDC = vu( FD_SHA256_INITIAL_H, FD_SHA256_INITIAL_G, FD_SHA256_INITIAL_D, FD_SHA256_INITIAL_C );
     556             : 
     557           0 :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     558           0 :     vu_t stateFEBA = initialFEBA;
     559           0 :     vu_t stateHGDC = initialHGDC;
     560             : 
     561           0 :     FULL_ROUNDS();
     562             : 
     563           0 :     stateFEBA = vu_add( stateFEBA, initialFEBA );
     564           0 :     stateHGDC = vu_add( stateHGDC, initialHGDC );
     565             : 
     566           0 :     vu_t stateABCD = vu_permute2( stateFEBA, stateHGDC, 3, 2, 3, 2 );
     567           0 :     vu_t stateEFGH = vu_permute2( stateFEBA, stateHGDC, 1, 0, 1, 0 );
     568             : 
     569           0 :     w0003 = stateABCD;
     570           0 :     w0407 = stateEFGH;
     571           0 :   }
     572           0 :   vu_stu( hash,      vu_bswap( w0003 ) );
     573           0 :   vu_stu( hash+16UL, vu_bswap( w0407 ) );
     574           0 : #undef NEXT_W
     575           0 : #undef FOUR_ROUNDS
     576           0 : #undef FULL_ROUNDS
     577             : 
     578             : #else
     579             : 
     580             :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     581             : 
     582             :   /* Prepare padding once */
     583             :   ulong buf_used = 32UL;
     584             :   memcpy( buf, data, 32UL );
     585             :   buf[ buf_used ] = (uchar)0x80;
     586             :   buf_used++;
     587             : 
     588             :   ulong bit_cnt = 32UL << 3;
     589             :   memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     590             :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     591             : 
     592             :   /* This is just the above streamlined to eliminate all the overheads
     593             :      to support incremental hashing. */
     594             :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     595             : 
     596             :     uint  state[8] __attribute__((aligned(32)));
     597             : 
     598             :     state[0] = FD_SHA256_INITIAL_A;
     599             :     state[1] = FD_SHA256_INITIAL_B;
     600             :     state[2] = FD_SHA256_INITIAL_C;
     601             :     state[3] = FD_SHA256_INITIAL_D;
     602             :     state[4] = FD_SHA256_INITIAL_E;
     603             :     state[5] = FD_SHA256_INITIAL_F;
     604             :     state[6] = FD_SHA256_INITIAL_G;
     605             :     state[7] = FD_SHA256_INITIAL_H;
     606             : 
     607             :     fd_sha256_core( state, buf, 1UL );
     608             : 
     609             :     state[0] = fd_uint_bswap( state[0] );
     610             :     state[1] = fd_uint_bswap( state[1] );
     611             :     state[2] = fd_uint_bswap( state[2] );
     612             :     state[3] = fd_uint_bswap( state[3] );
     613             :     state[4] = fd_uint_bswap( state[4] );
     614             :     state[5] = fd_uint_bswap( state[5] );
     615             :     state[6] = fd_uint_bswap( state[6] );
     616             :     state[7] = fd_uint_bswap( state[7] );
     617             :     memcpy( buf, state, 32UL );
     618             :   }
     619             :   memcpy( hash, buf, 32UL );
     620             : #endif
     621           0 :   return _hash;
     622           0 : }
     623             : 
     624             : #undef fd_sha256_core

Generated by: LCOV version 1.14