LCOV - cov.lcov - ballet/ed25519/fd

LCOV - code coverage report

Current view:	top level - ballet/ed25519 - fd_x25519.c (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	0	102	0.0 %
Date:	2026-03-19 18:19:27	Functions:	0	5	0.0 %

          Line data    Source code

       1             : #include "fd_x25519.h"
       2             : #include "fd_f25519.h"
       3             : 
       4             : /* FD_X25519_VECTORIZE calls mul4 instead of sqr2+mul2, and similar.
       5             :    Only useful if the underlying ops are actually vectorized and therefore
       6             :    the cost of 4 muls is <= the cost of 2 sqr + 2 mul. */
       7             : #define FD_X25519_VECTORIZE FD_HAS_AVX512
       8             : 
       9             : /* FD_X25519_ALIGN aligns variables. */
      10             : #if FD_HAS_AVX
      11             : #define FD_X25519_ALIGN __attribute__((aligned(32)))
      12             : #else
      13             : #define FD_X25519_ALIGN
      14             : #endif
      15             : 
      16             : /*
      17             :  * Constant time primitives
      18             :  */
      19             : 
      20             : static inline int FD_FN_SENSITIVE
      21           0 : fd_x25519_is_zero_const_time( uchar const point[ 32 ] ) {
      22             :   //TODO: this is generally done by (x)or-ing the limbs, see also RFC 7748, page 13.
      23           0 :   int is_zero = 1;
      24           0 :   for( ulong i=0UL; i<32UL; i++ ) {
      25           0 :     is_zero &= ( !point[ i ] );
      26           0 :   }
      27           0 :   return is_zero;
      28           0 : }
      29             : 
      30             : #if !FD_HAS_AVX512
      31             : 
      32             : static inline void FD_FN_SENSITIVE
      33             : fd_x25519_montgomery_ladder( fd_f25519_t *       x2,
      34             :                              fd_f25519_t *       z2,
      35             :                              fd_f25519_t const * x1,
      36             :                              uchar const *       secret_scalar ) {
      37             :   /* memory areas that will contain (partial) secrets and will be cleared at the end */
      38             :   fd_f25519_t secret_tmp_f[4];
      39             :   int swap = 0;
      40             :   int b = 0;
      41             : 
      42             :   /* human-readable variables */
      43             :   fd_f25519_t * x3   = &secret_tmp_f[0];
      44             :   fd_f25519_t * z3   = &secret_tmp_f[1];
      45             :   fd_f25519_t * tmp0 = &secret_tmp_f[2];
      46             :   fd_f25519_t * tmp1 = &secret_tmp_f[3];
      47             : 
      48             :   fd_f25519_set( x2, fd_f25519_one );
      49             :   fd_f25519_set( z2, fd_f25519_zero );
      50             : 
      51             :   /* use fd_f25519_add to reduce x1 mod p. this is required (we have a test). */
      52             :   fd_f25519_add( x3, fd_f25519_zero, x1 );
      53             :   fd_f25519_set( z3, fd_f25519_one );
      54             : 
      55             :   for( long pos=254UL; pos>=0; pos-- ) {
      56             :     b = (secret_scalar[ pos / 8L ] >> ( pos & 7L )) & 1;
      57             :     swap ^= b;
      58             :     fd_f25519_swap_if( x2, x3, swap );
      59             :     fd_f25519_swap_if( z2, z3, swap );
      60             :     swap = b;
      61             : 
      62             :     fd_f25519_sub_nr( tmp0, x3,   z3   );
      63             :     fd_f25519_sub_nr( tmp1, x2,   z2   );
      64             :     fd_f25519_add_nr( x2,   x2,   z2   );
      65             :     fd_f25519_add_nr( z2,   x3,   z3   );
      66             : 
      67             : #   if FD_X25519_VECTORIZE /* Note that okay to use less efficient squaring because we get it for free in unused vector lanes */
      68             :     fd_f25519_mul4( z3,   tmp0, x2,
      69             :                     z2,   z2,   tmp1,
      70             :                     tmp0, tmp1, tmp1,
      71             :                     tmp1, x2,   x2   );
      72             : #   else /* Use more efficient squaring if scalar implementation */
      73             :     fd_f25519_mul2( z3,   tmp0, x2,
      74             :                     z2,   z2,   tmp1 );
      75             :     fd_f25519_sqr2( tmp0, tmp1,
      76             :                     tmp1, x2         );
      77             : #   endif
      78             :     fd_f25519_add_nr( x3,   z3,   z2 );
      79             :     fd_f25519_sub_nr( z2,   z3,   z2 );
      80             : #   if FD_X25519_VECTORIZE /* See note above */
      81             :     fd_f25519_mul2( x2,   tmp1, tmp0,
      82             :                     z2,   z2,   z2   );
      83             : #   else
      84             :     fd_f25519_mul(  x2,   tmp1, tmp0 );
      85             :     fd_f25519_sqr(  z2,   z2         );
      86             : #   endif
      87             :     fd_f25519_sub_nr( tmp1, tmp1, tmp0 );
      88             : 
      89             :     fd_f25519_mul_121666( z3, tmp1 );
      90             : 
      91             :     fd_f25519_add_nr( tmp0, tmp0, z3   );
      92             : #   if FD_X25519_VECTORIZE /* See note above */
      93             :     fd_f25519_mul3( x3,   x3,   x3,
      94             :                     z3,   x1,   z2,
      95             :                     z2,   tmp0, tmp1 );
      96             : #   else
      97             :     fd_f25519_sqr ( x3,   x3         );
      98             :     fd_f25519_mul2( z3,   x1,   z2,
      99             :                     z2,   tmp1, tmp0 );
     100             : #   endif
     101             :   }
     102             : 
     103             :   fd_f25519_swap_if( x2, x3, swap );
     104             :   fd_f25519_swap_if( z2, z3, swap );
     105             : 
     106             :   /* Sanitize */
     107             : 
     108             :   fd_memzero_explicit( secret_tmp_f, sizeof(secret_tmp_f) );
     109             :   fd_memzero_explicit( &b, sizeof(int) );
     110             :   fd_memzero_explicit( &swap, sizeof(int) );
     111             : }
     112             : #else
     113             : 
     114             : /* This is the "transposed" version of the Montgomery ladder above.
     115             :    Experimentally, this is 15-20% faster on AVX-512. */
     116             : static inline void FD_FN_SENSITIVE
     117             : fd_x25519_montgomery_ladder( fd_f25519_t *       x2,
     118             :                              fd_f25519_t *       z2,
     119             :                              fd_f25519_t const * x1,
     120           0 :                              uchar const *       secret_scalar ) {
     121           0 :   FD_R43X6_QUAD_DECL( U );
     122           0 :   FD_R43X6_QUAD_DECL( Q );
     123           0 :   FD_R43X6_QUAD_DECL( P );
     124           0 :   FD_R43X6_QUAD_PACK( U, fd_r43x6_zero(),
     125           0 :                          fd_r43x6_zero(),
     126           0 :                          fd_r43x6_zero(),
     127           0 :                          x1->el );                      // x_1 = u, in u44
     128           0 :   FD_R43X6_QUAD_PACK( Q, fd_r43x6_one(),                // x_2 = 1, in u44
     129           0 :                          fd_r43x6_zero(),               // z_2 = 0, in u44
     130           0 :                          x1->el,                        // x_3 = u, in u44
     131           0 :                          fd_r43x6_one() );              // z_3 = 1, in u44
     132             :   /* reduce x1 */
     133           0 :   FD_R43X6_QUAD_FOLD_SIGNED( U, U );
     134           0 :   FD_R43X6_QUAD_FOLD_SIGNED( Q, Q );
     135           0 :   int swap = 0;
     136           0 :   int k_t = 0;
     137           0 :   wwl_t perm;
     138           0 :   fd_r43x6_t AA, E, F, G, H, GG;
     139             : 
     140           0 :   for( int t=254UL; t>=0; t-- ) {                       // For t = bits-1 down to 0:
     141             : 
     142             :     /* At this point, Q and U in u44|u44|u44|u44 */
     143             : 
     144           0 :     k_t = (secret_scalar[ t / 8L ] >> ( t & 7L )) & 1;  //   k_t = (k >> t) & 1;
     145           0 :     swap ^= k_t;                                        //   swap ^= k_t
     146           0 :     perm = wwl_if( (-swap) & 0xff, wwl( 2L,3L,0L,1L, 6L,7L,4L,5L ), wwl( 0L,1L,2L,3L, 4L,5L,6L,7L ) );
     147           0 :     Q03 = wwl_permute( perm, Q03 );                     //   (x_2, x_3) = cswap(swap, x_2, x_3)
     148           0 :     Q14 = wwl_permute( perm, Q14 );                     //   (z_2, z_3) = cswap(swap, z_2, z_3)
     149           0 :     Q25 = wwl_permute( perm, Q25 );
     150           0 :     swap = k_t;                                         //   swap = k_t
     151             : 
     152             :     /* These operations are exactly from the RFC but have been reordered
     153             :        slightly to make it easier to extract ILP. */
     154             : 
     155           0 :     FD_R43X6_QUAD_PERMUTE      ( P, 0,0,2,2, Q );       // A = x_2 + z_2,            P  = x_2|x_2|x_3  |x_3,   in u44|u44|u44|u44
     156           0 :     FD_R43X6_QUAD_PERMUTE      ( Q, 1,1,3,3, Q );       // B = x_2 - z_2,            Q  = z_2|z_2|z_3  |z_3,   in u44|u44|u44|u44
     157           0 :     FD_R43X6_QUAD_LANE_ADD_FAST( P, P, 1,0,1,0, P, Q ); // C = x_3 + z_3,            P  = A  |x_2|C    |x_3,   in u45|u44|u45|u44
     158           0 :     FD_R43X6_QUAD_LANE_SUB_FAST( P, P, 0,1,0,1, P, Q ); // D = x_3 - z_3,            P  = A  |B  |C    |D,     in u45|s44|u45|s44
     159           0 :     FD_R43X6_QUAD_PERMUTE      ( Q, 0,1,1,0, P );       // BB = B^2,                 P  = A  |B  |B    |A,     in u44|u44|u44|u44
     160           0 :     FD_R43X6_QUAD_MUL_FAST     ( P, P, Q );             // DA = D * A,               P  = AA |BB |CB   |DA,    in u62|u62|u62|u62
     161           0 :     FD_R43X6_QUAD_FOLD_UNSIGNED( P, P );                // DA = D * A,               P  = AA |BB |CB   |DA,    in u44|u44|u44|u44
     162           0 :     FD_R43X6_QUAD_PERMUTE      ( Q, 1,0,3,2, P );       // CB = C * B,               Q  = BB |AA |DA   |CB,    in u62|u62|u62|u62
     163           0 :     FD_R43X6_QUAD_LANE_SUB_FAST( P, P, 0,1,0,1, Q, P ); // E = AA-BB,                P  = AA |E  |CB   |CB-DA, in u62|s62|u62|s62
     164           0 :     FD_R43X6_QUAD_LANE_ADD_FAST( P, P, 0,0,1,0, P, Q ); //                           P  = AA |E  |DA+CB|CB-DA, in u62|s62|u63|s62
     165           0 :     FD_R43X6_QUAD_LANE_IF      ( Q, 0,1,1,0, P, Q );    //                           Q  = BB |E  |DA+CB|CB,    in u62|u44|u44|u62
     166           0 :     FD_R43X6_QUAD_LANE_IF      ( Q, 0,0,0,1, U, Q );    // x_3 = (DA + CB)^2,        Q  = BB |E  |DA+CB|x_1,   in u62|u44|u44|u44
     167           0 :     FD_R43X6_QUAD_UNPACK       ( AA, E, F, G, P );
     168           0 :     H  = fd_r43x6_add_fast( AA, fd_r43x6_scale_fast( 121665L, E ) ); //              H  = AA + a24 * E,        in u60
     169           0 :     GG = fd_r43x6_sqr_fast( G );                        //                           GG = (DA - CB)^2,         in u61
     170           0 :     FD_R43X6_QUAD_PACK         ( P, AA, H, F, GG );     // z_2 = E * (AA + a24 * E), P  = AA |H  |DA+CB|GG,    in u44|u60|u44|u61
     171           0 :     FD_R43X6_QUAD_FOLD_UNSIGNED( P, P );                //                           P  = AA |H  |DA+CB|GG,    in u44|u44|u44|u44
     172           0 :     FD_R43X6_QUAD_MUL_FAST     ( P, P, Q );             // z_3 = x_1 * (DA - CB)^2,  Q  = x_2|z_2|x_3  |z_3,   in u62|u62|u62|u62
     173           0 :     FD_R43X6_QUAD_FOLD_UNSIGNED( Q, P    );             //                           Q  = x_2|z_2|x_3  |z_3,   in u44|u44|u44|u44
     174           0 :   }
     175             : 
     176             :   /* At this point, Q in u44|u44|u44|u44 */
     177           0 :   perm = wwl_if( (-swap) & 0xff, wwl( 2L,3L,0L,1L, 6L,7L,4L,5L ), wwl( 0L,1L,2L,3L, 4L,5L,6L,7L ) );
     178           0 :   Q03 = wwl_permute( perm, Q03 );                       // (x_2, x_3) = cswap(swap, x_2, x_3)
     179           0 :   Q14 = wwl_permute( perm, Q14 );                       // (z_2, z_3) = cswap(swap, z_2, z_3)
     180           0 :   Q25 = wwl_permute( perm, Q25 );
     181             : 
     182           0 :   FD_R43X6_QUAD_UNPACK( x2->el, z2->el, E, F, Q );
     183             : 
     184             :   /* Sanitize */
     185             : 
     186           0 :   fd_memzero_explicit( &P03,  sizeof(wwl_t) );
     187           0 :   fd_memzero_explicit( &P14,  sizeof(wwl_t) );
     188           0 :   fd_memzero_explicit( &P25,  sizeof(wwl_t) );
     189           0 :   fd_memzero_explicit( &U03,  sizeof(wwl_t) );
     190           0 :   fd_memzero_explicit( &U14,  sizeof(wwl_t) );
     191           0 :   fd_memzero_explicit( &U25,  sizeof(wwl_t) );
     192           0 :   fd_memzero_explicit( &Q03,  sizeof(wwl_t) );
     193           0 :   fd_memzero_explicit( &Q14,  sizeof(wwl_t) );
     194           0 :   fd_memzero_explicit( &Q25,  sizeof(wwl_t) );
     195           0 :   fd_memzero_explicit( &AA,   sizeof(wwl_t) );
     196           0 :   fd_memzero_explicit( &E,    sizeof(wwl_t) );
     197           0 :   fd_memzero_explicit( &F,    sizeof(wwl_t) );
     198           0 :   fd_memzero_explicit( &G,    sizeof(wwl_t) );
     199           0 :   fd_memzero_explicit( &H,    sizeof(wwl_t) );
     200           0 :   fd_memzero_explicit( &GG,   sizeof(wwl_t) );
     201           0 :   fd_memzero_explicit( &perm, sizeof(wwl_t) );
     202           0 :   fd_memzero_explicit( &swap, sizeof(int) );
     203           0 :   fd_memzero_explicit( &k_t,  sizeof(int) );
     204             : 
     205           0 : }
     206             : #endif
     207             : 
     208             : /*
     209             :  * X25519 Protocol
     210             :  */
     211             : 
     212             : static inline void FD_FN_SENSITIVE
     213             : fd_x25519_scalar_mul_const_time( uchar               out[ 32 ],
     214             :                                  uchar const *       secret_scalar,
     215           0 :                                  fd_f25519_t const * point_x ) {
     216           0 :   fd_f25519_t x2[1], z2[1];
     217             : 
     218           0 :   fd_x25519_montgomery_ladder( x2, z2, point_x, secret_scalar );
     219             : 
     220           0 :   fd_f25519_inv( z2, z2 );
     221           0 :   fd_f25519_mul( x2, x2, z2 );
     222             : 
     223           0 :   fd_f25519_tobytes( out, x2 );
     224           0 : }
     225             : 
     226             : static const uchar fd_x25519_basepoint[ 32 ] FD_X25519_ALIGN = { 9 };
     227             : 
     228             : uchar * FD_FN_SENSITIVE
     229             : fd_x25519_public( uchar       self_public_key [ 32 ],
     230           0 :                   uchar const self_private_key[ 32 ] ) {
     231             :   /* IETF RFC 7748 Section 4.1 (page 3) */
     232           0 :   return fd_x25519_exchange( self_public_key, self_private_key, fd_x25519_basepoint );
     233           0 : }
     234             : 
     235             : uchar * FD_FN_SENSITIVE
     236             : fd_x25519_exchange( uchar       shared_secret   [ 32 ],
     237             :                     uchar const self_private_key[ 32 ],
     238           0 :                     uchar const peer_public_key [ 32 ] ) {
     239             : 
     240             :   /* Memory areas that will contain secrets */
     241           0 :   uchar secret_scalar[ 32UL ] FD_X25519_ALIGN;
     242             : 
     243             :   /* Public local variables */
     244           0 :   fd_f25519_t peer_public_key_point_u[1];
     245             : 
     246             :   //  RFC 7748 - Elliptic Curves for Security
     247             :   //
     248             :   //  5. The X25519 and X448 Functions
     249             :   //
     250             :   //  The "X25519" and "X448" functions perform scalar multiplication on
     251             :   //  the Montgomery form of the above curves.  (This is used when
     252             :   //  implementing Diffie-Hellman.)  The functions take a scalar and a
     253             :   //  u-coordinate as inputs and produce a u-coordinate as output.
     254             :   //  Although the functions work internally with integers, the inputs and
     255             :   //  outputs are 32-byte strings (for X25519) or 56-byte strings (for
     256             :   //  X448) and this specification defines their encoding.
     257             : 
     258             :   //  The u-coordinates are elements of the underlying field GF(2^255 - 19)
     259             :   //  or GF(2^448 - 2^224 - 1) and are encoded as an array of bytes, u, in
     260             :   //  little-endian order such that u[0] + 256*u[1] + 256^2*u[2] + ... +
     261             :   //  256^(n-1)*u[n-1] is congruent to the value modulo p and u[n-1] is
     262             :   //  minimal.  When receiving such an array, implementations of X25519
     263             :   //  (but not X448) MUST mask the most significant bit in the final byte.
     264             :   //  This is done to preserve compatibility with point formats that
     265             :   //  reserve the sign bit for use in other protocols and to increase
     266             :   //  resistance to implementation fingerprinting.
     267             : 
     268             :   //  Implementations MUST accept non-canonical values and process them as
     269             :   //  if they had been reduced modulo the field prime.  The non-canonical
     270             :   //  values are 2^255 - 19 through 2^255 - 1 for X25519 and 2^448 - 2^224
     271             :   //  - 1 through 2^448 - 1 for X448.
     272             : 
     273             :   /* From the text above:
     274             :      1. When receiving such an array, implementations of X25519 [...]
     275             :         MUST mask the most significant bit in the final byte
     276             :         >> this is done by fd_f25519_frombytes
     277             :      2. Implementations MUST accept non-canonical values
     278             :         >> no extra check needed */
     279           0 :   fd_f25519_frombytes( peer_public_key_point_u, peer_public_key );
     280             : 
     281             :   //  Scalars are assumed to be randomly generated bytes.  For X25519, in
     282             :   //  order to decode 32 random bytes as an integer scalar, set the three
     283             :   //  least significant bits of the first byte and the most significant bit
     284             :   //  of the last to zero, set the second most significant bit of the last
     285             :   //  byte to 1 and, finally, decode as little-endian.  This means that the
     286             :   //  resulting integer is of the form 2^254 plus eight times a value
     287             :   //  between 0 and 2^251 - 1 (inclusive).  Likewise, for X448, set the two
     288             :   //  least significant bits of the first byte to 0, and the most
     289             :   //  significant bit of the last byte to 1.  This means that the resulting
     290             :   //  integer is of the form 2^447 plus four times a value between 0 and
     291             :   //  2^445 - 1 (inclusive).
     292             : 
     293             :   /* decodeScalar25519
     294             :      note: e need to copy the private key, because we need to sanitize it. */
     295           0 :   memcpy( secret_scalar, self_private_key, 32UL );
     296           0 :   secret_scalar[ 0] &= (uchar)0xF8;
     297           0 :   secret_scalar[31] &= (uchar)0x7F;
     298           0 :   secret_scalar[31] |= (uchar)0x40;
     299             : 
     300           0 :   fd_x25519_scalar_mul_const_time( shared_secret, secret_scalar, peer_public_key_point_u );
     301             : 
     302             :   /* Sanitize */
     303           0 :   fd_memzero_explicit( secret_scalar, 32UL );
     304             : 
     305             :   /* Reject low order points */
     306           0 :   if( FD_UNLIKELY( fd_x25519_is_zero_const_time( shared_secret ) ) ) {
     307           0 :     return NULL;
     308           0 :   }
     309             : 
     310           0 :   return shared_secret;
     311           0 : }

Generated by: LCOV version 1.14