LCOV - code coverage report
Current view: top level - ballet/bn254 - fd_bn254_glv.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 74 82 90.2 %
Date: 2026-03-19 18:19:27 Functions: 8 9 88.9 %

          Line data    Source code
       1             : /* Included by fd_bn254_g1.c and fd_bn254_g2.c, should not be used elsewhere. */
       2             : 
       3             : /* Rundown of the BN254 GLV implementation:
       4             : 
       5             :    Context: https://bitcointalk.org/index.php?topic=3238.msg45565#msg45565
       6             : 
       7             :    BN254 is a "pairing-friendly" curve E: y^2 = x^3 + 3 over a prime field
       8             :    Fp, where:
       9             :     p = 21888242871839275222246405745257275088696311157297823662689037894645226208583
      10             : 
      11             :    The group G1 = E(Fp) has prime order:
      12             :     r = 21888242871839275222246405745257275088548364400416034343698204186575808495617
      13             : 
      14             :    A naive approach to scalar multiplication, [s]P, requires ~256 doublings
      15             :    and ~128 additions (double-and-add) in the worst case. The GLV method
      16             :    takes advantage of an easy to compute endomorphism to cut this roughly
      17             :    in half.
      18             : 
      19             :    For BN254, there exists a cube root of unity beta in Fp such that:
      20             :     phi: (x, y) -> (beta * x, y)
      21             :    is an endomorphism of E.
      22             :    That is, if point P = (x, y) is on the curve, then phi(P) = (beta * x, y)
      23             :    is also on the curve, since (beta*x)^3 = beta^3 * x^3) = x^3,
      24             :    so the curve equation is preserved.
      25             : 
      26             :    phi satisfies phi(P) = [lambda]P where lambda is a cube root of unity
      27             :    modulo r (the group order we defined above^):
      28             :     lambda = 4407920970296243842393367215006156084916469457145843978461
      29             :              (a root of X^2 + X + 1 = 0 mod r)
      30             : 
      31             :    Computing phi(P) = (beta * x, y) costs just one FP multiplication, but
      32             :    is equivalent to a full scalar multiplication by the ~254-bit scalar lambda.
      33             : 
      34             :    We want to "decompose" the input scalar in a way where a majority of the
      35             :    effort is re-used by the lambda computation, leaving the remaining operations
      36             :    smaller and faster to perform. Given a scalar s, we want to find small
      37             :    k1, k2 (each ~128-bit) such that:
      38             :     s = k1 + k2 * lambda (mod r)
      39             :    Then: [s]P = [k1]P + [k2](lambda * P) = [k1]P + [k2]phi(P).
      40             : 
      41             :    Using the "Straus-Shamir trick", https://pmc.ncbi.nlm.nih.gov/articles/PMC9028562/#app1-sensors-22-03083,
      42             :    this requires only ~128 doublings + ~128 additions instead of ~256 doublings.
      43             : 
      44             :    To decompose the scalar, we have a 2-dimensional lattice
      45             :     L = { (a, b) in Z^2 : a + b*lambda = 0 (mod r) }
      46             : 
      47             :    A reduced basis of L is given by three magnitudes:
      48             :     N_A = 147946756881789319000765030803803410728  (~128 bits, 2 limbs)
      49             :     N_B = 9931322734385697763                      (~64 bits, 1 limb)
      50             :     N_C = 147946756881789319010696353538189108491  (~128 bits, 2 limbs)
      51             : 
      52             :    These are arranged differently depending on the group.
      53             :    G1:
      54             :     | +N_A  +N_B |
      55             :     | -N_B  +N_C |
      56             :    G2:
      57             :     | -N_C  -N_B |
      58             :     | +N_B  -N_A |
      59             : 
      60             :    We avoid big integer divisions by r using Babai's algorithm with
      61             :    precomputed fixed-point inverses:
      62             :     b1 = (s * g1) >> 256, where for group:
      63             :                       G1: g1 = round(2^256 * N_C / r)
      64             :                       G2: g1 = round(2^256 * N_A / r)
      65             :     b2 = (s * g2) >> 256  g2 = round(2^256 * N_B / r), 66-bit, 2 limbs)
      66             : 
      67             :    Then:
      68             :     G1: k1 = s - b1*N_A - b2*N_B,  k2 = b1*N_B - b2*N_C
      69             :     G2: k1 = s - b1*N_C - b2*N_B,  k2 = b2*N_A - b1*N_B
      70             : 
      71             :    For G1, k1 >= 0 always, k2 may be negative.
      72             :    For G2, both k1 and k2 may be negative. */
      73             : 
      74             : /* beta in Montgomery form.
      75             :    0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48 */
      76             : const fd_bn254_fp_t fd_bn254_const_beta_mont[1] = {{{
      77             :   0x3350c88e13e80b9cUL, 0x7dce557cdb5e56b9UL, 0x6001b4b8b615564aUL, 0x2682e617020217e0UL
      78             : }}};
      79             : 
      80             : /* Lattice constants, see glv.py */
      81             : const ulong na[ 2 ] = { 0x8211bbeb7d4f1128UL, 0x6f4d8248eeb859fcUL };
      82             : const ulong nb[ 1 ] = { 0x89d3256894d213e3UL };
      83             : const ulong nc[ 2 ] = { 0x0be4e1541221250bUL, 0x6f4d8248eeb859fdUL };
      84             : 
      85             : /* g2 = round(2^256 * N_B / r), 66-bit (2 limbs). Same for G1 and G2. */
      86             : const ulong g2_const[ 2 ] = { 0xd91d232ec7e0b3d7UL, 0x0000000000000002UL };
      87             : 
      88             : /* Multiply 4-limb scalar s by a 3-limb constant g.
      89             :    Returns top 3 limbs. */
      90             : static inline void
      91             : fd_bn254_glv_sxg3( ulong                   out[ 3 ],
      92             :                    fd_bn254_scalar_t const * s,
      93         361 :                    ulong const               g[ 3 ] ) {
      94         361 :   uint128 s0 = s->limbs[0];
      95         361 :   uint128 s1 = s->limbs[1];
      96         361 :   uint128 s2 = s->limbs[2];
      97         361 :   uint128 s3 = s->limbs[3];
      98         361 :   uint128 acc;
      99         361 :   acc = s0 * g[ 0 ];
     100         361 :   acc = s1 * g[ 0 ] + s0 * g[ 1 ]               + (ulong)(acc >> 64);
     101         361 :   acc = s2 * g[ 0 ] + s1 * g[ 1 ] + s0 * g[ 2 ] + (ulong)(acc >> 64);
     102         361 :   acc = s3 * g[ 0 ] + s2 * g[ 1 ] + s1 * g[ 2 ] + (ulong)(acc >> 64);
     103         361 :   acc =               s3 * g[ 1 ] + s2 * g[ 2 ] + (ulong)(acc >> 64); out[ 0 ] = (ulong)acc;
     104         361 :   acc =                             s3 * g[ 2 ] + (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     105         361 :   acc =                                           (ulong)(acc >> 64); out[ 2 ] = (ulong)acc;
     106         361 : }
     107             : 
     108             : /* Same, but for a 2-limb constant g. */
     109             : static inline void
     110             : fd_bn254_glv_sxg2( ulong                   out[ 2 ],
     111             :                    fd_bn254_scalar_t const * s,
     112         361 :                    ulong             const   g[ 2 ] ) {
     113         361 :   uint128 s0 = s->limbs[0];
     114         361 :   uint128 s1 = s->limbs[1];
     115         361 :   uint128 s2 = s->limbs[2];
     116         361 :   uint128 s3 = s->limbs[3];
     117         361 :   uint128 acc;
     118         361 :   acc = s0 * g[ 0 ];
     119         361 :   acc = s1 * g[ 0 ] + s0 * g[ 1 ] + (ulong)(acc >> 64);
     120         361 :   acc = s2 * g[ 0 ] + s1 * g[ 1 ] + (ulong)(acc >> 64);
     121         361 :   acc = s3 * g[ 0 ] + s2 * g[ 1 ] + (ulong)(acc >> 64);
     122         361 :   acc =               s3 * g[ 1 ] + (ulong)(acc >> 64); out[ 0 ] = (ulong)acc;
     123         361 :   acc =                             (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     124         361 : }
     125             : 
     126             : /* Multiply 3-limb a by 2-limb n, store low 4 limbs into out. */
     127             : static inline void
     128             : fd_bn254_glv_mul3x2( ulong     out[ 4 ],
     129             :                      ulong const a[ 3 ],
     130         361 :                      ulong const n[ 2 ] ) {
     131         361 :   uint128 acc;
     132         361 :   acc = (uint128)a[ 0 ] * n[ 0 ];                                                 out[ 0 ] = (ulong)acc;
     133         361 :   acc = (uint128)a[ 1 ] * n[ 0 ] + (uint128)a[ 0 ] * n[ 1 ] + (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     134         361 :   acc = (uint128)a[ 2 ] * n[ 0 ] + (uint128)a[ 1 ] * n[ 1 ] + (ulong)(acc >> 64); out[ 2 ] = (ulong)acc;
     135         361 :   acc =                            (uint128)a[ 2 ] * n[ 1 ] + (ulong)(acc >> 64); out[ 3 ] = (ulong)acc;
     136         361 : }
     137             : 
     138             : /* Multiply 3-limb by 1-limb, store into 4-limb. */
     139             : static inline void
     140             : fd_bn254_glv_mul3x1( ulong     out[ 4 ],
     141             :                      ulong const a[ 3 ],
     142         361 :                      ulong const n[ 1 ] ) {
     143         361 :   uint128 acc;
     144         361 :   acc = (uint128)a[ 0 ] * n[ 0 ];                      out[ 0 ] = (ulong)acc;
     145         361 :   acc = (uint128)a[ 1 ] * n[ 0 ] + (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     146         361 :   acc = (uint128)a[ 2 ] * n[ 0 ] + (ulong)(acc >> 64); out[ 2 ] = (ulong)acc;
     147         361 :   acc =                            (ulong)(acc >> 64); out[ 3 ] = (ulong)acc;
     148         361 : }
     149             : 
     150             : /* Multiply 2-limb by 2-limb, store into 4-limb. */
     151             : static inline void
     152             : fd_bn254_glv_mul2x2( ulong     out[ 4 ],
     153             :                      ulong const a[ 2 ],
     154         361 :                      ulong const n[ 2 ] ) {
     155         361 :   uint128 acc;
     156         361 :   acc = (uint128)a[ 0 ] * n[ 0 ];                                                 out[ 0 ] = (ulong)acc;
     157         361 :   acc = (uint128)a[ 1 ] * n[ 0 ] + (uint128)a[ 0 ] * n[ 1 ] + (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     158         361 :   acc =                            (uint128)a[ 1 ] * n[ 1 ] + (ulong)(acc >> 64); out[ 2 ] = (ulong)acc;
     159         361 :   acc =                                                       (ulong)(acc >> 64); out[ 3 ] = (ulong)acc;
     160         361 : }
     161             : 
     162             : /* Multiply 2-limb by 1-limb, stores 3-limbs. */
     163             : static inline void
     164             : fd_bn254_glv_mul2x1( ulong     out[ 3 ],
     165             :                      ulong const a[ 2 ],
     166         362 :                      ulong const n[ 1 ] ) {
     167         362 :   uint128 acc;
     168         362 :   acc = (uint128)a[ 0 ] * n[ 0 ];                      out[ 0 ] = (ulong)acc;
     169         362 :   acc = (uint128)a[ 1 ] * n[ 0 ] + (ulong)(acc >> 64); out[ 1 ] = (ulong)acc;
     170         362 :   acc =                            (ulong)(acc >> 64); out[ 2 ] = (ulong)acc;
     171         362 : }
     172             : 
     173             : /* 4-limb addition: out = a + b.  Returns carry. */
     174             : static inline ulong
     175             : fd_bn254_glv_add4( ulong     out[ 4 ],
     176             :                    ulong const a[ 4 ],
     177         361 :                    ulong const b[ 4 ] ) {
     178         361 :   ulong carry = 0;
     179        1805 :   for( int j = 0; j < 4; j++ ) {
     180        1444 :     uint128 acc = (uint128)a[ j ] + b[ j ] + carry;
     181        1444 :     out[ j ] = (ulong)acc;
     182        1444 :     carry    = (ulong)(acc >> 64);
     183        1444 :   }
     184         361 :   return carry;
     185         361 : }
     186             : 
     187             : /* 4-limb subtraction: out = a - b.  Returns borrow (1 if a < b). */
     188             : static inline ulong
     189             : fd_bn254_glv_sub4( ulong     out[ 4 ],
     190             :                    ulong const a[ 4 ],
     191         724 :                    ulong const b[ 4 ] ) {
     192         724 :   ulong borrow = 0;
     193        3616 :   for( int j = 0; j < 4; j++ ) {
     194        2892 :     ulong av = a[ j ];
     195        2892 :     ulong bv = b[ j ];
     196        2892 :     ulong diff = av - bv - borrow;
     197        2892 :     borrow = ( av < bv || (borrow && av == bv) ) ? 1UL : 0UL;
     198        2892 :     out[ j ] = diff;
     199        2892 :   }
     200         724 :   return borrow;
     201         724 : }
     202             : 
     203             : /* Two's complement negation of a 4-limb value in-place. */
     204             : static inline void
     205           0 : fd_bn254_glv_negate4( ulong v[ 4 ] ) {
     206           0 :   ulong carry = 1;
     207           0 :   for( int j = 0; j < 4; j++ ) {
     208           0 :     uint128 sum = (uint128)(~v[ j ]) + carry;
     209           0 :     v[ j ] = (ulong)sum;
     210           0 :     carry  = (ulong)(sum >> 64);
     211           0 :   }
     212           0 : }

Generated by: LCOV version 1.14