/src/blst/src/recip.c

Source
/*
 * Copyright Supranational LLC
 * Licensed under the Apache License, Version 2.0, see LICENSE for details.
 * SPDX-License-Identifier: Apache-2.0
 */

#include "fields.h"

#ifdef __OPTIMIZE_SIZE__
/*
 * 608 multiplications for scalar inversion modulo BLS12-381 prime, 32%
 * more than corresponding optimal addition-chain, plus mispredicted
 * branch penalties on top of that... The addition chain below was
 * measured to be >50% faster.
 */
static void flt_reciprocal_fp(vec384 out, const vec384 inp)
{
    static const byte BLS12_381_P_minus_2[] = {
        TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff),
        TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf),
        TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a)
    };

    exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0);
}
#else
# define sqr(ret,a)   sqr_fp(ret,a)
# define mul(ret,a,b)   mul_fp(ret,a,b)
# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b)

# include "recip-addchain.h"
static void flt_reciprocal_fp(vec384 out, const vec384 inp)
{
    RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384);
}
# undef RECIPROCAL_MOD_BLS12_381_P
# undef sqr_n_mul
# undef mul
# undef sqr
#endif

static void flt_reciprocal_fp2(vec384x out, const vec384x inp)
{
    vec384 t0, t1;

    /*
     * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i
     */
    sqr_fp(t0, inp[0]);
    sqr_fp(t1, inp[1]);
    add_fp(t0, t0, t1);
    flt_reciprocal_fp(t1, t0);
    mul_fp(out[0], inp[0], t1);
    mul_fp(out[1], inp[1], t1);
    neg_fp(out[1], out[1]);
}

static void reciprocal_fp(vec384 out, const vec384 inp)
{
    static const vec384 Px8 = {    /* left-aligned value of the modulus */
        TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd),
        TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb),
        TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2)
    };
    union { vec768 x; vec384 r[2]; } temp;

    ct_inverse_mod_384(temp.x, inp, BLS12_381_P, Px8);
    redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0);
    mul_mont_384(temp.r[0], temp.r[0], BLS12_381_RR, BLS12_381_P, p0);

#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
    /* sign goes straight to flt_reciprocal */
    mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0);
    if (vec_is_equal(temp.r[1],  BLS12_381_Rx.p, sizeof(vec384)) |
        vec_is_zero(temp.r[1], sizeof(vec384)))
        vec_copy(out, temp.r[0], sizeof(vec384));
    else
        flt_reciprocal_fp(out, inp);
#else
    vec_copy(out, temp.r[0], sizeof(vec384));
#endif
}

void blst_fp_inverse(vec384 out, const vec384 inp)
{   reciprocal_fp(out, inp);   }

void blst_fp_eucl_inverse(vec384 ret, const vec384 a)
{   reciprocal_fp(ret, a);   }

static void reciprocal_fp2(vec384x out, const vec384x inp)
{
    vec384 t0, t1;

    /*
     * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i
     */
    sqr_fp(t0, inp[0]);
    sqr_fp(t1, inp[1]);
    add_fp(t0, t0, t1);
    reciprocal_fp(t1, t0);
    mul_fp(out[0], inp[0], t1);
    mul_fp(out[1], inp[1], t1);
    neg_fp(out[1], out[1]);
}

void blst_fp2_inverse(vec384x out, const vec384x inp)
{   reciprocal_fp2(out, inp);   }

void blst_fp2_eucl_inverse(vec384x out, const vec384x inp)
{   reciprocal_fp2(out, inp);   }

static void reciprocal_fr(vec256 out, const vec256 inp)
{
    static const vec256 rx2 = { /* left-aligned value of the modulus */
        TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd),
        TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90),
    };
    vec512 temp;

    ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2);
    redc_mont_256(out, temp, BLS12_381_r, r0);
    mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0);
}

void blst_fr_inverse(vec256 out, const vec256 inp)
{   reciprocal_fr(out, inp);   }

void blst_fr_eucl_inverse(vec256 out, const vec256 inp)
{   reciprocal_fr(out, inp);   }

Coverage Report

Created: 2026-02-14 06:48

Line	Count	Source
1		/*
2		* Copyright Supranational LLC
3		* Licensed under the Apache License, Version 2.0, see LICENSE for details.
4		* SPDX-License-Identifier: Apache-2.0
5		*/
6
7		#include "fields.h"
8
9		#ifdef __OPTIMIZE_SIZE__
10		/*
11		* 608 multiplications for scalar inversion modulo BLS12-381 prime, 32%
12		* more than corresponding optimal addition-chain, plus mispredicted
13		* branch penalties on top of that... The addition chain below was
14		* measured to be >50% faster.
15		*/
16		static void flt_reciprocal_fp(vec384 out, const vec384 inp)
17		{
18		static const byte BLS12_381_P_minus_2[] = {
19		TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff),
20		TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf),
21		TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a)
22		};
23
24		exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0);
25		}
26		#else
27		# define sqr(ret,a) sqr_fp(ret,a)
28		# define mul(ret,a,b) mul_fp(ret,a,b)
29		# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b)
30
31		# include "recip-addchain.h"
32		static void flt_reciprocal_fp(vec384 out, const vec384 inp)
33	0	{
34	0	RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384);
35	0	}
36		# undef RECIPROCAL_MOD_BLS12_381_P
37		# undef sqr_n_mul
38		# undef mul
39		# undef sqr
40		#endif
41
42		static void flt_reciprocal_fp2(vec384x out, const vec384x inp)
43	0	{
44	0	vec384 t0, t1;
45	0
46	0	/*
47	0	* \|out\| = 1/(a + bi) = a/(a^2+b^2) - b/(a^2+b^2)i
48	0	*/
49	0	sqr_fp(t0, inp[0]);
50	0	sqr_fp(t1, inp[1]);
51	0	add_fp(t0, t0, t1);
52	0	flt_reciprocal_fp(t1, t0);
53	0	mul_fp(out[0], inp[0], t1);
54	0	mul_fp(out[1], inp[1], t1);
55	0	neg_fp(out[1], out[1]);
56	0	}
57
58		static void reciprocal_fp(vec384 out, const vec384 inp)
59	1.06k	{
60	1.06k	static const vec384 Px8 = { /* left-aligned value of the modulus */
61	1.06k	TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd),
62	1.06k	TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb),
63	1.06k	TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2)
64	1.06k	};
65	1.06k	union { vec768 x; vec384 r[2]; } temp;
66
67	1.06k	ct_inverse_mod_384(temp.x, inp, BLS12_381_P, Px8);
68	1.06k	redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0);
69	1.06k	mul_mont_384(temp.r[0], temp.r[0], BLS12_381_RR, BLS12_381_P, p0);
70
71		#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
72		/* sign goes straight to flt_reciprocal */
73		mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0);
74		if (vec_is_equal(temp.r[1], BLS12_381_Rx.p, sizeof(vec384)) \|
75		vec_is_zero(temp.r[1], sizeof(vec384)))
76		vec_copy(out, temp.r[0], sizeof(vec384));
77		else
78		flt_reciprocal_fp(out, inp);
79		#else
80	1.06k	vec_copy(out, temp.r[0], sizeof(vec384));
81	1.06k	#endif
82	1.06k	}
83
84		void blst_fp_inverse(vec384 out, const vec384 inp)
85	13	{ reciprocal_fp(out, inp); }
86
87		void blst_fp_eucl_inverse(vec384 ret, const vec384 a)
88	3	{ reciprocal_fp(ret, a); }
89
90		static void reciprocal_fp2(vec384x out, const vec384x inp)
91	319	{
92	319	vec384 t0, t1;
93
94		/*
95		* \|out\| = 1/(a + bi) = a/(a^2+b^2) - b/(a^2+b^2)i
96		*/
97	319	sqr_fp(t0, inp[0]);
98	319	sqr_fp(t1, inp[1]);
99	319	add_fp(t0, t0, t1);
100	319	reciprocal_fp(t1, t0);
101	319	mul_fp(out[0], inp[0], t1);
102	319	mul_fp(out[1], inp[1], t1);
103	319	neg_fp(out[1], out[1]);
104	319	}
105
106		void blst_fp2_inverse(vec384x out, const vec384x inp)
107	0	{ reciprocal_fp2(out, inp); }
108
109		void blst_fp2_eucl_inverse(vec384x out, const vec384x inp)
110	0	{ reciprocal_fp2(out, inp); }
111
112		static void reciprocal_fr(vec256 out, const vec256 inp)
113	12	{
114	12	static const vec256 rx2 = { /* left-aligned value of the modulus */
115	12	TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd),
116	12	TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90),
117	12	};
118	12	vec512 temp;
119
120	12	ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2);
121	12	redc_mont_256(out, temp, BLS12_381_r, r0);
122	12	mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0);
123	12	}
124
125		void blst_fr_inverse(vec256 out, const vec256 inp)
126	8	{ reciprocal_fr(out, inp); }
127
128		void blst_fr_eucl_inverse(vec256 out, const vec256 inp)
129	4	{ reciprocal_fr(out, inp); }