/src/blst_normal/src/ec_ops.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright Supranational LLC  | 
3  |  |  * Licensed under the Apache License, Version 2.0, see LICENSE for details.  | 
4  |  |  * SPDX-License-Identifier: Apache-2.0  | 
5  |  |  */  | 
6  |  | #ifndef __BLS12_384_ASM_EC_OPS_H__  | 
7  |  | #define __BLS12_384_ASM_EC_OPS_H__  | 
8  |  | /*  | 
9  |  |  * Addition that can handle doubling [as well as points at infinity,  | 
10  |  |  * which are encoded as Z==0] in constant time. It naturally comes at  | 
11  |  |  * cost, but this subroutine should be called only when independent  | 
12  |  |  * points are processed, which is considered reasonable compromise.  | 
13  |  |  * For example, ptype##s_mult_w5 calls it, but since *major* gain is  | 
14  |  |  * result of pure doublings being effectively divided by amount of  | 
15  |  |  * points, slightly slower addition can be tolerated. But what is the  | 
16  |  |  * additional cost more specifically? Best addition result is 11M+5S,  | 
17  |  |  * while this routine takes 13M+5S (+1M+1S if a4!=0), as per  | 
18  |  |  *  | 
19  |  |  * -------------+-------------  | 
20  |  |  * addition     | doubling  | 
21  |  |  * -------------+-------------  | 
22  |  |  * U1 = X1*Z2^2 | U1 = X1  | 
23  |  |  * U2 = X2*Z1^2 |  | 
24  |  |  * S1 = Y1*Z2^3 | S1 = Y1  | 
25  |  |  * S2 = Y2*Z1^3 |  | 
26  |  |  * zz = Z1*Z2   | zz = Z1  | 
27  |  |  * H = U2-U1    | H' = 2*Y1  | 
28  |  |  * R = S2-S1    | R' = 3*X1^2[+a*Z1^4]  | 
29  |  |  * sx = U1+U2   | sx = X1+X1  | 
30  |  |  * -------------+-------------  | 
31  |  |  * H!=0 || R!=0 | H==0 && R==0  | 
32  |  |  *  | 
33  |  |  *      X3 = R^2-H^2*sx  | 
34  |  |  *      Y3 = R*(H^2*U1-X3)-H^3*S1  | 
35  |  |  *      Z3 = H*zz  | 
36  |  |  *  | 
37  |  |  * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is  | 
38  |  |  * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.  | 
39  |  |  */  | 
40  |  | #define POINT_DADD_IMPL(ptype, bits, field) \  | 
41  |  | static void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2, \  | 
42  | 624k  |                          const vec##bits a4) \  | 
43  | 624k  | { \ | 
44  | 624k  |     ptype p3; /* starts as (U1, S1, zz) from addition side */\  | 
45  | 624k  |     struct { vec##bits H, R, sx; } add, dbl; \ | 
46  | 624k  |     bool_t p1inf, p2inf, is_dbl; \  | 
47  | 624k  | \  | 
48  | 624k  |     add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\  | 
49  | 624k  |     sqr_##field(dbl.R, p1->X);          /* X1^2 */\  | 
50  | 624k  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\  | 
51  | 624k  |     add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\  | 
52  | 624k  | \  | 
53  | 624k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  | 
54  | 624k  |     sqr_##field(p3.X, p2->Z);           /* Z2^2 */\  | 
55  | 624k  |     mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\  | 
56  | 624k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  | 
57  | 624k  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  | 
58  | 624k  | \  | 
59  | 624k  |     if (a4 != NULL) { \ | 
60  | 2.74k  |         sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\  | 
61  | 2.74k  |         mul_##field(p3.Y, p3.Y, a4);    \  | 
62  | 2.74k  |         add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\  | 
63  | 2.74k  |     } \  | 
64  | 624k  | \  | 
65  | 624k  |     mul_##field(p3.Y, p1->Y, p2->Z);    \  | 
66  | 624k  |     mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\  | 
67  | 624k  |     mul_##field(add.R, p2->Y, p1->Z);   \  | 
68  | 624k  |     mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\  | 
69  | 624k  |     sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\  | 
70  | 624k  | \  | 
71  | 624k  |     mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\  | 
72  | 624k  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  | 
73  | 624k  | \  | 
74  | 624k  |     add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\  | 
75  | 624k  |     sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\  | 
76  | 624k  | \  | 
77  | 624k  |     /* make the choice between addition and doubling */\  | 
78  | 624k  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \  | 
79  | 624k  |     vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \  | 
80  | 624k  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \  | 
81  | 624k  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  | 
82  | 624k  | \  | 
83  | 624k  |     mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\  | 
84  | 624k  | \  | 
85  | 624k  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  | 
86  | 624k  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  | 
87  | 624k  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  | 
88  | 624k  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  | 
89  | 624k  | \  | 
90  | 624k  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  | 
91  | 624k  |     sqr_##field(p3.X, add.R);           /* R^2 */\  | 
92  | 624k  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  | 
93  | 624k  | \  | 
94  | 624k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  | 
95  | 624k  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  | 
96  | 624k  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  | 
97  | 624k  | \  | 
98  | 624k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  | 
99  | 624k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  | 
100  | 624k  | } Line  | Count  | Source  |  42  | 418k  |                          const vec##bits a4) \  |  43  | 418k  | { \ |  44  | 418k  |     ptype p3; /* starts as (U1, S1, zz) from addition side */\  |  45  | 418k  |     struct { vec##bits H, R, sx; } add, dbl; \ |  46  | 418k  |     bool_t p1inf, p2inf, is_dbl; \  |  47  | 418k  | \  |  48  | 418k  |     add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\  |  49  | 418k  |     sqr_##field(dbl.R, p1->X);          /* X1^2 */\  |  50  | 418k  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\  |  51  | 418k  |     add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\  |  52  | 418k  | \  |  53  | 418k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  54  | 418k  |     sqr_##field(p3.X, p2->Z);           /* Z2^2 */\  |  55  | 418k  |     mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\  |  56  | 418k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  57  | 418k  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  |  58  | 418k  | \  |  59  | 418k  |     if (a4 != NULL) { \ |  60  | 436  |         sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\  |  61  | 436  |         mul_##field(p3.Y, p3.Y, a4);    \  |  62  | 436  |         add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\  |  63  | 436  |     } \  |  64  | 418k  | \  |  65  | 418k  |     mul_##field(p3.Y, p1->Y, p2->Z);    \  |  66  | 418k  |     mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\  |  67  | 418k  |     mul_##field(add.R, p2->Y, p1->Z);   \  |  68  | 418k  |     mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\  |  69  | 418k  |     sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\  |  70  | 418k  | \  |  71  | 418k  |     mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\  |  72  | 418k  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  |  73  | 418k  | \  |  74  | 418k  |     add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\  |  75  | 418k  |     sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\  |  76  | 418k  | \  |  77  | 418k  |     /* make the choice between addition and doubling */\  |  78  | 418k  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \  |  79  | 418k  |     vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \  |  80  | 418k  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \  |  81  | 418k  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  |  82  | 418k  | \  |  83  | 418k  |     mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\  |  84  | 418k  | \  |  85  | 418k  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  |  86  | 418k  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  |  87  | 418k  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  |  88  | 418k  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  |  89  | 418k  | \  |  90  | 418k  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  |  91  | 418k  |     sqr_##field(p3.X, add.R);           /* R^2 */\  |  92  | 418k  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  |  93  | 418k  | \  |  94  | 418k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  |  95  | 418k  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  |  96  | 418k  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  |  97  | 418k  | \  |  98  | 418k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  |  99  | 418k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  |  100  | 418k  | }  |  
 Line  | Count  | Source  |  42  | 205k  |                          const vec##bits a4) \  |  43  | 205k  | { \ |  44  | 205k  |     ptype p3; /* starts as (U1, S1, zz) from addition side */\  |  45  | 205k  |     struct { vec##bits H, R, sx; } add, dbl; \ |  46  | 205k  |     bool_t p1inf, p2inf, is_dbl; \  |  47  | 205k  | \  |  48  | 205k  |     add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\  |  49  | 205k  |     sqr_##field(dbl.R, p1->X);          /* X1^2 */\  |  50  | 205k  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\  |  51  | 205k  |     add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\  |  52  | 205k  | \  |  53  | 205k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  54  | 205k  |     sqr_##field(p3.X, p2->Z);           /* Z2^2 */\  |  55  | 205k  |     mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\  |  56  | 205k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  57  | 205k  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  |  58  | 205k  | \  |  59  | 205k  |     if (a4 != NULL) { \ |  60  | 2.31k  |         sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\  |  61  | 2.31k  |         mul_##field(p3.Y, p3.Y, a4);    \  |  62  | 2.31k  |         add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\  |  63  | 2.31k  |     } \  |  64  | 205k  | \  |  65  | 205k  |     mul_##field(p3.Y, p1->Y, p2->Z);    \  |  66  | 205k  |     mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\  |  67  | 205k  |     mul_##field(add.R, p2->Y, p1->Z);   \  |  68  | 205k  |     mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\  |  69  | 205k  |     sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\  |  70  | 205k  | \  |  71  | 205k  |     mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\  |  72  | 205k  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  |  73  | 205k  | \  |  74  | 205k  |     add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\  |  75  | 205k  |     sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\  |  76  | 205k  | \  |  77  | 205k  |     /* make the choice between addition and doubling */\  |  78  | 205k  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \  |  79  | 205k  |     vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \  |  80  | 205k  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \  |  81  | 205k  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  |  82  | 205k  | \  |  83  | 205k  |     mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\  |  84  | 205k  | \  |  85  | 205k  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  |  86  | 205k  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  |  87  | 205k  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  |  88  | 205k  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  |  89  | 205k  | \  |  90  | 205k  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  |  91  | 205k  |     sqr_##field(p3.X, add.R);           /* R^2 */\  |  92  | 205k  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  |  93  | 205k  | \  |  94  | 205k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  |  95  | 205k  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  |  96  | 205k  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  |  97  | 205k  | \  |  98  | 205k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  |  99  | 205k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  |  100  | 205k  | }  |  
  | 
101  |  |  | 
102  |  | /*  | 
103  |  |  * Addition with affine point that can handle doubling [as well as  | 
104  |  |  * points at infinity, with |p1| being encoded as Z==0 and |p2| as  | 
105  |  |  * X,Y==0] in constant time. But at what additional cost? Best  | 
106  |  |  * addition result is 7M+4S, while this routine takes 8M+5S, as per  | 
107  |  |  *  | 
108  |  |  * -------------+-------------  | 
109  |  |  * addition     | doubling  | 
110  |  |  * -------------+-------------  | 
111  |  |  * U1 = X1      | U1 = X2  | 
112  |  |  * U2 = X2*Z1^2 |  | 
113  |  |  * S1 = Y1      | S1 = Y2  | 
114  |  |  * S2 = Y2*Z1^3 |  | 
115  |  |  * H = U2-X1    | H' = 2*Y2  | 
116  |  |  * R = S2-Y1    | R' = 3*X2^2[+a]  | 
117  |  |  * sx = X1+U2   | sx = X2+X2  | 
118  |  |  * zz = H*Z1    | zz = H'  | 
119  |  |  * -------------+-------------  | 
120  |  |  * H!=0 || R!=0 | H==0 && R==0  | 
121  |  |  *  | 
122  |  |  *      X3 = R^2-H^2*sx  | 
123  |  |  *      Y3 = R*(H^2*U1-X3)-H^3*S1  | 
124  |  |  *      Z3 = zz  | 
125  |  |  *  | 
126  |  |  * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is  | 
127  |  |  * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.  | 
128  |  |  */  | 
129  |  | #define POINT_DADD_AFFINE_IMPL_A0(ptype, bits, field, one) \  | 
130  |  | static void ptype##_dadd_affine(ptype *out, const ptype *p1, \  | 
131  | 3.41k  |                                             const ptype##_affine *p2) \  | 
132  | 3.41k  | { \ | 
133  | 3.41k  |     ptype p3; /* starts as (,, H*Z1) from addition side */\  | 
134  | 3.41k  |     struct { vec##bits H, R, sx; } add, dbl; \ | 
135  | 3.41k  |     bool_t p1inf, p2inf, is_dbl; \  | 
136  | 3.41k  | \  | 
137  | 3.41k  |     p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \  | 
138  | 3.41k  |     add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\  | 
139  | 3.41k  |     sqr_##field(dbl.R, p2->X);          /* X2^2 */\  | 
140  | 3.41k  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\  | 
141  | 3.41k  |     add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\  | 
142  | 3.41k  | \  | 
143  | 3.41k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  | 
144  | 3.41k  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  | 
145  | 3.41k  |     mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\  | 
146  | 3.41k  |     mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\  | 
147  | 3.41k  |     sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\  | 
148  | 3.41k  | \  | 
149  | 3.41k  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  | 
150  | 3.41k  | \  | 
151  | 3.41k  |     add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\  | 
152  | 3.41k  |     sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\  | 
153  | 3.41k  | \  | 
154  | 3.41k  |     mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\  | 
155  | 3.41k  | \  | 
156  | 3.41k  |     /* make the choice between addition and doubling */ \  | 
157  | 3.41k  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \  | 
158  | 3.41k  |     vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \  | 
159  | 3.41k  |     vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\  | 
160  | 3.41k  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \  | 
161  | 3.41k  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  | 
162  | 3.41k  | \  | 
163  | 3.41k  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  | 
164  | 3.41k  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  | 
165  | 3.41k  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  | 
166  | 3.41k  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  | 
167  | 3.41k  | \  | 
168  | 3.41k  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  | 
169  | 3.41k  |     sqr_##field(p3.X, add.R);           /* R^2 */\  | 
170  | 3.41k  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  | 
171  | 3.41k  | \  | 
172  | 3.41k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  | 
173  | 3.41k  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  | 
174  | 3.41k  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  | 
175  | 3.41k  | \  | 
176  | 3.41k  |     vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \  | 
177  | 3.41k  |     vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \  | 
178  | 3.41k  |     vec_select(out, p1, &p3, sizeof(ptype), p2inf); \  | 
179  | 3.41k  | } server.c:POINTonE1_dadd_affine Line  | Count  | Source  |  131  | 808  |                                             const ptype##_affine *p2) \  |  132  | 808  | { \ |  133  | 808  |     ptype p3; /* starts as (,, H*Z1) from addition side */\  |  134  | 808  |     struct { vec##bits H, R, sx; } add, dbl; \ |  135  | 808  |     bool_t p1inf, p2inf, is_dbl; \  |  136  | 808  | \  |  137  | 808  |     p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \  |  138  | 808  |     add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\  |  139  | 808  |     sqr_##field(dbl.R, p2->X);          /* X2^2 */\  |  140  | 808  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\  |  141  | 808  |     add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\  |  142  | 808  | \  |  143  | 808  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  144  | 808  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  |  145  | 808  |     mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\  |  146  | 808  |     mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\  |  147  | 808  |     sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\  |  148  | 808  | \  |  149  | 808  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  |  150  | 808  | \  |  151  | 808  |     add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\  |  152  | 808  |     sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\  |  153  | 808  | \  |  154  | 808  |     mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\  |  155  | 808  | \  |  156  | 808  |     /* make the choice between addition and doubling */ \  |  157  | 808  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \  |  158  | 808  |     vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \  |  159  | 808  |     vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\  |  160  | 808  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \  |  161  | 808  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  |  162  | 808  | \  |  163  | 808  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  |  164  | 808  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  |  165  | 808  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  |  166  | 808  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  |  167  | 808  | \  |  168  | 808  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  |  169  | 808  |     sqr_##field(p3.X, add.R);           /* R^2 */\  |  170  | 808  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  |  171  | 808  | \  |  172  | 808  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  |  173  | 808  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  |  174  | 808  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  |  175  | 808  | \  |  176  | 808  |     vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \  |  177  | 808  |     vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \  |  178  | 808  |     vec_select(out, p1, &p3, sizeof(ptype), p2inf); \  |  179  | 808  | }  |  
 server.c:POINTonE2_dadd_affine Line  | Count  | Source  |  131  | 2.60k  |                                             const ptype##_affine *p2) \  |  132  | 2.60k  | { \ |  133  | 2.60k  |     ptype p3; /* starts as (,, H*Z1) from addition side */\  |  134  | 2.60k  |     struct { vec##bits H, R, sx; } add, dbl; \ |  135  | 2.60k  |     bool_t p1inf, p2inf, is_dbl; \  |  136  | 2.60k  | \  |  137  | 2.60k  |     p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \  |  138  | 2.60k  |     add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\  |  139  | 2.60k  |     sqr_##field(dbl.R, p2->X);          /* X2^2 */\  |  140  | 2.60k  |     mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\  |  141  | 2.60k  |     add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\  |  142  | 2.60k  | \  |  143  | 2.60k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  144  | 2.60k  |     sqr_##field(add.H, p1->Z);          /* Z1^2 */\  |  145  | 2.60k  |     mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\  |  146  | 2.60k  |     mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\  |  147  | 2.60k  |     sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\  |  148  | 2.60k  | \  |  149  | 2.60k  |     mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\  |  150  | 2.60k  | \  |  151  | 2.60k  |     add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\  |  152  | 2.60k  |     sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\  |  153  | 2.60k  | \  |  154  | 2.60k  |     mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\  |  155  | 2.60k  | \  |  156  | 2.60k  |     /* make the choice between addition and doubling */ \  |  157  | 2.60k  |     is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \  |  158  | 2.60k  |     vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \  |  159  | 2.60k  |     vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\  |  160  | 2.60k  |     vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \  |  161  | 2.60k  |     /* |p3| and |add| hold all inputs now, |p3| will hold output */\  |  162  | 2.60k  | \  |  163  | 2.60k  |     sqr_##field(dbl.H, add.H);          /* H^2 */\  |  164  | 2.60k  |     mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\  |  165  | 2.60k  |     mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\  |  166  | 2.60k  |     mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\  |  167  | 2.60k  | \  |  168  | 2.60k  |     mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\  |  169  | 2.60k  |     sqr_##field(p3.X, add.R);           /* R^2 */\  |  170  | 2.60k  |     sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\  |  171  | 2.60k  | \  |  172  | 2.60k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\  |  173  | 2.60k  |     mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\  |  174  | 2.60k  |     sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\  |  175  | 2.60k  | \  |  176  | 2.60k  |     vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \  |  177  | 2.60k  |     vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \  |  178  | 2.60k  |     vec_select(out, p1, &p3, sizeof(ptype), p2inf); \  |  179  | 2.60k  | }  |  
  | 
180  |  |  | 
181  |  | /*  | 
182  |  |  * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl  | 
183  |  |  * with twist to handle either input at infinity, which are encoded as Z==0.  | 
184  |  |  */  | 
185  |  | #define POINT_ADD_IMPL(ptype, bits, field) \  | 
186  | 166k  | static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \  | 
187  | 166k  | { \ | 
188  | 166k  |     ptype p3; \  | 
189  | 166k  |     vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \  | 
190  | 166k  |     bool_t p1inf, p2inf; \  | 
191  | 166k  | \  | 
192  | 166k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  | 
193  | 166k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  | 
194  | 166k  | \  | 
195  | 166k  |     mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\  | 
196  | 166k  |     mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\  | 
197  | 166k  | \  | 
198  | 166k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  | 
199  | 166k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  | 
200  | 166k  | \  | 
201  | 166k  |     mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\  | 
202  | 166k  |     mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\  | 
203  | 166k  | \  | 
204  | 166k  |     sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\  | 
205  | 166k  |     add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\  | 
206  | 166k  | \  | 
207  | 166k  |     mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\  | 
208  | 166k  |     mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\  | 
209  | 166k  | \  | 
210  | 166k  |     sub_##field(H, H, U1);              /* H = U2-U1 */\  | 
211  | 166k  | \  | 
212  | 166k  |     add_##field(I, H, H);               /* 2*H */\  | 
213  | 166k  |     sqr_##field(I, I);                  /* I = (2*H)^2 */\  | 
214  | 166k  | \  | 
215  | 166k  |     mul_##field(J, H, I);               /* J = H*I */\  | 
216  | 166k  |     mul_##field(S1, S1, J);             /* S1*J */\  | 
217  | 166k  | \  | 
218  | 166k  |     mul_##field(p3.Y, U1, I);           /* V = U1*I */\  | 
219  | 166k  | \  | 
220  | 166k  |     sqr_##field(p3.X, p3.Z);            /* r^2 */\  | 
221  | 166k  |     sub_##field(p3.X, p3.X, J);         /* r^2-J */\  | 
222  | 166k  |     sub_##field(p3.X, p3.X, p3.Y);      \  | 
223  | 166k  |     sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\  | 
224  | 166k  | \  | 
225  | 166k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\  | 
226  | 166k  |     mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\  | 
227  | 166k  |     sub_##field(p3.Y, p3.Y, S1);        \  | 
228  | 166k  |     sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\  | 
229  | 166k  | \  | 
230  | 166k  |     add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\  | 
231  | 166k  |     sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\  | 
232  | 166k  |     sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\  | 
233  | 166k  |     sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\  | 
234  | 166k  |     mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\  | 
235  | 166k  | \  | 
236  | 166k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  | 
237  | 166k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  | 
238  | 166k  | } Line  | Count  | Source  |  186  | 121k  | static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \  |  187  | 121k  | { \ |  188  | 121k  |     ptype p3; \  |  189  | 121k  |     vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \  |  190  | 121k  |     bool_t p1inf, p2inf; \  |  191  | 121k  | \  |  192  | 121k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  193  | 121k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  |  194  | 121k  | \  |  195  | 121k  |     mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\  |  196  | 121k  |     mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\  |  197  | 121k  | \  |  198  | 121k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  199  | 121k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  |  200  | 121k  | \  |  201  | 121k  |     mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\  |  202  | 121k  |     mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\  |  203  | 121k  | \  |  204  | 121k  |     sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\  |  205  | 121k  |     add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\  |  206  | 121k  | \  |  207  | 121k  |     mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\  |  208  | 121k  |     mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\  |  209  | 121k  | \  |  210  | 121k  |     sub_##field(H, H, U1);              /* H = U2-U1 */\  |  211  | 121k  | \  |  212  | 121k  |     add_##field(I, H, H);               /* 2*H */\  |  213  | 121k  |     sqr_##field(I, I);                  /* I = (2*H)^2 */\  |  214  | 121k  | \  |  215  | 121k  |     mul_##field(J, H, I);               /* J = H*I */\  |  216  | 121k  |     mul_##field(S1, S1, J);             /* S1*J */\  |  217  | 121k  | \  |  218  | 121k  |     mul_##field(p3.Y, U1, I);           /* V = U1*I */\  |  219  | 121k  | \  |  220  | 121k  |     sqr_##field(p3.X, p3.Z);            /* r^2 */\  |  221  | 121k  |     sub_##field(p3.X, p3.X, J);         /* r^2-J */\  |  222  | 121k  |     sub_##field(p3.X, p3.X, p3.Y);      \  |  223  | 121k  |     sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\  |  224  | 121k  | \  |  225  | 121k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\  |  226  | 121k  |     mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\  |  227  | 121k  |     sub_##field(p3.Y, p3.Y, S1);        \  |  228  | 121k  |     sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\  |  229  | 121k  | \  |  230  | 121k  |     add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\  |  231  | 121k  |     sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\  |  232  | 121k  |     sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\  |  233  | 121k  |     sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\  |  234  | 121k  |     mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\  |  235  | 121k  | \  |  236  | 121k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  |  237  | 121k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  |  238  | 121k  | }  |  
 Line  | Count  | Source  |  186  | 44.6k  | static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \  |  187  | 44.6k  | { \ |  188  | 44.6k  |     ptype p3; \  |  189  | 44.6k  |     vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \  |  190  | 44.6k  |     bool_t p1inf, p2inf; \  |  191  | 44.6k  | \  |  192  | 44.6k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  193  | 44.6k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  |  194  | 44.6k  | \  |  195  | 44.6k  |     mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\  |  196  | 44.6k  |     mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\  |  197  | 44.6k  | \  |  198  | 44.6k  |     p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  199  | 44.6k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  |  200  | 44.6k  | \  |  201  | 44.6k  |     mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\  |  202  | 44.6k  |     mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\  |  203  | 44.6k  | \  |  204  | 44.6k  |     sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\  |  205  | 44.6k  |     add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\  |  206  | 44.6k  | \  |  207  | 44.6k  |     mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\  |  208  | 44.6k  |     mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\  |  209  | 44.6k  | \  |  210  | 44.6k  |     sub_##field(H, H, U1);              /* H = U2-U1 */\  |  211  | 44.6k  | \  |  212  | 44.6k  |     add_##field(I, H, H);               /* 2*H */\  |  213  | 44.6k  |     sqr_##field(I, I);                  /* I = (2*H)^2 */\  |  214  | 44.6k  | \  |  215  | 44.6k  |     mul_##field(J, H, I);               /* J = H*I */\  |  216  | 44.6k  |     mul_##field(S1, S1, J);             /* S1*J */\  |  217  | 44.6k  | \  |  218  | 44.6k  |     mul_##field(p3.Y, U1, I);           /* V = U1*I */\  |  219  | 44.6k  | \  |  220  | 44.6k  |     sqr_##field(p3.X, p3.Z);            /* r^2 */\  |  221  | 44.6k  |     sub_##field(p3.X, p3.X, J);         /* r^2-J */\  |  222  | 44.6k  |     sub_##field(p3.X, p3.X, p3.Y);      \  |  223  | 44.6k  |     sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\  |  224  | 44.6k  | \  |  225  | 44.6k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\  |  226  | 44.6k  |     mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\  |  227  | 44.6k  |     sub_##field(p3.Y, p3.Y, S1);        \  |  228  | 44.6k  |     sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\  |  229  | 44.6k  | \  |  230  | 44.6k  |     add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\  |  231  | 44.6k  |     sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\  |  232  | 44.6k  |     sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\  |  233  | 44.6k  |     sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\  |  234  | 44.6k  |     mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\  |  235  | 44.6k  | \  |  236  | 44.6k  |     vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \  |  237  | 44.6k  |     vec_select(out, p2, &p3, sizeof(ptype), p1inf); \  |  238  | 44.6k  | }  |  
  | 
239  |  |  | 
240  |  | /*  | 
241  |  |  * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl  | 
242  |  |  * with twist to handle either input at infinity, with |p1| encoded as Z==0,  | 
243  |  |  * and |p2| as X==Y==0.  | 
244  |  |  */  | 
245  |  | #define POINT_ADD_AFFINE_IMPL(ptype, bits, field, one) \  | 
246  |  | static void ptype##_add_affine(ptype *out, const ptype *p1, \  | 
247  | 1.62k  |                                            const ptype##_affine *p2) \  | 
248  | 1.62k  | { \ | 
249  | 1.62k  |     ptype p3; \  | 
250  | 1.62k  |     vec##bits Z1Z1, H, HH, I, J; \  | 
251  | 1.62k  |     bool_t p1inf, p2inf; \  | 
252  | 1.62k  | \  | 
253  | 1.62k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  | 
254  | 1.62k  | \  | 
255  | 1.62k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  | 
256  | 1.62k  | \  | 
257  | 1.62k  |     mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\  | 
258  | 1.62k  |     mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\  | 
259  | 1.62k  | \  | 
260  | 1.62k  |     p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \  | 
261  | 1.62k  | \  | 
262  | 1.62k  |     mul_##field(H, p2->X, Z1Z1);        /* U2 = X2*Z1Z1 */\  | 
263  | 1.62k  |     sub_##field(H, H, p1->X);           /* H = U2-X1 */\  | 
264  | 1.62k  | \  | 
265  | 1.62k  |     sqr_##field(HH, H);                 /* HH = H^2 */\  | 
266  | 1.62k  |     add_##field(I, HH, HH);             \  | 
267  | 1.62k  |     add_##field(I, I, I);               /* I = 4*HH */\  | 
268  | 1.62k  | \  | 
269  | 1.62k  |     mul_##field(p3.Y, p1->X, I);        /* V = X1*I */\  | 
270  | 1.62k  |     mul_##field(J, H, I);               /* J = H*I */\  | 
271  | 1.62k  |     mul_##field(I, J, p1->Y);           /* Y1*J */\  | 
272  | 1.62k  | \  | 
273  | 1.62k  |     sub_##field(p3.Z, p3.Z, p1->Y);     /* S2-Y1 */\  | 
274  | 1.62k  |     add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-Y1) */\  | 
275  | 1.62k  | \  | 
276  | 1.62k  |     sqr_##field(p3.X, p3.Z);            /* r^2 */\  | 
277  | 1.62k  |     sub_##field(p3.X, p3.X, J);         /* r^2-J */\  | 
278  | 1.62k  |     sub_##field(p3.X, p3.X, p3.Y);      \  | 
279  | 1.62k  |     sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\  | 
280  | 1.62k  | \  | 
281  | 1.62k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\  | 
282  | 1.62k  |     mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\  | 
283  | 1.62k  |     sub_##field(p3.Y, p3.Y, I);         \  | 
284  | 1.62k  |     sub_##field(p3.Y, p3.Y, I);         /* Y3 = r*(V-X3)-2*Y1*J */\  | 
285  | 1.62k  | \  | 
286  | 1.62k  |     add_##field(p3.Z, p1->Z, H);        /* Z1+H */\  | 
287  | 1.62k  |     sqr_##field(p3.Z, p3.Z);            /* (Z1+H)^2 */\  | 
288  | 1.62k  |     sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+H)^2-Z1Z1 */\  | 
289  | 1.62k  |     sub_##field(p3.Z, p3.Z, HH);        /* Z3 = (Z1+H)^2-Z1Z1-HH */\  | 
290  | 1.62k  | \  | 
291  | 1.62k  |     vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \  | 
292  | 1.62k  |     vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \  | 
293  | 1.62k  |     vec_select(out, p1, &p3, sizeof(ptype), p2inf); \  | 
294  | 1.62k  | } server.c:POINTonE1_add_affine Line  | Count  | Source  |  247  | 1.62k  |                                            const ptype##_affine *p2) \  |  248  | 1.62k  | { \ |  249  | 1.62k  |     ptype p3; \  |  250  | 1.62k  |     vec##bits Z1Z1, H, HH, I, J; \  |  251  | 1.62k  |     bool_t p1inf, p2inf; \  |  252  | 1.62k  | \  |  253  | 1.62k  |     p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  254  | 1.62k  | \  |  255  | 1.62k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  |  256  | 1.62k  | \  |  257  | 1.62k  |     mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\  |  258  | 1.62k  |     mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\  |  259  | 1.62k  | \  |  260  | 1.62k  |     p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \  |  261  | 1.62k  | \  |  262  | 1.62k  |     mul_##field(H, p2->X, Z1Z1);        /* U2 = X2*Z1Z1 */\  |  263  | 1.62k  |     sub_##field(H, H, p1->X);           /* H = U2-X1 */\  |  264  | 1.62k  | \  |  265  | 1.62k  |     sqr_##field(HH, H);                 /* HH = H^2 */\  |  266  | 1.62k  |     add_##field(I, HH, HH);             \  |  267  | 1.62k  |     add_##field(I, I, I);               /* I = 4*HH */\  |  268  | 1.62k  | \  |  269  | 1.62k  |     mul_##field(p3.Y, p1->X, I);        /* V = X1*I */\  |  270  | 1.62k  |     mul_##field(J, H, I);               /* J = H*I */\  |  271  | 1.62k  |     mul_##field(I, J, p1->Y);           /* Y1*J */\  |  272  | 1.62k  | \  |  273  | 1.62k  |     sub_##field(p3.Z, p3.Z, p1->Y);     /* S2-Y1 */\  |  274  | 1.62k  |     add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-Y1) */\  |  275  | 1.62k  | \  |  276  | 1.62k  |     sqr_##field(p3.X, p3.Z);            /* r^2 */\  |  277  | 1.62k  |     sub_##field(p3.X, p3.X, J);         /* r^2-J */\  |  278  | 1.62k  |     sub_##field(p3.X, p3.X, p3.Y);      \  |  279  | 1.62k  |     sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\  |  280  | 1.62k  | \  |  281  | 1.62k  |     sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\  |  282  | 1.62k  |     mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\  |  283  | 1.62k  |     sub_##field(p3.Y, p3.Y, I);         \  |  284  | 1.62k  |     sub_##field(p3.Y, p3.Y, I);         /* Y3 = r*(V-X3)-2*Y1*J */\  |  285  | 1.62k  | \  |  286  | 1.62k  |     add_##field(p3.Z, p1->Z, H);        /* Z1+H */\  |  287  | 1.62k  |     sqr_##field(p3.Z, p3.Z);            /* (Z1+H)^2 */\  |  288  | 1.62k  |     sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+H)^2-Z1Z1 */\  |  289  | 1.62k  |     sub_##field(p3.Z, p3.Z, HH);        /* Z3 = (Z1+H)^2-Z1Z1-HH */\  |  290  | 1.62k  | \  |  291  | 1.62k  |     vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \  |  292  | 1.62k  |     vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \  |  293  | 1.62k  |     vec_select(out, p1, &p3, sizeof(ptype), p2inf); \  |  294  | 1.62k  | }  |  
 Unexecuted instantiation: server.c:POINTonE2_add_affine  | 
295  |  |  | 
296  |  | /*  | 
297  |  |  * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l  | 
298  |  |  */  | 
299  |  | #define POINT_DOUBLE_IMPL_A0(ptype, bits, field) \  | 
300  | 4.93M  | static void ptype##_double(ptype *p3, const ptype *p1) \  | 
301  | 4.93M  | { \ | 
302  | 4.93M  |     vec##bits A, B, C; \  | 
303  | 4.93M  | \  | 
304  | 4.93M  |     sqr_##field(A, p1->X);              /* A = X1^2 */\  | 
305  | 4.93M  |     sqr_##field(B, p1->Y);              /* B = Y1^2 */\  | 
306  | 4.93M  |     sqr_##field(C, B);                  /* C = B^2 */\  | 
307  | 4.93M  | \  | 
308  | 4.93M  |     add_##field(B, B, p1->X);           /* X1+B */\  | 
309  | 4.93M  |     sqr_##field(B, B);                  /* (X1+B)^2 */\  | 
310  | 4.93M  |     sub_##field(B, B, A);               /* (X1+B)^2-A */\  | 
311  | 4.93M  |     sub_##field(B, B, C);               /* (X1+B)^2-A-C */\  | 
312  | 4.93M  |     add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\  | 
313  | 4.93M  | \  | 
314  | 4.93M  |     mul_by_3_##field(A, A);             /* E = 3*A */\  | 
315  | 4.93M  | \  | 
316  | 4.93M  |     sqr_##field(p3->X, A);              /* F = E^2 */\  | 
317  | 4.93M  |     sub_##field(p3->X, p3->X, B);       \  | 
318  | 4.93M  |     sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\  | 
319  | 4.93M  | \  | 
320  | 4.93M  |     add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\  | 
321  | 4.93M  |     mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\  | 
322  | 4.93M  | \  | 
323  | 4.93M  |     mul_by_8_##field(C, C);             /* 8*C */\  | 
324  | 4.93M  |     sub_##field(p3->Y, B, p3->X);       /* D-X3 */\  | 
325  | 4.93M  |     mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\  | 
326  | 4.93M  |     sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\  | 
327  | 4.93M  | } server.c:POINTonE1_double Line  | Count  | Source  |  300  | 3.36M  | static void ptype##_double(ptype *p3, const ptype *p1) \  |  301  | 3.36M  | { \ |  302  | 3.36M  |     vec##bits A, B, C; \  |  303  | 3.36M  | \  |  304  | 3.36M  |     sqr_##field(A, p1->X);              /* A = X1^2 */\  |  305  | 3.36M  |     sqr_##field(B, p1->Y);              /* B = Y1^2 */\  |  306  | 3.36M  |     sqr_##field(C, B);                  /* C = B^2 */\  |  307  | 3.36M  | \  |  308  | 3.36M  |     add_##field(B, B, p1->X);           /* X1+B */\  |  309  | 3.36M  |     sqr_##field(B, B);                  /* (X1+B)^2 */\  |  310  | 3.36M  |     sub_##field(B, B, A);               /* (X1+B)^2-A */\  |  311  | 3.36M  |     sub_##field(B, B, C);               /* (X1+B)^2-A-C */\  |  312  | 3.36M  |     add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\  |  313  | 3.36M  | \  |  314  | 3.36M  |     mul_by_3_##field(A, A);             /* E = 3*A */\  |  315  | 3.36M  | \  |  316  | 3.36M  |     sqr_##field(p3->X, A);              /* F = E^2 */\  |  317  | 3.36M  |     sub_##field(p3->X, p3->X, B);       \  |  318  | 3.36M  |     sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\  |  319  | 3.36M  | \  |  320  | 3.36M  |     add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\  |  321  | 3.36M  |     mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\  |  322  | 3.36M  | \  |  323  | 3.36M  |     mul_by_8_##field(C, C);             /* 8*C */\  |  324  | 3.36M  |     sub_##field(p3->Y, B, p3->X);       /* D-X3 */\  |  325  | 3.36M  |     mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\  |  326  | 3.36M  |     sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\  |  327  | 3.36M  | }  |  
 server.c:POINTonE2_double Line  | Count  | Source  |  300  | 1.56M  | static void ptype##_double(ptype *p3, const ptype *p1) \  |  301  | 1.56M  | { \ |  302  | 1.56M  |     vec##bits A, B, C; \  |  303  | 1.56M  | \  |  304  | 1.56M  |     sqr_##field(A, p1->X);              /* A = X1^2 */\  |  305  | 1.56M  |     sqr_##field(B, p1->Y);              /* B = Y1^2 */\  |  306  | 1.56M  |     sqr_##field(C, B);                  /* C = B^2 */\  |  307  | 1.56M  | \  |  308  | 1.56M  |     add_##field(B, B, p1->X);           /* X1+B */\  |  309  | 1.56M  |     sqr_##field(B, B);                  /* (X1+B)^2 */\  |  310  | 1.56M  |     sub_##field(B, B, A);               /* (X1+B)^2-A */\  |  311  | 1.56M  |     sub_##field(B, B, C);               /* (X1+B)^2-A-C */\  |  312  | 1.56M  |     add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\  |  313  | 1.56M  | \  |  314  | 1.56M  |     mul_by_3_##field(A, A);             /* E = 3*A */\  |  315  | 1.56M  | \  |  316  | 1.56M  |     sqr_##field(p3->X, A);              /* F = E^2 */\  |  317  | 1.56M  |     sub_##field(p3->X, p3->X, B);       \  |  318  | 1.56M  |     sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\  |  319  | 1.56M  | \  |  320  | 1.56M  |     add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\  |  321  | 1.56M  |     mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\  |  322  | 1.56M  | \  |  323  | 1.56M  |     mul_by_8_##field(C, C);             /* 8*C */\  |  324  | 1.56M  |     sub_##field(p3->Y, B, p3->X);       /* D-X3 */\  |  325  | 1.56M  |     mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\  |  326  | 1.56M  |     sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\  |  327  | 1.56M  | }  |  
  | 
328  |  |  | 
329  |  | #define POINT_LADDER_PRE_IMPL(ptype, bits, field) \  | 
330  |  | static void ptype##xz_ladder_pre(ptype##xz *pxz, const ptype *p) \  | 
331  |  | { \ | 
332  |  |     mul_##field(pxz->X, p->X, p->Z);    /* X2 = X1*Z1 */\  | 
333  |  |     sqr_##field(pxz->Z, p->Z);          \  | 
334  |  |     mul_##field(pxz->Z, pxz->Z, p->Z);  /* Z2 = Z1^3 */\  | 
335  |  | }  | 
336  |  |  | 
337  |  | /*  | 
338  |  |  * https://hyperelliptic.org/EFD/g1p/auto-shortw-xz.html#ladder-ladd-2002-it-3  | 
339  |  |  * with twist to handle either input at infinity, which are encoded as Z==0.  | 
340  |  |  * Just in case, order of doubling and addition is reverse in comparison to  | 
341  |  |  * hyperelliptic.org entry. This was done to minimize temporary storage.  | 
342  |  |  *  | 
343  |  |  * XZ1 is |p|, XZ2&XZ4 are in&out |r|, XZ3&XZ5 are in&out |s|.  | 
344  |  |  */  | 
345  |  | #define POINT_LADDER_STEP_IMPL_A0(ptype, bits, field, suffix4b) \  | 
346  |  | static void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s, \  | 
347  |  |                                   const ptype##xz *p) \  | 
348  |  | { \ | 
349  |  |     ptype##xz p5; \  | 
350  |  |     vec##bits A, B, C, D, XX, ZZ; \  | 
351  |  |     bool_t r_inf, s_inf; \  | 
352  |  |                                         /* s += r */\  | 
353  |  |     mul_##field(A, r->X, s->X);         /* A = X2*X3 */\  | 
354  |  |     mul_##field(B, r->Z, s->Z);         /* B = Z2*Z3 */\  | 
355  |  |     mul_##field(C, r->X, s->Z);         /* C = X2*Z3 */\  | 
356  |  |     mul_##field(D, r->Z, s->X);         /* D = X3*Z2 */\  | 
357  |  | \  | 
358  |  |     sqr_##field(A, A);                  /* (A[-a*B])^2 */\  | 
359  |  |     add_##field(p5.X, C, D);            /* C+D */\  | 
360  |  |     mul_##field(p5.X, p5.X, B);         /* B*(C+D) */\  | 
361  |  |     mul_by_4b_##suffix4b(B, p5.X);      /* b4*B*(C+D) */\  | 
362  |  |     sub_##field(p5.X, A, B);            /* (A[-a*B])^2-b4*B*(C+D) */\  | 
363  |  |     mul_##field(p5.X, p5.X, p->Z);      /* X5 = Z1*((A[-a*B])^2-b4*B*(C+D)) */\  | 
364  |  | \  | 
365  |  |     sub_##field(p5.Z, C, D);            /* C-D */\  | 
366  |  |     sqr_##field(p5.Z, p5.Z);            /* (C-D)^2 */\  | 
367  |  |     mul_##field(p5.Z, p5.Z, p->X);      /* Z5 = X1*(C-D)^2 */\  | 
368  |  | \  | 
369  |  |     r_inf = vec_is_zero(r->Z, sizeof(r->Z)); \  | 
370  |  |     s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \  | 
371  |  | \  | 
372  |  |     vec_select(&p5, r, &p5, sizeof(ptype##xz), s_inf); \  | 
373  |  |     vec_select(s,   s, &p5, sizeof(ptype##xz), r_inf); \  | 
374  |  |                                         /* r *= 2 */\  | 
375  |  |     sqr_##field(XX, r->X);              /* XX = X2^2 */\  | 
376  |  |     sqr_##field(ZZ, r->Z);              /* ZZ = Z2^2 */\  | 
377  |  | \  | 
378  |  |     add_##field(r->Z, r->X, r->Z);      /* X2+Z2 */\  | 
379  |  |     sqr_##field(r->Z, r->Z);            /* (X2+Z2)^2 */\  | 
380  |  |     sub_##field(r->Z, r->Z, XX);        /* (X2+Z2)^2-XX */\  | 
381  |  |     sub_##field(r->Z, r->Z, ZZ);        /* E = (X2+Z2)^2-XX-ZZ */\  | 
382  |  | \  | 
383  |  |     sqr_##field(A, XX);                 /* (XX[-a*ZZ])^2 */\  | 
384  |  |     mul_##field(B, r->Z, ZZ);           /* E*ZZ */\  | 
385  |  |     mul_by_4b_##suffix4b(C, B);         /* b4*E*ZZ */\  | 
386  |  |     sub_##field(r->X, A, C);            /* X4 = (XX[-a*ZZ])^2-b4*E*ZZ */\  | 
387  |  | \  | 
388  |  |     sqr_##field(ZZ, ZZ);                /* ZZ^2 */\  | 
389  |  |     mul_by_4b_##suffix4b(B, ZZ);        /* b4*ZZ^2 */\  | 
390  |  |     mul_##field(r->Z, r->Z, XX);        /* E*(XX[+a*ZZ]) */\  | 
391  |  |     add_##field(r->Z, r->Z, r->Z);      /* 2*E*(XX[+a*ZZ]) */\  | 
392  |  |     add_##field(r->Z, r->Z, B);         /* Z4 = 2*E*(XX[+a*ZZ])+b4*ZZ^2 */\  | 
393  |  | }  | 
394  |  |  | 
395  |  | /*  | 
396  |  |  * Recover the |r|'s y-coordinate using Eq. (8) from Brier-Joye,  | 
397  |  |  * "Weierstraß Elliptic Curves and Side-Channel Attacks", with XZ twist  | 
398  |  |  * and conversion to Jacobian coordinates from <openssl>/.../ecp_smpl.c,  | 
399  |  |  * and with twist to recover from |s| at infinity [which occurs when  | 
400  |  |  * multiplying by (order-1)].  | 
401  |  |  *  | 
402  |  |  * X4 = 2*Y1*X2*Z3*Z1*Z2  | 
403  |  |  * Y4 = 2*b*Z3*(Z1*Z2)^2 + Z3*(a*Z1*Z2+X1*X2)*(X1*Z2+X2*Z1) - X3*(X1*Z2-X2*Z1)^2  | 
404  |  |  * Z4 = 2*Y1*Z3*Z2^2*Z1  | 
405  |  |  *  | 
406  |  |  * Z3x2 = 2*Z3  | 
407  |  |  * Y1Z3x2 = Y1*Z3x2  | 
408  |  |  * Z1Z2 = Z1*Z2  | 
409  |  |  * X1Z2 = X1*Z2  | 
410  |  |  * X2Z1 = X2*Z1  | 
411  |  |  * X4 = Y1Z3x2*X2*Z1Z2  | 
412  |  |  * A = b*Z3x2*(Z1Z2)^2  | 
413  |  |  * B = Z3*(a*Z1Z2+X1*X2)*(X1Z2+X2Z1)  | 
414  |  |  * C = X3*(X1Z2-X2Z1)^2  | 
415  |  |  * Y4 = A+B-C  | 
416  |  |  * Z4 = Y1Z3x2*Z1Z2*Z2  | 
417  |  |  *  | 
418  |  |  * XZ1 is |p|, XZ2 is |r|, XZ3 is |s|, 'a' is 0.  | 
419  |  |  */  | 
420  |  | #define POINT_LADDER_POST_IMPL_A0(ptype, bits, field, suffixb) \  | 
421  |  | static void ptype##xz_ladder_post(ptype *p4, \  | 
422  |  |                                   const ptype##xz *r, const ptype##xz *s, \  | 
423  |  |                                   const ptype##xz *p, const vec##bits Y1) \  | 
424  |  | { \ | 
425  |  |     vec##bits Z3x2, Y1Z3x2, Z1Z2, X1Z2, X2Z1, A, B, C; \  | 
426  |  |     bool_t s_inf; \  | 
427  |  | \  | 
428  |  |     add_##field(Z3x2, s->Z, s->Z);      /* Z3x2 = 2*Z3 */\  | 
429  |  |     mul_##field(Y1Z3x2, Y1, Z3x2);      /* Y1Z3x2 = Y1*Z3x2 */\  | 
430  |  |     mul_##field(Z1Z2, p->Z, r->Z);      /* Z1Z2 = Z1*Z2 */\  | 
431  |  |     mul_##field(X1Z2, p->X, r->Z);      /* X1Z2 = X1*Z2 */\  | 
432  |  |     mul_##field(X2Z1, r->X, p->Z);      /* X2Z1 = X2*Z1 */\  | 
433  |  | \  | 
434  |  |     mul_##field(p4->X, Y1Z3x2, r->X);   /* Y1Z3x2*X2 */\  | 
435  |  |     mul_##field(p4->X, p4->X, Z1Z2);    /* X4 = Y1Z3x2*X2*Z1Z2 */\  | 
436  |  | \  | 
437  |  |     sqr_##field(A, Z1Z2);               /* (Z1Z2)^2 */\  | 
438  |  |     mul_##field(B, A, Z3x2);            /* Z3x2*(Z1Z2)^2 */\  | 
439  |  |     mul_by_b_##suffixb(A, B);           /* A = b*Z3x2*(Z1Z2)^2 */\  | 
440  |  | \  | 
441  |  |     mul_##field(B, p->X, r->X);         /* [a*Z1Z2+]X1*X2 */\  | 
442  |  |     mul_##field(B, B, s->Z);            /* Z3*([a*Z1Z2+]X1*X2) */\  | 
443  |  |     add_##field(C, X1Z2, X2Z1);         /* X1Z2+X2Z1 */\  | 
444  |  |     mul_##field(B, B, C);               /* B = Z3*([a*Z2Z1+]X1*X2)*(X1Z2+X2Z1) */\  | 
445  |  | \  | 
446  |  |     sub_##field(C, X1Z2, X2Z1);         /* X1Z2-X2Z1 */\  | 
447  |  |     sqr_##field(C, C);                  /* (X1Z2-X2Z1)^2 */\  | 
448  |  |     mul_##field(C, C, s->X);            /* C = X3*(X1Z2-X2Z1)^2 */\  | 
449  |  | \  | 
450  |  |     add_##field(A, A, B);               /* A+B */\  | 
451  |  |     sub_##field(A, A, C);               /* Y4 = A+B-C */\  | 
452  |  | \  | 
453  |  |     mul_##field(p4->Z, Z1Z2, r->Z);     /* Z1Z2*Z2 */\  | 
454  |  |     mul_##field(p4->Z, p4->Z, Y1Z3x2);  /* Y1Z3x2*Z1Z2*Z2 */\  | 
455  |  | \  | 
456  |  |     s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \  | 
457  |  |     vec_select(p4->X, p->X, p4->X, sizeof(p4->X), s_inf); \  | 
458  |  |     vec_select(p4->Y, Y1,   A,     sizeof(p4->Y), s_inf); \  | 
459  |  |     vec_select(p4->Z, p->Z, p4->Z, sizeof(p4->Z), s_inf); \  | 
460  |  |     ptype##_cneg(p4, s_inf); \  | 
461  |  |                                         /* to Jacobian */\  | 
462  |  |     mul_##field(p4->X, p4->X, p4->Z);   /* X4 = X4*Z4 */\  | 
463  |  |     sqr_##field(B, p4->Z);              \  | 
464  |  |     mul_##field(p4->Y, p4->Y, B);       /* Y4 = Y4*Z4^2 */\  | 
465  |  | }  | 
466  |  |  | 
467  |  | #define POINT_IS_EQUAL_IMPL(ptype, bits, field) \  | 
468  | 39.9k  | static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \  | 
469  | 39.9k  | { \ | 
470  | 39.9k  |     vec##bits Z1Z1, Z2Z2; \  | 
471  | 39.9k  |     ptype##_affine a1, a2; \  | 
472  | 39.9k  |     bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \  | 
473  | 39.9k  |     bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \  | 
474  | 39.9k  | \  | 
475  | 39.9k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  | 
476  | 39.9k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  | 
477  | 39.9k  | \  | 
478  | 39.9k  |     mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\  | 
479  | 39.9k  |     mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\  | 
480  | 39.9k  | \  | 
481  | 39.9k  |     mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\  | 
482  | 39.9k  |     mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\  | 
483  | 39.9k  | \  | 
484  | 39.9k  |     mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\  | 
485  | 39.9k  |     mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\  | 
486  | 39.9k  | \  | 
487  | 39.9k  |     return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \  | 
488  | 39.9k  | } server.c:POINTonE1_is_equal Line  | Count  | Source  |  468  | 21.3k  | static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \  |  469  | 21.3k  | { \ |  470  | 21.3k  |     vec##bits Z1Z1, Z2Z2; \  |  471  | 21.3k  |     ptype##_affine a1, a2; \  |  472  | 21.3k  |     bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  473  | 21.3k  |     bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  474  | 21.3k  | \  |  475  | 21.3k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  |  476  | 21.3k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  |  477  | 21.3k  | \  |  478  | 21.3k  |     mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\  |  479  | 21.3k  |     mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\  |  480  | 21.3k  | \  |  481  | 21.3k  |     mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\  |  482  | 21.3k  |     mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\  |  483  | 21.3k  | \  |  484  | 21.3k  |     mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\  |  485  | 21.3k  |     mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\  |  486  | 21.3k  | \  |  487  | 21.3k  |     return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \  |  488  | 21.3k  | }  |  
 server.c:POINTonE2_is_equal Line  | Count  | Source  |  468  | 18.6k  | static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \  |  469  | 18.6k  | { \ |  470  | 18.6k  |     vec##bits Z1Z1, Z2Z2; \  |  471  | 18.6k  |     ptype##_affine a1, a2; \  |  472  | 18.6k  |     bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \  |  473  | 18.6k  |     bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \  |  474  | 18.6k  | \  |  475  | 18.6k  |     sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\  |  476  | 18.6k  |     sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\  |  477  | 18.6k  | \  |  478  | 18.6k  |     mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\  |  479  | 18.6k  |     mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\  |  480  | 18.6k  | \  |  481  | 18.6k  |     mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\  |  482  | 18.6k  |     mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\  |  483  | 18.6k  | \  |  484  | 18.6k  |     mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\  |  485  | 18.6k  |     mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\  |  486  | 18.6k  | \  |  487  | 18.6k  |     return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \  |  488  | 18.6k  | }  |  
  | 
489  |  |  | 
490  |  | /*  | 
491  |  |  * https://eprint.iacr.org/2015/1060, algorithm 7 with a twist to handle  | 
492  |  |  * |p3| pointing at either |p1| or |p2|. This is resolved by adding |t5|  | 
493  |  |  * and replacing few first references to |X3| in the formula, up to step  | 
494  |  |  * 21, with it. 12M[+27A], doubling and infinity are handled by the  | 
495  |  |  * formula itself. Infinity is to be encoded as [0, !0, 0].  | 
496  |  |  */  | 
497  |  | #define POINT_PROJ_DADD_IMPL_A0(ptype, bits, field, suffixb) \  | 
498  |  | static void ptype##proj_dadd(ptype##proj *p3, const ptype##proj *p1, \  | 
499  |  |                                               const ptype##proj *p2) \  | 
500  |  | { \ | 
501  |  |     vec##bits t0, t1, t2, t3, t4, t5; \  | 
502  |  | \  | 
503  |  |     mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\  | 
504  |  |     mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\  | 
505  |  |     mul_##field(t2, p1->Z, p2->Z);      /* 3.     t2 = Z1*Z2 */\  | 
506  |  |     add_##field(t3, p1->X, p1->Y);      /* 4.     t3 = X1+Y1 */\  | 
507  |  |     add_##field(t4, p2->X, p2->Y);      /* 5.     t4 = X2+Y2 */\  | 
508  |  |     mul_##field(t3, t3, t4);            /* 6.     t3 = t3*t4 */\  | 
509  |  |     add_##field(t4, t0, t1);            /* 7.     t4 = t0+t1 */\  | 
510  |  |     sub_##field(t3, t3, t4);            /* 8.     t3 = t3-t4 */\  | 
511  |  |     add_##field(t4, p1->Y, p1->Z);      /* 9.     t4 = Y1+Z1 */\  | 
512  |  |     add_##field(t5, p2->Y, p2->Z);      /* 10.    t5 = Y2+Z2 */\  | 
513  |  |     mul_##field(t4, t4, t5);            /* 11.    t4 = t4*t5 */\  | 
514  |  |     add_##field(t5, t1, t2);            /* 12.    t5 = t1+t2 */\  | 
515  |  |     sub_##field(t4, t4, t5);            /* 13.    t4 = t4-t5 */\  | 
516  |  |     add_##field(t5, p1->X, p1->Z);      /* 14.    t5 = X1+Z1 */\  | 
517  |  |     add_##field(p3->Y, p2->X, p2->Z);   /* 15.    Y3 = X2+Z2 */\  | 
518  |  |     mul_##field(t5, t5, p3->Y);         /* 16.    t5 = t5*Y3 */\  | 
519  |  |     add_##field(p3->Y, t0, t2);         /* 17.    Y3 = t0+t2 */\  | 
520  |  |     sub_##field(p3->Y, t5, p3->Y);      /* 18.    Y3 = t5-Y3 */\  | 
521  |  |     mul_by_3_##field(t0, t0);           /* 19-20. t0 = 3*t0  */\  | 
522  |  |     mul_by_3_##field(t5, t2);           /* 21.    t5 = 3*t2  */\  | 
523  |  |     mul_by_b_##suffixb(t2, t5);         /* 21.    t2 = b*t5  */\  | 
524  |  |     add_##field(p3->Z, t1, t2);         /* 22.    Z3 = t1+t2 */\  | 
525  |  |     sub_##field(t1, t1, t2);            /* 23.    t1 = t1-t2 */\  | 
526  |  |     mul_by_3_##field(t5, p3->Y);        /* 24.    t5 = 3*Y3  */\  | 
527  |  |     mul_by_b_##suffixb(p3->Y, t5);      /* 24.    Y3 = b*t5  */\  | 
528  |  |     mul_##field(p3->X, t4, p3->Y);      /* 25.    X3 = t4*Y3 */\  | 
529  |  |     mul_##field(t2, t3, t1);            /* 26.    t2 = t3*t1 */\  | 
530  |  |     sub_##field(p3->X, t2, p3->X);      /* 27.    X3 = t2-X3 */\  | 
531  |  |     mul_##field(p3->Y, p3->Y, t0);      /* 28.    Y3 = Y3*t0 */\  | 
532  |  |     mul_##field(t1, t1, p3->Z);         /* 29.    t1 = t1*Z3 */\  | 
533  |  |     add_##field(p3->Y, t1, p3->Y);      /* 30.    Y3 = t1+Y3 */\  | 
534  |  |     mul_##field(t0, t0, t3);            /* 31.    t0 = t0*t3 */\  | 
535  |  |     mul_##field(p3->Z, p3->Z, t4);      /* 32.    Z3 = Z3*t4 */\  | 
536  |  |     add_##field(p3->Z, p3->Z, t0);      /* 33.    Z3 = Z3+t0 */\  | 
537  |  | }  | 
538  |  |  | 
539  |  | /*  | 
540  |  |  * https://eprint.iacr.org/2015/1060, algorithm 8 with a twist to handle  | 
541  |  |  * |p2| being infinity encoded as [0, 0]. 11M[+21A].  | 
542  |  |  */  | 
543  |  | #define POINT_PROJ_DADD_AFFINE_IMPL_A0(ptype, bits, field, suffixb) \  | 
544  |  | static void ptype##proj_dadd_affine(ptype##proj *out, const ptype##proj *p1, \  | 
545  |  |                                                       const ptype##_affine *p2) \  | 
546  |  | { \ | 
547  |  |     ptype##proj p3[1]; \  | 
548  |  |     vec##bits t0, t1, t2, t3, t4; \  | 
549  |  |     limb_t p2inf = vec_is_zero(p2, sizeof(*p2)); \  | 
550  |  | \  | 
551  |  |     mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\  | 
552  |  |     mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\  | 
553  |  |     add_##field(t3, p1->X, p1->Y);      /* 3.     t3 = X1+Y1 */\  | 
554  |  |     add_##field(t4, p2->X, p2->Y);      /* 4.     t4 = X2+Y2 */\  | 
555  |  |     mul_##field(t3, t3, t4);            /* 5.     t3 = t3*t4 */\  | 
556  |  |     add_##field(t4, t0, t1);            /* 6.     t4 = t0+t1 */\  | 
557  |  |     sub_##field(t3, t3, t4);            /* 7.     t3 = t3-t4 */\  | 
558  |  |     mul_##field(t4, p2->Y, p1->Z);      /* 8.     t4 = Y2*Z1 */\  | 
559  |  |     add_##field(t4, t4, p1->Y);         /* 9.     t4 = t4+Y1 */\  | 
560  |  |     mul_##field(p3->Y, p2->X, p1->Z);   /* 10.    Y3 = X2*Z1 */\  | 
561  |  |     add_##field(p3->Y, p3->Y, p1->X);   /* 11.    Y3 = Y3+X1 */\  | 
562  |  |     mul_by_3_##field(t0, t0);           /* 12-13. t0 = 3*t0  */\  | 
563  |  |     mul_by_b_##suffixb(t2, p1->Z);      /* 14.    t2 = b*Z1  */\  | 
564  |  |     mul_by_3_##field(t2, t2);           /* 14.    t2 = 3*t2  */\  | 
565  |  |     add_##field(p3->Z, t1, t2);         /* 15.    Z3 = t1+t2 */\  | 
566  |  |     sub_##field(t1, t1, t2);            /* 16.    t1 = t1-t2 */\  | 
567  |  |     mul_by_b_##suffixb(t2, p3->Y);      /* 17.    t2 = b*Y3  */\  | 
568  |  |     mul_by_3_##field(p3->Y, t2);        /* 17.    Y3 = 3*t2  */\  | 
569  |  |     mul_##field(p3->X, t4, p3->Y);      /* 18.    X3 = t4*Y3 */\  | 
570  |  |     mul_##field(t2, t3, t1);            /* 19.    t2 = t3*t1 */\  | 
571  |  |     sub_##field(p3->X, t2, p3->X);      /* 20.    X3 = t2-X3 */\  | 
572  |  |     mul_##field(p3->Y, p3->Y, t0);      /* 21.    Y3 = Y3*t0 */\  | 
573  |  |     mul_##field(t1, t1, p3->Z);         /* 22.    t1 = t1*Z3 */\  | 
574  |  |     add_##field(p3->Y, t1, p3->Y);      /* 23.    Y3 = t1+Y3 */\  | 
575  |  |     mul_##field(t0, t0, t3);            /* 24.    t0 = t0*t3 */\  | 
576  |  |     mul_##field(p3->Z, p3->Z, t4);      /* 25.    Z3 = Z3*t4 */\  | 
577  |  |     add_##field(p3->Z, p3->Z, t0);      /* 26.    Z3 = Z3+t0 */\  | 
578  |  | \  | 
579  |  |     vec_select(out, p1, p3, sizeof(*out), p2inf); \  | 
580  |  | }  | 
581  |  |  | 
582  |  | /*  | 
583  |  |  * https://eprint.iacr.org/2015/1060, algorithm 9 with a twist to handle  | 
584  |  |  * |p3| pointing at |p1|. This is resolved by adding |t3| to hold X*Y  | 
585  |  |  * and reordering operations to bring references to |p1| forward.  | 
586  |  |  * 6M+2S[+13A].  | 
587  |  |  */  | 
588  |  | #define POINT_PROJ_DOUBLE_IMPL_A0(ptype, bits, field, suffixb) \  | 
589  |  | static void ptype##proj_double(ptype##proj *p3, const ptype##proj *p1) \  | 
590  |  | { \ | 
591  |  |     vec##bits t0, t1, t2, t3; \  | 
592  |  | \  | 
593  |  |     sqr_##field(t0, p1->Y);             /* 1.     t0 = Y*Y   */\  | 
594  |  |     mul_##field(t1, p1->Y, p1->Z);      /* 5.     t1 = Y*Z   */\  | 
595  |  |     sqr_##field(t2, p1->Z);             /* 6.     t2 = Z*Z   */\  | 
596  |  |     mul_##field(t3, p1->X, p1->Y);      /* 16.    t3 = X*Y   */\  | 
597  |  |     lshift_##field(p3->Z, t0, 3);       /* 2-4.   Z3 = 8*t0  */\  | 
598  |  |     mul_by_b_##suffixb(p3->X, t2);      /* 7.     t2 = b*t2  */\  | 
599  |  |     mul_by_3_##field(t2, p3->X);        /* 7.     t2 = 3*t2  */\  | 
600  |  |     mul_##field(p3->X, t2, p3->Z);      /* 8.     X3 = t2*Z3 */\  | 
601  |  |     add_##field(p3->Y, t0, t2);         /* 9.     Y3 = t0+t2 */\  | 
602  |  |     mul_##field(p3->Z, t1, p3->Z);      /* 10.    Z3 = t1*Z3 */\  | 
603  |  |     mul_by_3_##field(t2, t2);           /* 11-12. t2 = 3*t2  */\  | 
604  |  |     sub_##field(t0, t0, t2);            /* 13.    t0 = t0-t2 */\  | 
605  |  |     mul_##field(p3->Y, t0, p3->Y);      /* 14.    Y3 = t0*Y3 */\  | 
606  |  |     add_##field(p3->Y, p3->X, p3->Y);   /* 15.    Y3 = X3+Y3 */\  | 
607  |  |     mul_##field(p3->X, t0, t3);         /* 17.    X3 = t0*t3 */\  | 
608  |  |     add_##field(p3->X, p3->X, p3->X);   /* 18.    X3 = X3+X3 */\  | 
609  |  | }  | 
610  |  |  | 
611  |  | #define POINT_PROJ_TO_JACOBIAN_IMPL(ptype, bits, field) \  | 
612  |  | static void ptype##proj_to_Jacobian(ptype *out, const ptype##proj *in) \  | 
613  |  | { \ | 
614  |  |     vec##bits ZZ; \  | 
615  |  | \  | 
616  |  |     sqr_##field(ZZ, in->Z); \  | 
617  |  |     mul_##field(out->X, in->X, in->Z); \  | 
618  |  |     mul_##field(out->Y, in->Y, ZZ); \  | 
619  |  |     vec_copy(out->Z, in->Z, sizeof(out->Z)); \  | 
620  |  | }  | 
621  |  |  | 
622  |  | #define POINT_TO_PROJECTIVE_IMPL(ptype, bits, field, one) \  | 
623  |  | static void ptype##_to_projective(ptype##proj *out, const ptype *in) \  | 
624  |  | { \ | 
625  |  |     vec##bits ZZ; \  | 
626  |  |     limb_t is_inf = vec_is_zero(in->Z, sizeof(in->Z)); \  | 
627  |  | \  | 
628  |  |     sqr_##field(ZZ, in->Z); \  | 
629  |  |     mul_##field(out->X, in->X, in->Z); \  | 
630  |  |     vec_select(out->Y, one, in->Y, sizeof(out->Y), is_inf); \  | 
631  |  |     mul_##field(out->Z, ZZ, in->Z); \  | 
632  |  | }  | 
633  |  |  | 
634  |  | /******************* !!!!! NOT CONSTANT TIME !!!!! *******************/  | 
635  |  |  | 
636  |  | /*  | 
637  |  |  * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s  | 
638  |  |  * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1  | 
639  |  |  * with twist to handle either input at infinity. Addition costs 12M+2S,  | 
640  |  |  * while conditional doubling - 4M+6M+3S.  | 
641  |  |  */  | 
642  |  | #define POINTXYZZ_DADD_IMPL(ptype, bits, field) \  | 
643  |  | static void ptype##xyzz_dadd(ptype##xyzz *p3, const ptype##xyzz *p1, \  | 
644  | 0  |                                               const ptype##xyzz *p2) \  | 
645  | 0  | { \ | 
646  | 0  |     vec##bits U, S, P, R; \  | 
647  | 0  | \  | 
648  | 0  |     if (vec_is_zero(p2->ZZZ, 2*sizeof(p2->ZZZ))) { \ | 
649  | 0  |         vec_copy(p3, p1, sizeof(*p3));  \  | 
650  | 0  |         return; \  | 
651  | 0  |     } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \ | 
652  | 0  |         vec_copy(p3, p2, sizeof(*p3));  \  | 
653  | 0  |         return; \  | 
654  | 0  |     } \  | 
655  | 0  | \  | 
656  | 0  |     mul_##field(U, p1->X, p2->ZZ);              /* U1 = X1*ZZ2 */\  | 
657  | 0  |     mul_##field(S, p1->Y, p2->ZZZ);             /* S1 = Y1*ZZZ2 */\  | 
658  | 0  |     mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\  | 
659  | 0  |     mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\  | 
660  | 0  |     sub_##field(P, P, U);                       /* P = U2-U1 */\  | 
661  | 0  |     sub_##field(R, R, S);                       /* R = S2-S1 */\  | 
662  | 0  | \  | 
663  | 0  |     if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\ | 
664  | 0  |         vec##bits PP, PPP, Q;                   /* add |p1| and |p2| */\  | 
665  | 0  | \  | 
666  | 0  |         sqr_##field(PP, P);                     /* PP = P^2 */\  | 
667  | 0  |         mul_##field(PPP, PP, P);                /* PPP = P*PP */\  | 
668  | 0  |         mul_##field(Q, U, PP);                  /* Q = U1*PP */\  | 
669  | 0  |         sqr_##field(p3->X, R);                  /* R^2 */\  | 
670  | 0  |         add_##field(P, Q, Q); \  | 
671  | 0  |         sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\  | 
672  | 0  |         sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\  | 
673  | 0  |         sub_##field(Q, Q, p3->X); \  | 
674  | 0  |         mul_##field(Q, Q, R);                   /* R*(Q-X3) */\  | 
675  | 0  |         mul_##field(p3->Y, S, PPP);             /* S1*PPP */\  | 
676  | 0  |         sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-S1*PPP */\  | 
677  | 0  |         mul_##field(p3->ZZ, p1->ZZ, p2->ZZ);    /* ZZ1*ZZ2 */\  | 
678  | 0  |         mul_##field(p3->ZZZ, p1->ZZZ, p2->ZZZ); /* ZZZ1*ZZZ2 */\  | 
679  | 0  |         mul_##field(p3->ZZ, p3->ZZ, PP);        /* ZZ3 = ZZ1*ZZ2*PP */\  | 
680  | 0  |         mul_##field(p3->ZZZ, p3->ZZZ, PPP);     /* ZZZ3 = ZZZ1*ZZZ2*PPP */\  | 
681  | 0  |     } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\ | 
682  | 0  |         vec##bits V, W, M;                      /* double |p1| */\  | 
683  | 0  | \  | 
684  | 0  |         add_##field(U, p1->Y, p1->Y);           /* U = 2*Y1 */\  | 
685  | 0  |         sqr_##field(V, U);                      /* V = U^2 */\  | 
686  | 0  |         mul_##field(W, V, U);                   /* W = U*V */\  | 
687  | 0  |         mul_##field(S, p1->X, V);               /* S = X1*V */\  | 
688  | 0  |         sqr_##field(M, p1->X); \  | 
689  | 0  |         mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a*ZZ1^2] */\  | 
690  | 0  |         sqr_##field(p3->X, M); \  | 
691  | 0  |         add_##field(U, S, S);                   /* 2*S */\  | 
692  | 0  |         sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\  | 
693  | 0  |         mul_##field(p3->Y, W, p1->Y);           /* W*Y1 */\  | 
694  | 0  |         sub_##field(S, S, p3->X); \  | 
695  | 0  |         mul_##field(S, S, M);                   /* M*(S-X3) */\  | 
696  | 0  |         sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\  | 
697  | 0  |         mul_##field(p3->ZZ, p1->ZZ, V);         /* ZZ3 = V*ZZ1 */\  | 
698  | 0  |         mul_##field(p3->ZZZ, p1->ZZZ, W);       /* ZZ3 = W*ZZZ1 */\  | 
699  | 0  |     } else {                                    /* X1==X2 && Y1==-Y2 */\ | 
700  | 0  |         vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\  | 
701  | 0  |     } \  | 
702  | 0  | } Unexecuted instantiation: server.c:POINTonE1xyzz_dadd Unexecuted instantiation: server.c:POINTonE2xyzz_dadd  | 
703  |  |  | 
704  |  | /*  | 
705  |  |  * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s  | 
706  |  |  * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-mdbl-2008-s-1  | 
707  |  |  * with twists to handle even subtractions and either input at infinity.  | 
708  |  |  * Addition costs 8M+2S, while conditional doubling - 2M+4M+3S.  | 
709  |  |  */  | 
710  |  | #define POINTXYZZ_DADD_AFFINE_IMPL(ptype, bits, field, one) \  | 
711  |  | static void ptype##xyzz_dadd_affine(ptype##xyzz *p3, const ptype##xyzz *p1, \  | 
712  |  |                                                      const ptype##_affine *p2, \  | 
713  | 0  |                                                      bool_t subtract) \  | 
714  | 0  | { \ | 
715  | 0  |     vec##bits P, R; \  | 
716  | 0  | \  | 
717  | 0  |     if (vec_is_zero(p2, sizeof(*p2))) { \ | 
718  | 0  |         vec_copy(p3, p1, sizeof(*p3));  \  | 
719  | 0  |         return; \  | 
720  | 0  |     } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \ | 
721  | 0  |         vec_copy(p3->X, p2->X, 2*sizeof(p3->X));\  | 
722  | 0  |         cneg_##field(p3->ZZZ, one, subtract);   \  | 
723  | 0  |         vec_copy(p3->ZZ, one, sizeof(p3->ZZ));  \  | 
724  | 0  |         return; \  | 
725  | 0  |     } \  | 
726  | 0  | \  | 
727  | 0  |     mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\  | 
728  | 0  |     mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\  | 
729  | 0  |     cneg_##field(R, R, subtract); \  | 
730  | 0  |     sub_##field(P, P, p1->X);                   /* P = U2-X1 */\  | 
731  | 0  |     sub_##field(R, R, p1->Y);                   /* R = S2-Y1 */\  | 
732  | 0  | \  | 
733  | 0  |     if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\ | 
734  | 0  |         vec##bits PP, PPP, Q;                   /* add |p2| to |p1| */\  | 
735  | 0  | \  | 
736  | 0  |         sqr_##field(PP, P);                     /* PP = P^2 */\  | 
737  | 0  |         mul_##field(PPP, PP, P);                /* PPP = P*PP */\  | 
738  | 0  |         mul_##field(Q, p1->X, PP);              /* Q = X1*PP */\  | 
739  | 0  |         sqr_##field(p3->X, R);                  /* R^2 */\  | 
740  | 0  |         add_##field(P, Q, Q); \  | 
741  | 0  |         sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\  | 
742  | 0  |         sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\  | 
743  | 0  |         sub_##field(Q, Q, p3->X); \  | 
744  | 0  |         mul_##field(Q, Q, R);                   /* R*(Q-X3) */\  | 
745  | 0  |         mul_##field(p3->Y, p1->Y, PPP);         /* Y1*PPP */\  | 
746  | 0  |         sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-Y1*PPP */\  | 
747  | 0  |         mul_##field(p3->ZZ, p1->ZZ, PP);        /* ZZ3 = ZZ1*PP */\  | 
748  | 0  |         mul_##field(p3->ZZZ, p1->ZZZ, PPP);     /* ZZZ3 = ZZZ1*PPP */\  | 
749  | 0  |     } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\ | 
750  | 0  |         vec##bits U, S, M;                      /* double |p2| */\  | 
751  | 0  | \  | 
752  | 0  |         add_##field(U, p2->Y, p2->Y);           /* U = 2*Y1 */\  | 
753  | 0  |         sqr_##field(p3->ZZ, U);                 /* [ZZ3 =] V = U^2 */\  | 
754  | 0  |         mul_##field(p3->ZZZ, p3->ZZ, U);        /* [ZZZ3 =] W = U*V */\  | 
755  | 0  |         mul_##field(S, p2->X, p3->ZZ);          /* S = X1*V */\  | 
756  | 0  |         sqr_##field(M, p2->X); \  | 
757  | 0  |         mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a] */\  | 
758  | 0  |         sqr_##field(p3->X, M); \  | 
759  | 0  |         add_##field(U, S, S);                   /* 2*S */\  | 
760  | 0  |         sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\  | 
761  | 0  |         mul_##field(p3->Y, p3->ZZZ, p2->Y);     /* W*Y1 */\  | 
762  | 0  |         sub_##field(S, S, p3->X); \  | 
763  | 0  |         mul_##field(S, S, M);                   /* M*(S-X3) */\  | 
764  | 0  |         sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\  | 
765  | 0  |         cneg_##field(p3->ZZZ, p3->ZZZ, subtract); \  | 
766  | 0  |     } else {                                    /* X1==X2 && Y1==-Y2 */\ | 
767  | 0  |         vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\  | 
768  | 0  |     } \  | 
769  | 0  | } Unexecuted instantiation: server.c:POINTonE1xyzz_dadd_affine Unexecuted instantiation: server.c:POINTonE2xyzz_dadd_affine  | 
770  |  |  | 
771  |  | #define POINTXYZZ_TO_JACOBIAN_IMPL(ptype, bits, field) \  | 
772  | 0  | static void ptype##xyzz_to_Jacobian(ptype *out, const ptype##xyzz *in) \  | 
773  | 0  | { \ | 
774  | 0  |     mul_##field(out->X, in->X, in->ZZ); \  | 
775  | 0  |     mul_##field(out->Y, in->Y, in->ZZZ); \  | 
776  | 0  |     vec_copy(out->Z, in->ZZ, sizeof(out->Z)); \  | 
777  | 0  | } Unexecuted instantiation: server.c:POINTonE1xyzz_to_Jacobian Unexecuted instantiation: server.c:POINTonE2xyzz_to_Jacobian  | 
778  |  |  | 
779  |  | #define POINT_TO_XYZZ_IMPL(ptype, bits, field) \  | 
780  |  | static void ptype##_to_xyzz(ptype##xyzz *out, const ptype *in) \  | 
781  |  | { \ | 
782  |  |     vec_copy(out->X, in->X, 2*sizeof(out->X)); \  | 
783  |  |     sqr_##field(out->ZZ, in->Z); \  | 
784  |  |     mul_##field(out->ZZZ, out->ZZ, in->Z); \  | 
785  |  | }  | 
786  |  |  | 
787  |  | #endif  |