Coverage Report

Created: 2023-12-08 07:00

/src/blst_normal/src/ec_ops.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright Supranational LLC
3
 * Licensed under the Apache License, Version 2.0, see LICENSE for details.
4
 * SPDX-License-Identifier: Apache-2.0
5
 */
6
#ifndef __BLS12_384_ASM_EC_OPS_H__
7
#define __BLS12_384_ASM_EC_OPS_H__
8
/*
9
 * Addition that can handle doubling [as well as points at infinity,
10
 * which are encoded as Z==0] in constant time. It naturally comes at
11
 * cost, but this subroutine should be called only when independent
12
 * points are processed, which is considered reasonable compromise.
13
 * For example, ptype##s_mult_w5 calls it, but since *major* gain is
14
 * result of pure doublings being effectively divided by amount of
15
 * points, slightly slower addition can be tolerated. But what is the
16
 * additional cost more specifically? Best addition result is 11M+5S,
17
 * while this routine takes 13M+5S (+1M+1S if a4!=0), as per
18
 *
19
 * -------------+-------------
20
 * addition     | doubling
21
 * -------------+-------------
22
 * U1 = X1*Z2^2 | U1 = X1
23
 * U2 = X2*Z1^2 |
24
 * S1 = Y1*Z2^3 | S1 = Y1
25
 * S2 = Y2*Z1^3 |
26
 * zz = Z1*Z2   | zz = Z1
27
 * H = U2-U1    | H' = 2*Y1
28
 * R = S2-S1    | R' = 3*X1^2[+a*Z1^4]
29
 * sx = U1+U2   | sx = X1+X1
30
 * -------------+-------------
31
 * H!=0 || R!=0 | H==0 && R==0
32
 *
33
 *      X3 = R^2-H^2*sx
34
 *      Y3 = R*(H^2*U1-X3)-H^3*S1
35
 *      Z3 = H*zz
36
 *
37
 * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is
38
 * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.
39
 */
40
#define POINT_DADD_IMPL(ptype, bits, field) \
41
static void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2, \
42
624k
                         const vec##bits a4) \
43
624k
{ \
44
624k
    ptype p3; /* starts as (U1, S1, zz) from addition side */\
45
624k
    struct { vec##bits H, R, sx; } add, dbl; \
46
624k
    bool_t p1inf, p2inf, is_dbl; \
47
624k
\
48
624k
    add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\
49
624k
    sqr_##field(dbl.R, p1->X);          /* X1^2 */\
50
624k
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\
51
624k
    add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\
52
624k
\
53
624k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
54
624k
    sqr_##field(p3.X, p2->Z);           /* Z2^2 */\
55
624k
    mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\
56
624k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
57
624k
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
58
624k
\
59
624k
    if (a4 != NULL) { \
60
2.74k
        sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\
61
2.74k
        mul_##field(p3.Y, p3.Y, a4);    \
62
2.74k
        add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\
63
2.74k
    } \
64
624k
\
65
624k
    mul_##field(p3.Y, p1->Y, p2->Z);    \
66
624k
    mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\
67
624k
    mul_##field(add.R, p2->Y, p1->Z);   \
68
624k
    mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\
69
624k
    sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\
70
624k
\
71
624k
    mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\
72
624k
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
73
624k
\
74
624k
    add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\
75
624k
    sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\
76
624k
\
77
624k
    /* make the choice between addition and doubling */\
78
624k
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \
79
624k
    vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \
80
624k
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \
81
624k
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
82
624k
\
83
624k
    mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\
84
624k
\
85
624k
    sqr_##field(dbl.H, add.H);          /* H^2 */\
86
624k
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
87
624k
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
88
624k
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
89
624k
\
90
624k
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
91
624k
    sqr_##field(p3.X, add.R);           /* R^2 */\
92
624k
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
93
624k
\
94
624k
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
95
624k
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
96
624k
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
97
624k
\
98
624k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
99
624k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
100
624k
}
server.c:POINTonE1_dadd
Line
Count
Source
42
418k
                         const vec##bits a4) \
43
418k
{ \
44
418k
    ptype p3; /* starts as (U1, S1, zz) from addition side */\
45
418k
    struct { vec##bits H, R, sx; } add, dbl; \
46
418k
    bool_t p1inf, p2inf, is_dbl; \
47
418k
\
48
418k
    add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\
49
418k
    sqr_##field(dbl.R, p1->X);          /* X1^2 */\
50
418k
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\
51
418k
    add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\
52
418k
\
53
418k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
54
418k
    sqr_##field(p3.X, p2->Z);           /* Z2^2 */\
55
418k
    mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\
56
418k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
57
418k
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
58
418k
\
59
418k
    if (a4 != NULL) { \
60
436
        sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\
61
436
        mul_##field(p3.Y, p3.Y, a4);    \
62
436
        add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\
63
436
    } \
64
418k
\
65
418k
    mul_##field(p3.Y, p1->Y, p2->Z);    \
66
418k
    mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\
67
418k
    mul_##field(add.R, p2->Y, p1->Z);   \
68
418k
    mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\
69
418k
    sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\
70
418k
\
71
418k
    mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\
72
418k
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
73
418k
\
74
418k
    add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\
75
418k
    sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\
76
418k
\
77
418k
    /* make the choice between addition and doubling */\
78
418k
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \
79
418k
    vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \
80
418k
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \
81
418k
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
82
418k
\
83
418k
    mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\
84
418k
\
85
418k
    sqr_##field(dbl.H, add.H);          /* H^2 */\
86
418k
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
87
418k
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
88
418k
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
89
418k
\
90
418k
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
91
418k
    sqr_##field(p3.X, add.R);           /* R^2 */\
92
418k
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
93
418k
\
94
418k
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
95
418k
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
96
418k
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
97
418k
\
98
418k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
99
418k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
100
418k
}
server.c:POINTonE2_dadd
Line
Count
Source
42
205k
                         const vec##bits a4) \
43
205k
{ \
44
205k
    ptype p3; /* starts as (U1, S1, zz) from addition side */\
45
205k
    struct { vec##bits H, R, sx; } add, dbl; \
46
205k
    bool_t p1inf, p2inf, is_dbl; \
47
205k
\
48
205k
    add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\
49
205k
    sqr_##field(dbl.R, p1->X);          /* X1^2 */\
50
205k
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\
51
205k
    add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\
52
205k
\
53
205k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
54
205k
    sqr_##field(p3.X, p2->Z);           /* Z2^2 */\
55
205k
    mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\
56
205k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
57
205k
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
58
205k
\
59
205k
    if (a4 != NULL) { \
60
2.31k
        sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\
61
2.31k
        mul_##field(p3.Y, p3.Y, a4);    \
62
2.31k
        add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\
63
2.31k
    } \
64
205k
\
65
205k
    mul_##field(p3.Y, p1->Y, p2->Z);    \
66
205k
    mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\
67
205k
    mul_##field(add.R, p2->Y, p1->Z);   \
68
205k
    mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\
69
205k
    sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\
70
205k
\
71
205k
    mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\
72
205k
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
73
205k
\
74
205k
    add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\
75
205k
    sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\
76
205k
\
77
205k
    /* make the choice between addition and doubling */\
78
205k
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \
79
205k
    vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \
80
205k
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \
81
205k
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
82
205k
\
83
205k
    mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\
84
205k
\
85
205k
    sqr_##field(dbl.H, add.H);          /* H^2 */\
86
205k
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
87
205k
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
88
205k
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
89
205k
\
90
205k
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
91
205k
    sqr_##field(p3.X, add.R);           /* R^2 */\
92
205k
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
93
205k
\
94
205k
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
95
205k
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
96
205k
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
97
205k
\
98
205k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
99
205k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
100
205k
}
101
102
/*
103
 * Addition with affine point that can handle doubling [as well as
104
 * points at infinity, with |p1| being encoded as Z==0 and |p2| as
105
 * X,Y==0] in constant time. But at what additional cost? Best
106
 * addition result is 7M+4S, while this routine takes 8M+5S, as per
107
 *
108
 * -------------+-------------
109
 * addition     | doubling
110
 * -------------+-------------
111
 * U1 = X1      | U1 = X2
112
 * U2 = X2*Z1^2 |
113
 * S1 = Y1      | S1 = Y2
114
 * S2 = Y2*Z1^3 |
115
 * H = U2-X1    | H' = 2*Y2
116
 * R = S2-Y1    | R' = 3*X2^2[+a]
117
 * sx = X1+U2   | sx = X2+X2
118
 * zz = H*Z1    | zz = H'
119
 * -------------+-------------
120
 * H!=0 || R!=0 | H==0 && R==0
121
 *
122
 *      X3 = R^2-H^2*sx
123
 *      Y3 = R*(H^2*U1-X3)-H^3*S1
124
 *      Z3 = zz
125
 *
126
 * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is
127
 * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.
128
 */
129
#define POINT_DADD_AFFINE_IMPL_A0(ptype, bits, field, one) \
130
static void ptype##_dadd_affine(ptype *out, const ptype *p1, \
131
3.41k
                                            const ptype##_affine *p2) \
132
3.41k
{ \
133
3.41k
    ptype p3; /* starts as (,, H*Z1) from addition side */\
134
3.41k
    struct { vec##bits H, R, sx; } add, dbl; \
135
3.41k
    bool_t p1inf, p2inf, is_dbl; \
136
3.41k
\
137
3.41k
    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \
138
3.41k
    add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\
139
3.41k
    sqr_##field(dbl.R, p2->X);          /* X2^2 */\
140
3.41k
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\
141
3.41k
    add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\
142
3.41k
\
143
3.41k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
144
3.41k
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
145
3.41k
    mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\
146
3.41k
    mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\
147
3.41k
    sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\
148
3.41k
\
149
3.41k
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
150
3.41k
\
151
3.41k
    add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\
152
3.41k
    sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\
153
3.41k
\
154
3.41k
    mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\
155
3.41k
\
156
3.41k
    /* make the choice between addition and doubling */ \
157
3.41k
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \
158
3.41k
    vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \
159
3.41k
    vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\
160
3.41k
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \
161
3.41k
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
162
3.41k
\
163
3.41k
    sqr_##field(dbl.H, add.H);          /* H^2 */\
164
3.41k
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
165
3.41k
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
166
3.41k
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
167
3.41k
\
168
3.41k
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
169
3.41k
    sqr_##field(p3.X, add.R);           /* R^2 */\
170
3.41k
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
171
3.41k
\
172
3.41k
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
173
3.41k
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
174
3.41k
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
175
3.41k
\
176
3.41k
    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \
177
3.41k
    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \
178
3.41k
    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \
179
3.41k
}
server.c:POINTonE1_dadd_affine
Line
Count
Source
131
808
                                            const ptype##_affine *p2) \
132
808
{ \
133
808
    ptype p3; /* starts as (,, H*Z1) from addition side */\
134
808
    struct { vec##bits H, R, sx; } add, dbl; \
135
808
    bool_t p1inf, p2inf, is_dbl; \
136
808
\
137
808
    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \
138
808
    add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\
139
808
    sqr_##field(dbl.R, p2->X);          /* X2^2 */\
140
808
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\
141
808
    add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\
142
808
\
143
808
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
144
808
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
145
808
    mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\
146
808
    mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\
147
808
    sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\
148
808
\
149
808
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
150
808
\
151
808
    add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\
152
808
    sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\
153
808
\
154
808
    mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\
155
808
\
156
808
    /* make the choice between addition and doubling */ \
157
808
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \
158
808
    vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \
159
808
    vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\
160
808
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \
161
808
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
162
808
\
163
808
    sqr_##field(dbl.H, add.H);          /* H^2 */\
164
808
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
165
808
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
166
808
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
167
808
\
168
808
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
169
808
    sqr_##field(p3.X, add.R);           /* R^2 */\
170
808
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
171
808
\
172
808
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
173
808
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
174
808
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
175
808
\
176
808
    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \
177
808
    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \
178
808
    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \
179
808
}
server.c:POINTonE2_dadd_affine
Line
Count
Source
131
2.60k
                                            const ptype##_affine *p2) \
132
2.60k
{ \
133
2.60k
    ptype p3; /* starts as (,, H*Z1) from addition side */\
134
2.60k
    struct { vec##bits H, R, sx; } add, dbl; \
135
2.60k
    bool_t p1inf, p2inf, is_dbl; \
136
2.60k
\
137
2.60k
    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \
138
2.60k
    add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\
139
2.60k
    sqr_##field(dbl.R, p2->X);          /* X2^2 */\
140
2.60k
    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\
141
2.60k
    add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\
142
2.60k
\
143
2.60k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
144
2.60k
    sqr_##field(add.H, p1->Z);          /* Z1^2 */\
145
2.60k
    mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\
146
2.60k
    mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\
147
2.60k
    sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\
148
2.60k
\
149
2.60k
    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\
150
2.60k
\
151
2.60k
    add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\
152
2.60k
    sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\
153
2.60k
\
154
2.60k
    mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\
155
2.60k
\
156
2.60k
    /* make the choice between addition and doubling */ \
157
2.60k
    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \
158
2.60k
    vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \
159
2.60k
    vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\
160
2.60k
    vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \
161
2.60k
    /* |p3| and |add| hold all inputs now, |p3| will hold output */\
162
2.60k
\
163
2.60k
    sqr_##field(dbl.H, add.H);          /* H^2 */\
164
2.60k
    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\
165
2.60k
    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\
166
2.60k
    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\
167
2.60k
\
168
2.60k
    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\
169
2.60k
    sqr_##field(p3.X, add.R);           /* R^2 */\
170
2.60k
    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\
171
2.60k
\
172
2.60k
    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\
173
2.60k
    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\
174
2.60k
    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\
175
2.60k
\
176
2.60k
    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \
177
2.60k
    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \
178
2.60k
    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \
179
2.60k
}
180
181
/*
182
 * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
183
 * with twist to handle either input at infinity, which are encoded as Z==0.
184
 */
185
#define POINT_ADD_IMPL(ptype, bits, field) \
186
166k
static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \
187
166k
{ \
188
166k
    ptype p3; \
189
166k
    vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \
190
166k
    bool_t p1inf, p2inf; \
191
166k
\
192
166k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
193
166k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
194
166k
\
195
166k
    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\
196
166k
    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\
197
166k
\
198
166k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
199
166k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
200
166k
\
201
166k
    mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\
202
166k
    mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\
203
166k
\
204
166k
    sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\
205
166k
    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\
206
166k
\
207
166k
    mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\
208
166k
    mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\
209
166k
\
210
166k
    sub_##field(H, H, U1);              /* H = U2-U1 */\
211
166k
\
212
166k
    add_##field(I, H, H);               /* 2*H */\
213
166k
    sqr_##field(I, I);                  /* I = (2*H)^2 */\
214
166k
\
215
166k
    mul_##field(J, H, I);               /* J = H*I */\
216
166k
    mul_##field(S1, S1, J);             /* S1*J */\
217
166k
\
218
166k
    mul_##field(p3.Y, U1, I);           /* V = U1*I */\
219
166k
\
220
166k
    sqr_##field(p3.X, p3.Z);            /* r^2 */\
221
166k
    sub_##field(p3.X, p3.X, J);         /* r^2-J */\
222
166k
    sub_##field(p3.X, p3.X, p3.Y);      \
223
166k
    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\
224
166k
\
225
166k
    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\
226
166k
    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\
227
166k
    sub_##field(p3.Y, p3.Y, S1);        \
228
166k
    sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\
229
166k
\
230
166k
    add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\
231
166k
    sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\
232
166k
    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\
233
166k
    sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\
234
166k
    mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\
235
166k
\
236
166k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
237
166k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
238
166k
}
server.c:POINTonE1_add
Line
Count
Source
186
121k
static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \
187
121k
{ \
188
121k
    ptype p3; \
189
121k
    vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \
190
121k
    bool_t p1inf, p2inf; \
191
121k
\
192
121k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
193
121k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
194
121k
\
195
121k
    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\
196
121k
    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\
197
121k
\
198
121k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
199
121k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
200
121k
\
201
121k
    mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\
202
121k
    mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\
203
121k
\
204
121k
    sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\
205
121k
    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\
206
121k
\
207
121k
    mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\
208
121k
    mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\
209
121k
\
210
121k
    sub_##field(H, H, U1);              /* H = U2-U1 */\
211
121k
\
212
121k
    add_##field(I, H, H);               /* 2*H */\
213
121k
    sqr_##field(I, I);                  /* I = (2*H)^2 */\
214
121k
\
215
121k
    mul_##field(J, H, I);               /* J = H*I */\
216
121k
    mul_##field(S1, S1, J);             /* S1*J */\
217
121k
\
218
121k
    mul_##field(p3.Y, U1, I);           /* V = U1*I */\
219
121k
\
220
121k
    sqr_##field(p3.X, p3.Z);            /* r^2 */\
221
121k
    sub_##field(p3.X, p3.X, J);         /* r^2-J */\
222
121k
    sub_##field(p3.X, p3.X, p3.Y);      \
223
121k
    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\
224
121k
\
225
121k
    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\
226
121k
    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\
227
121k
    sub_##field(p3.Y, p3.Y, S1);        \
228
121k
    sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\
229
121k
\
230
121k
    add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\
231
121k
    sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\
232
121k
    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\
233
121k
    sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\
234
121k
    mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\
235
121k
\
236
121k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
237
121k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
238
121k
}
server.c:POINTonE2_add
Line
Count
Source
186
44.6k
static void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \
187
44.6k
{ \
188
44.6k
    ptype p3; \
189
44.6k
    vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \
190
44.6k
    bool_t p1inf, p2inf; \
191
44.6k
\
192
44.6k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
193
44.6k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
194
44.6k
\
195
44.6k
    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\
196
44.6k
    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\
197
44.6k
\
198
44.6k
    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \
199
44.6k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
200
44.6k
\
201
44.6k
    mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\
202
44.6k
    mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\
203
44.6k
\
204
44.6k
    sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\
205
44.6k
    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\
206
44.6k
\
207
44.6k
    mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\
208
44.6k
    mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\
209
44.6k
\
210
44.6k
    sub_##field(H, H, U1);              /* H = U2-U1 */\
211
44.6k
\
212
44.6k
    add_##field(I, H, H);               /* 2*H */\
213
44.6k
    sqr_##field(I, I);                  /* I = (2*H)^2 */\
214
44.6k
\
215
44.6k
    mul_##field(J, H, I);               /* J = H*I */\
216
44.6k
    mul_##field(S1, S1, J);             /* S1*J */\
217
44.6k
\
218
44.6k
    mul_##field(p3.Y, U1, I);           /* V = U1*I */\
219
44.6k
\
220
44.6k
    sqr_##field(p3.X, p3.Z);            /* r^2 */\
221
44.6k
    sub_##field(p3.X, p3.X, J);         /* r^2-J */\
222
44.6k
    sub_##field(p3.X, p3.X, p3.Y);      \
223
44.6k
    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\
224
44.6k
\
225
44.6k
    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\
226
44.6k
    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\
227
44.6k
    sub_##field(p3.Y, p3.Y, S1);        \
228
44.6k
    sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\
229
44.6k
\
230
44.6k
    add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\
231
44.6k
    sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\
232
44.6k
    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\
233
44.6k
    sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\
234
44.6k
    mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\
235
44.6k
\
236
44.6k
    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \
237
44.6k
    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \
238
44.6k
}
239
240
/*
241
 * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
242
 * with twist to handle either input at infinity, with |p1| encoded as Z==0,
243
 * and |p2| as X==Y==0.
244
 */
245
#define POINT_ADD_AFFINE_IMPL(ptype, bits, field, one) \
246
static void ptype##_add_affine(ptype *out, const ptype *p1, \
247
1.62k
                                           const ptype##_affine *p2) \
248
1.62k
{ \
249
1.62k
    ptype p3; \
250
1.62k
    vec##bits Z1Z1, H, HH, I, J; \
251
1.62k
    bool_t p1inf, p2inf; \
252
1.62k
\
253
1.62k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
254
1.62k
\
255
1.62k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
256
1.62k
\
257
1.62k
    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\
258
1.62k
    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\
259
1.62k
\
260
1.62k
    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \
261
1.62k
\
262
1.62k
    mul_##field(H, p2->X, Z1Z1);        /* U2 = X2*Z1Z1 */\
263
1.62k
    sub_##field(H, H, p1->X);           /* H = U2-X1 */\
264
1.62k
\
265
1.62k
    sqr_##field(HH, H);                 /* HH = H^2 */\
266
1.62k
    add_##field(I, HH, HH);             \
267
1.62k
    add_##field(I, I, I);               /* I = 4*HH */\
268
1.62k
\
269
1.62k
    mul_##field(p3.Y, p1->X, I);        /* V = X1*I */\
270
1.62k
    mul_##field(J, H, I);               /* J = H*I */\
271
1.62k
    mul_##field(I, J, p1->Y);           /* Y1*J */\
272
1.62k
\
273
1.62k
    sub_##field(p3.Z, p3.Z, p1->Y);     /* S2-Y1 */\
274
1.62k
    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-Y1) */\
275
1.62k
\
276
1.62k
    sqr_##field(p3.X, p3.Z);            /* r^2 */\
277
1.62k
    sub_##field(p3.X, p3.X, J);         /* r^2-J */\
278
1.62k
    sub_##field(p3.X, p3.X, p3.Y);      \
279
1.62k
    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\
280
1.62k
\
281
1.62k
    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\
282
1.62k
    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\
283
1.62k
    sub_##field(p3.Y, p3.Y, I);         \
284
1.62k
    sub_##field(p3.Y, p3.Y, I);         /* Y3 = r*(V-X3)-2*Y1*J */\
285
1.62k
\
286
1.62k
    add_##field(p3.Z, p1->Z, H);        /* Z1+H */\
287
1.62k
    sqr_##field(p3.Z, p3.Z);            /* (Z1+H)^2 */\
288
1.62k
    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+H)^2-Z1Z1 */\
289
1.62k
    sub_##field(p3.Z, p3.Z, HH);        /* Z3 = (Z1+H)^2-Z1Z1-HH */\
290
1.62k
\
291
1.62k
    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \
292
1.62k
    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \
293
1.62k
    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \
294
1.62k
}
server.c:POINTonE1_add_affine
Line
Count
Source
247
1.62k
                                           const ptype##_affine *p2) \
248
1.62k
{ \
249
1.62k
    ptype p3; \
250
1.62k
    vec##bits Z1Z1, H, HH, I, J; \
251
1.62k
    bool_t p1inf, p2inf; \
252
1.62k
\
253
1.62k
    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \
254
1.62k
\
255
1.62k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
256
1.62k
\
257
1.62k
    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\
258
1.62k
    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\
259
1.62k
\
260
1.62k
    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \
261
1.62k
\
262
1.62k
    mul_##field(H, p2->X, Z1Z1);        /* U2 = X2*Z1Z1 */\
263
1.62k
    sub_##field(H, H, p1->X);           /* H = U2-X1 */\
264
1.62k
\
265
1.62k
    sqr_##field(HH, H);                 /* HH = H^2 */\
266
1.62k
    add_##field(I, HH, HH);             \
267
1.62k
    add_##field(I, I, I);               /* I = 4*HH */\
268
1.62k
\
269
1.62k
    mul_##field(p3.Y, p1->X, I);        /* V = X1*I */\
270
1.62k
    mul_##field(J, H, I);               /* J = H*I */\
271
1.62k
    mul_##field(I, J, p1->Y);           /* Y1*J */\
272
1.62k
\
273
1.62k
    sub_##field(p3.Z, p3.Z, p1->Y);     /* S2-Y1 */\
274
1.62k
    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-Y1) */\
275
1.62k
\
276
1.62k
    sqr_##field(p3.X, p3.Z);            /* r^2 */\
277
1.62k
    sub_##field(p3.X, p3.X, J);         /* r^2-J */\
278
1.62k
    sub_##field(p3.X, p3.X, p3.Y);      \
279
1.62k
    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\
280
1.62k
\
281
1.62k
    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\
282
1.62k
    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\
283
1.62k
    sub_##field(p3.Y, p3.Y, I);         \
284
1.62k
    sub_##field(p3.Y, p3.Y, I);         /* Y3 = r*(V-X3)-2*Y1*J */\
285
1.62k
\
286
1.62k
    add_##field(p3.Z, p1->Z, H);        /* Z1+H */\
287
1.62k
    sqr_##field(p3.Z, p3.Z);            /* (Z1+H)^2 */\
288
1.62k
    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+H)^2-Z1Z1 */\
289
1.62k
    sub_##field(p3.Z, p3.Z, HH);        /* Z3 = (Z1+H)^2-Z1Z1-HH */\
290
1.62k
\
291
1.62k
    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \
292
1.62k
    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \
293
1.62k
    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \
294
1.62k
}
Unexecuted instantiation: server.c:POINTonE2_add_affine
295
296
/*
297
 * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
298
 */
299
#define POINT_DOUBLE_IMPL_A0(ptype, bits, field) \
300
4.93M
static void ptype##_double(ptype *p3, const ptype *p1) \
301
4.93M
{ \
302
4.93M
    vec##bits A, B, C; \
303
4.93M
\
304
4.93M
    sqr_##field(A, p1->X);              /* A = X1^2 */\
305
4.93M
    sqr_##field(B, p1->Y);              /* B = Y1^2 */\
306
4.93M
    sqr_##field(C, B);                  /* C = B^2 */\
307
4.93M
\
308
4.93M
    add_##field(B, B, p1->X);           /* X1+B */\
309
4.93M
    sqr_##field(B, B);                  /* (X1+B)^2 */\
310
4.93M
    sub_##field(B, B, A);               /* (X1+B)^2-A */\
311
4.93M
    sub_##field(B, B, C);               /* (X1+B)^2-A-C */\
312
4.93M
    add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\
313
4.93M
\
314
4.93M
    mul_by_3_##field(A, A);             /* E = 3*A */\
315
4.93M
\
316
4.93M
    sqr_##field(p3->X, A);              /* F = E^2 */\
317
4.93M
    sub_##field(p3->X, p3->X, B);       \
318
4.93M
    sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\
319
4.93M
\
320
4.93M
    add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\
321
4.93M
    mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\
322
4.93M
\
323
4.93M
    mul_by_8_##field(C, C);             /* 8*C */\
324
4.93M
    sub_##field(p3->Y, B, p3->X);       /* D-X3 */\
325
4.93M
    mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\
326
4.93M
    sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\
327
4.93M
}
server.c:POINTonE1_double
Line
Count
Source
300
3.36M
static void ptype##_double(ptype *p3, const ptype *p1) \
301
3.36M
{ \
302
3.36M
    vec##bits A, B, C; \
303
3.36M
\
304
3.36M
    sqr_##field(A, p1->X);              /* A = X1^2 */\
305
3.36M
    sqr_##field(B, p1->Y);              /* B = Y1^2 */\
306
3.36M
    sqr_##field(C, B);                  /* C = B^2 */\
307
3.36M
\
308
3.36M
    add_##field(B, B, p1->X);           /* X1+B */\
309
3.36M
    sqr_##field(B, B);                  /* (X1+B)^2 */\
310
3.36M
    sub_##field(B, B, A);               /* (X1+B)^2-A */\
311
3.36M
    sub_##field(B, B, C);               /* (X1+B)^2-A-C */\
312
3.36M
    add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\
313
3.36M
\
314
3.36M
    mul_by_3_##field(A, A);             /* E = 3*A */\
315
3.36M
\
316
3.36M
    sqr_##field(p3->X, A);              /* F = E^2 */\
317
3.36M
    sub_##field(p3->X, p3->X, B);       \
318
3.36M
    sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\
319
3.36M
\
320
3.36M
    add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\
321
3.36M
    mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\
322
3.36M
\
323
3.36M
    mul_by_8_##field(C, C);             /* 8*C */\
324
3.36M
    sub_##field(p3->Y, B, p3->X);       /* D-X3 */\
325
3.36M
    mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\
326
3.36M
    sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\
327
3.36M
}
server.c:POINTonE2_double
Line
Count
Source
300
1.56M
static void ptype##_double(ptype *p3, const ptype *p1) \
301
1.56M
{ \
302
1.56M
    vec##bits A, B, C; \
303
1.56M
\
304
1.56M
    sqr_##field(A, p1->X);              /* A = X1^2 */\
305
1.56M
    sqr_##field(B, p1->Y);              /* B = Y1^2 */\
306
1.56M
    sqr_##field(C, B);                  /* C = B^2 */\
307
1.56M
\
308
1.56M
    add_##field(B, B, p1->X);           /* X1+B */\
309
1.56M
    sqr_##field(B, B);                  /* (X1+B)^2 */\
310
1.56M
    sub_##field(B, B, A);               /* (X1+B)^2-A */\
311
1.56M
    sub_##field(B, B, C);               /* (X1+B)^2-A-C */\
312
1.56M
    add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\
313
1.56M
\
314
1.56M
    mul_by_3_##field(A, A);             /* E = 3*A */\
315
1.56M
\
316
1.56M
    sqr_##field(p3->X, A);              /* F = E^2 */\
317
1.56M
    sub_##field(p3->X, p3->X, B);       \
318
1.56M
    sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\
319
1.56M
\
320
1.56M
    add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\
321
1.56M
    mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\
322
1.56M
\
323
1.56M
    mul_by_8_##field(C, C);             /* 8*C */\
324
1.56M
    sub_##field(p3->Y, B, p3->X);       /* D-X3 */\
325
1.56M
    mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\
326
1.56M
    sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\
327
1.56M
}
328
329
#define POINT_LADDER_PRE_IMPL(ptype, bits, field) \
330
static void ptype##xz_ladder_pre(ptype##xz *pxz, const ptype *p) \
331
{ \
332
    mul_##field(pxz->X, p->X, p->Z);    /* X2 = X1*Z1 */\
333
    sqr_##field(pxz->Z, p->Z);          \
334
    mul_##field(pxz->Z, pxz->Z, p->Z);  /* Z2 = Z1^3 */\
335
}
336
337
/*
338
 * https://hyperelliptic.org/EFD/g1p/auto-shortw-xz.html#ladder-ladd-2002-it-3
339
 * with twist to handle either input at infinity, which are encoded as Z==0.
340
 * Just in case, order of doubling and addition is reverse in comparison to
341
 * hyperelliptic.org entry. This was done to minimize temporary storage.
342
 *
343
 * XZ1 is |p|, XZ2&XZ4 are in&out |r|, XZ3&XZ5 are in&out |s|.
344
 */
345
#define POINT_LADDER_STEP_IMPL_A0(ptype, bits, field, suffix4b) \
346
static void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s, \
347
                                  const ptype##xz *p) \
348
{ \
349
    ptype##xz p5; \
350
    vec##bits A, B, C, D, XX, ZZ; \
351
    bool_t r_inf, s_inf; \
352
                                        /* s += r */\
353
    mul_##field(A, r->X, s->X);         /* A = X2*X3 */\
354
    mul_##field(B, r->Z, s->Z);         /* B = Z2*Z3 */\
355
    mul_##field(C, r->X, s->Z);         /* C = X2*Z3 */\
356
    mul_##field(D, r->Z, s->X);         /* D = X3*Z2 */\
357
\
358
    sqr_##field(A, A);                  /* (A[-a*B])^2 */\
359
    add_##field(p5.X, C, D);            /* C+D */\
360
    mul_##field(p5.X, p5.X, B);         /* B*(C+D) */\
361
    mul_by_4b_##suffix4b(B, p5.X);      /* b4*B*(C+D) */\
362
    sub_##field(p5.X, A, B);            /* (A[-a*B])^2-b4*B*(C+D) */\
363
    mul_##field(p5.X, p5.X, p->Z);      /* X5 = Z1*((A[-a*B])^2-b4*B*(C+D)) */\
364
\
365
    sub_##field(p5.Z, C, D);            /* C-D */\
366
    sqr_##field(p5.Z, p5.Z);            /* (C-D)^2 */\
367
    mul_##field(p5.Z, p5.Z, p->X);      /* Z5 = X1*(C-D)^2 */\
368
\
369
    r_inf = vec_is_zero(r->Z, sizeof(r->Z)); \
370
    s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \
371
\
372
    vec_select(&p5, r, &p5, sizeof(ptype##xz), s_inf); \
373
    vec_select(s,   s, &p5, sizeof(ptype##xz), r_inf); \
374
                                        /* r *= 2 */\
375
    sqr_##field(XX, r->X);              /* XX = X2^2 */\
376
    sqr_##field(ZZ, r->Z);              /* ZZ = Z2^2 */\
377
\
378
    add_##field(r->Z, r->X, r->Z);      /* X2+Z2 */\
379
    sqr_##field(r->Z, r->Z);            /* (X2+Z2)^2 */\
380
    sub_##field(r->Z, r->Z, XX);        /* (X2+Z2)^2-XX */\
381
    sub_##field(r->Z, r->Z, ZZ);        /* E = (X2+Z2)^2-XX-ZZ */\
382
\
383
    sqr_##field(A, XX);                 /* (XX[-a*ZZ])^2 */\
384
    mul_##field(B, r->Z, ZZ);           /* E*ZZ */\
385
    mul_by_4b_##suffix4b(C, B);         /* b4*E*ZZ */\
386
    sub_##field(r->X, A, C);            /* X4 = (XX[-a*ZZ])^2-b4*E*ZZ */\
387
\
388
    sqr_##field(ZZ, ZZ);                /* ZZ^2 */\
389
    mul_by_4b_##suffix4b(B, ZZ);        /* b4*ZZ^2 */\
390
    mul_##field(r->Z, r->Z, XX);        /* E*(XX[+a*ZZ]) */\
391
    add_##field(r->Z, r->Z, r->Z);      /* 2*E*(XX[+a*ZZ]) */\
392
    add_##field(r->Z, r->Z, B);         /* Z4 = 2*E*(XX[+a*ZZ])+b4*ZZ^2 */\
393
}
394
395
/*
396
 * Recover the |r|'s y-coordinate using Eq. (8) from Brier-Joye,
397
 * "Weierstraß Elliptic Curves and Side-Channel Attacks", with XZ twist
398
 * and conversion to Jacobian coordinates from <openssl>/.../ecp_smpl.c,
399
 * and with twist to recover from |s| at infinity [which occurs when
400
 * multiplying by (order-1)].
401
 *
402
 * X4 = 2*Y1*X2*Z3*Z1*Z2
403
 * Y4 = 2*b*Z3*(Z1*Z2)^2 + Z3*(a*Z1*Z2+X1*X2)*(X1*Z2+X2*Z1) - X3*(X1*Z2-X2*Z1)^2
404
 * Z4 = 2*Y1*Z3*Z2^2*Z1
405
 *
406
 * Z3x2 = 2*Z3
407
 * Y1Z3x2 = Y1*Z3x2
408
 * Z1Z2 = Z1*Z2
409
 * X1Z2 = X1*Z2
410
 * X2Z1 = X2*Z1
411
 * X4 = Y1Z3x2*X2*Z1Z2
412
 * A = b*Z3x2*(Z1Z2)^2
413
 * B = Z3*(a*Z1Z2+X1*X2)*(X1Z2+X2Z1)
414
 * C = X3*(X1Z2-X2Z1)^2
415
 * Y4 = A+B-C
416
 * Z4 = Y1Z3x2*Z1Z2*Z2
417
 *
418
 * XZ1 is |p|, XZ2 is |r|, XZ3 is |s|, 'a' is 0.
419
 */
420
#define POINT_LADDER_POST_IMPL_A0(ptype, bits, field, suffixb) \
421
static void ptype##xz_ladder_post(ptype *p4, \
422
                                  const ptype##xz *r, const ptype##xz *s, \
423
                                  const ptype##xz *p, const vec##bits Y1) \
424
{ \
425
    vec##bits Z3x2, Y1Z3x2, Z1Z2, X1Z2, X2Z1, A, B, C; \
426
    bool_t s_inf; \
427
\
428
    add_##field(Z3x2, s->Z, s->Z);      /* Z3x2 = 2*Z3 */\
429
    mul_##field(Y1Z3x2, Y1, Z3x2);      /* Y1Z3x2 = Y1*Z3x2 */\
430
    mul_##field(Z1Z2, p->Z, r->Z);      /* Z1Z2 = Z1*Z2 */\
431
    mul_##field(X1Z2, p->X, r->Z);      /* X1Z2 = X1*Z2 */\
432
    mul_##field(X2Z1, r->X, p->Z);      /* X2Z1 = X2*Z1 */\
433
\
434
    mul_##field(p4->X, Y1Z3x2, r->X);   /* Y1Z3x2*X2 */\
435
    mul_##field(p4->X, p4->X, Z1Z2);    /* X4 = Y1Z3x2*X2*Z1Z2 */\
436
\
437
    sqr_##field(A, Z1Z2);               /* (Z1Z2)^2 */\
438
    mul_##field(B, A, Z3x2);            /* Z3x2*(Z1Z2)^2 */\
439
    mul_by_b_##suffixb(A, B);           /* A = b*Z3x2*(Z1Z2)^2 */\
440
\
441
    mul_##field(B, p->X, r->X);         /* [a*Z1Z2+]X1*X2 */\
442
    mul_##field(B, B, s->Z);            /* Z3*([a*Z1Z2+]X1*X2) */\
443
    add_##field(C, X1Z2, X2Z1);         /* X1Z2+X2Z1 */\
444
    mul_##field(B, B, C);               /* B = Z3*([a*Z2Z1+]X1*X2)*(X1Z2+X2Z1) */\
445
\
446
    sub_##field(C, X1Z2, X2Z1);         /* X1Z2-X2Z1 */\
447
    sqr_##field(C, C);                  /* (X1Z2-X2Z1)^2 */\
448
    mul_##field(C, C, s->X);            /* C = X3*(X1Z2-X2Z1)^2 */\
449
\
450
    add_##field(A, A, B);               /* A+B */\
451
    sub_##field(A, A, C);               /* Y4 = A+B-C */\
452
\
453
    mul_##field(p4->Z, Z1Z2, r->Z);     /* Z1Z2*Z2 */\
454
    mul_##field(p4->Z, p4->Z, Y1Z3x2);  /* Y1Z3x2*Z1Z2*Z2 */\
455
\
456
    s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \
457
    vec_select(p4->X, p->X, p4->X, sizeof(p4->X), s_inf); \
458
    vec_select(p4->Y, Y1,   A,     sizeof(p4->Y), s_inf); \
459
    vec_select(p4->Z, p->Z, p4->Z, sizeof(p4->Z), s_inf); \
460
    ptype##_cneg(p4, s_inf); \
461
                                        /* to Jacobian */\
462
    mul_##field(p4->X, p4->X, p4->Z);   /* X4 = X4*Z4 */\
463
    sqr_##field(B, p4->Z);              \
464
    mul_##field(p4->Y, p4->Y, B);       /* Y4 = Y4*Z4^2 */\
465
}
466
467
#define POINT_IS_EQUAL_IMPL(ptype, bits, field) \
468
39.9k
static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \
469
39.9k
{ \
470
39.9k
    vec##bits Z1Z1, Z2Z2; \
471
39.9k
    ptype##_affine a1, a2; \
472
39.9k
    bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \
473
39.9k
    bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \
474
39.9k
\
475
39.9k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
476
39.9k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
477
39.9k
\
478
39.9k
    mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\
479
39.9k
    mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\
480
39.9k
\
481
39.9k
    mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\
482
39.9k
    mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\
483
39.9k
\
484
39.9k
    mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\
485
39.9k
    mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\
486
39.9k
\
487
39.9k
    return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \
488
39.9k
}
server.c:POINTonE1_is_equal
Line
Count
Source
468
21.3k
static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \
469
21.3k
{ \
470
21.3k
    vec##bits Z1Z1, Z2Z2; \
471
21.3k
    ptype##_affine a1, a2; \
472
21.3k
    bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \
473
21.3k
    bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \
474
21.3k
\
475
21.3k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
476
21.3k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
477
21.3k
\
478
21.3k
    mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\
479
21.3k
    mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\
480
21.3k
\
481
21.3k
    mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\
482
21.3k
    mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\
483
21.3k
\
484
21.3k
    mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\
485
21.3k
    mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\
486
21.3k
\
487
21.3k
    return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \
488
21.3k
}
server.c:POINTonE2_is_equal
Line
Count
Source
468
18.6k
static limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \
469
18.6k
{ \
470
18.6k
    vec##bits Z1Z1, Z2Z2; \
471
18.6k
    ptype##_affine a1, a2; \
472
18.6k
    bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \
473
18.6k
    bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \
474
18.6k
\
475
18.6k
    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\
476
18.6k
    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\
477
18.6k
\
478
18.6k
    mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\
479
18.6k
    mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\
480
18.6k
\
481
18.6k
    mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\
482
18.6k
    mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\
483
18.6k
\
484
18.6k
    mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\
485
18.6k
    mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\
486
18.6k
\
487
18.6k
    return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \
488
18.6k
}
489
490
/*
491
 * https://eprint.iacr.org/2015/1060, algorithm 7 with a twist to handle
492
 * |p3| pointing at either |p1| or |p2|. This is resolved by adding |t5|
493
 * and replacing few first references to |X3| in the formula, up to step
494
 * 21, with it. 12M[+27A], doubling and infinity are handled by the
495
 * formula itself. Infinity is to be encoded as [0, !0, 0].
496
 */
497
#define POINT_PROJ_DADD_IMPL_A0(ptype, bits, field, suffixb) \
498
static void ptype##proj_dadd(ptype##proj *p3, const ptype##proj *p1, \
499
                                              const ptype##proj *p2) \
500
{ \
501
    vec##bits t0, t1, t2, t3, t4, t5; \
502
\
503
    mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\
504
    mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\
505
    mul_##field(t2, p1->Z, p2->Z);      /* 3.     t2 = Z1*Z2 */\
506
    add_##field(t3, p1->X, p1->Y);      /* 4.     t3 = X1+Y1 */\
507
    add_##field(t4, p2->X, p2->Y);      /* 5.     t4 = X2+Y2 */\
508
    mul_##field(t3, t3, t4);            /* 6.     t3 = t3*t4 */\
509
    add_##field(t4, t0, t1);            /* 7.     t4 = t0+t1 */\
510
    sub_##field(t3, t3, t4);            /* 8.     t3 = t3-t4 */\
511
    add_##field(t4, p1->Y, p1->Z);      /* 9.     t4 = Y1+Z1 */\
512
    add_##field(t5, p2->Y, p2->Z);      /* 10.    t5 = Y2+Z2 */\
513
    mul_##field(t4, t4, t5);            /* 11.    t4 = t4*t5 */\
514
    add_##field(t5, t1, t2);            /* 12.    t5 = t1+t2 */\
515
    sub_##field(t4, t4, t5);            /* 13.    t4 = t4-t5 */\
516
    add_##field(t5, p1->X, p1->Z);      /* 14.    t5 = X1+Z1 */\
517
    add_##field(p3->Y, p2->X, p2->Z);   /* 15.    Y3 = X2+Z2 */\
518
    mul_##field(t5, t5, p3->Y);         /* 16.    t5 = t5*Y3 */\
519
    add_##field(p3->Y, t0, t2);         /* 17.    Y3 = t0+t2 */\
520
    sub_##field(p3->Y, t5, p3->Y);      /* 18.    Y3 = t5-Y3 */\
521
    mul_by_3_##field(t0, t0);           /* 19-20. t0 = 3*t0  */\
522
    mul_by_3_##field(t5, t2);           /* 21.    t5 = 3*t2  */\
523
    mul_by_b_##suffixb(t2, t5);         /* 21.    t2 = b*t5  */\
524
    add_##field(p3->Z, t1, t2);         /* 22.    Z3 = t1+t2 */\
525
    sub_##field(t1, t1, t2);            /* 23.    t1 = t1-t2 */\
526
    mul_by_3_##field(t5, p3->Y);        /* 24.    t5 = 3*Y3  */\
527
    mul_by_b_##suffixb(p3->Y, t5);      /* 24.    Y3 = b*t5  */\
528
    mul_##field(p3->X, t4, p3->Y);      /* 25.    X3 = t4*Y3 */\
529
    mul_##field(t2, t3, t1);            /* 26.    t2 = t3*t1 */\
530
    sub_##field(p3->X, t2, p3->X);      /* 27.    X3 = t2-X3 */\
531
    mul_##field(p3->Y, p3->Y, t0);      /* 28.    Y3 = Y3*t0 */\
532
    mul_##field(t1, t1, p3->Z);         /* 29.    t1 = t1*Z3 */\
533
    add_##field(p3->Y, t1, p3->Y);      /* 30.    Y3 = t1+Y3 */\
534
    mul_##field(t0, t0, t3);            /* 31.    t0 = t0*t3 */\
535
    mul_##field(p3->Z, p3->Z, t4);      /* 32.    Z3 = Z3*t4 */\
536
    add_##field(p3->Z, p3->Z, t0);      /* 33.    Z3 = Z3+t0 */\
537
}
538
539
/*
540
 * https://eprint.iacr.org/2015/1060, algorithm 8 with a twist to handle
541
 * |p2| being infinity encoded as [0, 0]. 11M[+21A].
542
 */
543
#define POINT_PROJ_DADD_AFFINE_IMPL_A0(ptype, bits, field, suffixb) \
544
static void ptype##proj_dadd_affine(ptype##proj *out, const ptype##proj *p1, \
545
                                                      const ptype##_affine *p2) \
546
{ \
547
    ptype##proj p3[1]; \
548
    vec##bits t0, t1, t2, t3, t4; \
549
    limb_t p2inf = vec_is_zero(p2, sizeof(*p2)); \
550
\
551
    mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\
552
    mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\
553
    add_##field(t3, p1->X, p1->Y);      /* 3.     t3 = X1+Y1 */\
554
    add_##field(t4, p2->X, p2->Y);      /* 4.     t4 = X2+Y2 */\
555
    mul_##field(t3, t3, t4);            /* 5.     t3 = t3*t4 */\
556
    add_##field(t4, t0, t1);            /* 6.     t4 = t0+t1 */\
557
    sub_##field(t3, t3, t4);            /* 7.     t3 = t3-t4 */\
558
    mul_##field(t4, p2->Y, p1->Z);      /* 8.     t4 = Y2*Z1 */\
559
    add_##field(t4, t4, p1->Y);         /* 9.     t4 = t4+Y1 */\
560
    mul_##field(p3->Y, p2->X, p1->Z);   /* 10.    Y3 = X2*Z1 */\
561
    add_##field(p3->Y, p3->Y, p1->X);   /* 11.    Y3 = Y3+X1 */\
562
    mul_by_3_##field(t0, t0);           /* 12-13. t0 = 3*t0  */\
563
    mul_by_b_##suffixb(t2, p1->Z);      /* 14.    t2 = b*Z1  */\
564
    mul_by_3_##field(t2, t2);           /* 14.    t2 = 3*t2  */\
565
    add_##field(p3->Z, t1, t2);         /* 15.    Z3 = t1+t2 */\
566
    sub_##field(t1, t1, t2);            /* 16.    t1 = t1-t2 */\
567
    mul_by_b_##suffixb(t2, p3->Y);      /* 17.    t2 = b*Y3  */\
568
    mul_by_3_##field(p3->Y, t2);        /* 17.    Y3 = 3*t2  */\
569
    mul_##field(p3->X, t4, p3->Y);      /* 18.    X3 = t4*Y3 */\
570
    mul_##field(t2, t3, t1);            /* 19.    t2 = t3*t1 */\
571
    sub_##field(p3->X, t2, p3->X);      /* 20.    X3 = t2-X3 */\
572
    mul_##field(p3->Y, p3->Y, t0);      /* 21.    Y3 = Y3*t0 */\
573
    mul_##field(t1, t1, p3->Z);         /* 22.    t1 = t1*Z3 */\
574
    add_##field(p3->Y, t1, p3->Y);      /* 23.    Y3 = t1+Y3 */\
575
    mul_##field(t0, t0, t3);            /* 24.    t0 = t0*t3 */\
576
    mul_##field(p3->Z, p3->Z, t4);      /* 25.    Z3 = Z3*t4 */\
577
    add_##field(p3->Z, p3->Z, t0);      /* 26.    Z3 = Z3+t0 */\
578
\
579
    vec_select(out, p1, p3, sizeof(*out), p2inf); \
580
}
581
582
/*
583
 * https://eprint.iacr.org/2015/1060, algorithm 9 with a twist to handle
584
 * |p3| pointing at |p1|. This is resolved by adding |t3| to hold X*Y
585
 * and reordering operations to bring references to |p1| forward.
586
 * 6M+2S[+13A].
587
 */
588
#define POINT_PROJ_DOUBLE_IMPL_A0(ptype, bits, field, suffixb) \
589
static void ptype##proj_double(ptype##proj *p3, const ptype##proj *p1) \
590
{ \
591
    vec##bits t0, t1, t2, t3; \
592
\
593
    sqr_##field(t0, p1->Y);             /* 1.     t0 = Y*Y   */\
594
    mul_##field(t1, p1->Y, p1->Z);      /* 5.     t1 = Y*Z   */\
595
    sqr_##field(t2, p1->Z);             /* 6.     t2 = Z*Z   */\
596
    mul_##field(t3, p1->X, p1->Y);      /* 16.    t3 = X*Y   */\
597
    lshift_##field(p3->Z, t0, 3);       /* 2-4.   Z3 = 8*t0  */\
598
    mul_by_b_##suffixb(p3->X, t2);      /* 7.     t2 = b*t2  */\
599
    mul_by_3_##field(t2, p3->X);        /* 7.     t2 = 3*t2  */\
600
    mul_##field(p3->X, t2, p3->Z);      /* 8.     X3 = t2*Z3 */\
601
    add_##field(p3->Y, t0, t2);         /* 9.     Y3 = t0+t2 */\
602
    mul_##field(p3->Z, t1, p3->Z);      /* 10.    Z3 = t1*Z3 */\
603
    mul_by_3_##field(t2, t2);           /* 11-12. t2 = 3*t2  */\
604
    sub_##field(t0, t0, t2);            /* 13.    t0 = t0-t2 */\
605
    mul_##field(p3->Y, t0, p3->Y);      /* 14.    Y3 = t0*Y3 */\
606
    add_##field(p3->Y, p3->X, p3->Y);   /* 15.    Y3 = X3+Y3 */\
607
    mul_##field(p3->X, t0, t3);         /* 17.    X3 = t0*t3 */\
608
    add_##field(p3->X, p3->X, p3->X);   /* 18.    X3 = X3+X3 */\
609
}
610
611
#define POINT_PROJ_TO_JACOBIAN_IMPL(ptype, bits, field) \
612
static void ptype##proj_to_Jacobian(ptype *out, const ptype##proj *in) \
613
{ \
614
    vec##bits ZZ; \
615
\
616
    sqr_##field(ZZ, in->Z); \
617
    mul_##field(out->X, in->X, in->Z); \
618
    mul_##field(out->Y, in->Y, ZZ); \
619
    vec_copy(out->Z, in->Z, sizeof(out->Z)); \
620
}
621
622
#define POINT_TO_PROJECTIVE_IMPL(ptype, bits, field, one) \
623
static void ptype##_to_projective(ptype##proj *out, const ptype *in) \
624
{ \
625
    vec##bits ZZ; \
626
    limb_t is_inf = vec_is_zero(in->Z, sizeof(in->Z)); \
627
\
628
    sqr_##field(ZZ, in->Z); \
629
    mul_##field(out->X, in->X, in->Z); \
630
    vec_select(out->Y, one, in->Y, sizeof(out->Y), is_inf); \
631
    mul_##field(out->Z, ZZ, in->Z); \
632
}
633
634
/******************* !!!!! NOT CONSTANT TIME !!!!! *******************/
635
636
/*
637
 * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
638
 * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
639
 * with twist to handle either input at infinity. Addition costs 12M+2S,
640
 * while conditional doubling - 4M+6M+3S.
641
 */
642
#define POINTXYZZ_DADD_IMPL(ptype, bits, field) \
643
static void ptype##xyzz_dadd(ptype##xyzz *p3, const ptype##xyzz *p1, \
644
0
                                              const ptype##xyzz *p2) \
645
0
{ \
646
0
    vec##bits U, S, P, R; \
647
0
\
648
0
    if (vec_is_zero(p2->ZZZ, 2*sizeof(p2->ZZZ))) { \
649
0
        vec_copy(p3, p1, sizeof(*p3));  \
650
0
        return; \
651
0
    } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \
652
0
        vec_copy(p3, p2, sizeof(*p3));  \
653
0
        return; \
654
0
    } \
655
0
\
656
0
    mul_##field(U, p1->X, p2->ZZ);              /* U1 = X1*ZZ2 */\
657
0
    mul_##field(S, p1->Y, p2->ZZZ);             /* S1 = Y1*ZZZ2 */\
658
0
    mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\
659
0
    mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\
660
0
    sub_##field(P, P, U);                       /* P = U2-U1 */\
661
0
    sub_##field(R, R, S);                       /* R = S2-S1 */\
662
0
\
663
0
    if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\
664
0
        vec##bits PP, PPP, Q;                   /* add |p1| and |p2| */\
665
0
\
666
0
        sqr_##field(PP, P);                     /* PP = P^2 */\
667
0
        mul_##field(PPP, PP, P);                /* PPP = P*PP */\
668
0
        mul_##field(Q, U, PP);                  /* Q = U1*PP */\
669
0
        sqr_##field(p3->X, R);                  /* R^2 */\
670
0
        add_##field(P, Q, Q); \
671
0
        sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\
672
0
        sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\
673
0
        sub_##field(Q, Q, p3->X); \
674
0
        mul_##field(Q, Q, R);                   /* R*(Q-X3) */\
675
0
        mul_##field(p3->Y, S, PPP);             /* S1*PPP */\
676
0
        sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-S1*PPP */\
677
0
        mul_##field(p3->ZZ, p1->ZZ, p2->ZZ);    /* ZZ1*ZZ2 */\
678
0
        mul_##field(p3->ZZZ, p1->ZZZ, p2->ZZZ); /* ZZZ1*ZZZ2 */\
679
0
        mul_##field(p3->ZZ, p3->ZZ, PP);        /* ZZ3 = ZZ1*ZZ2*PP */\
680
0
        mul_##field(p3->ZZZ, p3->ZZZ, PPP);     /* ZZZ3 = ZZZ1*ZZZ2*PPP */\
681
0
    } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\
682
0
        vec##bits V, W, M;                      /* double |p1| */\
683
0
\
684
0
        add_##field(U, p1->Y, p1->Y);           /* U = 2*Y1 */\
685
0
        sqr_##field(V, U);                      /* V = U^2 */\
686
0
        mul_##field(W, V, U);                   /* W = U*V */\
687
0
        mul_##field(S, p1->X, V);               /* S = X1*V */\
688
0
        sqr_##field(M, p1->X); \
689
0
        mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a*ZZ1^2] */\
690
0
        sqr_##field(p3->X, M); \
691
0
        add_##field(U, S, S);                   /* 2*S */\
692
0
        sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\
693
0
        mul_##field(p3->Y, W, p1->Y);           /* W*Y1 */\
694
0
        sub_##field(S, S, p3->X); \
695
0
        mul_##field(S, S, M);                   /* M*(S-X3) */\
696
0
        sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\
697
0
        mul_##field(p3->ZZ, p1->ZZ, V);         /* ZZ3 = V*ZZ1 */\
698
0
        mul_##field(p3->ZZZ, p1->ZZZ, W);       /* ZZ3 = W*ZZZ1 */\
699
0
    } else {                                    /* X1==X2 && Y1==-Y2 */\
700
0
        vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\
701
0
    } \
702
0
}
Unexecuted instantiation: server.c:POINTonE1xyzz_dadd
Unexecuted instantiation: server.c:POINTonE2xyzz_dadd
703
704
/*
705
 * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
706
 * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-mdbl-2008-s-1
707
 * with twists to handle even subtractions and either input at infinity.
708
 * Addition costs 8M+2S, while conditional doubling - 2M+4M+3S.
709
 */
710
#define POINTXYZZ_DADD_AFFINE_IMPL(ptype, bits, field, one) \
711
static void ptype##xyzz_dadd_affine(ptype##xyzz *p3, const ptype##xyzz *p1, \
712
                                                     const ptype##_affine *p2, \
713
0
                                                     bool_t subtract) \
714
0
{ \
715
0
    vec##bits P, R; \
716
0
\
717
0
    if (vec_is_zero(p2, sizeof(*p2))) { \
718
0
        vec_copy(p3, p1, sizeof(*p3));  \
719
0
        return; \
720
0
    } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \
721
0
        vec_copy(p3->X, p2->X, 2*sizeof(p3->X));\
722
0
        cneg_##field(p3->ZZZ, one, subtract);   \
723
0
        vec_copy(p3->ZZ, one, sizeof(p3->ZZ));  \
724
0
        return; \
725
0
    } \
726
0
\
727
0
    mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\
728
0
    mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\
729
0
    cneg_##field(R, R, subtract); \
730
0
    sub_##field(P, P, p1->X);                   /* P = U2-X1 */\
731
0
    sub_##field(R, R, p1->Y);                   /* R = S2-Y1 */\
732
0
\
733
0
    if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\
734
0
        vec##bits PP, PPP, Q;                   /* add |p2| to |p1| */\
735
0
\
736
0
        sqr_##field(PP, P);                     /* PP = P^2 */\
737
0
        mul_##field(PPP, PP, P);                /* PPP = P*PP */\
738
0
        mul_##field(Q, p1->X, PP);              /* Q = X1*PP */\
739
0
        sqr_##field(p3->X, R);                  /* R^2 */\
740
0
        add_##field(P, Q, Q); \
741
0
        sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\
742
0
        sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\
743
0
        sub_##field(Q, Q, p3->X); \
744
0
        mul_##field(Q, Q, R);                   /* R*(Q-X3) */\
745
0
        mul_##field(p3->Y, p1->Y, PPP);         /* Y1*PPP */\
746
0
        sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-Y1*PPP */\
747
0
        mul_##field(p3->ZZ, p1->ZZ, PP);        /* ZZ3 = ZZ1*PP */\
748
0
        mul_##field(p3->ZZZ, p1->ZZZ, PPP);     /* ZZZ3 = ZZZ1*PPP */\
749
0
    } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\
750
0
        vec##bits U, S, M;                      /* double |p2| */\
751
0
\
752
0
        add_##field(U, p2->Y, p2->Y);           /* U = 2*Y1 */\
753
0
        sqr_##field(p3->ZZ, U);                 /* [ZZ3 =] V = U^2 */\
754
0
        mul_##field(p3->ZZZ, p3->ZZ, U);        /* [ZZZ3 =] W = U*V */\
755
0
        mul_##field(S, p2->X, p3->ZZ);          /* S = X1*V */\
756
0
        sqr_##field(M, p2->X); \
757
0
        mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a] */\
758
0
        sqr_##field(p3->X, M); \
759
0
        add_##field(U, S, S);                   /* 2*S */\
760
0
        sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\
761
0
        mul_##field(p3->Y, p3->ZZZ, p2->Y);     /* W*Y1 */\
762
0
        sub_##field(S, S, p3->X); \
763
0
        mul_##field(S, S, M);                   /* M*(S-X3) */\
764
0
        sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\
765
0
        cneg_##field(p3->ZZZ, p3->ZZZ, subtract); \
766
0
    } else {                                    /* X1==X2 && Y1==-Y2 */\
767
0
        vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\
768
0
    } \
769
0
}
Unexecuted instantiation: server.c:POINTonE1xyzz_dadd_affine
Unexecuted instantiation: server.c:POINTonE2xyzz_dadd_affine
770
771
#define POINTXYZZ_TO_JACOBIAN_IMPL(ptype, bits, field) \
772
0
static void ptype##xyzz_to_Jacobian(ptype *out, const ptype##xyzz *in) \
773
0
{ \
774
0
    mul_##field(out->X, in->X, in->ZZ); \
775
0
    mul_##field(out->Y, in->Y, in->ZZZ); \
776
0
    vec_copy(out->Z, in->ZZ, sizeof(out->Z)); \
777
0
}
Unexecuted instantiation: server.c:POINTonE1xyzz_to_Jacobian
Unexecuted instantiation: server.c:POINTonE2xyzz_to_Jacobian
778
779
#define POINT_TO_XYZZ_IMPL(ptype, bits, field) \
780
static void ptype##_to_xyzz(ptype##xyzz *out, const ptype *in) \
781
{ \
782
    vec_copy(out->X, in->X, 2*sizeof(out->X)); \
783
    sqr_##field(out->ZZ, in->Z); \
784
    mul_##field(out->ZZZ, out->ZZ, in->Z); \
785
}
786
787
#endif