/src/openssl30/crypto/ec/ecp_nistp521.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2011-2021 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | /* Copyright 2011 Google Inc.  | 
11  |  |  *  | 
12  |  |  * Licensed under the Apache License, Version 2.0 (the "License");  | 
13  |  |  *  | 
14  |  |  * you may not use this file except in compliance with the License.  | 
15  |  |  * You may obtain a copy of the License at  | 
16  |  |  *  | 
17  |  |  *     http://www.apache.org/licenses/LICENSE-2.0  | 
18  |  |  *  | 
19  |  |  *  Unless required by applicable law or agreed to in writing, software  | 
20  |  |  *  distributed under the License is distributed on an "AS IS" BASIS,  | 
21  |  |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
22  |  |  *  See the License for the specific language governing permissions and  | 
23  |  |  *  limitations under the License.  | 
24  |  |  */  | 
25  |  |  | 
26  |  | /*  | 
27  |  |  * ECDSA low level APIs are deprecated for public use, but still ok for  | 
28  |  |  * internal use.  | 
29  |  |  */  | 
30  |  | #include "internal/deprecated.h"  | 
31  |  |  | 
32  |  | /*  | 
33  |  |  * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication  | 
34  |  |  *  | 
35  |  |  * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.  | 
36  |  |  * Otherwise based on Emilia's P224 work, which was inspired by my curve25519  | 
37  |  |  * work which got its smarts from Daniel J. Bernstein's work on the same.  | 
38  |  |  */  | 
39  |  |  | 
40  |  | #include <openssl/e_os2.h>  | 
41  |  |  | 
42  |  | #include <string.h>  | 
43  |  | #include <openssl/err.h>  | 
44  |  | #include "ec_local.h"  | 
45  |  |  | 
46  |  | #include "internal/numbers.h"  | 
47  |  |  | 
48  |  | #ifndef INT128_MAX  | 
49  |  | # error "Your compiler doesn't appear to support 128-bit integer types"  | 
50  |  | #endif  | 
51  |  |  | 
52  |  | typedef uint8_t u8;  | 
53  |  | typedef uint64_t u64;  | 
54  |  |  | 
55  |  | /*  | 
56  |  |  * The underlying field. P521 operates over GF(2^521-1). We can serialize an  | 
57  |  |  * element of this field into 66 bytes where the most significant byte  | 
58  |  |  * contains only a single bit. We call this an felem_bytearray.  | 
59  |  |  */  | 
60  |  |  | 
61  |  | typedef u8 felem_bytearray[66];  | 
62  |  |  | 
63  |  | /*  | 
64  |  |  * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.  | 
65  |  |  * These values are big-endian.  | 
66  |  |  */  | 
67  |  | static const felem_bytearray nistp521_curve_params[5] = { | 
68  |  |     {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ | 
69  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
70  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
71  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
72  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
73  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
74  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
75  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
76  |  |      0xff, 0xff},  | 
77  |  |     {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ | 
78  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
79  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
80  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
81  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
82  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
83  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
84  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  | 
85  |  |      0xff, 0xfc},  | 
86  |  |     {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ | 
87  |  |      0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,  | 
88  |  |      0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,  | 
89  |  |      0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,  | 
90  |  |      0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,  | 
91  |  |      0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,  | 
92  |  |      0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,  | 
93  |  |      0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,  | 
94  |  |      0x3f, 0x00},  | 
95  |  |     {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ | 
96  |  |      0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,  | 
97  |  |      0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,  | 
98  |  |      0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,  | 
99  |  |      0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,  | 
100  |  |      0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,  | 
101  |  |      0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,  | 
102  |  |      0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,  | 
103  |  |      0xbd, 0x66},  | 
104  |  |     {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ | 
105  |  |      0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,  | 
106  |  |      0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,  | 
107  |  |      0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,  | 
108  |  |      0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,  | 
109  |  |      0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,  | 
110  |  |      0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,  | 
111  |  |      0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,  | 
112  |  |      0x66, 0x50}  | 
113  |  | };  | 
114  |  |  | 
115  |  | /*-  | 
116  |  |  * The representation of field elements.  | 
117  |  |  * ------------------------------------  | 
118  |  |  *  | 
119  |  |  * We represent field elements with nine values. These values are either 64 or  | 
120  |  |  * 128 bits and the field element represented is:  | 
121  |  |  *   v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464  (mod p)  | 
122  |  |  * Each of the nine values is called a 'limb'. Since the limbs are spaced only  | 
123  |  |  * 58 bits apart, but are greater than 58 bits in length, the most significant  | 
124  |  |  * bits of each limb overlap with the least significant bits of the next.  | 
125  |  |  *  | 
126  |  |  * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a  | 
127  |  |  * 'largefelem' */  | 
128  |  |  | 
129  | 46.3M  | #define NLIMBS 9  | 
130  |  |  | 
131  |  | typedef uint64_t limb;  | 
132  |  | typedef limb limb_aX __attribute((__aligned__(1)));  | 
133  |  | typedef limb felem[NLIMBS];  | 
134  |  | typedef uint128_t largefelem[NLIMBS];  | 
135  |  |  | 
136  |  | static const limb bottom57bits = 0x1ffffffffffffff;  | 
137  |  | static const limb bottom58bits = 0x3ffffffffffffff;  | 
138  |  |  | 
139  |  | /*  | 
140  |  |  * bin66_to_felem takes a little-endian byte array and converts it into felem  | 
141  |  |  * form. This assumes that the CPU is little-endian.  | 
142  |  |  */  | 
143  |  | static void bin66_to_felem(felem out, const u8 in[66])  | 
144  | 2.67k  | { | 
145  | 2.67k  |     out[0] = (*((limb *) & in[0])) & bottom58bits;  | 
146  | 2.67k  |     out[1] = (*((limb_aX *) & in[7]) >> 2) & bottom58bits;  | 
147  | 2.67k  |     out[2] = (*((limb_aX *) & in[14]) >> 4) & bottom58bits;  | 
148  | 2.67k  |     out[3] = (*((limb_aX *) & in[21]) >> 6) & bottom58bits;  | 
149  | 2.67k  |     out[4] = (*((limb_aX *) & in[29])) & bottom58bits;  | 
150  | 2.67k  |     out[5] = (*((limb_aX *) & in[36]) >> 2) & bottom58bits;  | 
151  | 2.67k  |     out[6] = (*((limb_aX *) & in[43]) >> 4) & bottom58bits;  | 
152  | 2.67k  |     out[7] = (*((limb_aX *) & in[50]) >> 6) & bottom58bits;  | 
153  | 2.67k  |     out[8] = (*((limb_aX *) & in[58])) & bottom57bits;  | 
154  | 2.67k  | }  | 
155  |  |  | 
156  |  | /*  | 
157  |  |  * felem_to_bin66 takes an felem and serializes into a little endian, 66 byte  | 
158  |  |  * array. This assumes that the CPU is little-endian.  | 
159  |  |  */  | 
160  |  | static void felem_to_bin66(u8 out[66], const felem in)  | 
161  | 5.34k  | { | 
162  | 5.34k  |     memset(out, 0, 66);  | 
163  | 5.34k  |     (*((limb *) & out[0])) = in[0];  | 
164  | 5.34k  |     (*((limb_aX *) & out[7])) |= in[1] << 2;  | 
165  | 5.34k  |     (*((limb_aX *) & out[14])) |= in[2] << 4;  | 
166  | 5.34k  |     (*((limb_aX *) & out[21])) |= in[3] << 6;  | 
167  | 5.34k  |     (*((limb_aX *) & out[29])) = in[4];  | 
168  | 5.34k  |     (*((limb_aX *) & out[36])) |= in[5] << 2;  | 
169  | 5.34k  |     (*((limb_aX *) & out[43])) |= in[6] << 4;  | 
170  | 5.34k  |     (*((limb_aX *) & out[50])) |= in[7] << 6;  | 
171  | 5.34k  |     (*((limb_aX *) & out[58])) = in[8];  | 
172  | 5.34k  | }  | 
173  |  |  | 
174  |  | /* BN_to_felem converts an OpenSSL BIGNUM into an felem */  | 
175  |  | static int BN_to_felem(felem out, const BIGNUM *bn)  | 
176  | 2.67k  | { | 
177  | 2.67k  |     felem_bytearray b_out;  | 
178  | 2.67k  |     int num_bytes;  | 
179  |  |  | 
180  | 2.67k  |     if (BN_is_negative(bn)) { | 
181  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
182  | 0  |         return 0;  | 
183  | 0  |     }  | 
184  | 2.67k  |     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));  | 
185  | 2.67k  |     if (num_bytes < 0) { | 
186  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
187  | 0  |         return 0;  | 
188  | 0  |     }  | 
189  | 2.67k  |     bin66_to_felem(out, b_out);  | 
190  | 2.67k  |     return 1;  | 
191  | 2.67k  | }  | 
192  |  |  | 
193  |  | /* felem_to_BN converts an felem into an OpenSSL BIGNUM */  | 
194  |  | static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)  | 
195  | 5.34k  | { | 
196  | 5.34k  |     felem_bytearray b_out;  | 
197  | 5.34k  |     felem_to_bin66(b_out, in);  | 
198  | 5.34k  |     return BN_lebin2bn(b_out, sizeof(b_out), out);  | 
199  | 5.34k  | }  | 
200  |  |  | 
201  |  | /*-  | 
202  |  |  * Field operations  | 
203  |  |  * ----------------  | 
204  |  |  */  | 
205  |  |  | 
206  |  | static void felem_one(felem out)  | 
207  | 0  | { | 
208  | 0  |     out[0] = 1;  | 
209  | 0  |     out[1] = 0;  | 
210  | 0  |     out[2] = 0;  | 
211  | 0  |     out[3] = 0;  | 
212  | 0  |     out[4] = 0;  | 
213  | 0  |     out[5] = 0;  | 
214  | 0  |     out[6] = 0;  | 
215  | 0  |     out[7] = 0;  | 
216  | 0  |     out[8] = 0;  | 
217  | 0  | }  | 
218  |  |  | 
219  |  | static void felem_assign(felem out, const felem in)  | 
220  | 1.56M  | { | 
221  | 1.56M  |     out[0] = in[0];  | 
222  | 1.56M  |     out[1] = in[1];  | 
223  | 1.56M  |     out[2] = in[2];  | 
224  | 1.56M  |     out[3] = in[3];  | 
225  | 1.56M  |     out[4] = in[4];  | 
226  | 1.56M  |     out[5] = in[5];  | 
227  | 1.56M  |     out[6] = in[6];  | 
228  | 1.56M  |     out[7] = in[7];  | 
229  | 1.56M  |     out[8] = in[8];  | 
230  | 1.56M  | }  | 
231  |  |  | 
232  |  | /* felem_sum64 sets out = out + in. */  | 
233  |  | static void felem_sum64(felem out, const felem in)  | 
234  | 446k  | { | 
235  | 446k  |     out[0] += in[0];  | 
236  | 446k  |     out[1] += in[1];  | 
237  | 446k  |     out[2] += in[2];  | 
238  | 446k  |     out[3] += in[3];  | 
239  | 446k  |     out[4] += in[4];  | 
240  | 446k  |     out[5] += in[5];  | 
241  | 446k  |     out[6] += in[6];  | 
242  | 446k  |     out[7] += in[7];  | 
243  | 446k  |     out[8] += in[8];  | 
244  | 446k  | }  | 
245  |  |  | 
246  |  | /* felem_scalar sets out = in * scalar */  | 
247  |  | static void felem_scalar(felem out, const felem in, limb scalar)  | 
248  | 4.12M  | { | 
249  | 4.12M  |     out[0] = in[0] * scalar;  | 
250  | 4.12M  |     out[1] = in[1] * scalar;  | 
251  | 4.12M  |     out[2] = in[2] * scalar;  | 
252  | 4.12M  |     out[3] = in[3] * scalar;  | 
253  | 4.12M  |     out[4] = in[4] * scalar;  | 
254  | 4.12M  |     out[5] = in[5] * scalar;  | 
255  | 4.12M  |     out[6] = in[6] * scalar;  | 
256  | 4.12M  |     out[7] = in[7] * scalar;  | 
257  | 4.12M  |     out[8] = in[8] * scalar;  | 
258  | 4.12M  | }  | 
259  |  |  | 
260  |  | /* felem_scalar64 sets out = out * scalar */  | 
261  |  | static void felem_scalar64(felem out, limb scalar)  | 
262  | 703k  | { | 
263  | 703k  |     out[0] *= scalar;  | 
264  | 703k  |     out[1] *= scalar;  | 
265  | 703k  |     out[2] *= scalar;  | 
266  | 703k  |     out[3] *= scalar;  | 
267  | 703k  |     out[4] *= scalar;  | 
268  | 703k  |     out[5] *= scalar;  | 
269  | 703k  |     out[6] *= scalar;  | 
270  | 703k  |     out[7] *= scalar;  | 
271  | 703k  |     out[8] *= scalar;  | 
272  | 703k  | }  | 
273  |  |  | 
274  |  | /* felem_scalar128 sets out = out * scalar */  | 
275  |  | static void felem_scalar128(largefelem out, limb scalar)  | 
276  | 234k  | { | 
277  | 234k  |     out[0] *= scalar;  | 
278  | 234k  |     out[1] *= scalar;  | 
279  | 234k  |     out[2] *= scalar;  | 
280  | 234k  |     out[3] *= scalar;  | 
281  | 234k  |     out[4] *= scalar;  | 
282  | 234k  |     out[5] *= scalar;  | 
283  | 234k  |     out[6] *= scalar;  | 
284  | 234k  |     out[7] *= scalar;  | 
285  | 234k  |     out[8] *= scalar;  | 
286  | 234k  | }  | 
287  |  |  | 
288  |  | /*-  | 
289  |  |  * felem_neg sets |out| to |-in|  | 
290  |  |  * On entry:  | 
291  |  |  *   in[i] < 2^59 + 2^14  | 
292  |  |  * On exit:  | 
293  |  |  *   out[i] < 2^62  | 
294  |  |  */  | 
295  |  | static void felem_neg(felem out, const felem in)  | 
296  | 13.7k  | { | 
297  |  |     /* In order to prevent underflow, we subtract from 0 mod p. */  | 
298  | 13.7k  |     static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);  | 
299  | 13.7k  |     static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);  | 
300  |  |  | 
301  | 13.7k  |     out[0] = two62m3 - in[0];  | 
302  | 13.7k  |     out[1] = two62m2 - in[1];  | 
303  | 13.7k  |     out[2] = two62m2 - in[2];  | 
304  | 13.7k  |     out[3] = two62m2 - in[3];  | 
305  | 13.7k  |     out[4] = two62m2 - in[4];  | 
306  | 13.7k  |     out[5] = two62m2 - in[5];  | 
307  | 13.7k  |     out[6] = two62m2 - in[6];  | 
308  | 13.7k  |     out[7] = two62m2 - in[7];  | 
309  | 13.7k  |     out[8] = two62m2 - in[8];  | 
310  | 13.7k  | }  | 
311  |  |  | 
312  |  | /*-  | 
313  |  |  * felem_diff64 subtracts |in| from |out|  | 
314  |  |  * On entry:  | 
315  |  |  *   in[i] < 2^59 + 2^14  | 
316  |  |  * On exit:  | 
317  |  |  *   out[i] < out[i] + 2^62  | 
318  |  |  */  | 
319  |  | static void felem_diff64(felem out, const felem in)  | 
320  | 378k  | { | 
321  |  |     /*  | 
322  |  |      * In order to prevent underflow, we add 0 mod p before subtracting.  | 
323  |  |      */  | 
324  | 378k  |     static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);  | 
325  | 378k  |     static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);  | 
326  |  |  | 
327  | 378k  |     out[0] += two62m3 - in[0];  | 
328  | 378k  |     out[1] += two62m2 - in[1];  | 
329  | 378k  |     out[2] += two62m2 - in[2];  | 
330  | 378k  |     out[3] += two62m2 - in[3];  | 
331  | 378k  |     out[4] += two62m2 - in[4];  | 
332  | 378k  |     out[5] += two62m2 - in[5];  | 
333  | 378k  |     out[6] += two62m2 - in[6];  | 
334  | 378k  |     out[7] += two62m2 - in[7];  | 
335  | 378k  |     out[8] += two62m2 - in[8];  | 
336  | 378k  | }  | 
337  |  |  | 
338  |  | /*-  | 
339  |  |  * felem_diff_128_64 subtracts |in| from |out|  | 
340  |  |  * On entry:  | 
341  |  |  *   in[i] < 2^62 + 2^17  | 
342  |  |  * On exit:  | 
343  |  |  *   out[i] < out[i] + 2^63  | 
344  |  |  */  | 
345  |  | static void felem_diff_128_64(largefelem out, const felem in)  | 
346  | 678k  | { | 
347  |  |     /*  | 
348  |  |      * In order to prevent underflow, we add 64p mod p (which is equivalent  | 
349  |  |      * to 0 mod p) before subtracting. p is 2^521 - 1, i.e. in binary a 521  | 
350  |  |      * digit number with all bits set to 1. See "The representation of field  | 
351  |  |      * elements" comment above for a description of how limbs are used to  | 
352  |  |      * represent a number. 64p is represented with 8 limbs containing a number  | 
353  |  |      * with 58 bits set and one limb with a number with 57 bits set.  | 
354  |  |      */  | 
355  | 678k  |     static const limb two63m6 = (((limb) 1) << 63) - (((limb) 1) << 6);  | 
356  | 678k  |     static const limb two63m5 = (((limb) 1) << 63) - (((limb) 1) << 5);  | 
357  |  |  | 
358  | 678k  |     out[0] += two63m6 - in[0];  | 
359  | 678k  |     out[1] += two63m5 - in[1];  | 
360  | 678k  |     out[2] += two63m5 - in[2];  | 
361  | 678k  |     out[3] += two63m5 - in[3];  | 
362  | 678k  |     out[4] += two63m5 - in[4];  | 
363  | 678k  |     out[5] += two63m5 - in[5];  | 
364  | 678k  |     out[6] += two63m5 - in[6];  | 
365  | 678k  |     out[7] += two63m5 - in[7];  | 
366  | 678k  |     out[8] += two63m5 - in[8];  | 
367  | 678k  | }  | 
368  |  |  | 
369  |  | /*-  | 
370  |  |  * felem_diff_128_64 subtracts |in| from |out|  | 
371  |  |  * On entry:  | 
372  |  |  *   in[i] < 2^126  | 
373  |  |  * On exit:  | 
374  |  |  *   out[i] < out[i] + 2^127 - 2^69  | 
375  |  |  */  | 
376  |  | static void felem_diff128(largefelem out, const largefelem in)  | 
377  | 234k  | { | 
378  |  |     /*  | 
379  |  |      * In order to prevent underflow, we add 0 mod p before subtracting.  | 
380  |  |      */  | 
381  | 234k  |     static const uint128_t two127m70 =  | 
382  | 234k  |         (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);  | 
383  | 234k  |     static const uint128_t two127m69 =  | 
384  | 234k  |         (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);  | 
385  |  |  | 
386  | 234k  |     out[0] += (two127m70 - in[0]);  | 
387  | 234k  |     out[1] += (two127m69 - in[1]);  | 
388  | 234k  |     out[2] += (two127m69 - in[2]);  | 
389  | 234k  |     out[3] += (two127m69 - in[3]);  | 
390  | 234k  |     out[4] += (two127m69 - in[4]);  | 
391  | 234k  |     out[5] += (two127m69 - in[5]);  | 
392  | 234k  |     out[6] += (two127m69 - in[6]);  | 
393  | 234k  |     out[7] += (two127m69 - in[7]);  | 
394  | 234k  |     out[8] += (two127m69 - in[8]);  | 
395  | 234k  | }  | 
396  |  |  | 
397  |  | /*-  | 
398  |  |  * felem_square sets |out| = |in|^2  | 
399  |  |  * On entry:  | 
400  |  |  *   in[i] < 2^62  | 
401  |  |  * On exit:  | 
402  |  |  *   out[i] < 17 * max(in[i]) * max(in[i])  | 
403  |  |  */  | 
404  |  | static void felem_square_ref(largefelem out, const felem in)  | 
405  | 1.41M  | { | 
406  | 1.41M  |     felem inx2, inx4;  | 
407  | 1.41M  |     felem_scalar(inx2, in, 2);  | 
408  | 1.41M  |     felem_scalar(inx4, in, 4);  | 
409  |  |  | 
410  |  |     /*-  | 
411  |  |      * We have many cases were we want to do  | 
412  |  |      *   in[x] * in[y] +  | 
413  |  |      *   in[y] * in[x]  | 
414  |  |      * This is obviously just  | 
415  |  |      *   2 * in[x] * in[y]  | 
416  |  |      * However, rather than do the doubling on the 128 bit result, we  | 
417  |  |      * double one of the inputs to the multiplication by reading from  | 
418  |  |      * |inx2|  | 
419  |  |      */  | 
420  |  |  | 
421  | 1.41M  |     out[0] = ((uint128_t) in[0]) * in[0];  | 
422  | 1.41M  |     out[1] = ((uint128_t) in[0]) * inx2[1];  | 
423  | 1.41M  |     out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1];  | 
424  | 1.41M  |     out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2];  | 
425  | 1.41M  |     out[4] = ((uint128_t) in[0]) * inx2[4] +  | 
426  | 1.41M  |              ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2];  | 
427  | 1.41M  |     out[5] = ((uint128_t) in[0]) * inx2[5] +  | 
428  | 1.41M  |              ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3];  | 
429  | 1.41M  |     out[6] = ((uint128_t) in[0]) * inx2[6] +  | 
430  | 1.41M  |              ((uint128_t) in[1]) * inx2[5] +  | 
431  | 1.41M  |              ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3];  | 
432  | 1.41M  |     out[7] = ((uint128_t) in[0]) * inx2[7] +  | 
433  | 1.41M  |              ((uint128_t) in[1]) * inx2[6] +  | 
434  | 1.41M  |              ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4];  | 
435  | 1.41M  |     out[8] = ((uint128_t) in[0]) * inx2[8] +  | 
436  | 1.41M  |              ((uint128_t) in[1]) * inx2[7] +  | 
437  | 1.41M  |              ((uint128_t) in[2]) * inx2[6] +  | 
438  | 1.41M  |              ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4];  | 
439  |  |  | 
440  |  |     /*  | 
441  |  |      * The remaining limbs fall above 2^521, with the first falling at 2^522.  | 
442  |  |      * They correspond to locations one bit up from the limbs produced above  | 
443  |  |      * so we would have to multiply by two to align them. Again, rather than  | 
444  |  |      * operate on the 128-bit result, we double one of the inputs to the  | 
445  |  |      * multiplication. If we want to double for both this reason, and the  | 
446  |  |      * reason above, then we end up multiplying by four.  | 
447  |  |      */  | 
448  |  |  | 
449  |  |     /* 9 */  | 
450  | 1.41M  |     out[0] += ((uint128_t) in[1]) * inx4[8] +  | 
451  | 1.41M  |               ((uint128_t) in[2]) * inx4[7] +  | 
452  | 1.41M  |               ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5];  | 
453  |  |  | 
454  |  |     /* 10 */  | 
455  | 1.41M  |     out[1] += ((uint128_t) in[2]) * inx4[8] +  | 
456  | 1.41M  |               ((uint128_t) in[3]) * inx4[7] +  | 
457  | 1.41M  |               ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5];  | 
458  |  |  | 
459  |  |     /* 11 */  | 
460  | 1.41M  |     out[2] += ((uint128_t) in[3]) * inx4[8] +  | 
461  | 1.41M  |               ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6];  | 
462  |  |  | 
463  |  |     /* 12 */  | 
464  | 1.41M  |     out[3] += ((uint128_t) in[4]) * inx4[8] +  | 
465  | 1.41M  |               ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6];  | 
466  |  |  | 
467  |  |     /* 13 */  | 
468  | 1.41M  |     out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7];  | 
469  |  |  | 
470  |  |     /* 14 */  | 
471  | 1.41M  |     out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7];  | 
472  |  |  | 
473  |  |     /* 15 */  | 
474  | 1.41M  |     out[6] += ((uint128_t) in[7]) * inx4[8];  | 
475  |  |  | 
476  |  |     /* 16 */  | 
477  | 1.41M  |     out[7] += ((uint128_t) in[8]) * inx2[8];  | 
478  | 1.41M  | }  | 
479  |  |  | 
480  |  | /*-  | 
481  |  |  * felem_mul sets |out| = |in1| * |in2|  | 
482  |  |  * On entry:  | 
483  |  |  *   in1[i] < 2^64  | 
484  |  |  *   in2[i] < 2^63  | 
485  |  |  * On exit:  | 
486  |  |  *   out[i] < 17 * max(in1[i]) * max(in2[i])  | 
487  |  |  */  | 
488  |  | static void felem_mul_ref(largefelem out, const felem in1, const felem in2)  | 
489  | 1.21M  | { | 
490  | 1.21M  |     felem in2x2;  | 
491  | 1.21M  |     felem_scalar(in2x2, in2, 2);  | 
492  |  |  | 
493  | 1.21M  |     out[0] = ((uint128_t) in1[0]) * in2[0];  | 
494  |  |  | 
495  | 1.21M  |     out[1] = ((uint128_t) in1[0]) * in2[1] +  | 
496  | 1.21M  |              ((uint128_t) in1[1]) * in2[0];  | 
497  |  |  | 
498  | 1.21M  |     out[2] = ((uint128_t) in1[0]) * in2[2] +  | 
499  | 1.21M  |              ((uint128_t) in1[1]) * in2[1] +  | 
500  | 1.21M  |              ((uint128_t) in1[2]) * in2[0];  | 
501  |  |  | 
502  | 1.21M  |     out[3] = ((uint128_t) in1[0]) * in2[3] +  | 
503  | 1.21M  |              ((uint128_t) in1[1]) * in2[2] +  | 
504  | 1.21M  |              ((uint128_t) in1[2]) * in2[1] +  | 
505  | 1.21M  |              ((uint128_t) in1[3]) * in2[0];  | 
506  |  |  | 
507  | 1.21M  |     out[4] = ((uint128_t) in1[0]) * in2[4] +  | 
508  | 1.21M  |              ((uint128_t) in1[1]) * in2[3] +  | 
509  | 1.21M  |              ((uint128_t) in1[2]) * in2[2] +  | 
510  | 1.21M  |              ((uint128_t) in1[3]) * in2[1] +  | 
511  | 1.21M  |              ((uint128_t) in1[4]) * in2[0];  | 
512  |  |  | 
513  | 1.21M  |     out[5] = ((uint128_t) in1[0]) * in2[5] +  | 
514  | 1.21M  |              ((uint128_t) in1[1]) * in2[4] +  | 
515  | 1.21M  |              ((uint128_t) in1[2]) * in2[3] +  | 
516  | 1.21M  |              ((uint128_t) in1[3]) * in2[2] +  | 
517  | 1.21M  |              ((uint128_t) in1[4]) * in2[1] +  | 
518  | 1.21M  |              ((uint128_t) in1[5]) * in2[0];  | 
519  |  |  | 
520  | 1.21M  |     out[6] = ((uint128_t) in1[0]) * in2[6] +  | 
521  | 1.21M  |              ((uint128_t) in1[1]) * in2[5] +  | 
522  | 1.21M  |              ((uint128_t) in1[2]) * in2[4] +  | 
523  | 1.21M  |              ((uint128_t) in1[3]) * in2[3] +  | 
524  | 1.21M  |              ((uint128_t) in1[4]) * in2[2] +  | 
525  | 1.21M  |              ((uint128_t) in1[5]) * in2[1] +  | 
526  | 1.21M  |              ((uint128_t) in1[6]) * in2[0];  | 
527  |  |  | 
528  | 1.21M  |     out[7] = ((uint128_t) in1[0]) * in2[7] +  | 
529  | 1.21M  |              ((uint128_t) in1[1]) * in2[6] +  | 
530  | 1.21M  |              ((uint128_t) in1[2]) * in2[5] +  | 
531  | 1.21M  |              ((uint128_t) in1[3]) * in2[4] +  | 
532  | 1.21M  |              ((uint128_t) in1[4]) * in2[3] +  | 
533  | 1.21M  |              ((uint128_t) in1[5]) * in2[2] +  | 
534  | 1.21M  |              ((uint128_t) in1[6]) * in2[1] +  | 
535  | 1.21M  |              ((uint128_t) in1[7]) * in2[0];  | 
536  |  |  | 
537  | 1.21M  |     out[8] = ((uint128_t) in1[0]) * in2[8] +  | 
538  | 1.21M  |              ((uint128_t) in1[1]) * in2[7] +  | 
539  | 1.21M  |              ((uint128_t) in1[2]) * in2[6] +  | 
540  | 1.21M  |              ((uint128_t) in1[3]) * in2[5] +  | 
541  | 1.21M  |              ((uint128_t) in1[4]) * in2[4] +  | 
542  | 1.21M  |              ((uint128_t) in1[5]) * in2[3] +  | 
543  | 1.21M  |              ((uint128_t) in1[6]) * in2[2] +  | 
544  | 1.21M  |              ((uint128_t) in1[7]) * in2[1] +  | 
545  | 1.21M  |              ((uint128_t) in1[8]) * in2[0];  | 
546  |  |  | 
547  |  |     /* See comment in felem_square about the use of in2x2 here */  | 
548  |  |  | 
549  | 1.21M  |     out[0] += ((uint128_t) in1[1]) * in2x2[8] +  | 
550  | 1.21M  |               ((uint128_t) in1[2]) * in2x2[7] +  | 
551  | 1.21M  |               ((uint128_t) in1[3]) * in2x2[6] +  | 
552  | 1.21M  |               ((uint128_t) in1[4]) * in2x2[5] +  | 
553  | 1.21M  |               ((uint128_t) in1[5]) * in2x2[4] +  | 
554  | 1.21M  |               ((uint128_t) in1[6]) * in2x2[3] +  | 
555  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[2] +  | 
556  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[1];  | 
557  |  |  | 
558  | 1.21M  |     out[1] += ((uint128_t) in1[2]) * in2x2[8] +  | 
559  | 1.21M  |               ((uint128_t) in1[3]) * in2x2[7] +  | 
560  | 1.21M  |               ((uint128_t) in1[4]) * in2x2[6] +  | 
561  | 1.21M  |               ((uint128_t) in1[5]) * in2x2[5] +  | 
562  | 1.21M  |               ((uint128_t) in1[6]) * in2x2[4] +  | 
563  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[3] +  | 
564  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[2];  | 
565  |  |  | 
566  | 1.21M  |     out[2] += ((uint128_t) in1[3]) * in2x2[8] +  | 
567  | 1.21M  |               ((uint128_t) in1[4]) * in2x2[7] +  | 
568  | 1.21M  |               ((uint128_t) in1[5]) * in2x2[6] +  | 
569  | 1.21M  |               ((uint128_t) in1[6]) * in2x2[5] +  | 
570  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[4] +  | 
571  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[3];  | 
572  |  |  | 
573  | 1.21M  |     out[3] += ((uint128_t) in1[4]) * in2x2[8] +  | 
574  | 1.21M  |               ((uint128_t) in1[5]) * in2x2[7] +  | 
575  | 1.21M  |               ((uint128_t) in1[6]) * in2x2[6] +  | 
576  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[5] +  | 
577  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[4];  | 
578  |  |  | 
579  | 1.21M  |     out[4] += ((uint128_t) in1[5]) * in2x2[8] +  | 
580  | 1.21M  |               ((uint128_t) in1[6]) * in2x2[7] +  | 
581  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[6] +  | 
582  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[5];  | 
583  |  |  | 
584  | 1.21M  |     out[5] += ((uint128_t) in1[6]) * in2x2[8] +  | 
585  | 1.21M  |               ((uint128_t) in1[7]) * in2x2[7] +  | 
586  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[6];  | 
587  |  |  | 
588  | 1.21M  |     out[6] += ((uint128_t) in1[7]) * in2x2[8] +  | 
589  | 1.21M  |               ((uint128_t) in1[8]) * in2x2[7];  | 
590  |  |  | 
591  | 1.21M  |     out[7] += ((uint128_t) in1[8]) * in2x2[8];  | 
592  | 1.21M  | }  | 
593  |  |  | 
594  |  | static const limb bottom52bits = 0xfffffffffffff;  | 
595  |  |  | 
596  |  | /*-  | 
597  |  |  * felem_reduce converts a largefelem to an felem.  | 
598  |  |  * On entry:  | 
599  |  |  *   in[i] < 2^128  | 
600  |  |  * On exit:  | 
601  |  |  *   out[i] < 2^59 + 2^14  | 
602  |  |  */  | 
603  |  | static void felem_reduce(felem out, const largefelem in)  | 
604  | 2.39M  | { | 
605  | 2.39M  |     u64 overflow1, overflow2;  | 
606  |  |  | 
607  | 2.39M  |     out[0] = ((limb) in[0]) & bottom58bits;  | 
608  | 2.39M  |     out[1] = ((limb) in[1]) & bottom58bits;  | 
609  | 2.39M  |     out[2] = ((limb) in[2]) & bottom58bits;  | 
610  | 2.39M  |     out[3] = ((limb) in[3]) & bottom58bits;  | 
611  | 2.39M  |     out[4] = ((limb) in[4]) & bottom58bits;  | 
612  | 2.39M  |     out[5] = ((limb) in[5]) & bottom58bits;  | 
613  | 2.39M  |     out[6] = ((limb) in[6]) & bottom58bits;  | 
614  | 2.39M  |     out[7] = ((limb) in[7]) & bottom58bits;  | 
615  | 2.39M  |     out[8] = ((limb) in[8]) & bottom58bits;  | 
616  |  |  | 
617  |  |     /* out[i] < 2^58 */  | 
618  |  |  | 
619  | 2.39M  |     out[1] += ((limb) in[0]) >> 58;  | 
620  | 2.39M  |     out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;  | 
621  |  |     /*-  | 
622  |  |      * out[1] < 2^58 + 2^6 + 2^58  | 
623  |  |      *        = 2^59 + 2^6  | 
624  |  |      */  | 
625  | 2.39M  |     out[2] += ((limb) (in[0] >> 64)) >> 52;  | 
626  |  |  | 
627  | 2.39M  |     out[2] += ((limb) in[1]) >> 58;  | 
628  | 2.39M  |     out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;  | 
629  | 2.39M  |     out[3] += ((limb) (in[1] >> 64)) >> 52;  | 
630  |  |  | 
631  | 2.39M  |     out[3] += ((limb) in[2]) >> 58;  | 
632  | 2.39M  |     out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;  | 
633  | 2.39M  |     out[4] += ((limb) (in[2] >> 64)) >> 52;  | 
634  |  |  | 
635  | 2.39M  |     out[4] += ((limb) in[3]) >> 58;  | 
636  | 2.39M  |     out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;  | 
637  | 2.39M  |     out[5] += ((limb) (in[3] >> 64)) >> 52;  | 
638  |  |  | 
639  | 2.39M  |     out[5] += ((limb) in[4]) >> 58;  | 
640  | 2.39M  |     out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;  | 
641  | 2.39M  |     out[6] += ((limb) (in[4] >> 64)) >> 52;  | 
642  |  |  | 
643  | 2.39M  |     out[6] += ((limb) in[5]) >> 58;  | 
644  | 2.39M  |     out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;  | 
645  | 2.39M  |     out[7] += ((limb) (in[5] >> 64)) >> 52;  | 
646  |  |  | 
647  | 2.39M  |     out[7] += ((limb) in[6]) >> 58;  | 
648  | 2.39M  |     out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;  | 
649  | 2.39M  |     out[8] += ((limb) (in[6] >> 64)) >> 52;  | 
650  |  |  | 
651  | 2.39M  |     out[8] += ((limb) in[7]) >> 58;  | 
652  | 2.39M  |     out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;  | 
653  |  |     /*-  | 
654  |  |      * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12  | 
655  |  |      *            < 2^59 + 2^13  | 
656  |  |      */  | 
657  | 2.39M  |     overflow1 = ((limb) (in[7] >> 64)) >> 52;  | 
658  |  |  | 
659  | 2.39M  |     overflow1 += ((limb) in[8]) >> 58;  | 
660  | 2.39M  |     overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;  | 
661  | 2.39M  |     overflow2 = ((limb) (in[8] >> 64)) >> 52;  | 
662  |  |  | 
663  | 2.39M  |     overflow1 <<= 1;            /* overflow1 < 2^13 + 2^7 + 2^59 */  | 
664  | 2.39M  |     overflow2 <<= 1;            /* overflow2 < 2^13 */  | 
665  |  |  | 
666  | 2.39M  |     out[0] += overflow1;        /* out[0] < 2^60 */  | 
667  | 2.39M  |     out[1] += overflow2;        /* out[1] < 2^59 + 2^6 + 2^13 */  | 
668  |  |  | 
669  | 2.39M  |     out[1] += out[0] >> 58;  | 
670  | 2.39M  |     out[0] &= bottom58bits;  | 
671  |  |     /*-  | 
672  |  |      * out[0] < 2^58  | 
673  |  |      * out[1] < 2^59 + 2^6 + 2^13 + 2^2  | 
674  |  |      *        < 2^59 + 2^14  | 
675  |  |      */  | 
676  | 2.39M  | }  | 
677  |  |  | 
678  |  | #if defined(ECP_NISTP521_ASM)  | 
679  |  | void felem_square_wrapper(largefelem out, const felem in);  | 
680  |  | void felem_mul_wrapper(largefelem out, const felem in1, const felem in2);  | 
681  |  |  | 
682  |  | static void (*felem_square_p)(largefelem out, const felem in) =  | 
683  |  |     felem_square_wrapper;  | 
684  |  | static void (*felem_mul_p)(largefelem out, const felem in1, const felem in2) =  | 
685  |  |     felem_mul_wrapper;  | 
686  |  |  | 
687  |  | void p521_felem_square(largefelem out, const felem in);  | 
688  |  | void p521_felem_mul(largefelem out, const felem in1, const felem in2);  | 
689  |  |  | 
690  |  | # if defined(_ARCH_PPC64)  | 
691  |  | #  include "crypto/ppc_arch.h"  | 
692  |  | # endif  | 
693  |  |  | 
694  |  | void felem_select(void)  | 
695  |  | { | 
696  |  | # if defined(_ARCH_PPC64)  | 
697  |  |     if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { | 
698  |  |         felem_square_p = p521_felem_square;  | 
699  |  |         felem_mul_p = p521_felem_mul;  | 
700  |  |  | 
701  |  |         return;  | 
702  |  |     }  | 
703  |  | # endif  | 
704  |  |  | 
705  |  |     /* Default */  | 
706  |  |     felem_square_p = felem_square_ref;  | 
707  |  |     felem_mul_p = felem_mul_ref;  | 
708  |  | }  | 
709  |  |  | 
710  |  | void felem_square_wrapper(largefelem out, const felem in)  | 
711  |  | { | 
712  |  |     felem_select();  | 
713  |  |     felem_square_p(out, in);  | 
714  |  | }  | 
715  |  |  | 
716  |  | void felem_mul_wrapper(largefelem out, const felem in1, const felem in2)  | 
717  |  | { | 
718  |  |     felem_select();  | 
719  |  |     felem_mul_p(out, in1, in2);  | 
720  |  | }  | 
721  |  |  | 
722  |  | # define felem_square felem_square_p  | 
723  |  | # define felem_mul felem_mul_p  | 
724  |  | #else  | 
725  | 1.41M  | # define felem_square felem_square_ref  | 
726  | 1.21M  | # define felem_mul felem_mul_ref  | 
727  |  | #endif  | 
728  |  |  | 
729  |  | static void felem_square_reduce(felem out, const felem in)  | 
730  | 0  | { | 
731  | 0  |     largefelem tmp;  | 
732  | 0  |     felem_square(tmp, in);  | 
733  | 0  |     felem_reduce(out, tmp);  | 
734  | 0  | }  | 
735  |  |  | 
736  |  | static void felem_mul_reduce(felem out, const felem in1, const felem in2)  | 
737  | 0  | { | 
738  | 0  |     largefelem tmp;  | 
739  | 0  |     felem_mul(tmp, in1, in2);  | 
740  | 0  |     felem_reduce(out, tmp);  | 
741  | 0  | }  | 
742  |  |  | 
743  |  | /*-  | 
744  |  |  * felem_inv calculates |out| = |in|^{-1} | 
745  |  |  *  | 
746  |  |  * Based on Fermat's Little Theorem:  | 
747  |  |  *   a^p = a (mod p)  | 
748  |  |  *   a^{p-1} = 1 (mod p) | 
749  |  |  *   a^{p-2} = a^{-1} (mod p) | 
750  |  |  */  | 
751  |  | static void felem_inv(felem out, const felem in)  | 
752  | 762  | { | 
753  | 762  |     felem ftmp, ftmp2, ftmp3, ftmp4;  | 
754  | 762  |     largefelem tmp;  | 
755  | 762  |     unsigned i;  | 
756  |  |  | 
757  | 762  |     felem_square(tmp, in);  | 
758  | 762  |     felem_reduce(ftmp, tmp);    /* 2^1 */  | 
759  | 762  |     felem_mul(tmp, in, ftmp);  | 
760  | 762  |     felem_reduce(ftmp, tmp);    /* 2^2 - 2^0 */  | 
761  | 762  |     felem_assign(ftmp2, ftmp);  | 
762  | 762  |     felem_square(tmp, ftmp);  | 
763  | 762  |     felem_reduce(ftmp, tmp);    /* 2^3 - 2^1 */  | 
764  | 762  |     felem_mul(tmp, in, ftmp);  | 
765  | 762  |     felem_reduce(ftmp, tmp);    /* 2^3 - 2^0 */  | 
766  | 762  |     felem_square(tmp, ftmp);  | 
767  | 762  |     felem_reduce(ftmp, tmp);    /* 2^4 - 2^1 */  | 
768  |  |  | 
769  | 762  |     felem_square(tmp, ftmp2);  | 
770  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^3 - 2^1 */  | 
771  | 762  |     felem_square(tmp, ftmp3);  | 
772  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^4 - 2^2 */  | 
773  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
774  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^4 - 2^0 */  | 
775  |  |  | 
776  | 762  |     felem_assign(ftmp2, ftmp3);  | 
777  | 762  |     felem_square(tmp, ftmp3);  | 
778  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^5 - 2^1 */  | 
779  | 762  |     felem_square(tmp, ftmp3);  | 
780  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^6 - 2^2 */  | 
781  | 762  |     felem_square(tmp, ftmp3);  | 
782  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^7 - 2^3 */  | 
783  | 762  |     felem_square(tmp, ftmp3);  | 
784  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^8 - 2^4 */  | 
785  | 762  |     felem_assign(ftmp4, ftmp3);  | 
786  | 762  |     felem_mul(tmp, ftmp3, ftmp);  | 
787  | 762  |     felem_reduce(ftmp4, tmp);   /* 2^8 - 2^1 */  | 
788  | 762  |     felem_square(tmp, ftmp4);  | 
789  | 762  |     felem_reduce(ftmp4, tmp);   /* 2^9 - 2^2 */  | 
790  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
791  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^8 - 2^0 */  | 
792  | 762  |     felem_assign(ftmp2, ftmp3);  | 
793  |  |  | 
794  | 6.85k  |     for (i = 0; i < 8; i++) { | 
795  | 6.09k  |         felem_square(tmp, ftmp3);  | 
796  | 6.09k  |         felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */  | 
797  | 6.09k  |     }  | 
798  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
799  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^16 - 2^0 */  | 
800  | 762  |     felem_assign(ftmp2, ftmp3);  | 
801  |  |  | 
802  | 12.9k  |     for (i = 0; i < 16; i++) { | 
803  | 12.1k  |         felem_square(tmp, ftmp3);  | 
804  | 12.1k  |         felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */  | 
805  | 12.1k  |     }  | 
806  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
807  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^32 - 2^0 */  | 
808  | 762  |     felem_assign(ftmp2, ftmp3);  | 
809  |  |  | 
810  | 25.1k  |     for (i = 0; i < 32; i++) { | 
811  | 24.3k  |         felem_square(tmp, ftmp3);  | 
812  | 24.3k  |         felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */  | 
813  | 24.3k  |     }  | 
814  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
815  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^64 - 2^0 */  | 
816  | 762  |     felem_assign(ftmp2, ftmp3);  | 
817  |  |  | 
818  | 49.5k  |     for (i = 0; i < 64; i++) { | 
819  | 48.7k  |         felem_square(tmp, ftmp3);  | 
820  | 48.7k  |         felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */  | 
821  | 48.7k  |     }  | 
822  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
823  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^128 - 2^0 */  | 
824  | 762  |     felem_assign(ftmp2, ftmp3);  | 
825  |  |  | 
826  | 98.2k  |     for (i = 0; i < 128; i++) { | 
827  | 97.5k  |         felem_square(tmp, ftmp3);  | 
828  | 97.5k  |         felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */  | 
829  | 97.5k  |     }  | 
830  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
831  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^256 - 2^0 */  | 
832  | 762  |     felem_assign(ftmp2, ftmp3);  | 
833  |  |  | 
834  | 195k  |     for (i = 0; i < 256; i++) { | 
835  | 195k  |         felem_square(tmp, ftmp3);  | 
836  | 195k  |         felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */  | 
837  | 195k  |     }  | 
838  | 762  |     felem_mul(tmp, ftmp3, ftmp2);  | 
839  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^512 - 2^0 */  | 
840  |  |  | 
841  | 7.62k  |     for (i = 0; i < 9; i++) { | 
842  | 6.85k  |         felem_square(tmp, ftmp3);  | 
843  | 6.85k  |         felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */  | 
844  | 6.85k  |     }  | 
845  | 762  |     felem_mul(tmp, ftmp3, ftmp4);  | 
846  | 762  |     felem_reduce(ftmp3, tmp);   /* 2^512 - 2^2 */  | 
847  | 762  |     felem_mul(tmp, ftmp3, in);  | 
848  | 762  |     felem_reduce(out, tmp);     /* 2^512 - 3 */  | 
849  | 762  | }  | 
850  |  |  | 
851  |  | /* This is 2^521-1, expressed as an felem */  | 
852  |  | static const felem kPrime = { | 
853  |  |     0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,  | 
854  |  |     0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,  | 
855  |  |     0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff  | 
856  |  | };  | 
857  |  |  | 
858  |  | /*-  | 
859  |  |  * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0  | 
860  |  |  * otherwise.  | 
861  |  |  * On entry:  | 
862  |  |  *   in[i] < 2^59 + 2^14  | 
863  |  |  */  | 
864  |  | static limb felem_is_zero(const felem in)  | 
865  | 361k  | { | 
866  | 361k  |     felem ftmp;  | 
867  | 361k  |     limb is_zero, is_p;  | 
868  | 361k  |     felem_assign(ftmp, in);  | 
869  |  |  | 
870  | 361k  |     ftmp[0] += ftmp[8] >> 57;  | 
871  | 361k  |     ftmp[8] &= bottom57bits;  | 
872  |  |     /* ftmp[8] < 2^57 */  | 
873  | 361k  |     ftmp[1] += ftmp[0] >> 58;  | 
874  | 361k  |     ftmp[0] &= bottom58bits;  | 
875  | 361k  |     ftmp[2] += ftmp[1] >> 58;  | 
876  | 361k  |     ftmp[1] &= bottom58bits;  | 
877  | 361k  |     ftmp[3] += ftmp[2] >> 58;  | 
878  | 361k  |     ftmp[2] &= bottom58bits;  | 
879  | 361k  |     ftmp[4] += ftmp[3] >> 58;  | 
880  | 361k  |     ftmp[3] &= bottom58bits;  | 
881  | 361k  |     ftmp[5] += ftmp[4] >> 58;  | 
882  | 361k  |     ftmp[4] &= bottom58bits;  | 
883  | 361k  |     ftmp[6] += ftmp[5] >> 58;  | 
884  | 361k  |     ftmp[5] &= bottom58bits;  | 
885  | 361k  |     ftmp[7] += ftmp[6] >> 58;  | 
886  | 361k  |     ftmp[6] &= bottom58bits;  | 
887  | 361k  |     ftmp[8] += ftmp[7] >> 58;  | 
888  | 361k  |     ftmp[7] &= bottom58bits;  | 
889  |  |     /* ftmp[8] < 2^57 + 4 */  | 
890  |  |  | 
891  |  |     /*  | 
892  |  |      * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater  | 
893  |  |      * than our bound for ftmp[8]. Therefore we only have to check if the  | 
894  |  |      * zero is zero or 2^521-1.  | 
895  |  |      */  | 
896  |  |  | 
897  | 361k  |     is_zero = 0;  | 
898  | 361k  |     is_zero |= ftmp[0];  | 
899  | 361k  |     is_zero |= ftmp[1];  | 
900  | 361k  |     is_zero |= ftmp[2];  | 
901  | 361k  |     is_zero |= ftmp[3];  | 
902  | 361k  |     is_zero |= ftmp[4];  | 
903  | 361k  |     is_zero |= ftmp[5];  | 
904  | 361k  |     is_zero |= ftmp[6];  | 
905  | 361k  |     is_zero |= ftmp[7];  | 
906  | 361k  |     is_zero |= ftmp[8];  | 
907  |  |  | 
908  | 361k  |     is_zero--;  | 
909  |  |     /*  | 
910  |  |      * We know that ftmp[i] < 2^63, therefore the only way that the top bit  | 
911  |  |      * can be set is if is_zero was 0 before the decrement.  | 
912  |  |      */  | 
913  | 361k  |     is_zero = 0 - (is_zero >> 63);  | 
914  |  |  | 
915  | 361k  |     is_p = ftmp[0] ^ kPrime[0];  | 
916  | 361k  |     is_p |= ftmp[1] ^ kPrime[1];  | 
917  | 361k  |     is_p |= ftmp[2] ^ kPrime[2];  | 
918  | 361k  |     is_p |= ftmp[3] ^ kPrime[3];  | 
919  | 361k  |     is_p |= ftmp[4] ^ kPrime[4];  | 
920  | 361k  |     is_p |= ftmp[5] ^ kPrime[5];  | 
921  | 361k  |     is_p |= ftmp[6] ^ kPrime[6];  | 
922  | 361k  |     is_p |= ftmp[7] ^ kPrime[7];  | 
923  | 361k  |     is_p |= ftmp[8] ^ kPrime[8];  | 
924  |  |  | 
925  | 361k  |     is_p--;  | 
926  | 361k  |     is_p = 0 - (is_p >> 63);  | 
927  |  |  | 
928  | 361k  |     is_zero |= is_p;  | 
929  | 361k  |     return is_zero;  | 
930  | 361k  | }  | 
931  |  |  | 
932  |  | static int felem_is_zero_int(const void *in)  | 
933  | 0  | { | 
934  | 0  |     return (int)(felem_is_zero(in) & ((limb) 1));  | 
935  | 0  | }  | 
936  |  |  | 
937  |  | /*-  | 
938  |  |  * felem_contract converts |in| to its unique, minimal representation.  | 
939  |  |  * On entry:  | 
940  |  |  *   in[i] < 2^59 + 2^14  | 
941  |  |  */  | 
942  |  | static void felem_contract(felem out, const felem in)  | 
943  | 3.64k  | { | 
944  | 3.64k  |     limb is_p, is_greater, sign;  | 
945  | 3.64k  |     static const limb two58 = ((limb) 1) << 58;  | 
946  |  |  | 
947  | 3.64k  |     felem_assign(out, in);  | 
948  |  |  | 
949  | 3.64k  |     out[0] += out[8] >> 57;  | 
950  | 3.64k  |     out[8] &= bottom57bits;  | 
951  |  |     /* out[8] < 2^57 */  | 
952  | 3.64k  |     out[1] += out[0] >> 58;  | 
953  | 3.64k  |     out[0] &= bottom58bits;  | 
954  | 3.64k  |     out[2] += out[1] >> 58;  | 
955  | 3.64k  |     out[1] &= bottom58bits;  | 
956  | 3.64k  |     out[3] += out[2] >> 58;  | 
957  | 3.64k  |     out[2] &= bottom58bits;  | 
958  | 3.64k  |     out[4] += out[3] >> 58;  | 
959  | 3.64k  |     out[3] &= bottom58bits;  | 
960  | 3.64k  |     out[5] += out[4] >> 58;  | 
961  | 3.64k  |     out[4] &= bottom58bits;  | 
962  | 3.64k  |     out[6] += out[5] >> 58;  | 
963  | 3.64k  |     out[5] &= bottom58bits;  | 
964  | 3.64k  |     out[7] += out[6] >> 58;  | 
965  | 3.64k  |     out[6] &= bottom58bits;  | 
966  | 3.64k  |     out[8] += out[7] >> 58;  | 
967  | 3.64k  |     out[7] &= bottom58bits;  | 
968  |  |     /* out[8] < 2^57 + 4 */  | 
969  |  |  | 
970  |  |     /*  | 
971  |  |      * If the value is greater than 2^521-1 then we have to subtract 2^521-1  | 
972  |  |      * out. See the comments in felem_is_zero regarding why we don't test for  | 
973  |  |      * other multiples of the prime.  | 
974  |  |      */  | 
975  |  |  | 
976  |  |     /*  | 
977  |  |      * First, if |out| is equal to 2^521-1, we subtract it out to get zero.  | 
978  |  |      */  | 
979  |  |  | 
980  | 3.64k  |     is_p = out[0] ^ kPrime[0];  | 
981  | 3.64k  |     is_p |= out[1] ^ kPrime[1];  | 
982  | 3.64k  |     is_p |= out[2] ^ kPrime[2];  | 
983  | 3.64k  |     is_p |= out[3] ^ kPrime[3];  | 
984  | 3.64k  |     is_p |= out[4] ^ kPrime[4];  | 
985  | 3.64k  |     is_p |= out[5] ^ kPrime[5];  | 
986  | 3.64k  |     is_p |= out[6] ^ kPrime[6];  | 
987  | 3.64k  |     is_p |= out[7] ^ kPrime[7];  | 
988  | 3.64k  |     is_p |= out[8] ^ kPrime[8];  | 
989  |  |  | 
990  | 3.64k  |     is_p--;  | 
991  | 3.64k  |     is_p &= is_p << 32;  | 
992  | 3.64k  |     is_p &= is_p << 16;  | 
993  | 3.64k  |     is_p &= is_p << 8;  | 
994  | 3.64k  |     is_p &= is_p << 4;  | 
995  | 3.64k  |     is_p &= is_p << 2;  | 
996  | 3.64k  |     is_p &= is_p << 1;  | 
997  | 3.64k  |     is_p = 0 - (is_p >> 63);  | 
998  | 3.64k  |     is_p = ~is_p;  | 
999  |  |  | 
1000  |  |     /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */  | 
1001  |  |  | 
1002  | 3.64k  |     out[0] &= is_p;  | 
1003  | 3.64k  |     out[1] &= is_p;  | 
1004  | 3.64k  |     out[2] &= is_p;  | 
1005  | 3.64k  |     out[3] &= is_p;  | 
1006  | 3.64k  |     out[4] &= is_p;  | 
1007  | 3.64k  |     out[5] &= is_p;  | 
1008  | 3.64k  |     out[6] &= is_p;  | 
1009  | 3.64k  |     out[7] &= is_p;  | 
1010  | 3.64k  |     out[8] &= is_p;  | 
1011  |  |  | 
1012  |  |     /*  | 
1013  |  |      * In order to test that |out| >= 2^521-1 we need only test if out[8] >>  | 
1014  |  |      * 57 is greater than zero as (2^521-1) + x >= 2^522  | 
1015  |  |      */  | 
1016  | 3.64k  |     is_greater = out[8] >> 57;  | 
1017  | 3.64k  |     is_greater |= is_greater << 32;  | 
1018  | 3.64k  |     is_greater |= is_greater << 16;  | 
1019  | 3.64k  |     is_greater |= is_greater << 8;  | 
1020  | 3.64k  |     is_greater |= is_greater << 4;  | 
1021  | 3.64k  |     is_greater |= is_greater << 2;  | 
1022  | 3.64k  |     is_greater |= is_greater << 1;  | 
1023  | 3.64k  |     is_greater = 0 - (is_greater >> 63);  | 
1024  |  |  | 
1025  | 3.64k  |     out[0] -= kPrime[0] & is_greater;  | 
1026  | 3.64k  |     out[1] -= kPrime[1] & is_greater;  | 
1027  | 3.64k  |     out[2] -= kPrime[2] & is_greater;  | 
1028  | 3.64k  |     out[3] -= kPrime[3] & is_greater;  | 
1029  | 3.64k  |     out[4] -= kPrime[4] & is_greater;  | 
1030  | 3.64k  |     out[5] -= kPrime[5] & is_greater;  | 
1031  | 3.64k  |     out[6] -= kPrime[6] & is_greater;  | 
1032  | 3.64k  |     out[7] -= kPrime[7] & is_greater;  | 
1033  | 3.64k  |     out[8] -= kPrime[8] & is_greater;  | 
1034  |  |  | 
1035  |  |     /* Eliminate negative coefficients */  | 
1036  | 3.64k  |     sign = -(out[0] >> 63);  | 
1037  | 3.64k  |     out[0] += (two58 & sign);  | 
1038  | 3.64k  |     out[1] -= (1 & sign);  | 
1039  | 3.64k  |     sign = -(out[1] >> 63);  | 
1040  | 3.64k  |     out[1] += (two58 & sign);  | 
1041  | 3.64k  |     out[2] -= (1 & sign);  | 
1042  | 3.64k  |     sign = -(out[2] >> 63);  | 
1043  | 3.64k  |     out[2] += (two58 & sign);  | 
1044  | 3.64k  |     out[3] -= (1 & sign);  | 
1045  | 3.64k  |     sign = -(out[3] >> 63);  | 
1046  | 3.64k  |     out[3] += (two58 & sign);  | 
1047  | 3.64k  |     out[4] -= (1 & sign);  | 
1048  | 3.64k  |     sign = -(out[4] >> 63);  | 
1049  | 3.64k  |     out[4] += (two58 & sign);  | 
1050  | 3.64k  |     out[5] -= (1 & sign);  | 
1051  | 3.64k  |     sign = -(out[0] >> 63);  | 
1052  | 3.64k  |     out[5] += (two58 & sign);  | 
1053  | 3.64k  |     out[6] -= (1 & sign);  | 
1054  | 3.64k  |     sign = -(out[6] >> 63);  | 
1055  | 3.64k  |     out[6] += (two58 & sign);  | 
1056  | 3.64k  |     out[7] -= (1 & sign);  | 
1057  | 3.64k  |     sign = -(out[7] >> 63);  | 
1058  | 3.64k  |     out[7] += (two58 & sign);  | 
1059  | 3.64k  |     out[8] -= (1 & sign);  | 
1060  | 3.64k  |     sign = -(out[5] >> 63);  | 
1061  | 3.64k  |     out[5] += (two58 & sign);  | 
1062  | 3.64k  |     out[6] -= (1 & sign);  | 
1063  | 3.64k  |     sign = -(out[6] >> 63);  | 
1064  | 3.64k  |     out[6] += (two58 & sign);  | 
1065  | 3.64k  |     out[7] -= (1 & sign);  | 
1066  | 3.64k  |     sign = -(out[7] >> 63);  | 
1067  | 3.64k  |     out[7] += (two58 & sign);  | 
1068  | 3.64k  |     out[8] -= (1 & sign);  | 
1069  | 3.64k  | }  | 
1070  |  |  | 
1071  |  | /*-  | 
1072  |  |  * Group operations  | 
1073  |  |  * ----------------  | 
1074  |  |  *  | 
1075  |  |  * Building on top of the field operations we have the operations on the  | 
1076  |  |  * elliptic curve group itself. Points on the curve are represented in Jacobian  | 
1077  |  |  * coordinates */  | 
1078  |  |  | 
1079  |  | /*-  | 
1080  |  |  * point_double calculates 2*(x_in, y_in, z_in)  | 
1081  |  |  *  | 
1082  |  |  * The method is taken from:  | 
1083  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b  | 
1084  |  |  *  | 
1085  |  |  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.  | 
1086  |  |  * while x_out == y_in is not (maybe this works, but it's not tested). */  | 
1087  |  | static void  | 
1088  |  | point_double(felem x_out, felem y_out, felem z_out,  | 
1089  |  |              const felem x_in, const felem y_in, const felem z_in)  | 
1090  | 143k  | { | 
1091  | 143k  |     largefelem tmp, tmp2;  | 
1092  | 143k  |     felem delta, gamma, beta, alpha, ftmp, ftmp2;  | 
1093  |  |  | 
1094  | 143k  |     felem_assign(ftmp, x_in);  | 
1095  | 143k  |     felem_assign(ftmp2, x_in);  | 
1096  |  |  | 
1097  |  |     /* delta = z^2 */  | 
1098  | 143k  |     felem_square(tmp, z_in);  | 
1099  | 143k  |     felem_reduce(delta, tmp);   /* delta[i] < 2^59 + 2^14 */  | 
1100  |  |  | 
1101  |  |     /* gamma = y^2 */  | 
1102  | 143k  |     felem_square(tmp, y_in);  | 
1103  | 143k  |     felem_reduce(gamma, tmp);   /* gamma[i] < 2^59 + 2^14 */  | 
1104  |  |  | 
1105  |  |     /* beta = x*gamma */  | 
1106  | 143k  |     felem_mul(tmp, x_in, gamma);  | 
1107  | 143k  |     felem_reduce(beta, tmp);    /* beta[i] < 2^59 + 2^14 */  | 
1108  |  |  | 
1109  |  |     /* alpha = 3*(x-delta)*(x+delta) */  | 
1110  | 143k  |     felem_diff64(ftmp, delta);  | 
1111  |  |     /* ftmp[i] < 2^61 */  | 
1112  | 143k  |     felem_sum64(ftmp2, delta);  | 
1113  |  |     /* ftmp2[i] < 2^60 + 2^15 */  | 
1114  | 143k  |     felem_scalar64(ftmp2, 3);  | 
1115  |  |     /* ftmp2[i] < 3*2^60 + 3*2^15 */  | 
1116  | 143k  |     felem_mul(tmp, ftmp, ftmp2);  | 
1117  |  |     /*-  | 
1118  |  |      * tmp[i] < 17(3*2^121 + 3*2^76)  | 
1119  |  |      *        = 61*2^121 + 61*2^76  | 
1120  |  |      *        < 64*2^121 + 64*2^76  | 
1121  |  |      *        = 2^127 + 2^82  | 
1122  |  |      *        < 2^128  | 
1123  |  |      */  | 
1124  | 143k  |     felem_reduce(alpha, tmp);  | 
1125  |  |  | 
1126  |  |     /* x' = alpha^2 - 8*beta */  | 
1127  | 143k  |     felem_square(tmp, alpha);  | 
1128  |  |     /*  | 
1129  |  |      * tmp[i] < 17*2^120 < 2^125  | 
1130  |  |      */  | 
1131  | 143k  |     felem_assign(ftmp, beta);  | 
1132  | 143k  |     felem_scalar64(ftmp, 8);  | 
1133  |  |     /* ftmp[i] < 2^62 + 2^17 */  | 
1134  | 143k  |     felem_diff_128_64(tmp, ftmp);  | 
1135  |  |     /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */  | 
1136  | 143k  |     felem_reduce(x_out, tmp);  | 
1137  |  |  | 
1138  |  |     /* z' = (y + z)^2 - gamma - delta */  | 
1139  | 143k  |     felem_sum64(delta, gamma);  | 
1140  |  |     /* delta[i] < 2^60 + 2^15 */  | 
1141  | 143k  |     felem_assign(ftmp, y_in);  | 
1142  | 143k  |     felem_sum64(ftmp, z_in);  | 
1143  |  |     /* ftmp[i] < 2^60 + 2^15 */  | 
1144  | 143k  |     felem_square(tmp, ftmp);  | 
1145  |  |     /*  | 
1146  |  |      * tmp[i] < 17(2^122) < 2^127  | 
1147  |  |      */  | 
1148  | 143k  |     felem_diff_128_64(tmp, delta);  | 
1149  |  |     /* tmp[i] < 2^127 + 2^63 */  | 
1150  | 143k  |     felem_reduce(z_out, tmp);  | 
1151  |  |  | 
1152  |  |     /* y' = alpha*(4*beta - x') - 8*gamma^2 */  | 
1153  | 143k  |     felem_scalar64(beta, 4);  | 
1154  |  |     /* beta[i] < 2^61 + 2^16 */  | 
1155  | 143k  |     felem_diff64(beta, x_out);  | 
1156  |  |     /* beta[i] < 2^61 + 2^60 + 2^16 */  | 
1157  | 143k  |     felem_mul(tmp, alpha, beta);  | 
1158  |  |     /*-  | 
1159  |  |      * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))  | 
1160  |  |      *        = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)  | 
1161  |  |      *        = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)  | 
1162  |  |      *        < 2^128  | 
1163  |  |      */  | 
1164  | 143k  |     felem_square(tmp2, gamma);  | 
1165  |  |     /*-  | 
1166  |  |      * tmp2[i] < 17*(2^59 + 2^14)^2  | 
1167  |  |      *         = 17*(2^118 + 2^74 + 2^28)  | 
1168  |  |      */  | 
1169  | 143k  |     felem_scalar128(tmp2, 8);  | 
1170  |  |     /*-  | 
1171  |  |      * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)  | 
1172  |  |      *         = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31  | 
1173  |  |      *         < 2^126  | 
1174  |  |      */  | 
1175  | 143k  |     felem_diff128(tmp, tmp2);  | 
1176  |  |     /*-  | 
1177  |  |      * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)  | 
1178  |  |      *        = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +  | 
1179  |  |      *          2^74 + 2^69 + 2^34 + 2^30  | 
1180  |  |      *        < 2^128  | 
1181  |  |      */  | 
1182  | 143k  |     felem_reduce(y_out, tmp);  | 
1183  | 143k  | }  | 
1184  |  |  | 
1185  |  | /* copy_conditional copies in to out iff mask is all ones. */  | 
1186  |  | static void copy_conditional(felem out, const felem in, limb mask)  | 
1187  | 556k  | { | 
1188  | 556k  |     unsigned i;  | 
1189  | 5.56M  |     for (i = 0; i < NLIMBS; ++i) { | 
1190  | 5.00M  |         const limb tmp = mask & (in[i] ^ out[i]);  | 
1191  | 5.00M  |         out[i] ^= tmp;  | 
1192  | 5.00M  |     }  | 
1193  | 556k  | }  | 
1194  |  |  | 
1195  |  | /*-  | 
1196  |  |  * point_add calculates (x1, y1, z1) + (x2, y2, z2)  | 
1197  |  |  *  | 
1198  |  |  * The method is taken from  | 
1199  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,  | 
1200  |  |  * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).  | 
1201  |  |  *  | 
1202  |  |  * This function includes a branch for checking whether the two input points  | 
1203  |  |  * are equal (while not equal to the point at infinity). See comment below  | 
1204  |  |  * on constant-time.  | 
1205  |  |  */  | 
1206  |  | static void point_add(felem x3, felem y3, felem z3,  | 
1207  |  |                       const felem x1, const felem y1, const felem z1,  | 
1208  |  |                       const int mixed, const felem x2, const felem y2,  | 
1209  |  |                       const felem z2)  | 
1210  | 90.4k  | { | 
1211  | 90.4k  |     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;  | 
1212  | 90.4k  |     largefelem tmp, tmp2;  | 
1213  | 90.4k  |     limb x_equal, y_equal, z1_is_zero, z2_is_zero;  | 
1214  | 90.4k  |     limb points_equal;  | 
1215  |  |  | 
1216  | 90.4k  |     z1_is_zero = felem_is_zero(z1);  | 
1217  | 90.4k  |     z2_is_zero = felem_is_zero(z2);  | 
1218  |  |  | 
1219  |  |     /* ftmp = z1z1 = z1**2 */  | 
1220  | 90.4k  |     felem_square(tmp, z1);  | 
1221  | 90.4k  |     felem_reduce(ftmp, tmp);  | 
1222  |  |  | 
1223  | 90.4k  |     if (!mixed) { | 
1224  |  |         /* ftmp2 = z2z2 = z2**2 */  | 
1225  | 14.5k  |         felem_square(tmp, z2);  | 
1226  | 14.5k  |         felem_reduce(ftmp2, tmp);  | 
1227  |  |  | 
1228  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1229  | 14.5k  |         felem_mul(tmp, x1, ftmp2);  | 
1230  | 14.5k  |         felem_reduce(ftmp3, tmp);  | 
1231  |  |  | 
1232  |  |         /* ftmp5 = z1 + z2 */  | 
1233  | 14.5k  |         felem_assign(ftmp5, z1);  | 
1234  | 14.5k  |         felem_sum64(ftmp5, z2);  | 
1235  |  |         /* ftmp5[i] < 2^61 */  | 
1236  |  |  | 
1237  |  |         /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */  | 
1238  | 14.5k  |         felem_square(tmp, ftmp5);  | 
1239  |  |         /* tmp[i] < 17*2^122 */  | 
1240  | 14.5k  |         felem_diff_128_64(tmp, ftmp);  | 
1241  |  |         /* tmp[i] < 17*2^122 + 2^63 */  | 
1242  | 14.5k  |         felem_diff_128_64(tmp, ftmp2);  | 
1243  |  |         /* tmp[i] < 17*2^122 + 2^64 */  | 
1244  | 14.5k  |         felem_reduce(ftmp5, tmp);  | 
1245  |  |  | 
1246  |  |         /* ftmp2 = z2 * z2z2 */  | 
1247  | 14.5k  |         felem_mul(tmp, ftmp2, z2);  | 
1248  | 14.5k  |         felem_reduce(ftmp2, tmp);  | 
1249  |  |  | 
1250  |  |         /* s1 = ftmp6 = y1 * z2**3 */  | 
1251  | 14.5k  |         felem_mul(tmp, y1, ftmp2);  | 
1252  | 14.5k  |         felem_reduce(ftmp6, tmp);  | 
1253  | 75.9k  |     } else { | 
1254  |  |         /*  | 
1255  |  |          * We'll assume z2 = 1 (special case z2 = 0 is handled later)  | 
1256  |  |          */  | 
1257  |  |  | 
1258  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1259  | 75.9k  |         felem_assign(ftmp3, x1);  | 
1260  |  |  | 
1261  |  |         /* ftmp5 = 2*z1z2 */  | 
1262  | 75.9k  |         felem_scalar(ftmp5, z1, 2);  | 
1263  |  |  | 
1264  |  |         /* s1 = ftmp6 = y1 * z2**3 */  | 
1265  | 75.9k  |         felem_assign(ftmp6, y1);  | 
1266  | 75.9k  |     }  | 
1267  |  |  | 
1268  |  |     /* u2 = x2*z1z1 */  | 
1269  | 90.4k  |     felem_mul(tmp, x2, ftmp);  | 
1270  |  |     /* tmp[i] < 17*2^120 */  | 
1271  |  |  | 
1272  |  |     /* h = ftmp4 = u2 - u1 */  | 
1273  | 90.4k  |     felem_diff_128_64(tmp, ftmp3);  | 
1274  |  |     /* tmp[i] < 17*2^120 + 2^63 */  | 
1275  | 90.4k  |     felem_reduce(ftmp4, tmp);  | 
1276  |  |  | 
1277  | 90.4k  |     x_equal = felem_is_zero(ftmp4);  | 
1278  |  |  | 
1279  |  |     /* z_out = ftmp5 * h */  | 
1280  | 90.4k  |     felem_mul(tmp, ftmp5, ftmp4);  | 
1281  | 90.4k  |     felem_reduce(z_out, tmp);  | 
1282  |  |  | 
1283  |  |     /* ftmp = z1 * z1z1 */  | 
1284  | 90.4k  |     felem_mul(tmp, ftmp, z1);  | 
1285  | 90.4k  |     felem_reduce(ftmp, tmp);  | 
1286  |  |  | 
1287  |  |     /* s2 = tmp = y2 * z1**3 */  | 
1288  | 90.4k  |     felem_mul(tmp, y2, ftmp);  | 
1289  |  |     /* tmp[i] < 17*2^120 */  | 
1290  |  |  | 
1291  |  |     /* r = ftmp5 = (s2 - s1)*2 */  | 
1292  | 90.4k  |     felem_diff_128_64(tmp, ftmp6);  | 
1293  |  |     /* tmp[i] < 17*2^120 + 2^63 */  | 
1294  | 90.4k  |     felem_reduce(ftmp5, tmp);  | 
1295  | 90.4k  |     y_equal = felem_is_zero(ftmp5);  | 
1296  | 90.4k  |     felem_scalar64(ftmp5, 2);  | 
1297  |  |     /* ftmp5[i] < 2^61 */  | 
1298  |  |  | 
1299  |  |     /*  | 
1300  |  |      * The formulae are incorrect if the points are equal, in affine coordinates  | 
1301  |  |      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this  | 
1302  |  |      * happens.  | 
1303  |  |      *  | 
1304  |  |      * We use bitwise operations to avoid potential side-channels introduced by  | 
1305  |  |      * the short-circuiting behaviour of boolean operators.  | 
1306  |  |      *  | 
1307  |  |      * The special case of either point being the point at infinity (z1 and/or  | 
1308  |  |      * z2 are zero), is handled separately later on in this function, so we  | 
1309  |  |      * avoid jumping to point_double here in those special cases.  | 
1310  |  |      *  | 
1311  |  |      * Notice the comment below on the implications of this branching for timing  | 
1312  |  |      * leaks and why it is considered practically irrelevant.  | 
1313  |  |      */  | 
1314  | 90.4k  |     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));  | 
1315  |  |  | 
1316  | 90.4k  |     if (points_equal) { | 
1317  |  |         /*  | 
1318  |  |          * This is obviously not constant-time but it will almost-never happen  | 
1319  |  |          * for ECDH / ECDSA. The case where it can happen is during scalar-mult  | 
1320  |  |          * where the intermediate value gets very close to the group order.  | 
1321  |  |          * Since |ossl_ec_GFp_nistp_recode_scalar_bits| produces signed digits  | 
1322  |  |          * for the scalar, it's possible for the intermediate value to be a small  | 
1323  |  |          * negative multiple of the base point, and for the final signed digit  | 
1324  |  |          * to be the same value. We believe that this only occurs for the scalar  | 
1325  |  |          * 1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff  | 
1326  |  |          * ffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb  | 
1327  |  |          * 71e913863f7, in that case the penultimate intermediate is -9G and  | 
1328  |  |          * the final digit is also -9G. Since this only happens for a single  | 
1329  |  |          * scalar, the timing leak is irrelevant. (Any attacker who wanted to  | 
1330  |  |          * check whether a secret scalar was that exact value, can already do  | 
1331  |  |          * so.)  | 
1332  |  |          */  | 
1333  | 0  |         point_double(x3, y3, z3, x1, y1, z1);  | 
1334  | 0  |         return;  | 
1335  | 0  |     }  | 
1336  |  |  | 
1337  |  |     /* I = ftmp = (2h)**2 */  | 
1338  | 90.4k  |     felem_assign(ftmp, ftmp4);  | 
1339  | 90.4k  |     felem_scalar64(ftmp, 2);  | 
1340  |  |     /* ftmp[i] < 2^61 */  | 
1341  | 90.4k  |     felem_square(tmp, ftmp);  | 
1342  |  |     /* tmp[i] < 17*2^122 */  | 
1343  | 90.4k  |     felem_reduce(ftmp, tmp);  | 
1344  |  |  | 
1345  |  |     /* J = ftmp2 = h * I */  | 
1346  | 90.4k  |     felem_mul(tmp, ftmp4, ftmp);  | 
1347  | 90.4k  |     felem_reduce(ftmp2, tmp);  | 
1348  |  |  | 
1349  |  |     /* V = ftmp4 = U1 * I */  | 
1350  | 90.4k  |     felem_mul(tmp, ftmp3, ftmp);  | 
1351  | 90.4k  |     felem_reduce(ftmp4, tmp);  | 
1352  |  |  | 
1353  |  |     /* x_out = r**2 - J - 2V */  | 
1354  | 90.4k  |     felem_square(tmp, ftmp5);  | 
1355  |  |     /* tmp[i] < 17*2^122 */  | 
1356  | 90.4k  |     felem_diff_128_64(tmp, ftmp2);  | 
1357  |  |     /* tmp[i] < 17*2^122 + 2^63 */  | 
1358  | 90.4k  |     felem_assign(ftmp3, ftmp4);  | 
1359  | 90.4k  |     felem_scalar64(ftmp4, 2);  | 
1360  |  |     /* ftmp4[i] < 2^61 */  | 
1361  | 90.4k  |     felem_diff_128_64(tmp, ftmp4);  | 
1362  |  |     /* tmp[i] < 17*2^122 + 2^64 */  | 
1363  | 90.4k  |     felem_reduce(x_out, tmp);  | 
1364  |  |  | 
1365  |  |     /* y_out = r(V-x_out) - 2 * s1 * J */  | 
1366  | 90.4k  |     felem_diff64(ftmp3, x_out);  | 
1367  |  |     /*  | 
1368  |  |      * ftmp3[i] < 2^60 + 2^60 = 2^61  | 
1369  |  |      */  | 
1370  | 90.4k  |     felem_mul(tmp, ftmp5, ftmp3);  | 
1371  |  |     /* tmp[i] < 17*2^122 */  | 
1372  | 90.4k  |     felem_mul(tmp2, ftmp6, ftmp2);  | 
1373  |  |     /* tmp2[i] < 17*2^120 */  | 
1374  | 90.4k  |     felem_scalar128(tmp2, 2);  | 
1375  |  |     /* tmp2[i] < 17*2^121 */  | 
1376  | 90.4k  |     felem_diff128(tmp, tmp2);  | 
1377  |  |         /*-  | 
1378  |  |          * tmp[i] < 2^127 - 2^69 + 17*2^122  | 
1379  |  |          *        = 2^126 - 2^122 - 2^6 - 2^2 - 1  | 
1380  |  |          *        < 2^127  | 
1381  |  |          */  | 
1382  | 90.4k  |     felem_reduce(y_out, tmp);  | 
1383  |  |  | 
1384  | 90.4k  |     copy_conditional(x_out, x2, z1_is_zero);  | 
1385  | 90.4k  |     copy_conditional(x_out, x1, z2_is_zero);  | 
1386  | 90.4k  |     copy_conditional(y_out, y2, z1_is_zero);  | 
1387  | 90.4k  |     copy_conditional(y_out, y1, z2_is_zero);  | 
1388  | 90.4k  |     copy_conditional(z_out, z2, z1_is_zero);  | 
1389  | 90.4k  |     copy_conditional(z_out, z1, z2_is_zero);  | 
1390  | 90.4k  |     felem_assign(x3, x_out);  | 
1391  | 90.4k  |     felem_assign(y3, y_out);  | 
1392  | 90.4k  |     felem_assign(z3, z_out);  | 
1393  | 90.4k  | }  | 
1394  |  |  | 
1395  |  | /*-  | 
1396  |  |  * Base point pre computation  | 
1397  |  |  * --------------------------  | 
1398  |  |  *  | 
1399  |  |  * Two different sorts of precomputed tables are used in the following code.  | 
1400  |  |  * Each contain various points on the curve, where each point is three field  | 
1401  |  |  * elements (x, y, z).  | 
1402  |  |  *  | 
1403  |  |  * For the base point table, z is usually 1 (0 for the point at infinity).  | 
1404  |  |  * This table has 16 elements:  | 
1405  |  |  * index | bits    | point  | 
1406  |  |  * ------+---------+------------------------------  | 
1407  |  |  *     0 | 0 0 0 0 | 0G  | 
1408  |  |  *     1 | 0 0 0 1 | 1G  | 
1409  |  |  *     2 | 0 0 1 0 | 2^130G  | 
1410  |  |  *     3 | 0 0 1 1 | (2^130 + 1)G  | 
1411  |  |  *     4 | 0 1 0 0 | 2^260G  | 
1412  |  |  *     5 | 0 1 0 1 | (2^260 + 1)G  | 
1413  |  |  *     6 | 0 1 1 0 | (2^260 + 2^130)G  | 
1414  |  |  *     7 | 0 1 1 1 | (2^260 + 2^130 + 1)G  | 
1415  |  |  *     8 | 1 0 0 0 | 2^390G  | 
1416  |  |  *     9 | 1 0 0 1 | (2^390 + 1)G  | 
1417  |  |  *    10 | 1 0 1 0 | (2^390 + 2^130)G  | 
1418  |  |  *    11 | 1 0 1 1 | (2^390 + 2^130 + 1)G  | 
1419  |  |  *    12 | 1 1 0 0 | (2^390 + 2^260)G  | 
1420  |  |  *    13 | 1 1 0 1 | (2^390 + 2^260 + 1)G  | 
1421  |  |  *    14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G  | 
1422  |  |  *    15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G  | 
1423  |  |  *  | 
1424  |  |  * The reason for this is so that we can clock bits into four different  | 
1425  |  |  * locations when doing simple scalar multiplies against the base point.  | 
1426  |  |  *  | 
1427  |  |  * Tables for other points have table[i] = iG for i in 0 .. 16. */  | 
1428  |  |  | 
1429  |  | /* gmul is the table of precomputed base points */  | 
1430  |  | static const felem gmul[16][3] = { | 
1431  |  | {{0, 0, 0, 0, 0, 0, 0, 0, 0}, | 
1432  |  |  {0, 0, 0, 0, 0, 0, 0, 0, 0}, | 
1433  |  |  {0, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1434  |  | {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334, | 
1435  |  |   0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,  | 
1436  |  |   0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},  | 
1437  |  |  {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353, | 
1438  |  |   0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,  | 
1439  |  |   0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},  | 
1440  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1441  |  | {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad, | 
1442  |  |   0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,  | 
1443  |  |   0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},  | 
1444  |  |  {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58, | 
1445  |  |   0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,  | 
1446  |  |   0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},  | 
1447  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1448  |  | {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873, | 
1449  |  |   0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,  | 
1450  |  |   0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},  | 
1451  |  |  {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52, | 
1452  |  |   0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,  | 
1453  |  |   0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},  | 
1454  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1455  |  | {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2, | 
1456  |  |   0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,  | 
1457  |  |   0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},  | 
1458  |  |  {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a, | 
1459  |  |   0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,  | 
1460  |  |   0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},  | 
1461  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1462  |  | {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6, | 
1463  |  |   0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,  | 
1464  |  |   0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},  | 
1465  |  |  {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d, | 
1466  |  |   0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,  | 
1467  |  |   0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},  | 
1468  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1469  |  | {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27, | 
1470  |  |   0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,  | 
1471  |  |   0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},  | 
1472  |  |  {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa, | 
1473  |  |   0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,  | 
1474  |  |   0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},  | 
1475  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1476  |  | {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890, | 
1477  |  |   0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,  | 
1478  |  |   0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},  | 
1479  |  |  {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516, | 
1480  |  |   0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,  | 
1481  |  |   0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},  | 
1482  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1483  |  | {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce, | 
1484  |  |   0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,  | 
1485  |  |   0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},  | 
1486  |  |  {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318, | 
1487  |  |   0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,  | 
1488  |  |   0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},  | 
1489  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1490  |  | {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae, | 
1491  |  |   0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,  | 
1492  |  |   0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},  | 
1493  |  |  {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447, | 
1494  |  |   0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,  | 
1495  |  |   0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},  | 
1496  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1497  |  | {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5, | 
1498  |  |   0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,  | 
1499  |  |   0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},  | 
1500  |  |  {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df, | 
1501  |  |   0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,  | 
1502  |  |   0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},  | 
1503  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1504  |  | {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292, | 
1505  |  |   0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,  | 
1506  |  |   0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},  | 
1507  |  |  {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30, | 
1508  |  |   0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,  | 
1509  |  |   0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},  | 
1510  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1511  |  | {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767, | 
1512  |  |   0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,  | 
1513  |  |   0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},  | 
1514  |  |  {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2, | 
1515  |  |   0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,  | 
1516  |  |   0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},  | 
1517  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1518  |  | {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3, | 
1519  |  |   0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,  | 
1520  |  |   0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},  | 
1521  |  |  {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8, | 
1522  |  |   0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,  | 
1523  |  |   0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},  | 
1524  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1525  |  | {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608, | 
1526  |  |   0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,  | 
1527  |  |   0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},  | 
1528  |  |  {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006, | 
1529  |  |   0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,  | 
1530  |  |   0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},  | 
1531  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}}, | 
1532  |  | {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c, | 
1533  |  |   0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,  | 
1534  |  |   0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},  | 
1535  |  |  {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7, | 
1536  |  |   0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,  | 
1537  |  |   0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},  | 
1538  |  |  {1, 0, 0, 0, 0, 0, 0, 0, 0}} | 
1539  |  | };  | 
1540  |  |  | 
1541  |  | /*  | 
1542  |  |  * select_point selects the |idx|th point from a precomputation table and  | 
1543  |  |  * copies it to out.  | 
1544  |  |  */  | 
1545  |  |  /* pre_comp below is of the size provided in |size| */  | 
1546  |  | static void select_point(const limb idx, unsigned int size,  | 
1547  |  |                          const felem pre_comp[][3], felem out[3])  | 
1548  | 90.2k  | { | 
1549  | 90.2k  |     unsigned i, j;  | 
1550  | 90.2k  |     limb *outlimbs = &out[0][0];  | 
1551  |  |  | 
1552  | 90.2k  |     memset(out, 0, sizeof(*out) * 3);  | 
1553  |  |  | 
1554  | 1.54M  |     for (i = 0; i < size; i++) { | 
1555  | 1.45M  |         const limb *inlimbs = &pre_comp[i][0][0];  | 
1556  | 1.45M  |         limb mask = i ^ idx;  | 
1557  | 1.45M  |         mask |= mask >> 4;  | 
1558  | 1.45M  |         mask |= mask >> 2;  | 
1559  | 1.45M  |         mask |= mask >> 1;  | 
1560  | 1.45M  |         mask &= 1;  | 
1561  | 1.45M  |         mask--;  | 
1562  | 40.8M  |         for (j = 0; j < NLIMBS * 3; j++)  | 
1563  | 39.3M  |             outlimbs[j] |= inlimbs[j] & mask;  | 
1564  | 1.45M  |     }  | 
1565  | 90.2k  | }  | 
1566  |  |  | 
1567  |  | /* get_bit returns the |i|th bit in |in| */  | 
1568  |  | static char get_bit(const felem_bytearray in, int i)  | 
1569  | 386k  | { | 
1570  | 386k  |     if (i < 0)  | 
1571  | 131  |         return 0;  | 
1572  | 386k  |     return (in[i >> 3] >> (i & 7)) & 1;  | 
1573  | 386k  | }  | 
1574  |  |  | 
1575  |  | /*  | 
1576  |  |  * Interleaved point multiplication using precomputed point multiples: The  | 
1577  |  |  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars  | 
1578  |  |  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the  | 
1579  |  |  * generator, using certain (large) precomputed multiples in g_pre_comp.  | 
1580  |  |  * Output point (X, Y, Z) is stored in x_out, y_out, z_out  | 
1581  |  |  */  | 
1582  |  | static void batch_mul(felem x_out, felem y_out, felem z_out,  | 
1583  |  |                       const felem_bytearray scalars[],  | 
1584  |  |                       const unsigned num_points, const u8 *g_scalar,  | 
1585  |  |                       const int mixed, const felem pre_comp[][17][3],  | 
1586  |  |                       const felem g_pre_comp[16][3])  | 
1587  | 706  | { | 
1588  | 706  |     int i, skip;  | 
1589  | 706  |     unsigned num, gen_mul = (g_scalar != NULL);  | 
1590  | 706  |     felem nq[3], tmp[4];  | 
1591  | 706  |     limb bits;  | 
1592  | 706  |     u8 sign, digit;  | 
1593  |  |  | 
1594  |  |     /* set nq to the point at infinity */  | 
1595  | 706  |     memset(nq, 0, sizeof(nq));  | 
1596  |  |  | 
1597  |  |     /*  | 
1598  |  |      * Loop over all scalars msb-to-lsb, interleaving additions of multiples  | 
1599  |  |      * of the generator (last quarter of rounds) and additions of other  | 
1600  |  |      * points multiples (every 5th round).  | 
1601  |  |      */  | 
1602  | 706  |     skip = 1;                   /* save two point operations in the first  | 
1603  |  |                                  * round */  | 
1604  | 144k  |     for (i = (num_points ? 520 : 130); i >= 0; --i) { | 
1605  |  |         /* double */  | 
1606  | 143k  |         if (!skip)  | 
1607  | 142k  |             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);  | 
1608  |  |  | 
1609  |  |         /* add multiples of the generator */  | 
1610  | 143k  |         if (gen_mul && (i <= 130)) { | 
1611  | 76.5k  |             bits = get_bit(g_scalar, i + 390) << 3;  | 
1612  | 76.5k  |             if (i < 130) { | 
1613  | 75.9k  |                 bits |= get_bit(g_scalar, i + 260) << 2;  | 
1614  | 75.9k  |                 bits |= get_bit(g_scalar, i + 130) << 1;  | 
1615  | 75.9k  |                 bits |= get_bit(g_scalar, i);  | 
1616  | 75.9k  |             }  | 
1617  |  |             /* select the point to add, in constant time */  | 
1618  | 76.5k  |             select_point(bits, 16, g_pre_comp, tmp);  | 
1619  | 76.5k  |             if (!skip) { | 
1620  |  |                 /* The 1 argument below is for "mixed" */  | 
1621  | 75.9k  |                 point_add(nq[0], nq[1], nq[2],  | 
1622  | 75.9k  |                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);  | 
1623  | 75.9k  |             } else { | 
1624  | 575  |                 memcpy(nq, tmp, 3 * sizeof(felem));  | 
1625  | 575  |                 skip = 0;  | 
1626  | 575  |             }  | 
1627  | 76.5k  |         }  | 
1628  |  |  | 
1629  |  |         /* do other additions every 5 doublings */  | 
1630  | 143k  |         if (num_points && (i % 5 == 0)) { | 
1631  |  |             /* loop over all scalars */  | 
1632  | 27.5k  |             for (num = 0; num < num_points; ++num) { | 
1633  | 13.7k  |                 bits = get_bit(scalars[num], i + 4) << 5;  | 
1634  | 13.7k  |                 bits |= get_bit(scalars[num], i + 3) << 4;  | 
1635  | 13.7k  |                 bits |= get_bit(scalars[num], i + 2) << 3;  | 
1636  | 13.7k  |                 bits |= get_bit(scalars[num], i + 1) << 2;  | 
1637  | 13.7k  |                 bits |= get_bit(scalars[num], i) << 1;  | 
1638  | 13.7k  |                 bits |= get_bit(scalars[num], i - 1);  | 
1639  | 13.7k  |                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);  | 
1640  |  |  | 
1641  |  |                 /*  | 
1642  |  |                  * select the point to add or subtract, in constant time  | 
1643  |  |                  */  | 
1644  | 13.7k  |                 select_point(digit, 17, pre_comp[num], tmp);  | 
1645  | 13.7k  |                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative  | 
1646  |  |                                             * point */  | 
1647  | 13.7k  |                 copy_conditional(tmp[1], tmp[3], (-(limb) sign));  | 
1648  |  |  | 
1649  | 13.7k  |                 if (!skip) { | 
1650  | 13.6k  |                     point_add(nq[0], nq[1], nq[2],  | 
1651  | 13.6k  |                               nq[0], nq[1], nq[2],  | 
1652  | 13.6k  |                               mixed, tmp[0], tmp[1], tmp[2]);  | 
1653  | 13.6k  |                 } else { | 
1654  | 131  |                     memcpy(nq, tmp, 3 * sizeof(felem));  | 
1655  | 131  |                     skip = 0;  | 
1656  | 131  |                 }  | 
1657  | 13.7k  |             }  | 
1658  | 13.7k  |         }  | 
1659  | 143k  |     }  | 
1660  | 706  |     felem_assign(x_out, nq[0]);  | 
1661  | 706  |     felem_assign(y_out, nq[1]);  | 
1662  | 706  |     felem_assign(z_out, nq[2]);  | 
1663  | 706  | }  | 
1664  |  |  | 
1665  |  | /* Precomputation for the group generator. */  | 
1666  |  | struct nistp521_pre_comp_st { | 
1667  |  |     felem g_pre_comp[16][3];  | 
1668  |  |     CRYPTO_REF_COUNT references;  | 
1669  |  |     CRYPTO_RWLOCK *lock;  | 
1670  |  | };  | 
1671  |  |  | 
1672  |  | const EC_METHOD *EC_GFp_nistp521_method(void)  | 
1673  | 13.5k  | { | 
1674  | 13.5k  |     static const EC_METHOD ret = { | 
1675  | 13.5k  |         EC_FLAGS_DEFAULT_OCT,  | 
1676  | 13.5k  |         NID_X9_62_prime_field,  | 
1677  | 13.5k  |         ossl_ec_GFp_nistp521_group_init,  | 
1678  | 13.5k  |         ossl_ec_GFp_simple_group_finish,  | 
1679  | 13.5k  |         ossl_ec_GFp_simple_group_clear_finish,  | 
1680  | 13.5k  |         ossl_ec_GFp_nist_group_copy,  | 
1681  | 13.5k  |         ossl_ec_GFp_nistp521_group_set_curve,  | 
1682  | 13.5k  |         ossl_ec_GFp_simple_group_get_curve,  | 
1683  | 13.5k  |         ossl_ec_GFp_simple_group_get_degree,  | 
1684  | 13.5k  |         ossl_ec_group_simple_order_bits,  | 
1685  | 13.5k  |         ossl_ec_GFp_simple_group_check_discriminant,  | 
1686  | 13.5k  |         ossl_ec_GFp_simple_point_init,  | 
1687  | 13.5k  |         ossl_ec_GFp_simple_point_finish,  | 
1688  | 13.5k  |         ossl_ec_GFp_simple_point_clear_finish,  | 
1689  | 13.5k  |         ossl_ec_GFp_simple_point_copy,  | 
1690  | 13.5k  |         ossl_ec_GFp_simple_point_set_to_infinity,  | 
1691  | 13.5k  |         ossl_ec_GFp_simple_point_set_affine_coordinates,  | 
1692  | 13.5k  |         ossl_ec_GFp_nistp521_point_get_affine_coordinates,  | 
1693  | 13.5k  |         0 /* point_set_compressed_coordinates */ ,  | 
1694  | 13.5k  |         0 /* point2oct */ ,  | 
1695  | 13.5k  |         0 /* oct2point */ ,  | 
1696  | 13.5k  |         ossl_ec_GFp_simple_add,  | 
1697  | 13.5k  |         ossl_ec_GFp_simple_dbl,  | 
1698  | 13.5k  |         ossl_ec_GFp_simple_invert,  | 
1699  | 13.5k  |         ossl_ec_GFp_simple_is_at_infinity,  | 
1700  | 13.5k  |         ossl_ec_GFp_simple_is_on_curve,  | 
1701  | 13.5k  |         ossl_ec_GFp_simple_cmp,  | 
1702  | 13.5k  |         ossl_ec_GFp_simple_make_affine,  | 
1703  | 13.5k  |         ossl_ec_GFp_simple_points_make_affine,  | 
1704  | 13.5k  |         ossl_ec_GFp_nistp521_points_mul,  | 
1705  | 13.5k  |         ossl_ec_GFp_nistp521_precompute_mult,  | 
1706  | 13.5k  |         ossl_ec_GFp_nistp521_have_precompute_mult,  | 
1707  | 13.5k  |         ossl_ec_GFp_nist_field_mul,  | 
1708  | 13.5k  |         ossl_ec_GFp_nist_field_sqr,  | 
1709  | 13.5k  |         0 /* field_div */ ,  | 
1710  | 13.5k  |         ossl_ec_GFp_simple_field_inv,  | 
1711  | 13.5k  |         0 /* field_encode */ ,  | 
1712  | 13.5k  |         0 /* field_decode */ ,  | 
1713  | 13.5k  |         0,                      /* field_set_to_one */  | 
1714  | 13.5k  |         ossl_ec_key_simple_priv2oct,  | 
1715  | 13.5k  |         ossl_ec_key_simple_oct2priv,  | 
1716  | 13.5k  |         0, /* set private */  | 
1717  | 13.5k  |         ossl_ec_key_simple_generate_key,  | 
1718  | 13.5k  |         ossl_ec_key_simple_check_key,  | 
1719  | 13.5k  |         ossl_ec_key_simple_generate_public_key,  | 
1720  | 13.5k  |         0, /* keycopy */  | 
1721  | 13.5k  |         0, /* keyfinish */  | 
1722  | 13.5k  |         ossl_ecdh_simple_compute_key,  | 
1723  | 13.5k  |         ossl_ecdsa_simple_sign_setup,  | 
1724  | 13.5k  |         ossl_ecdsa_simple_sign_sig,  | 
1725  | 13.5k  |         ossl_ecdsa_simple_verify_sig,  | 
1726  | 13.5k  |         0, /* field_inverse_mod_ord */  | 
1727  | 13.5k  |         0, /* blind_coordinates */  | 
1728  | 13.5k  |         0, /* ladder_pre */  | 
1729  | 13.5k  |         0, /* ladder_step */  | 
1730  | 13.5k  |         0  /* ladder_post */  | 
1731  | 13.5k  |     };  | 
1732  |  |  | 
1733  | 13.5k  |     return &ret;  | 
1734  | 13.5k  | }  | 
1735  |  |  | 
1736  |  | /******************************************************************************/  | 
1737  |  | /*  | 
1738  |  |  * FUNCTIONS TO MANAGE PRECOMPUTATION  | 
1739  |  |  */  | 
1740  |  |  | 
1741  |  | static NISTP521_PRE_COMP *nistp521_pre_comp_new(void)  | 
1742  | 0  | { | 
1743  | 0  |     NISTP521_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));  | 
1744  |  | 
  | 
1745  | 0  |     if (ret == NULL) { | 
1746  | 0  |         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);  | 
1747  | 0  |         return ret;  | 
1748  | 0  |     }  | 
1749  |  |  | 
1750  | 0  |     ret->references = 1;  | 
1751  |  | 
  | 
1752  | 0  |     ret->lock = CRYPTO_THREAD_lock_new();  | 
1753  | 0  |     if (ret->lock == NULL) { | 
1754  | 0  |         ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);  | 
1755  | 0  |         OPENSSL_free(ret);  | 
1756  | 0  |         return NULL;  | 
1757  | 0  |     }  | 
1758  | 0  |     return ret;  | 
1759  | 0  | }  | 
1760  |  |  | 
1761  |  | NISTP521_PRE_COMP *EC_nistp521_pre_comp_dup(NISTP521_PRE_COMP *p)  | 
1762  | 0  | { | 
1763  | 0  |     int i;  | 
1764  | 0  |     if (p != NULL)  | 
1765  | 0  |         CRYPTO_UP_REF(&p->references, &i, p->lock);  | 
1766  | 0  |     return p;  | 
1767  | 0  | }  | 
1768  |  |  | 
1769  |  | void EC_nistp521_pre_comp_free(NISTP521_PRE_COMP *p)  | 
1770  | 0  | { | 
1771  | 0  |     int i;  | 
1772  |  | 
  | 
1773  | 0  |     if (p == NULL)  | 
1774  | 0  |         return;  | 
1775  |  |  | 
1776  | 0  |     CRYPTO_DOWN_REF(&p->references, &i, p->lock);  | 
1777  | 0  |     REF_PRINT_COUNT("EC_nistp521", p); | 
1778  | 0  |     if (i > 0)  | 
1779  | 0  |         return;  | 
1780  | 0  |     REF_ASSERT_ISNT(i < 0);  | 
1781  |  | 
  | 
1782  | 0  |     CRYPTO_THREAD_lock_free(p->lock);  | 
1783  | 0  |     OPENSSL_free(p);  | 
1784  | 0  | }  | 
1785  |  |  | 
1786  |  | /******************************************************************************/  | 
1787  |  | /*  | 
1788  |  |  * OPENSSL EC_METHOD FUNCTIONS  | 
1789  |  |  */  | 
1790  |  |  | 
1791  |  | int ossl_ec_GFp_nistp521_group_init(EC_GROUP *group)  | 
1792  | 26.9k  | { | 
1793  | 26.9k  |     int ret;  | 
1794  | 26.9k  |     ret = ossl_ec_GFp_simple_group_init(group);  | 
1795  | 26.9k  |     group->a_is_minus3 = 1;  | 
1796  | 26.9k  |     return ret;  | 
1797  | 26.9k  | }  | 
1798  |  |  | 
1799  |  | int ossl_ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,  | 
1800  |  |                                          const BIGNUM *a, const BIGNUM *b,  | 
1801  |  |                                          BN_CTX *ctx)  | 
1802  | 13.5k  | { | 
1803  | 13.5k  |     int ret = 0;  | 
1804  | 13.5k  |     BIGNUM *curve_p, *curve_a, *curve_b;  | 
1805  | 13.5k  | #ifndef FIPS_MODULE  | 
1806  | 13.5k  |     BN_CTX *new_ctx = NULL;  | 
1807  |  |  | 
1808  | 13.5k  |     if (ctx == NULL)  | 
1809  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
1810  | 13.5k  | #endif  | 
1811  | 13.5k  |     if (ctx == NULL)  | 
1812  | 0  |         return 0;  | 
1813  |  |  | 
1814  | 13.5k  |     BN_CTX_start(ctx);  | 
1815  | 13.5k  |     curve_p = BN_CTX_get(ctx);  | 
1816  | 13.5k  |     curve_a = BN_CTX_get(ctx);  | 
1817  | 13.5k  |     curve_b = BN_CTX_get(ctx);  | 
1818  | 13.5k  |     if (curve_b == NULL)  | 
1819  | 0  |         goto err;  | 
1820  | 13.5k  |     BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);  | 
1821  | 13.5k  |     BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);  | 
1822  | 13.5k  |     BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);  | 
1823  | 13.5k  |     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { | 
1824  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);  | 
1825  | 0  |         goto err;  | 
1826  | 0  |     }  | 
1827  | 13.5k  |     group->field_mod_func = BN_nist_mod_521;  | 
1828  | 13.5k  |     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);  | 
1829  | 13.5k  |  err:  | 
1830  | 13.5k  |     BN_CTX_end(ctx);  | 
1831  | 13.5k  | #ifndef FIPS_MODULE  | 
1832  | 13.5k  |     BN_CTX_free(new_ctx);  | 
1833  | 13.5k  | #endif  | 
1834  | 13.5k  |     return ret;  | 
1835  | 13.5k  | }  | 
1836  |  |  | 
1837  |  | /*  | 
1838  |  |  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =  | 
1839  |  |  * (X/Z^2, Y/Z^3)  | 
1840  |  |  */  | 
1841  |  | int ossl_ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,  | 
1842  |  |                                                       const EC_POINT *point,  | 
1843  |  |                                                       BIGNUM *x, BIGNUM *y,  | 
1844  |  |                                                       BN_CTX *ctx)  | 
1845  | 762  | { | 
1846  | 762  |     felem z1, z2, x_in, y_in, x_out, y_out;  | 
1847  | 762  |     largefelem tmp;  | 
1848  |  |  | 
1849  | 762  |     if (EC_POINT_is_at_infinity(group, point)) { | 
1850  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);  | 
1851  | 0  |         return 0;  | 
1852  | 0  |     }  | 
1853  | 762  |     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||  | 
1854  | 762  |         (!BN_to_felem(z1, point->Z)))  | 
1855  | 0  |         return 0;  | 
1856  | 762  |     felem_inv(z2, z1);  | 
1857  | 762  |     felem_square(tmp, z2);  | 
1858  | 762  |     felem_reduce(z1, tmp);  | 
1859  | 762  |     felem_mul(tmp, x_in, z1);  | 
1860  | 762  |     felem_reduce(x_in, tmp);  | 
1861  | 762  |     felem_contract(x_out, x_in);  | 
1862  | 762  |     if (x != NULL) { | 
1863  | 762  |         if (!felem_to_BN(x, x_out)) { | 
1864  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1865  | 0  |             return 0;  | 
1866  | 0  |         }  | 
1867  | 762  |     }  | 
1868  | 762  |     felem_mul(tmp, z1, z2);  | 
1869  | 762  |     felem_reduce(z1, tmp);  | 
1870  | 762  |     felem_mul(tmp, y_in, z1);  | 
1871  | 762  |     felem_reduce(y_in, tmp);  | 
1872  | 762  |     felem_contract(y_out, y_in);  | 
1873  | 762  |     if (y != NULL) { | 
1874  | 716  |         if (!felem_to_BN(y, y_out)) { | 
1875  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1876  | 0  |             return 0;  | 
1877  | 0  |         }  | 
1878  | 716  |     }  | 
1879  | 762  |     return 1;  | 
1880  | 762  | }  | 
1881  |  |  | 
1882  |  | /* points below is of size |num|, and tmp_felems is of size |num+1/ */  | 
1883  |  | static void make_points_affine(size_t num, felem points[][3],  | 
1884  |  |                                felem tmp_felems[])  | 
1885  | 0  | { | 
1886  |  |     /*  | 
1887  |  |      * Runs in constant time, unless an input is the point at infinity (which  | 
1888  |  |      * normally shouldn't happen).  | 
1889  |  |      */  | 
1890  | 0  |     ossl_ec_GFp_nistp_points_make_affine_internal(num,  | 
1891  | 0  |                                                   points,  | 
1892  | 0  |                                                   sizeof(felem),  | 
1893  | 0  |                                                   tmp_felems,  | 
1894  | 0  |                                                   (void (*)(void *))felem_one,  | 
1895  | 0  |                                                   felem_is_zero_int,  | 
1896  | 0  |                                                   (void (*)(void *, const void *))  | 
1897  | 0  |                                                   felem_assign,  | 
1898  | 0  |                                                   (void (*)(void *, const void *))  | 
1899  | 0  |                                                   felem_square_reduce, (void (*)  | 
1900  | 0  |                                                                         (void *,  | 
1901  | 0  |                                                                          const void  | 
1902  | 0  |                                                                          *,  | 
1903  | 0  |                                                                          const void  | 
1904  | 0  |                                                                          *))  | 
1905  | 0  |                                                   felem_mul_reduce,  | 
1906  | 0  |                                                   (void (*)(void *, const void *))  | 
1907  | 0  |                                                   felem_inv,  | 
1908  | 0  |                                                   (void (*)(void *, const void *))  | 
1909  | 0  |                                                   felem_contract);  | 
1910  | 0  | }  | 
1911  |  |  | 
1912  |  | /*  | 
1913  |  |  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL  | 
1914  |  |  * values Result is stored in r (r can equal one of the inputs).  | 
1915  |  |  */  | 
1916  |  | int ossl_ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,  | 
1917  |  |                                     const BIGNUM *scalar, size_t num,  | 
1918  |  |                                     const EC_POINT *points[],  | 
1919  |  |                                     const BIGNUM *scalars[], BN_CTX *ctx)  | 
1920  | 706  | { | 
1921  | 706  |     int ret = 0;  | 
1922  | 706  |     int j;  | 
1923  | 706  |     int mixed = 0;  | 
1924  | 706  |     BIGNUM *x, *y, *z, *tmp_scalar;  | 
1925  | 706  |     felem_bytearray g_secret;  | 
1926  | 706  |     felem_bytearray *secrets = NULL;  | 
1927  | 706  |     felem (*pre_comp)[17][3] = NULL;  | 
1928  | 706  |     felem *tmp_felems = NULL;  | 
1929  | 706  |     unsigned i;  | 
1930  | 706  |     int num_bytes;  | 
1931  | 706  |     int have_pre_comp = 0;  | 
1932  | 706  |     size_t num_points = num;  | 
1933  | 706  |     felem x_in, y_in, z_in, x_out, y_out, z_out;  | 
1934  | 706  |     NISTP521_PRE_COMP *pre = NULL;  | 
1935  | 706  |     felem(*g_pre_comp)[3] = NULL;  | 
1936  | 706  |     EC_POINT *generator = NULL;  | 
1937  | 706  |     const EC_POINT *p = NULL;  | 
1938  | 706  |     const BIGNUM *p_scalar = NULL;  | 
1939  |  |  | 
1940  | 706  |     BN_CTX_start(ctx);  | 
1941  | 706  |     x = BN_CTX_get(ctx);  | 
1942  | 706  |     y = BN_CTX_get(ctx);  | 
1943  | 706  |     z = BN_CTX_get(ctx);  | 
1944  | 706  |     tmp_scalar = BN_CTX_get(ctx);  | 
1945  | 706  |     if (tmp_scalar == NULL)  | 
1946  | 0  |         goto err;  | 
1947  |  |  | 
1948  | 706  |     if (scalar != NULL) { | 
1949  | 584  |         pre = group->pre_comp.nistp521;  | 
1950  | 584  |         if (pre)  | 
1951  |  |             /* we have precomputation, try to use it */  | 
1952  | 0  |             g_pre_comp = &pre->g_pre_comp[0];  | 
1953  | 584  |         else  | 
1954  |  |             /* try to use the standard precomputation */  | 
1955  | 584  |             g_pre_comp = (felem(*)[3]) gmul;  | 
1956  | 584  |         generator = EC_POINT_new(group);  | 
1957  | 584  |         if (generator == NULL)  | 
1958  | 0  |             goto err;  | 
1959  |  |         /* get the generator from precomputation */  | 
1960  | 584  |         if (!felem_to_BN(x, g_pre_comp[1][0]) ||  | 
1961  | 584  |             !felem_to_BN(y, g_pre_comp[1][1]) ||  | 
1962  | 584  |             !felem_to_BN(z, g_pre_comp[1][2])) { | 
1963  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1964  | 0  |             goto err;  | 
1965  | 0  |         }  | 
1966  | 584  |         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,  | 
1967  | 584  |                                                                 generator,  | 
1968  | 584  |                                                                 x, y, z, ctx))  | 
1969  | 0  |             goto err;  | 
1970  | 584  |         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))  | 
1971  |  |             /* precomputation matches generator */  | 
1972  | 584  |             have_pre_comp = 1;  | 
1973  | 0  |         else  | 
1974  |  |             /*  | 
1975  |  |              * we don't have valid precomputation: treat the generator as a  | 
1976  |  |              * random point  | 
1977  |  |              */  | 
1978  | 0  |             num_points++;  | 
1979  | 584  |     }  | 
1980  |  |  | 
1981  | 706  |     if (num_points > 0) { | 
1982  | 131  |         if (num_points >= 2) { | 
1983  |  |             /*  | 
1984  |  |              * unless we precompute multiples for just one point, converting  | 
1985  |  |              * those into affine form is time well spent  | 
1986  |  |              */  | 
1987  | 0  |             mixed = 1;  | 
1988  | 0  |         }  | 
1989  | 131  |         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);  | 
1990  | 131  |         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);  | 
1991  | 131  |         if (mixed)  | 
1992  | 0  |             tmp_felems =  | 
1993  | 0  |                 OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));  | 
1994  | 131  |         if ((secrets == NULL) || (pre_comp == NULL)  | 
1995  | 131  |             || (mixed && (tmp_felems == NULL))) { | 
1996  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);  | 
1997  | 0  |             goto err;  | 
1998  | 0  |         }  | 
1999  |  |  | 
2000  |  |         /*  | 
2001  |  |          * we treat NULL scalars as 0, and NULL points as points at infinity,  | 
2002  |  |          * i.e., they contribute nothing to the linear combination  | 
2003  |  |          */  | 
2004  | 262  |         for (i = 0; i < num_points; ++i) { | 
2005  | 131  |             if (i == num) { | 
2006  |  |                 /*  | 
2007  |  |                  * we didn't have a valid precomputation, so we pick the  | 
2008  |  |                  * generator  | 
2009  |  |                  */  | 
2010  | 0  |                 p = EC_GROUP_get0_generator(group);  | 
2011  | 0  |                 p_scalar = scalar;  | 
2012  | 131  |             } else { | 
2013  |  |                 /* the i^th point */  | 
2014  | 131  |                 p = points[i];  | 
2015  | 131  |                 p_scalar = scalars[i];  | 
2016  | 131  |             }  | 
2017  | 131  |             if ((p_scalar != NULL) && (p != NULL)) { | 
2018  |  |                 /* reduce scalar to 0 <= scalar < 2^521 */  | 
2019  | 131  |                 if ((BN_num_bits(p_scalar) > 521)  | 
2020  | 131  |                     || (BN_is_negative(p_scalar))) { | 
2021  |  |                     /*  | 
2022  |  |                      * this is an unusual input, and we don't guarantee  | 
2023  |  |                      * constant-timeness  | 
2024  |  |                      */  | 
2025  | 0  |                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) { | 
2026  | 0  |                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2027  | 0  |                         goto err;  | 
2028  | 0  |                     }  | 
2029  | 0  |                     num_bytes = BN_bn2lebinpad(tmp_scalar,  | 
2030  | 0  |                                                secrets[i], sizeof(secrets[i]));  | 
2031  | 131  |                 } else { | 
2032  | 131  |                     num_bytes = BN_bn2lebinpad(p_scalar,  | 
2033  | 131  |                                                secrets[i], sizeof(secrets[i]));  | 
2034  | 131  |                 }  | 
2035  | 131  |                 if (num_bytes < 0) { | 
2036  | 0  |                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2037  | 0  |                     goto err;  | 
2038  | 0  |                 }  | 
2039  |  |                 /* precompute multiples */  | 
2040  | 131  |                 if ((!BN_to_felem(x_out, p->X)) ||  | 
2041  | 131  |                     (!BN_to_felem(y_out, p->Y)) ||  | 
2042  | 131  |                     (!BN_to_felem(z_out, p->Z)))  | 
2043  | 0  |                     goto err;  | 
2044  | 131  |                 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));  | 
2045  | 131  |                 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));  | 
2046  | 131  |                 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));  | 
2047  | 2.09k  |                 for (j = 2; j <= 16; ++j) { | 
2048  | 1.96k  |                     if (j & 1) { | 
2049  | 917  |                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],  | 
2050  | 917  |                                   pre_comp[i][j][2], pre_comp[i][1][0],  | 
2051  | 917  |                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,  | 
2052  | 917  |                                   pre_comp[i][j - 1][0],  | 
2053  | 917  |                                   pre_comp[i][j - 1][1],  | 
2054  | 917  |                                   pre_comp[i][j - 1][2]);  | 
2055  | 1.04k  |                     } else { | 
2056  | 1.04k  |                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],  | 
2057  | 1.04k  |                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],  | 
2058  | 1.04k  |                                      pre_comp[i][j / 2][1],  | 
2059  | 1.04k  |                                      pre_comp[i][j / 2][2]);  | 
2060  | 1.04k  |                     }  | 
2061  | 1.96k  |                 }  | 
2062  | 131  |             }  | 
2063  | 131  |         }  | 
2064  | 131  |         if (mixed)  | 
2065  | 0  |             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);  | 
2066  | 131  |     }  | 
2067  |  |  | 
2068  |  |     /* the scalar for the generator */  | 
2069  | 706  |     if ((scalar != NULL) && (have_pre_comp)) { | 
2070  | 584  |         memset(g_secret, 0, sizeof(g_secret));  | 
2071  |  |         /* reduce scalar to 0 <= scalar < 2^521 */  | 
2072  | 584  |         if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) { | 
2073  |  |             /*  | 
2074  |  |              * this is an unusual input, and we don't guarantee  | 
2075  |  |              * constant-timeness  | 
2076  |  |              */  | 
2077  | 25  |             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) { | 
2078  | 0  |                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2079  | 0  |                 goto err;  | 
2080  | 0  |             }  | 
2081  | 25  |             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));  | 
2082  | 559  |         } else { | 
2083  | 559  |             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));  | 
2084  | 559  |         }  | 
2085  |  |         /* do the multiplication with generator precomputation */  | 
2086  | 584  |         batch_mul(x_out, y_out, z_out,  | 
2087  | 584  |                   (const felem_bytearray(*))secrets, num_points,  | 
2088  | 584  |                   g_secret,  | 
2089  | 584  |                   mixed, (const felem(*)[17][3])pre_comp,  | 
2090  | 584  |                   (const felem(*)[3])g_pre_comp);  | 
2091  | 584  |     } else { | 
2092  |  |         /* do the multiplication without generator precomputation */  | 
2093  | 122  |         batch_mul(x_out, y_out, z_out,  | 
2094  | 122  |                   (const felem_bytearray(*))secrets, num_points,  | 
2095  | 122  |                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);  | 
2096  | 122  |     }  | 
2097  |  |     /* reduce the output to its unique minimal representation */  | 
2098  | 706  |     felem_contract(x_in, x_out);  | 
2099  | 706  |     felem_contract(y_in, y_out);  | 
2100  | 706  |     felem_contract(z_in, z_out);  | 
2101  | 706  |     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||  | 
2102  | 706  |         (!felem_to_BN(z, z_in))) { | 
2103  | 0  |         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2104  | 0  |         goto err;  | 
2105  | 0  |     }  | 
2106  | 706  |     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,  | 
2107  | 706  |                                                              ctx);  | 
2108  |  |  | 
2109  | 706  |  err:  | 
2110  | 706  |     BN_CTX_end(ctx);  | 
2111  | 706  |     EC_POINT_free(generator);  | 
2112  | 706  |     OPENSSL_free(secrets);  | 
2113  | 706  |     OPENSSL_free(pre_comp);  | 
2114  | 706  |     OPENSSL_free(tmp_felems);  | 
2115  | 706  |     return ret;  | 
2116  | 706  | }  | 
2117  |  |  | 
2118  |  | int ossl_ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)  | 
2119  | 0  | { | 
2120  | 0  |     int ret = 0;  | 
2121  | 0  |     NISTP521_PRE_COMP *pre = NULL;  | 
2122  | 0  |     int i, j;  | 
2123  | 0  |     BIGNUM *x, *y;  | 
2124  | 0  |     EC_POINT *generator = NULL;  | 
2125  | 0  |     felem tmp_felems[16];  | 
2126  | 0  | #ifndef FIPS_MODULE  | 
2127  | 0  |     BN_CTX *new_ctx = NULL;  | 
2128  | 0  | #endif  | 
2129  |  |  | 
2130  |  |     /* throw away old precomputation */  | 
2131  | 0  |     EC_pre_comp_free(group);  | 
2132  |  | 
  | 
2133  | 0  | #ifndef FIPS_MODULE  | 
2134  | 0  |     if (ctx == NULL)  | 
2135  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
2136  | 0  | #endif  | 
2137  | 0  |     if (ctx == NULL)  | 
2138  | 0  |         return 0;  | 
2139  |  |  | 
2140  | 0  |     BN_CTX_start(ctx);  | 
2141  | 0  |     x = BN_CTX_get(ctx);  | 
2142  | 0  |     y = BN_CTX_get(ctx);  | 
2143  | 0  |     if (y == NULL)  | 
2144  | 0  |         goto err;  | 
2145  |  |     /* get the generator */  | 
2146  | 0  |     if (group->generator == NULL)  | 
2147  | 0  |         goto err;  | 
2148  | 0  |     generator = EC_POINT_new(group);  | 
2149  | 0  |     if (generator == NULL)  | 
2150  | 0  |         goto err;  | 
2151  | 0  |     BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);  | 
2152  | 0  |     BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);  | 
2153  | 0  |     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))  | 
2154  | 0  |         goto err;  | 
2155  | 0  |     if ((pre = nistp521_pre_comp_new()) == NULL)  | 
2156  | 0  |         goto err;  | 
2157  |  |     /*  | 
2158  |  |      * if the generator is the standard one, use built-in precomputation  | 
2159  |  |      */  | 
2160  | 0  |     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { | 
2161  | 0  |         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));  | 
2162  | 0  |         goto done;  | 
2163  | 0  |     }  | 
2164  | 0  |     if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||  | 
2165  | 0  |         (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||  | 
2166  | 0  |         (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))  | 
2167  | 0  |         goto err;  | 
2168  |  |     /* compute 2^130*G, 2^260*G, 2^390*G */  | 
2169  | 0  |     for (i = 1; i <= 4; i <<= 1) { | 
2170  | 0  |         point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],  | 
2171  | 0  |                      pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],  | 
2172  | 0  |                      pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);  | 
2173  | 0  |         for (j = 0; j < 129; ++j) { | 
2174  | 0  |             point_double(pre->g_pre_comp[2 * i][0],  | 
2175  | 0  |                          pre->g_pre_comp[2 * i][1],  | 
2176  | 0  |                          pre->g_pre_comp[2 * i][2],  | 
2177  | 0  |                          pre->g_pre_comp[2 * i][0],  | 
2178  | 0  |                          pre->g_pre_comp[2 * i][1],  | 
2179  | 0  |                          pre->g_pre_comp[2 * i][2]);  | 
2180  | 0  |         }  | 
2181  | 0  |     }  | 
2182  |  |     /* g_pre_comp[0] is the point at infinity */  | 
2183  | 0  |     memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));  | 
2184  |  |     /* the remaining multiples */  | 
2185  |  |     /* 2^130*G + 2^260*G */  | 
2186  | 0  |     point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],  | 
2187  | 0  |               pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],  | 
2188  | 0  |               pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],  | 
2189  | 0  |               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],  | 
2190  | 0  |               pre->g_pre_comp[2][2]);  | 
2191  |  |     /* 2^130*G + 2^390*G */  | 
2192  | 0  |     point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],  | 
2193  | 0  |               pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],  | 
2194  | 0  |               pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],  | 
2195  | 0  |               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],  | 
2196  | 0  |               pre->g_pre_comp[2][2]);  | 
2197  |  |     /* 2^260*G + 2^390*G */  | 
2198  | 0  |     point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],  | 
2199  | 0  |               pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],  | 
2200  | 0  |               pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],  | 
2201  | 0  |               0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],  | 
2202  | 0  |               pre->g_pre_comp[4][2]);  | 
2203  |  |     /* 2^130*G + 2^260*G + 2^390*G */  | 
2204  | 0  |     point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],  | 
2205  | 0  |               pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],  | 
2206  | 0  |               pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],  | 
2207  | 0  |               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],  | 
2208  | 0  |               pre->g_pre_comp[2][2]);  | 
2209  | 0  |     for (i = 1; i < 8; ++i) { | 
2210  |  |         /* odd multiples: add G */  | 
2211  | 0  |         point_add(pre->g_pre_comp[2 * i + 1][0],  | 
2212  | 0  |                   pre->g_pre_comp[2 * i + 1][1],  | 
2213  | 0  |                   pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],  | 
2214  | 0  |                   pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,  | 
2215  | 0  |                   pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],  | 
2216  | 0  |                   pre->g_pre_comp[1][2]);  | 
2217  | 0  |     }  | 
2218  | 0  |     make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);  | 
2219  |  | 
  | 
2220  | 0  |  done:  | 
2221  | 0  |     SETPRECOMP(group, nistp521, pre);  | 
2222  | 0  |     ret = 1;  | 
2223  | 0  |     pre = NULL;  | 
2224  | 0  |  err:  | 
2225  | 0  |     BN_CTX_end(ctx);  | 
2226  | 0  |     EC_POINT_free(generator);  | 
2227  | 0  | #ifndef FIPS_MODULE  | 
2228  | 0  |     BN_CTX_free(new_ctx);  | 
2229  | 0  | #endif  | 
2230  | 0  |     EC_nistp521_pre_comp_free(pre);  | 
2231  | 0  |     return ret;  | 
2232  | 0  | }  | 
2233  |  |  | 
2234  |  | int ossl_ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)  | 
2235  | 0  | { | 
2236  | 0  |     return HAVEPRECOMP(group, nistp521);  | 
2237  | 0  | }  |