/src/openssl/crypto/ec/ecp_nistp256.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | /* Copyright 2011 Google Inc.  | 
11  |  |  *  | 
12  |  |  * Licensed under the Apache License, Version 2.0 (the "License");  | 
13  |  |  *  | 
14  |  |  * you may not use this file except in compliance with the License.  | 
15  |  |  * You may obtain a copy of the License at  | 
16  |  |  *  | 
17  |  |  *     http://www.apache.org/licenses/LICENSE-2.0  | 
18  |  |  *  | 
19  |  |  *  Unless required by applicable law or agreed to in writing, software  | 
20  |  |  *  distributed under the License is distributed on an "AS IS" BASIS,  | 
21  |  |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
22  |  |  *  See the License for the specific language governing permissions and  | 
23  |  |  *  limitations under the License.  | 
24  |  |  */  | 
25  |  |  | 
26  |  | /*  | 
27  |  |  * ECDSA low level APIs are deprecated for public use, but still ok for  | 
28  |  |  * internal use.  | 
29  |  |  */  | 
30  |  | #include "internal/deprecated.h"  | 
31  |  |  | 
32  |  | /*  | 
33  |  |  * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication  | 
34  |  |  *  | 
35  |  |  * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.  | 
36  |  |  * Otherwise based on Emilia's P224 work, which was inspired by my curve25519  | 
37  |  |  * work which got its smarts from Daniel J. Bernstein's work on the same.  | 
38  |  |  */  | 
39  |  |  | 
40  |  | #include <openssl/opensslconf.h>  | 
41  |  |  | 
42  |  | #include <stdint.h>  | 
43  |  | #include <string.h>  | 
44  |  | #include <openssl/err.h>  | 
45  |  | #include "ec_local.h"  | 
46  |  |  | 
47  |  | #include "internal/numbers.h"  | 
48  |  |  | 
49  |  | #ifndef INT128_MAX  | 
50  |  | # error "Your compiler doesn't appear to support 128-bit integer types"  | 
51  |  | #endif  | 
52  |  |  | 
53  |  | typedef uint8_t u8;  | 
54  |  | typedef uint32_t u32;  | 
55  |  | typedef uint64_t u64;  | 
56  |  |  | 
57  |  | /*  | 
58  |  |  * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We  | 
59  |  |  * can serialize an element of this field into 32 bytes. We call this an  | 
60  |  |  * felem_bytearray.  | 
61  |  |  */  | 
62  |  |  | 
63  |  | typedef u8 felem_bytearray[32];  | 
64  |  |  | 
65  |  | /*  | 
66  |  |  * These are the parameters of P256, taken from FIPS 186-3, page 86. These  | 
67  |  |  * values are big-endian.  | 
68  |  |  */  | 
69  |  | static const felem_bytearray nistp256_curve_params[5] = { | 
70  |  |     {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */ | 
71  |  |      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  | 
72  |  |      0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,  | 
73  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},  | 
74  |  |     {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */ | 
75  |  |      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  | 
76  |  |      0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,  | 
77  |  |      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc},  | 
78  |  |     {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, /* b */ | 
79  |  |      0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,  | 
80  |  |      0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,  | 
81  |  |      0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},  | 
82  |  |     {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */ | 
83  |  |      0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,  | 
84  |  |      0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,  | 
85  |  |      0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},  | 
86  |  |     {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */ | 
87  |  |      0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,  | 
88  |  |      0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,  | 
89  |  |      0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}  | 
90  |  | };  | 
91  |  |  | 
92  |  | /*-  | 
93  |  |  * The representation of field elements.  | 
94  |  |  * ------------------------------------  | 
95  |  |  *  | 
96  |  |  * We represent field elements with either four 128-bit values, eight 128-bit  | 
97  |  |  * values, or four 64-bit values. The field element represented is:  | 
98  |  |  *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192  (mod p)  | 
99  |  |  * or:  | 
100  |  |  *   v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[7]*2^448  (mod p)  | 
101  |  |  *  | 
102  |  |  * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits  | 
103  |  |  * apart, but are 128-bits wide, the most significant bits of each limb overlap  | 
104  |  |  * with the least significant bits of the next.  | 
105  |  |  *  | 
106  |  |  * A field element with four limbs is an 'felem'. One with eight limbs is a  | 
107  |  |  * 'longfelem'  | 
108  |  |  *  | 
109  |  |  * A field element with four, 64-bit values is called a 'smallfelem'. Small  | 
110  |  |  * values are used as intermediate values before multiplication.  | 
111  |  |  */  | 
112  |  |  | 
113  | 0  | #define NLIMBS 4  | 
114  |  |  | 
115  |  | typedef uint128_t limb;  | 
116  |  | typedef limb felem[NLIMBS];  | 
117  |  | typedef limb longfelem[NLIMBS * 2];  | 
118  |  | typedef u64 smallfelem[NLIMBS];  | 
119  |  |  | 
120  |  | /* This is the value of the prime as four 64-bit words, little-endian. */  | 
121  |  | static const u64 kPrime[4] = { | 
122  |  |     0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul  | 
123  |  | };  | 
124  |  | static const u64 bottom63bits = 0x7ffffffffffffffful;  | 
125  |  |  | 
126  |  | /*  | 
127  |  |  * bin32_to_felem takes a little-endian byte array and converts it into felem  | 
128  |  |  * form. This assumes that the CPU is little-endian.  | 
129  |  |  */  | 
130  |  | static void bin32_to_felem(felem out, const u8 in[32])  | 
131  | 0  | { | 
132  | 0  |     out[0] = *((u64 *)&in[0]);  | 
133  | 0  |     out[1] = *((u64 *)&in[8]);  | 
134  | 0  |     out[2] = *((u64 *)&in[16]);  | 
135  | 0  |     out[3] = *((u64 *)&in[24]);  | 
136  | 0  | }  | 
137  |  |  | 
138  |  | /*  | 
139  |  |  * smallfelem_to_bin32 takes a smallfelem and serializes into a little  | 
140  |  |  * endian, 32 byte array. This assumes that the CPU is little-endian.  | 
141  |  |  */  | 
142  |  | static void smallfelem_to_bin32(u8 out[32], const smallfelem in)  | 
143  | 0  | { | 
144  | 0  |     *((u64 *)&out[0]) = in[0];  | 
145  | 0  |     *((u64 *)&out[8]) = in[1];  | 
146  | 0  |     *((u64 *)&out[16]) = in[2];  | 
147  | 0  |     *((u64 *)&out[24]) = in[3];  | 
148  | 0  | }  | 
149  |  |  | 
150  |  | /* BN_to_felem converts an OpenSSL BIGNUM into an felem */  | 
151  |  | static int BN_to_felem(felem out, const BIGNUM *bn)  | 
152  | 0  | { | 
153  | 0  |     felem_bytearray b_out;  | 
154  | 0  |     int num_bytes;  | 
155  |  | 
  | 
156  | 0  |     if (BN_is_negative(bn)) { | 
157  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
158  | 0  |         return 0;  | 
159  | 0  |     }  | 
160  | 0  |     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));  | 
161  | 0  |     if (num_bytes < 0) { | 
162  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
163  | 0  |         return 0;  | 
164  | 0  |     }  | 
165  | 0  |     bin32_to_felem(out, b_out);  | 
166  | 0  |     return 1;  | 
167  | 0  | }  | 
168  |  |  | 
169  |  | /* felem_to_BN converts an felem into an OpenSSL BIGNUM */  | 
170  |  | static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)  | 
171  | 0  | { | 
172  | 0  |     felem_bytearray b_out;  | 
173  | 0  |     smallfelem_to_bin32(b_out, in);  | 
174  | 0  |     return BN_lebin2bn(b_out, sizeof(b_out), out);  | 
175  | 0  | }  | 
176  |  |  | 
177  |  | /*-  | 
178  |  |  * Field operations  | 
179  |  |  * ----------------  | 
180  |  |  */  | 
181  |  |  | 
182  |  | static void smallfelem_one(smallfelem out)  | 
183  | 0  | { | 
184  | 0  |     out[0] = 1;  | 
185  | 0  |     out[1] = 0;  | 
186  | 0  |     out[2] = 0;  | 
187  | 0  |     out[3] = 0;  | 
188  | 0  | }  | 
189  |  |  | 
190  |  | static void smallfelem_assign(smallfelem out, const smallfelem in)  | 
191  | 0  | { | 
192  | 0  |     out[0] = in[0];  | 
193  | 0  |     out[1] = in[1];  | 
194  | 0  |     out[2] = in[2];  | 
195  | 0  |     out[3] = in[3];  | 
196  | 0  | }  | 
197  |  |  | 
198  |  | static void felem_assign(felem out, const felem in)  | 
199  | 0  | { | 
200  | 0  |     out[0] = in[0];  | 
201  | 0  |     out[1] = in[1];  | 
202  | 0  |     out[2] = in[2];  | 
203  | 0  |     out[3] = in[3];  | 
204  | 0  | }  | 
205  |  |  | 
206  |  | /* felem_sum sets out = out + in. */  | 
207  |  | static void felem_sum(felem out, const felem in)  | 
208  | 0  | { | 
209  | 0  |     out[0] += in[0];  | 
210  | 0  |     out[1] += in[1];  | 
211  | 0  |     out[2] += in[2];  | 
212  | 0  |     out[3] += in[3];  | 
213  | 0  | }  | 
214  |  |  | 
215  |  | /* felem_small_sum sets out = out + in. */  | 
216  |  | static void felem_small_sum(felem out, const smallfelem in)  | 
217  | 0  | { | 
218  | 0  |     out[0] += in[0];  | 
219  | 0  |     out[1] += in[1];  | 
220  | 0  |     out[2] += in[2];  | 
221  | 0  |     out[3] += in[3];  | 
222  | 0  | }  | 
223  |  |  | 
224  |  | /* felem_scalar sets out = out * scalar */  | 
225  |  | static void felem_scalar(felem out, const u64 scalar)  | 
226  | 0  | { | 
227  | 0  |     out[0] *= scalar;  | 
228  | 0  |     out[1] *= scalar;  | 
229  | 0  |     out[2] *= scalar;  | 
230  | 0  |     out[3] *= scalar;  | 
231  | 0  | }  | 
232  |  |  | 
233  |  | /* longfelem_scalar sets out = out * scalar */  | 
234  |  | static void longfelem_scalar(longfelem out, const u64 scalar)  | 
235  | 0  | { | 
236  | 0  |     out[0] *= scalar;  | 
237  | 0  |     out[1] *= scalar;  | 
238  | 0  |     out[2] *= scalar;  | 
239  | 0  |     out[3] *= scalar;  | 
240  | 0  |     out[4] *= scalar;  | 
241  | 0  |     out[5] *= scalar;  | 
242  | 0  |     out[6] *= scalar;  | 
243  | 0  |     out[7] *= scalar;  | 
244  | 0  | }  | 
245  |  |  | 
246  |  | #define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)  | 
247  |  | #define two105 (((limb)1) << 105)  | 
248  |  | #define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)  | 
249  |  |  | 
250  |  | /* zero105 is 0 mod p */  | 
251  |  | static const felem zero105 =  | 
252  |  |     { two105m41m9, two105, two105m41p9, two105m41p9 }; | 
253  |  |  | 
254  |  | /*-  | 
255  |  |  * smallfelem_neg sets |out| to |-small|  | 
256  |  |  * On exit:  | 
257  |  |  *   out[i] < out[i] + 2^105  | 
258  |  |  */  | 
259  |  | static void smallfelem_neg(felem out, const smallfelem small)  | 
260  | 0  | { | 
261  |  |     /* In order to prevent underflow, we subtract from 0 mod p. */  | 
262  | 0  |     out[0] = zero105[0] - small[0];  | 
263  | 0  |     out[1] = zero105[1] - small[1];  | 
264  | 0  |     out[2] = zero105[2] - small[2];  | 
265  | 0  |     out[3] = zero105[3] - small[3];  | 
266  | 0  | }  | 
267  |  |  | 
268  |  | /*-  | 
269  |  |  * felem_diff subtracts |in| from |out|  | 
270  |  |  * On entry:  | 
271  |  |  *   in[i] < 2^104  | 
272  |  |  * On exit:  | 
273  |  |  *   out[i] < out[i] + 2^105  | 
274  |  |  */  | 
275  |  | static void felem_diff(felem out, const felem in)  | 
276  | 0  | { | 
277  |  |     /*  | 
278  |  |      * In order to prevent underflow, we add 0 mod p before subtracting.  | 
279  |  |      */  | 
280  | 0  |     out[0] += zero105[0];  | 
281  | 0  |     out[1] += zero105[1];  | 
282  | 0  |     out[2] += zero105[2];  | 
283  | 0  |     out[3] += zero105[3];  | 
284  |  | 
  | 
285  | 0  |     out[0] -= in[0];  | 
286  | 0  |     out[1] -= in[1];  | 
287  | 0  |     out[2] -= in[2];  | 
288  | 0  |     out[3] -= in[3];  | 
289  | 0  | }  | 
290  |  |  | 
291  |  | #define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)  | 
292  |  | #define two107 (((limb)1) << 107)  | 
293  |  | #define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)  | 
294  |  |  | 
295  |  | /* zero107 is 0 mod p */  | 
296  |  | static const felem zero107 = { | 
297  |  |     two107m43m11, two107, two107m43p11, two107m43p11  | 
298  |  | };  | 
299  |  |  | 
300  |  | /*-  | 
301  |  |  * An alternative felem_diff for larger inputs |in|  | 
302  |  |  * felem_diff_zero107 subtracts |in| from |out|  | 
303  |  |  * On entry:  | 
304  |  |  *   in[i] < 2^106  | 
305  |  |  * On exit:  | 
306  |  |  *   out[i] < out[i] + 2^107  | 
307  |  |  */  | 
308  |  | static void felem_diff_zero107(felem out, const felem in)  | 
309  | 0  | { | 
310  |  |     /*  | 
311  |  |      * In order to prevent underflow, we add 0 mod p before subtracting.  | 
312  |  |      */  | 
313  | 0  |     out[0] += zero107[0];  | 
314  | 0  |     out[1] += zero107[1];  | 
315  | 0  |     out[2] += zero107[2];  | 
316  | 0  |     out[3] += zero107[3];  | 
317  |  | 
  | 
318  | 0  |     out[0] -= in[0];  | 
319  | 0  |     out[1] -= in[1];  | 
320  | 0  |     out[2] -= in[2];  | 
321  | 0  |     out[3] -= in[3];  | 
322  | 0  | }  | 
323  |  |  | 
324  |  | /*-  | 
325  |  |  * longfelem_diff subtracts |in| from |out|  | 
326  |  |  * On entry:  | 
327  |  |  *   in[i] < 7*2^67  | 
328  |  |  * On exit:  | 
329  |  |  *   out[i] < out[i] + 2^70 + 2^40  | 
330  |  |  */  | 
331  |  | static void longfelem_diff(longfelem out, const longfelem in)  | 
332  | 0  | { | 
333  | 0  |     static const limb two70m8p6 =  | 
334  | 0  |         (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);  | 
335  | 0  |     static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);  | 
336  | 0  |     static const limb two70 = (((limb) 1) << 70);  | 
337  | 0  |     static const limb two70m40m38p6 =  | 
338  | 0  |         (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +  | 
339  | 0  |         (((limb) 1) << 6);  | 
340  | 0  |     static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);  | 
341  |  |  | 
342  |  |     /* add 0 mod p to avoid underflow */  | 
343  | 0  |     out[0] += two70m8p6;  | 
344  | 0  |     out[1] += two70p40;  | 
345  | 0  |     out[2] += two70;  | 
346  | 0  |     out[3] += two70m40m38p6;  | 
347  | 0  |     out[4] += two70m6;  | 
348  | 0  |     out[5] += two70m6;  | 
349  | 0  |     out[6] += two70m6;  | 
350  | 0  |     out[7] += two70m6;  | 
351  |  |  | 
352  |  |     /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */  | 
353  | 0  |     out[0] -= in[0];  | 
354  | 0  |     out[1] -= in[1];  | 
355  | 0  |     out[2] -= in[2];  | 
356  | 0  |     out[3] -= in[3];  | 
357  | 0  |     out[4] -= in[4];  | 
358  | 0  |     out[5] -= in[5];  | 
359  | 0  |     out[6] -= in[6];  | 
360  | 0  |     out[7] -= in[7];  | 
361  | 0  | }  | 
362  |  |  | 
363  |  | #define two64m0 (((limb)1) << 64) - 1  | 
364  |  | #define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1  | 
365  |  | #define two64m46 (((limb)1) << 64) - (((limb)1) << 46)  | 
366  |  | #define two64m32 (((limb)1) << 64) - (((limb)1) << 32)  | 
367  |  |  | 
368  |  | /* zero110 is 0 mod p */  | 
369  |  | static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; | 
370  |  |  | 
371  |  | /*-  | 
372  |  |  * felem_shrink converts an felem into a smallfelem. The result isn't quite  | 
373  |  |  * minimal as the value may be greater than p.  | 
374  |  |  *  | 
375  |  |  * On entry:  | 
376  |  |  *   in[i] < 2^109  | 
377  |  |  * On exit:  | 
378  |  |  *   out[i] < 2^64  | 
379  |  |  */  | 
380  |  | static void felem_shrink(smallfelem out, const felem in)  | 
381  | 0  | { | 
382  | 0  |     felem tmp;  | 
383  | 0  |     u64 a, b, mask;  | 
384  | 0  |     u64 high, low;  | 
385  | 0  |     static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */  | 
386  |  |  | 
387  |  |     /* Carry 2->3 */  | 
388  | 0  |     tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));  | 
389  |  |     /* tmp[3] < 2^110 */  | 
390  |  | 
  | 
391  | 0  |     tmp[2] = zero110[2] + (u64)in[2];  | 
392  | 0  |     tmp[0] = zero110[0] + in[0];  | 
393  | 0  |     tmp[1] = zero110[1] + in[1];  | 
394  |  |     /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */  | 
395  |  |  | 
396  |  |     /*  | 
397  |  |      * We perform two partial reductions where we eliminate the high-word of  | 
398  |  |      * tmp[3]. We don't update the other words till the end.  | 
399  |  |      */  | 
400  | 0  |     a = tmp[3] >> 64;           /* a < 2^46 */  | 
401  | 0  |     tmp[3] = (u64)tmp[3];  | 
402  | 0  |     tmp[3] -= a;  | 
403  | 0  |     tmp[3] += ((limb) a) << 32;  | 
404  |  |     /* tmp[3] < 2^79 */  | 
405  |  | 
  | 
406  | 0  |     b = a;  | 
407  | 0  |     a = tmp[3] >> 64;           /* a < 2^15 */  | 
408  | 0  |     b += a;                     /* b < 2^46 + 2^15 < 2^47 */  | 
409  | 0  |     tmp[3] = (u64)tmp[3];  | 
410  | 0  |     tmp[3] -= a;  | 
411  | 0  |     tmp[3] += ((limb) a) << 32;  | 
412  |  |     /* tmp[3] < 2^64 + 2^47 */  | 
413  |  |  | 
414  |  |     /*  | 
415  |  |      * This adjusts the other two words to complete the two partial  | 
416  |  |      * reductions.  | 
417  |  |      */  | 
418  | 0  |     tmp[0] += b;  | 
419  | 0  |     tmp[1] -= (((limb) b) << 32);  | 
420  |  |  | 
421  |  |     /*  | 
422  |  |      * In order to make space in tmp[3] for the carry from 2 -> 3, we  | 
423  |  |      * conditionally subtract kPrime if tmp[3] is large enough.  | 
424  |  |      */  | 
425  | 0  |     high = (u64)(tmp[3] >> 64);  | 
426  |  |     /* As tmp[3] < 2^65, high is either 1 or 0 */  | 
427  | 0  |     high = 0 - high;  | 
428  |  |     /*-  | 
429  |  |      * high is:  | 
430  |  |      *   all ones   if the high word of tmp[3] is 1  | 
431  |  |      *   all zeros  if the high word of tmp[3] if 0  | 
432  |  |      */  | 
433  | 0  |     low = (u64)tmp[3];  | 
434  | 0  |     mask = 0 - (low >> 63);  | 
435  |  |     /*-  | 
436  |  |      * mask is:  | 
437  |  |      *   all ones   if the MSB of low is 1  | 
438  |  |      *   all zeros  if the MSB of low if 0  | 
439  |  |      */  | 
440  | 0  |     low &= bottom63bits;  | 
441  | 0  |     low -= kPrime3Test;  | 
442  |  |     /* if low was greater than kPrime3Test then the MSB is zero */  | 
443  | 0  |     low = ~low;  | 
444  | 0  |     low = 0 - (low >> 63);  | 
445  |  |     /*-  | 
446  |  |      * low is:  | 
447  |  |      *   all ones   if low was > kPrime3Test  | 
448  |  |      *   all zeros  if low was <= kPrime3Test  | 
449  |  |      */  | 
450  | 0  |     mask = (mask & low) | high;  | 
451  | 0  |     tmp[0] -= mask & kPrime[0];  | 
452  | 0  |     tmp[1] -= mask & kPrime[1];  | 
453  |  |     /* kPrime[2] is zero, so omitted */  | 
454  | 0  |     tmp[3] -= mask & kPrime[3];  | 
455  |  |     /* tmp[3] < 2**64 - 2**32 + 1 */  | 
456  |  | 
  | 
457  | 0  |     tmp[1] += ((u64)(tmp[0] >> 64));  | 
458  | 0  |     tmp[0] = (u64)tmp[0];  | 
459  | 0  |     tmp[2] += ((u64)(tmp[1] >> 64));  | 
460  | 0  |     tmp[1] = (u64)tmp[1];  | 
461  | 0  |     tmp[3] += ((u64)(tmp[2] >> 64));  | 
462  | 0  |     tmp[2] = (u64)tmp[2];  | 
463  |  |     /* tmp[i] < 2^64 */  | 
464  |  | 
  | 
465  | 0  |     out[0] = tmp[0];  | 
466  | 0  |     out[1] = tmp[1];  | 
467  | 0  |     out[2] = tmp[2];  | 
468  | 0  |     out[3] = tmp[3];  | 
469  | 0  | }  | 
470  |  |  | 
471  |  | /* smallfelem_expand converts a smallfelem to an felem */  | 
472  |  | static void smallfelem_expand(felem out, const smallfelem in)  | 
473  | 0  | { | 
474  | 0  |     out[0] = in[0];  | 
475  | 0  |     out[1] = in[1];  | 
476  | 0  |     out[2] = in[2];  | 
477  | 0  |     out[3] = in[3];  | 
478  | 0  | }  | 
479  |  |  | 
480  |  | /*-  | 
481  |  |  * smallfelem_square sets |out| = |small|^2  | 
482  |  |  * On entry:  | 
483  |  |  *   small[i] < 2^64  | 
484  |  |  * On exit:  | 
485  |  |  *   out[i] < 7 * 2^64 < 2^67  | 
486  |  |  */  | 
487  |  | static void smallfelem_square(longfelem out, const smallfelem small)  | 
488  | 0  | { | 
489  | 0  |     limb a;  | 
490  | 0  |     u64 high, low;  | 
491  |  | 
  | 
492  | 0  |     a = ((uint128_t) small[0]) * small[0];  | 
493  | 0  |     low = a;  | 
494  | 0  |     high = a >> 64;  | 
495  | 0  |     out[0] = low;  | 
496  | 0  |     out[1] = high;  | 
497  |  | 
  | 
498  | 0  |     a = ((uint128_t) small[0]) * small[1];  | 
499  | 0  |     low = a;  | 
500  | 0  |     high = a >> 64;  | 
501  | 0  |     out[1] += low;  | 
502  | 0  |     out[1] += low;  | 
503  | 0  |     out[2] = high;  | 
504  |  | 
  | 
505  | 0  |     a = ((uint128_t) small[0]) * small[2];  | 
506  | 0  |     low = a;  | 
507  | 0  |     high = a >> 64;  | 
508  | 0  |     out[2] += low;  | 
509  | 0  |     out[2] *= 2;  | 
510  | 0  |     out[3] = high;  | 
511  |  | 
  | 
512  | 0  |     a = ((uint128_t) small[0]) * small[3];  | 
513  | 0  |     low = a;  | 
514  | 0  |     high = a >> 64;  | 
515  | 0  |     out[3] += low;  | 
516  | 0  |     out[4] = high;  | 
517  |  | 
  | 
518  | 0  |     a = ((uint128_t) small[1]) * small[2];  | 
519  | 0  |     low = a;  | 
520  | 0  |     high = a >> 64;  | 
521  | 0  |     out[3] += low;  | 
522  | 0  |     out[3] *= 2;  | 
523  | 0  |     out[4] += high;  | 
524  |  | 
  | 
525  | 0  |     a = ((uint128_t) small[1]) * small[1];  | 
526  | 0  |     low = a;  | 
527  | 0  |     high = a >> 64;  | 
528  | 0  |     out[2] += low;  | 
529  | 0  |     out[3] += high;  | 
530  |  | 
  | 
531  | 0  |     a = ((uint128_t) small[1]) * small[3];  | 
532  | 0  |     low = a;  | 
533  | 0  |     high = a >> 64;  | 
534  | 0  |     out[4] += low;  | 
535  | 0  |     out[4] *= 2;  | 
536  | 0  |     out[5] = high;  | 
537  |  | 
  | 
538  | 0  |     a = ((uint128_t) small[2]) * small[3];  | 
539  | 0  |     low = a;  | 
540  | 0  |     high = a >> 64;  | 
541  | 0  |     out[5] += low;  | 
542  | 0  |     out[5] *= 2;  | 
543  | 0  |     out[6] = high;  | 
544  | 0  |     out[6] += high;  | 
545  |  | 
  | 
546  | 0  |     a = ((uint128_t) small[2]) * small[2];  | 
547  | 0  |     low = a;  | 
548  | 0  |     high = a >> 64;  | 
549  | 0  |     out[4] += low;  | 
550  | 0  |     out[5] += high;  | 
551  |  | 
  | 
552  | 0  |     a = ((uint128_t) small[3]) * small[3];  | 
553  | 0  |     low = a;  | 
554  | 0  |     high = a >> 64;  | 
555  | 0  |     out[6] += low;  | 
556  | 0  |     out[7] = high;  | 
557  | 0  | }  | 
558  |  |  | 
559  |  | /*-  | 
560  |  |  * felem_square sets |out| = |in|^2  | 
561  |  |  * On entry:  | 
562  |  |  *   in[i] < 2^109  | 
563  |  |  * On exit:  | 
564  |  |  *   out[i] < 7 * 2^64 < 2^67  | 
565  |  |  */  | 
566  |  | static void felem_square(longfelem out, const felem in)  | 
567  | 0  | { | 
568  | 0  |     u64 small[4];  | 
569  | 0  |     felem_shrink(small, in);  | 
570  | 0  |     smallfelem_square(out, small);  | 
571  | 0  | }  | 
572  |  |  | 
573  |  | /*-  | 
574  |  |  * smallfelem_mul sets |out| = |small1| * |small2|  | 
575  |  |  * On entry:  | 
576  |  |  *   small1[i] < 2^64  | 
577  |  |  *   small2[i] < 2^64  | 
578  |  |  * On exit:  | 
579  |  |  *   out[i] < 7 * 2^64 < 2^67  | 
580  |  |  */  | 
581  |  | static void smallfelem_mul(longfelem out, const smallfelem small1,  | 
582  |  |                            const smallfelem small2)  | 
583  | 0  | { | 
584  | 0  |     limb a;  | 
585  | 0  |     u64 high, low;  | 
586  |  | 
  | 
587  | 0  |     a = ((uint128_t) small1[0]) * small2[0];  | 
588  | 0  |     low = a;  | 
589  | 0  |     high = a >> 64;  | 
590  | 0  |     out[0] = low;  | 
591  | 0  |     out[1] = high;  | 
592  |  | 
  | 
593  | 0  |     a = ((uint128_t) small1[0]) * small2[1];  | 
594  | 0  |     low = a;  | 
595  | 0  |     high = a >> 64;  | 
596  | 0  |     out[1] += low;  | 
597  | 0  |     out[2] = high;  | 
598  |  | 
  | 
599  | 0  |     a = ((uint128_t) small1[1]) * small2[0];  | 
600  | 0  |     low = a;  | 
601  | 0  |     high = a >> 64;  | 
602  | 0  |     out[1] += low;  | 
603  | 0  |     out[2] += high;  | 
604  |  | 
  | 
605  | 0  |     a = ((uint128_t) small1[0]) * small2[2];  | 
606  | 0  |     low = a;  | 
607  | 0  |     high = a >> 64;  | 
608  | 0  |     out[2] += low;  | 
609  | 0  |     out[3] = high;  | 
610  |  | 
  | 
611  | 0  |     a = ((uint128_t) small1[1]) * small2[1];  | 
612  | 0  |     low = a;  | 
613  | 0  |     high = a >> 64;  | 
614  | 0  |     out[2] += low;  | 
615  | 0  |     out[3] += high;  | 
616  |  | 
  | 
617  | 0  |     a = ((uint128_t) small1[2]) * small2[0];  | 
618  | 0  |     low = a;  | 
619  | 0  |     high = a >> 64;  | 
620  | 0  |     out[2] += low;  | 
621  | 0  |     out[3] += high;  | 
622  |  | 
  | 
623  | 0  |     a = ((uint128_t) small1[0]) * small2[3];  | 
624  | 0  |     low = a;  | 
625  | 0  |     high = a >> 64;  | 
626  | 0  |     out[3] += low;  | 
627  | 0  |     out[4] = high;  | 
628  |  | 
  | 
629  | 0  |     a = ((uint128_t) small1[1]) * small2[2];  | 
630  | 0  |     low = a;  | 
631  | 0  |     high = a >> 64;  | 
632  | 0  |     out[3] += low;  | 
633  | 0  |     out[4] += high;  | 
634  |  | 
  | 
635  | 0  |     a = ((uint128_t) small1[2]) * small2[1];  | 
636  | 0  |     low = a;  | 
637  | 0  |     high = a >> 64;  | 
638  | 0  |     out[3] += low;  | 
639  | 0  |     out[4] += high;  | 
640  |  | 
  | 
641  | 0  |     a = ((uint128_t) small1[3]) * small2[0];  | 
642  | 0  |     low = a;  | 
643  | 0  |     high = a >> 64;  | 
644  | 0  |     out[3] += low;  | 
645  | 0  |     out[4] += high;  | 
646  |  | 
  | 
647  | 0  |     a = ((uint128_t) small1[1]) * small2[3];  | 
648  | 0  |     low = a;  | 
649  | 0  |     high = a >> 64;  | 
650  | 0  |     out[4] += low;  | 
651  | 0  |     out[5] = high;  | 
652  |  | 
  | 
653  | 0  |     a = ((uint128_t) small1[2]) * small2[2];  | 
654  | 0  |     low = a;  | 
655  | 0  |     high = a >> 64;  | 
656  | 0  |     out[4] += low;  | 
657  | 0  |     out[5] += high;  | 
658  |  | 
  | 
659  | 0  |     a = ((uint128_t) small1[3]) * small2[1];  | 
660  | 0  |     low = a;  | 
661  | 0  |     high = a >> 64;  | 
662  | 0  |     out[4] += low;  | 
663  | 0  |     out[5] += high;  | 
664  |  | 
  | 
665  | 0  |     a = ((uint128_t) small1[2]) * small2[3];  | 
666  | 0  |     low = a;  | 
667  | 0  |     high = a >> 64;  | 
668  | 0  |     out[5] += low;  | 
669  | 0  |     out[6] = high;  | 
670  |  | 
  | 
671  | 0  |     a = ((uint128_t) small1[3]) * small2[2];  | 
672  | 0  |     low = a;  | 
673  | 0  |     high = a >> 64;  | 
674  | 0  |     out[5] += low;  | 
675  | 0  |     out[6] += high;  | 
676  |  | 
  | 
677  | 0  |     a = ((uint128_t) small1[3]) * small2[3];  | 
678  | 0  |     low = a;  | 
679  | 0  |     high = a >> 64;  | 
680  | 0  |     out[6] += low;  | 
681  | 0  |     out[7] = high;  | 
682  | 0  | }  | 
683  |  |  | 
684  |  | /*-  | 
685  |  |  * felem_mul sets |out| = |in1| * |in2|  | 
686  |  |  * On entry:  | 
687  |  |  *   in1[i] < 2^109  | 
688  |  |  *   in2[i] < 2^109  | 
689  |  |  * On exit:  | 
690  |  |  *   out[i] < 7 * 2^64 < 2^67  | 
691  |  |  */  | 
692  |  | static void felem_mul(longfelem out, const felem in1, const felem in2)  | 
693  | 0  | { | 
694  | 0  |     smallfelem small1, small2;  | 
695  | 0  |     felem_shrink(small1, in1);  | 
696  | 0  |     felem_shrink(small2, in2);  | 
697  | 0  |     smallfelem_mul(out, small1, small2);  | 
698  | 0  | }  | 
699  |  |  | 
700  |  | /*-  | 
701  |  |  * felem_small_mul sets |out| = |small1| * |in2|  | 
702  |  |  * On entry:  | 
703  |  |  *   small1[i] < 2^64  | 
704  |  |  *   in2[i] < 2^109  | 
705  |  |  * On exit:  | 
706  |  |  *   out[i] < 7 * 2^64 < 2^67  | 
707  |  |  */  | 
708  |  | static void felem_small_mul(longfelem out, const smallfelem small1,  | 
709  |  |                             const felem in2)  | 
710  | 0  | { | 
711  | 0  |     smallfelem small2;  | 
712  | 0  |     felem_shrink(small2, in2);  | 
713  | 0  |     smallfelem_mul(out, small1, small2);  | 
714  | 0  | }  | 
715  |  |  | 
716  |  | #define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)  | 
717  |  | #define two100 (((limb)1) << 100)  | 
718  |  | #define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)  | 
719  |  | /* zero100 is 0 mod p */  | 
720  |  | static const felem zero100 =  | 
721  |  |     { two100m36m4, two100, two100m36p4, two100m36p4 }; | 
722  |  |  | 
723  |  | /*-  | 
724  |  |  * Internal function for the different flavours of felem_reduce.  | 
725  |  |  * felem_reduce_ reduces the higher coefficients in[4]-in[7].  | 
726  |  |  * On entry:  | 
727  |  |  *   out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]  | 
728  |  |  *   out[1] >= in[7] + 2^32*in[4]  | 
729  |  |  *   out[2] >= in[5] + 2^32*in[5]  | 
730  |  |  *   out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]  | 
731  |  |  * On exit:  | 
732  |  |  *   out[0] <= out[0] + in[4] + 2^32*in[5]  | 
733  |  |  *   out[1] <= out[1] + in[5] + 2^33*in[6]  | 
734  |  |  *   out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]  | 
735  |  |  *   out[3] <= out[3] + 2^32*in[4] + 3*in[7]  | 
736  |  |  */  | 
737  |  | static void felem_reduce_(felem out, const longfelem in)  | 
738  | 0  | { | 
739  | 0  |     int128_t c;  | 
740  |  |     /* combine common terms from below */  | 
741  | 0  |     c = in[4] + (in[5] << 32);  | 
742  | 0  |     out[0] += c;  | 
743  | 0  |     out[3] -= c;  | 
744  |  | 
  | 
745  | 0  |     c = in[5] - in[7];  | 
746  | 0  |     out[1] += c;  | 
747  | 0  |     out[2] -= c;  | 
748  |  |  | 
749  |  |     /* the remaining terms */  | 
750  |  |     /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */  | 
751  | 0  |     out[1] -= (in[4] << 32);  | 
752  | 0  |     out[3] += (in[4] << 32);  | 
753  |  |  | 
754  |  |     /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */  | 
755  | 0  |     out[2] -= (in[5] << 32);  | 
756  |  |  | 
757  |  |     /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */  | 
758  | 0  |     out[0] -= in[6];  | 
759  | 0  |     out[0] -= (in[6] << 32);  | 
760  | 0  |     out[1] += (in[6] << 33);  | 
761  | 0  |     out[2] += (in[6] * 2);  | 
762  | 0  |     out[3] -= (in[6] << 32);  | 
763  |  |  | 
764  |  |     /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */  | 
765  | 0  |     out[0] -= in[7];  | 
766  | 0  |     out[0] -= (in[7] << 32);  | 
767  | 0  |     out[2] += (in[7] << 33);  | 
768  | 0  |     out[3] += (in[7] * 3);  | 
769  | 0  | }  | 
770  |  |  | 
771  |  | /*-  | 
772  |  |  * felem_reduce converts a longfelem into an felem.  | 
773  |  |  * To be called directly after felem_square or felem_mul.  | 
774  |  |  * On entry:  | 
775  |  |  *   in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64  | 
776  |  |  *   in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64  | 
777  |  |  * On exit:  | 
778  |  |  *   out[i] < 2^101  | 
779  |  |  */  | 
780  |  | static void felem_reduce(felem out, const longfelem in)  | 
781  | 0  | { | 
782  | 0  |     out[0] = zero100[0] + in[0];  | 
783  | 0  |     out[1] = zero100[1] + in[1];  | 
784  | 0  |     out[2] = zero100[2] + in[2];  | 
785  | 0  |     out[3] = zero100[3] + in[3];  | 
786  |  | 
  | 
787  | 0  |     felem_reduce_(out, in);  | 
788  |  |  | 
789  |  |     /*-  | 
790  |  |      * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0  | 
791  |  |      * out[1] > 2^100 - 2^64 - 7*2^96 > 0  | 
792  |  |      * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0  | 
793  |  |      * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0  | 
794  |  |      *  | 
795  |  |      * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101  | 
796  |  |      * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101  | 
797  |  |      * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101  | 
798  |  |      * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101  | 
799  |  |      */  | 
800  | 0  | }  | 
801  |  |  | 
802  |  | /*-  | 
803  |  |  * felem_reduce_zero105 converts a larger longfelem into an felem.  | 
804  |  |  * On entry:  | 
805  |  |  *   in[0] < 2^71  | 
806  |  |  * On exit:  | 
807  |  |  *   out[i] < 2^106  | 
808  |  |  */  | 
809  |  | static void felem_reduce_zero105(felem out, const longfelem in)  | 
810  | 0  | { | 
811  | 0  |     out[0] = zero105[0] + in[0];  | 
812  | 0  |     out[1] = zero105[1] + in[1];  | 
813  | 0  |     out[2] = zero105[2] + in[2];  | 
814  | 0  |     out[3] = zero105[3] + in[3];  | 
815  |  | 
  | 
816  | 0  |     felem_reduce_(out, in);  | 
817  |  |  | 
818  |  |     /*-  | 
819  |  |      * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0  | 
820  |  |      * out[1] > 2^105 - 2^71 - 2^103 > 0  | 
821  |  |      * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0  | 
822  |  |      * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0  | 
823  |  |      *  | 
824  |  |      * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106  | 
825  |  |      * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106  | 
826  |  |      * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106  | 
827  |  |      * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106  | 
828  |  |      */  | 
829  | 0  | }  | 
830  |  |  | 
831  |  | /*  | 
832  |  |  * subtract_u64 sets *result = *result - v and *carry to one if the  | 
833  |  |  * subtraction underflowed.  | 
834  |  |  */  | 
835  |  | static void subtract_u64(u64 *result, u64 *carry, u64 v)  | 
836  | 0  | { | 
837  | 0  |     uint128_t r = *result;  | 
838  | 0  |     r -= v;  | 
839  | 0  |     *carry = (r >> 64) & 1;  | 
840  | 0  |     *result = (u64)r;  | 
841  | 0  | }  | 
842  |  |  | 
843  |  | /*  | 
844  |  |  * felem_contract converts |in| to its unique, minimal representation. On  | 
845  |  |  * entry: in[i] < 2^109  | 
846  |  |  */  | 
847  |  | static void felem_contract(smallfelem out, const felem in)  | 
848  | 0  | { | 
849  | 0  |     unsigned i;  | 
850  | 0  |     u64 all_equal_so_far = 0, result = 0, carry;  | 
851  |  | 
  | 
852  | 0  |     felem_shrink(out, in);  | 
853  |  |     /* small is minimal except that the value might be > p */  | 
854  |  | 
  | 
855  | 0  |     all_equal_so_far--;  | 
856  |  |     /*  | 
857  |  |      * We are doing a constant time test if out >= kPrime. We need to compare  | 
858  |  |      * each u64, from most-significant to least significant. For each one, if  | 
859  |  |      * all words so far have been equal (m is all ones) then a non-equal  | 
860  |  |      * result is the answer. Otherwise we continue.  | 
861  |  |      */  | 
862  | 0  |     for (i = 3; i < 4; i--) { | 
863  | 0  |         u64 equal;  | 
864  | 0  |         uint128_t a = ((uint128_t) kPrime[i]) - out[i];  | 
865  |  |         /*  | 
866  |  |          * if out[i] > kPrime[i] then a will underflow and the high 64-bits  | 
867  |  |          * will all be set.  | 
868  |  |          */  | 
869  | 0  |         result |= all_equal_so_far & ((u64)(a >> 64));  | 
870  |  |  | 
871  |  |         /*  | 
872  |  |          * if kPrime[i] == out[i] then |equal| will be all zeros and the  | 
873  |  |          * decrement will make it all ones.  | 
874  |  |          */  | 
875  | 0  |         equal = kPrime[i] ^ out[i];  | 
876  | 0  |         equal--;  | 
877  | 0  |         equal &= equal << 32;  | 
878  | 0  |         equal &= equal << 16;  | 
879  | 0  |         equal &= equal << 8;  | 
880  | 0  |         equal &= equal << 4;  | 
881  | 0  |         equal &= equal << 2;  | 
882  | 0  |         equal &= equal << 1;  | 
883  | 0  |         equal = 0 - (equal >> 63);  | 
884  |  | 
  | 
885  | 0  |         all_equal_so_far &= equal;  | 
886  | 0  |     }  | 
887  |  |  | 
888  |  |     /*  | 
889  |  |      * if all_equal_so_far is still all ones then the two values are equal  | 
890  |  |      * and so out >= kPrime is true.  | 
891  |  |      */  | 
892  | 0  |     result |= all_equal_so_far;  | 
893  |  |  | 
894  |  |     /* if out >= kPrime then we subtract kPrime. */  | 
895  | 0  |     subtract_u64(&out[0], &carry, result & kPrime[0]);  | 
896  | 0  |     subtract_u64(&out[1], &carry, carry);  | 
897  | 0  |     subtract_u64(&out[2], &carry, carry);  | 
898  | 0  |     subtract_u64(&out[3], &carry, carry);  | 
899  |  | 
  | 
900  | 0  |     subtract_u64(&out[1], &carry, result & kPrime[1]);  | 
901  | 0  |     subtract_u64(&out[2], &carry, carry);  | 
902  | 0  |     subtract_u64(&out[3], &carry, carry);  | 
903  |  | 
  | 
904  | 0  |     subtract_u64(&out[2], &carry, result & kPrime[2]);  | 
905  | 0  |     subtract_u64(&out[3], &carry, carry);  | 
906  |  | 
  | 
907  | 0  |     subtract_u64(&out[3], &carry, result & kPrime[3]);  | 
908  | 0  | }  | 
909  |  |  | 
910  |  | static void smallfelem_square_contract(smallfelem out, const smallfelem in)  | 
911  | 0  | { | 
912  | 0  |     longfelem longtmp;  | 
913  | 0  |     felem tmp;  | 
914  |  | 
  | 
915  | 0  |     smallfelem_square(longtmp, in);  | 
916  | 0  |     felem_reduce(tmp, longtmp);  | 
917  | 0  |     felem_contract(out, tmp);  | 
918  | 0  | }  | 
919  |  |  | 
920  |  | static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,  | 
921  |  |                                     const smallfelem in2)  | 
922  | 0  | { | 
923  | 0  |     longfelem longtmp;  | 
924  | 0  |     felem tmp;  | 
925  |  | 
  | 
926  | 0  |     smallfelem_mul(longtmp, in1, in2);  | 
927  | 0  |     felem_reduce(tmp, longtmp);  | 
928  | 0  |     felem_contract(out, tmp);  | 
929  | 0  | }  | 
930  |  |  | 
931  |  | /*-  | 
932  |  |  * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0  | 
933  |  |  * otherwise.  | 
934  |  |  * On entry:  | 
935  |  |  *   small[i] < 2^64  | 
936  |  |  */  | 
937  |  | static limb smallfelem_is_zero(const smallfelem small)  | 
938  | 0  | { | 
939  | 0  |     limb result;  | 
940  | 0  |     u64 is_p;  | 
941  |  | 
  | 
942  | 0  |     u64 is_zero = small[0] | small[1] | small[2] | small[3];  | 
943  | 0  |     is_zero--;  | 
944  | 0  |     is_zero &= is_zero << 32;  | 
945  | 0  |     is_zero &= is_zero << 16;  | 
946  | 0  |     is_zero &= is_zero << 8;  | 
947  | 0  |     is_zero &= is_zero << 4;  | 
948  | 0  |     is_zero &= is_zero << 2;  | 
949  | 0  |     is_zero &= is_zero << 1;  | 
950  | 0  |     is_zero = 0 - (is_zero >> 63);  | 
951  |  | 
  | 
952  | 0  |     is_p = (small[0] ^ kPrime[0]) |  | 
953  | 0  |         (small[1] ^ kPrime[1]) |  | 
954  | 0  |         (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);  | 
955  | 0  |     is_p--;  | 
956  | 0  |     is_p &= is_p << 32;  | 
957  | 0  |     is_p &= is_p << 16;  | 
958  | 0  |     is_p &= is_p << 8;  | 
959  | 0  |     is_p &= is_p << 4;  | 
960  | 0  |     is_p &= is_p << 2;  | 
961  | 0  |     is_p &= is_p << 1;  | 
962  | 0  |     is_p = 0 - (is_p >> 63);  | 
963  |  | 
  | 
964  | 0  |     is_zero |= is_p;  | 
965  |  | 
  | 
966  | 0  |     result = is_zero;  | 
967  | 0  |     result |= ((limb) is_zero) << 64;  | 
968  | 0  |     return result;  | 
969  | 0  | }  | 
970  |  |  | 
971  |  | static int smallfelem_is_zero_int(const void *small)  | 
972  | 0  | { | 
973  | 0  |     return (int)(smallfelem_is_zero(small) & ((limb) 1));  | 
974  | 0  | }  | 
975  |  |  | 
976  |  | /*-  | 
977  |  |  * felem_inv calculates |out| = |in|^{-1} | 
978  |  |  *  | 
979  |  |  * Based on Fermat's Little Theorem:  | 
980  |  |  *   a^p = a (mod p)  | 
981  |  |  *   a^{p-1} = 1 (mod p) | 
982  |  |  *   a^{p-2} = a^{-1} (mod p) | 
983  |  |  */  | 
984  |  | static void felem_inv(felem out, const felem in)  | 
985  | 0  | { | 
986  | 0  |     felem ftmp, ftmp2;  | 
987  |  |     /* each e_I will hold |in|^{2^I - 1} */ | 
988  | 0  |     felem e2, e4, e8, e16, e32, e64;  | 
989  | 0  |     longfelem tmp;  | 
990  | 0  |     unsigned i;  | 
991  |  | 
  | 
992  | 0  |     felem_square(tmp, in);  | 
993  | 0  |     felem_reduce(ftmp, tmp);    /* 2^1 */  | 
994  | 0  |     felem_mul(tmp, in, ftmp);  | 
995  | 0  |     felem_reduce(ftmp, tmp);    /* 2^2 - 2^0 */  | 
996  | 0  |     felem_assign(e2, ftmp);  | 
997  | 0  |     felem_square(tmp, ftmp);  | 
998  | 0  |     felem_reduce(ftmp, tmp);    /* 2^3 - 2^1 */  | 
999  | 0  |     felem_square(tmp, ftmp);  | 
1000  | 0  |     felem_reduce(ftmp, tmp);    /* 2^4 - 2^2 */  | 
1001  | 0  |     felem_mul(tmp, ftmp, e2);  | 
1002  | 0  |     felem_reduce(ftmp, tmp);    /* 2^4 - 2^0 */  | 
1003  | 0  |     felem_assign(e4, ftmp);  | 
1004  | 0  |     felem_square(tmp, ftmp);  | 
1005  | 0  |     felem_reduce(ftmp, tmp);    /* 2^5 - 2^1 */  | 
1006  | 0  |     felem_square(tmp, ftmp);  | 
1007  | 0  |     felem_reduce(ftmp, tmp);    /* 2^6 - 2^2 */  | 
1008  | 0  |     felem_square(tmp, ftmp);  | 
1009  | 0  |     felem_reduce(ftmp, tmp);    /* 2^7 - 2^3 */  | 
1010  | 0  |     felem_square(tmp, ftmp);  | 
1011  | 0  |     felem_reduce(ftmp, tmp);    /* 2^8 - 2^4 */  | 
1012  | 0  |     felem_mul(tmp, ftmp, e4);  | 
1013  | 0  |     felem_reduce(ftmp, tmp);    /* 2^8 - 2^0 */  | 
1014  | 0  |     felem_assign(e8, ftmp);  | 
1015  | 0  |     for (i = 0; i < 8; i++) { | 
1016  | 0  |         felem_square(tmp, ftmp);  | 
1017  | 0  |         felem_reduce(ftmp, tmp);  | 
1018  | 0  |     }                           /* 2^16 - 2^8 */  | 
1019  | 0  |     felem_mul(tmp, ftmp, e8);  | 
1020  | 0  |     felem_reduce(ftmp, tmp);    /* 2^16 - 2^0 */  | 
1021  | 0  |     felem_assign(e16, ftmp);  | 
1022  | 0  |     for (i = 0; i < 16; i++) { | 
1023  | 0  |         felem_square(tmp, ftmp);  | 
1024  | 0  |         felem_reduce(ftmp, tmp);  | 
1025  | 0  |     }                           /* 2^32 - 2^16 */  | 
1026  | 0  |     felem_mul(tmp, ftmp, e16);  | 
1027  | 0  |     felem_reduce(ftmp, tmp);    /* 2^32 - 2^0 */  | 
1028  | 0  |     felem_assign(e32, ftmp);  | 
1029  | 0  |     for (i = 0; i < 32; i++) { | 
1030  | 0  |         felem_square(tmp, ftmp);  | 
1031  | 0  |         felem_reduce(ftmp, tmp);  | 
1032  | 0  |     }                           /* 2^64 - 2^32 */  | 
1033  | 0  |     felem_assign(e64, ftmp);  | 
1034  | 0  |     felem_mul(tmp, ftmp, in);  | 
1035  | 0  |     felem_reduce(ftmp, tmp);    /* 2^64 - 2^32 + 2^0 */  | 
1036  | 0  |     for (i = 0; i < 192; i++) { | 
1037  | 0  |         felem_square(tmp, ftmp);  | 
1038  | 0  |         felem_reduce(ftmp, tmp);  | 
1039  | 0  |     }                           /* 2^256 - 2^224 + 2^192 */  | 
1040  |  | 
  | 
1041  | 0  |     felem_mul(tmp, e64, e32);  | 
1042  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^64 - 2^0 */  | 
1043  | 0  |     for (i = 0; i < 16; i++) { | 
1044  | 0  |         felem_square(tmp, ftmp2);  | 
1045  | 0  |         felem_reduce(ftmp2, tmp);  | 
1046  | 0  |     }                           /* 2^80 - 2^16 */  | 
1047  | 0  |     felem_mul(tmp, ftmp2, e16);  | 
1048  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^80 - 2^0 */  | 
1049  | 0  |     for (i = 0; i < 8; i++) { | 
1050  | 0  |         felem_square(tmp, ftmp2);  | 
1051  | 0  |         felem_reduce(ftmp2, tmp);  | 
1052  | 0  |     }                           /* 2^88 - 2^8 */  | 
1053  | 0  |     felem_mul(tmp, ftmp2, e8);  | 
1054  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^88 - 2^0 */  | 
1055  | 0  |     for (i = 0; i < 4; i++) { | 
1056  | 0  |         felem_square(tmp, ftmp2);  | 
1057  | 0  |         felem_reduce(ftmp2, tmp);  | 
1058  | 0  |     }                           /* 2^92 - 2^4 */  | 
1059  | 0  |     felem_mul(tmp, ftmp2, e4);  | 
1060  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^92 - 2^0 */  | 
1061  | 0  |     felem_square(tmp, ftmp2);  | 
1062  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^93 - 2^1 */  | 
1063  | 0  |     felem_square(tmp, ftmp2);  | 
1064  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^94 - 2^2 */  | 
1065  | 0  |     felem_mul(tmp, ftmp2, e2);  | 
1066  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^94 - 2^0 */  | 
1067  | 0  |     felem_square(tmp, ftmp2);  | 
1068  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^95 - 2^1 */  | 
1069  | 0  |     felem_square(tmp, ftmp2);  | 
1070  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^96 - 2^2 */  | 
1071  | 0  |     felem_mul(tmp, ftmp2, in);  | 
1072  | 0  |     felem_reduce(ftmp2, tmp);   /* 2^96 - 3 */  | 
1073  |  | 
  | 
1074  | 0  |     felem_mul(tmp, ftmp2, ftmp);  | 
1075  | 0  |     felem_reduce(out, tmp);     /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */  | 
1076  | 0  | }  | 
1077  |  |  | 
1078  |  | static void smallfelem_inv_contract(smallfelem out, const smallfelem in)  | 
1079  | 0  | { | 
1080  | 0  |     felem tmp;  | 
1081  |  | 
  | 
1082  | 0  |     smallfelem_expand(tmp, in);  | 
1083  | 0  |     felem_inv(tmp, tmp);  | 
1084  | 0  |     felem_contract(out, tmp);  | 
1085  | 0  | }  | 
1086  |  |  | 
1087  |  | /*-  | 
1088  |  |  * Group operations  | 
1089  |  |  * ----------------  | 
1090  |  |  *  | 
1091  |  |  * Building on top of the field operations we have the operations on the  | 
1092  |  |  * elliptic curve group itself. Points on the curve are represented in Jacobian  | 
1093  |  |  * coordinates  | 
1094  |  |  */  | 
1095  |  |  | 
1096  |  | /*-  | 
1097  |  |  * point_double calculates 2*(x_in, y_in, z_in)  | 
1098  |  |  *  | 
1099  |  |  * The method is taken from:  | 
1100  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b  | 
1101  |  |  *  | 
1102  |  |  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.  | 
1103  |  |  * while x_out == y_in is not (maybe this works, but it's not tested).  | 
1104  |  |  */  | 
1105  |  | static void  | 
1106  |  | point_double(felem x_out, felem y_out, felem z_out,  | 
1107  |  |              const felem x_in, const felem y_in, const felem z_in)  | 
1108  | 0  | { | 
1109  | 0  |     longfelem tmp, tmp2;  | 
1110  | 0  |     felem delta, gamma, beta, alpha, ftmp, ftmp2;  | 
1111  | 0  |     smallfelem small1, small2;  | 
1112  |  | 
  | 
1113  | 0  |     felem_assign(ftmp, x_in);  | 
1114  |  |     /* ftmp[i] < 2^106 */  | 
1115  | 0  |     felem_assign(ftmp2, x_in);  | 
1116  |  |     /* ftmp2[i] < 2^106 */  | 
1117  |  |  | 
1118  |  |     /* delta = z^2 */  | 
1119  | 0  |     felem_square(tmp, z_in);  | 
1120  | 0  |     felem_reduce(delta, tmp);  | 
1121  |  |     /* delta[i] < 2^101 */  | 
1122  |  |  | 
1123  |  |     /* gamma = y^2 */  | 
1124  | 0  |     felem_square(tmp, y_in);  | 
1125  | 0  |     felem_reduce(gamma, tmp);  | 
1126  |  |     /* gamma[i] < 2^101 */  | 
1127  | 0  |     felem_shrink(small1, gamma);  | 
1128  |  |  | 
1129  |  |     /* beta = x*gamma */  | 
1130  | 0  |     felem_small_mul(tmp, small1, x_in);  | 
1131  | 0  |     felem_reduce(beta, tmp);  | 
1132  |  |     /* beta[i] < 2^101 */  | 
1133  |  |  | 
1134  |  |     /* alpha = 3*(x-delta)*(x+delta) */  | 
1135  | 0  |     felem_diff(ftmp, delta);  | 
1136  |  |     /* ftmp[i] < 2^105 + 2^106 < 2^107 */  | 
1137  | 0  |     felem_sum(ftmp2, delta);  | 
1138  |  |     /* ftmp2[i] < 2^105 + 2^106 < 2^107 */  | 
1139  | 0  |     felem_scalar(ftmp2, 3);  | 
1140  |  |     /* ftmp2[i] < 3 * 2^107 < 2^109 */  | 
1141  | 0  |     felem_mul(tmp, ftmp, ftmp2);  | 
1142  | 0  |     felem_reduce(alpha, tmp);  | 
1143  |  |     /* alpha[i] < 2^101 */  | 
1144  | 0  |     felem_shrink(small2, alpha);  | 
1145  |  |  | 
1146  |  |     /* x' = alpha^2 - 8*beta */  | 
1147  | 0  |     smallfelem_square(tmp, small2);  | 
1148  | 0  |     felem_reduce(x_out, tmp);  | 
1149  | 0  |     felem_assign(ftmp, beta);  | 
1150  | 0  |     felem_scalar(ftmp, 8);  | 
1151  |  |     /* ftmp[i] < 8 * 2^101 = 2^104 */  | 
1152  | 0  |     felem_diff(x_out, ftmp);  | 
1153  |  |     /* x_out[i] < 2^105 + 2^101 < 2^106 */  | 
1154  |  |  | 
1155  |  |     /* z' = (y + z)^2 - gamma - delta */  | 
1156  | 0  |     felem_sum(delta, gamma);  | 
1157  |  |     /* delta[i] < 2^101 + 2^101 = 2^102 */  | 
1158  | 0  |     felem_assign(ftmp, y_in);  | 
1159  | 0  |     felem_sum(ftmp, z_in);  | 
1160  |  |     /* ftmp[i] < 2^106 + 2^106 = 2^107 */  | 
1161  | 0  |     felem_square(tmp, ftmp);  | 
1162  | 0  |     felem_reduce(z_out, tmp);  | 
1163  | 0  |     felem_diff(z_out, delta);  | 
1164  |  |     /* z_out[i] < 2^105 + 2^101 < 2^106 */  | 
1165  |  |  | 
1166  |  |     /* y' = alpha*(4*beta - x') - 8*gamma^2 */  | 
1167  | 0  |     felem_scalar(beta, 4);  | 
1168  |  |     /* beta[i] < 4 * 2^101 = 2^103 */  | 
1169  | 0  |     felem_diff_zero107(beta, x_out);  | 
1170  |  |     /* beta[i] < 2^107 + 2^103 < 2^108 */  | 
1171  | 0  |     felem_small_mul(tmp, small2, beta);  | 
1172  |  |     /* tmp[i] < 7 * 2^64 < 2^67 */  | 
1173  | 0  |     smallfelem_square(tmp2, small1);  | 
1174  |  |     /* tmp2[i] < 7 * 2^64 */  | 
1175  | 0  |     longfelem_scalar(tmp2, 8);  | 
1176  |  |     /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */  | 
1177  | 0  |     longfelem_diff(tmp, tmp2);  | 
1178  |  |     /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */  | 
1179  | 0  |     felem_reduce_zero105(y_out, tmp);  | 
1180  |  |     /* y_out[i] < 2^106 */  | 
1181  | 0  | }  | 
1182  |  |  | 
1183  |  | /*  | 
1184  |  |  * point_double_small is the same as point_double, except that it operates on  | 
1185  |  |  * smallfelems  | 
1186  |  |  */  | 
1187  |  | static void  | 
1188  |  | point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,  | 
1189  |  |                    const smallfelem x_in, const smallfelem y_in,  | 
1190  |  |                    const smallfelem z_in)  | 
1191  | 0  | { | 
1192  | 0  |     felem felem_x_out, felem_y_out, felem_z_out;  | 
1193  | 0  |     felem felem_x_in, felem_y_in, felem_z_in;  | 
1194  |  | 
  | 
1195  | 0  |     smallfelem_expand(felem_x_in, x_in);  | 
1196  | 0  |     smallfelem_expand(felem_y_in, y_in);  | 
1197  | 0  |     smallfelem_expand(felem_z_in, z_in);  | 
1198  | 0  |     point_double(felem_x_out, felem_y_out, felem_z_out,  | 
1199  | 0  |                  felem_x_in, felem_y_in, felem_z_in);  | 
1200  | 0  |     felem_shrink(x_out, felem_x_out);  | 
1201  | 0  |     felem_shrink(y_out, felem_y_out);  | 
1202  | 0  |     felem_shrink(z_out, felem_z_out);  | 
1203  | 0  | }  | 
1204  |  |  | 
1205  |  | /* copy_conditional copies in to out iff mask is all ones. */  | 
1206  |  | static void copy_conditional(felem out, const felem in, limb mask)  | 
1207  | 0  | { | 
1208  | 0  |     unsigned i;  | 
1209  | 0  |     for (i = 0; i < NLIMBS; ++i) { | 
1210  | 0  |         const limb tmp = mask & (in[i] ^ out[i]);  | 
1211  | 0  |         out[i] ^= tmp;  | 
1212  | 0  |     }  | 
1213  | 0  | }  | 
1214  |  |  | 
1215  |  | /* copy_small_conditional copies in to out iff mask is all ones. */  | 
1216  |  | static void copy_small_conditional(felem out, const smallfelem in, limb mask)  | 
1217  | 0  | { | 
1218  | 0  |     unsigned i;  | 
1219  | 0  |     const u64 mask64 = mask;  | 
1220  | 0  |     for (i = 0; i < NLIMBS; ++i) { | 
1221  | 0  |         out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);  | 
1222  | 0  |     }  | 
1223  | 0  | }  | 
1224  |  |  | 
1225  |  | /*-  | 
1226  |  |  * point_add calculates (x1, y1, z1) + (x2, y2, z2)  | 
1227  |  |  *  | 
1228  |  |  * The method is taken from:  | 
1229  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,  | 
1230  |  |  * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).  | 
1231  |  |  *  | 
1232  |  |  * This function includes a branch for checking whether the two input points  | 
1233  |  |  * are equal, (while not equal to the point at infinity). This case never  | 
1234  |  |  * happens during single point multiplication, so there is no timing leak for  | 
1235  |  |  * ECDH or ECDSA signing.  | 
1236  |  |  */  | 
1237  |  | static void point_add(felem x3, felem y3, felem z3,  | 
1238  |  |                       const felem x1, const felem y1, const felem z1,  | 
1239  |  |                       const int mixed, const smallfelem x2,  | 
1240  |  |                       const smallfelem y2, const smallfelem z2)  | 
1241  | 0  | { | 
1242  | 0  |     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;  | 
1243  | 0  |     longfelem tmp, tmp2;  | 
1244  | 0  |     smallfelem small1, small2, small3, small4, small5;  | 
1245  | 0  |     limb x_equal, y_equal, z1_is_zero, z2_is_zero;  | 
1246  | 0  |     limb points_equal;  | 
1247  |  | 
  | 
1248  | 0  |     felem_shrink(small3, z1);  | 
1249  |  | 
  | 
1250  | 0  |     z1_is_zero = smallfelem_is_zero(small3);  | 
1251  | 0  |     z2_is_zero = smallfelem_is_zero(z2);  | 
1252  |  |  | 
1253  |  |     /* ftmp = z1z1 = z1**2 */  | 
1254  | 0  |     smallfelem_square(tmp, small3);  | 
1255  | 0  |     felem_reduce(ftmp, tmp);  | 
1256  |  |     /* ftmp[i] < 2^101 */  | 
1257  | 0  |     felem_shrink(small1, ftmp);  | 
1258  |  | 
  | 
1259  | 0  |     if (!mixed) { | 
1260  |  |         /* ftmp2 = z2z2 = z2**2 */  | 
1261  | 0  |         smallfelem_square(tmp, z2);  | 
1262  | 0  |         felem_reduce(ftmp2, tmp);  | 
1263  |  |         /* ftmp2[i] < 2^101 */  | 
1264  | 0  |         felem_shrink(small2, ftmp2);  | 
1265  |  | 
  | 
1266  | 0  |         felem_shrink(small5, x1);  | 
1267  |  |  | 
1268  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1269  | 0  |         smallfelem_mul(tmp, small5, small2);  | 
1270  | 0  |         felem_reduce(ftmp3, tmp);  | 
1271  |  |         /* ftmp3[i] < 2^101 */  | 
1272  |  |  | 
1273  |  |         /* ftmp5 = z1 + z2 */  | 
1274  | 0  |         felem_assign(ftmp5, z1);  | 
1275  | 0  |         felem_small_sum(ftmp5, z2);  | 
1276  |  |         /* ftmp5[i] < 2^107 */  | 
1277  |  |  | 
1278  |  |         /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */  | 
1279  | 0  |         felem_square(tmp, ftmp5);  | 
1280  | 0  |         felem_reduce(ftmp5, tmp);  | 
1281  |  |         /* ftmp2 = z2z2 + z1z1 */  | 
1282  | 0  |         felem_sum(ftmp2, ftmp);  | 
1283  |  |         /* ftmp2[i] < 2^101 + 2^101 = 2^102 */  | 
1284  | 0  |         felem_diff(ftmp5, ftmp2);  | 
1285  |  |         /* ftmp5[i] < 2^105 + 2^101 < 2^106 */  | 
1286  |  |  | 
1287  |  |         /* ftmp2 = z2 * z2z2 */  | 
1288  | 0  |         smallfelem_mul(tmp, small2, z2);  | 
1289  | 0  |         felem_reduce(ftmp2, tmp);  | 
1290  |  |  | 
1291  |  |         /* s1 = ftmp2 = y1 * z2**3 */  | 
1292  | 0  |         felem_mul(tmp, y1, ftmp2);  | 
1293  | 0  |         felem_reduce(ftmp6, tmp);  | 
1294  |  |         /* ftmp6[i] < 2^101 */  | 
1295  | 0  |     } else { | 
1296  |  |         /*  | 
1297  |  |          * We'll assume z2 = 1 (special case z2 = 0 is handled later)  | 
1298  |  |          */  | 
1299  |  |  | 
1300  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1301  | 0  |         felem_assign(ftmp3, x1);  | 
1302  |  |         /* ftmp3[i] < 2^106 */  | 
1303  |  |  | 
1304  |  |         /* ftmp5 = 2z1z2 */  | 
1305  | 0  |         felem_assign(ftmp5, z1);  | 
1306  | 0  |         felem_scalar(ftmp5, 2);  | 
1307  |  |         /* ftmp5[i] < 2*2^106 = 2^107 */  | 
1308  |  |  | 
1309  |  |         /* s1 = ftmp2 = y1 * z2**3 */  | 
1310  | 0  |         felem_assign(ftmp6, y1);  | 
1311  |  |         /* ftmp6[i] < 2^106 */  | 
1312  | 0  |     }  | 
1313  |  |  | 
1314  |  |     /* u2 = x2*z1z1 */  | 
1315  | 0  |     smallfelem_mul(tmp, x2, small1);  | 
1316  | 0  |     felem_reduce(ftmp4, tmp);  | 
1317  |  |  | 
1318  |  |     /* h = ftmp4 = u2 - u1 */  | 
1319  | 0  |     felem_diff_zero107(ftmp4, ftmp3);  | 
1320  |  |     /* ftmp4[i] < 2^107 + 2^101 < 2^108 */  | 
1321  | 0  |     felem_shrink(small4, ftmp4);  | 
1322  |  | 
  | 
1323  | 0  |     x_equal = smallfelem_is_zero(small4);  | 
1324  |  |  | 
1325  |  |     /* z_out = ftmp5 * h */  | 
1326  | 0  |     felem_small_mul(tmp, small4, ftmp5);  | 
1327  | 0  |     felem_reduce(z_out, tmp);  | 
1328  |  |     /* z_out[i] < 2^101 */  | 
1329  |  |  | 
1330  |  |     /* ftmp = z1 * z1z1 */  | 
1331  | 0  |     smallfelem_mul(tmp, small1, small3);  | 
1332  | 0  |     felem_reduce(ftmp, tmp);  | 
1333  |  |  | 
1334  |  |     /* s2 = tmp = y2 * z1**3 */  | 
1335  | 0  |     felem_small_mul(tmp, y2, ftmp);  | 
1336  | 0  |     felem_reduce(ftmp5, tmp);  | 
1337  |  |  | 
1338  |  |     /* r = ftmp5 = (s2 - s1)*2 */  | 
1339  | 0  |     felem_diff_zero107(ftmp5, ftmp6);  | 
1340  |  |     /* ftmp5[i] < 2^107 + 2^107 = 2^108 */  | 
1341  | 0  |     felem_scalar(ftmp5, 2);  | 
1342  |  |     /* ftmp5[i] < 2^109 */  | 
1343  | 0  |     felem_shrink(small1, ftmp5);  | 
1344  | 0  |     y_equal = smallfelem_is_zero(small1);  | 
1345  |  |  | 
1346  |  |     /*  | 
1347  |  |      * The formulae are incorrect if the points are equal, in affine coordinates  | 
1348  |  |      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this  | 
1349  |  |      * happens.  | 
1350  |  |      *  | 
1351  |  |      * We use bitwise operations to avoid potential side-channels introduced by  | 
1352  |  |      * the short-circuiting behaviour of boolean operators.  | 
1353  |  |      *  | 
1354  |  |      * The special case of either point being the point at infinity (z1 and/or  | 
1355  |  |      * z2 are zero), is handled separately later on in this function, so we  | 
1356  |  |      * avoid jumping to point_double here in those special cases.  | 
1357  |  |      */  | 
1358  | 0  |     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));  | 
1359  |  | 
  | 
1360  | 0  |     if (points_equal) { | 
1361  |  |         /*  | 
1362  |  |          * This is obviously not constant-time but, as mentioned before, this  | 
1363  |  |          * case never happens during single point multiplication, so there is no  | 
1364  |  |          * timing leak for ECDH or ECDSA signing.  | 
1365  |  |          */  | 
1366  | 0  |         point_double(x3, y3, z3, x1, y1, z1);  | 
1367  | 0  |         return;  | 
1368  | 0  |     }  | 
1369  |  |  | 
1370  |  |     /* I = ftmp = (2h)**2 */  | 
1371  | 0  |     felem_assign(ftmp, ftmp4);  | 
1372  | 0  |     felem_scalar(ftmp, 2);  | 
1373  |  |     /* ftmp[i] < 2*2^108 = 2^109 */  | 
1374  | 0  |     felem_square(tmp, ftmp);  | 
1375  | 0  |     felem_reduce(ftmp, tmp);  | 
1376  |  |  | 
1377  |  |     /* J = ftmp2 = h * I */  | 
1378  | 0  |     felem_mul(tmp, ftmp4, ftmp);  | 
1379  | 0  |     felem_reduce(ftmp2, tmp);  | 
1380  |  |  | 
1381  |  |     /* V = ftmp4 = U1 * I */  | 
1382  | 0  |     felem_mul(tmp, ftmp3, ftmp);  | 
1383  | 0  |     felem_reduce(ftmp4, tmp);  | 
1384  |  |  | 
1385  |  |     /* x_out = r**2 - J - 2V */  | 
1386  | 0  |     smallfelem_square(tmp, small1);  | 
1387  | 0  |     felem_reduce(x_out, tmp);  | 
1388  | 0  |     felem_assign(ftmp3, ftmp4);  | 
1389  | 0  |     felem_scalar(ftmp4, 2);  | 
1390  | 0  |     felem_sum(ftmp4, ftmp2);  | 
1391  |  |     /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */  | 
1392  | 0  |     felem_diff(x_out, ftmp4);  | 
1393  |  |     /* x_out[i] < 2^105 + 2^101 */  | 
1394  |  |  | 
1395  |  |     /* y_out = r(V-x_out) - 2 * s1 * J */  | 
1396  | 0  |     felem_diff_zero107(ftmp3, x_out);  | 
1397  |  |     /* ftmp3[i] < 2^107 + 2^101 < 2^108 */  | 
1398  | 0  |     felem_small_mul(tmp, small1, ftmp3);  | 
1399  | 0  |     felem_mul(tmp2, ftmp6, ftmp2);  | 
1400  | 0  |     longfelem_scalar(tmp2, 2);  | 
1401  |  |     /* tmp2[i] < 2*2^67 = 2^68 */  | 
1402  | 0  |     longfelem_diff(tmp, tmp2);  | 
1403  |  |     /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */  | 
1404  | 0  |     felem_reduce_zero105(y_out, tmp);  | 
1405  |  |     /* y_out[i] < 2^106 */  | 
1406  |  | 
  | 
1407  | 0  |     copy_small_conditional(x_out, x2, z1_is_zero);  | 
1408  | 0  |     copy_conditional(x_out, x1, z2_is_zero);  | 
1409  | 0  |     copy_small_conditional(y_out, y2, z1_is_zero);  | 
1410  | 0  |     copy_conditional(y_out, y1, z2_is_zero);  | 
1411  | 0  |     copy_small_conditional(z_out, z2, z1_is_zero);  | 
1412  | 0  |     copy_conditional(z_out, z1, z2_is_zero);  | 
1413  | 0  |     felem_assign(x3, x_out);  | 
1414  | 0  |     felem_assign(y3, y_out);  | 
1415  | 0  |     felem_assign(z3, z_out);  | 
1416  | 0  | }  | 
1417  |  |  | 
1418  |  | /*  | 
1419  |  |  * point_add_small is the same as point_add, except that it operates on  | 
1420  |  |  * smallfelems  | 
1421  |  |  */  | 
1422  |  | static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,  | 
1423  |  |                             smallfelem x1, smallfelem y1, smallfelem z1,  | 
1424  |  |                             smallfelem x2, smallfelem y2, smallfelem z2)  | 
1425  | 0  | { | 
1426  | 0  |     felem felem_x3, felem_y3, felem_z3;  | 
1427  | 0  |     felem felem_x1, felem_y1, felem_z1;  | 
1428  | 0  |     smallfelem_expand(felem_x1, x1);  | 
1429  | 0  |     smallfelem_expand(felem_y1, y1);  | 
1430  | 0  |     smallfelem_expand(felem_z1, z1);  | 
1431  | 0  |     point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,  | 
1432  | 0  |               x2, y2, z2);  | 
1433  | 0  |     felem_shrink(x3, felem_x3);  | 
1434  | 0  |     felem_shrink(y3, felem_y3);  | 
1435  | 0  |     felem_shrink(z3, felem_z3);  | 
1436  | 0  | }  | 
1437  |  |  | 
1438  |  | /*-  | 
1439  |  |  * Base point pre computation  | 
1440  |  |  * --------------------------  | 
1441  |  |  *  | 
1442  |  |  * Two different sorts of precomputed tables are used in the following code.  | 
1443  |  |  * Each contain various points on the curve, where each point is three field  | 
1444  |  |  * elements (x, y, z).  | 
1445  |  |  *  | 
1446  |  |  * For the base point table, z is usually 1 (0 for the point at infinity).  | 
1447  |  |  * This table has 2 * 16 elements, starting with the following:  | 
1448  |  |  * index | bits    | point  | 
1449  |  |  * ------+---------+------------------------------  | 
1450  |  |  *     0 | 0 0 0 0 | 0G  | 
1451  |  |  *     1 | 0 0 0 1 | 1G  | 
1452  |  |  *     2 | 0 0 1 0 | 2^64G  | 
1453  |  |  *     3 | 0 0 1 1 | (2^64 + 1)G  | 
1454  |  |  *     4 | 0 1 0 0 | 2^128G  | 
1455  |  |  *     5 | 0 1 0 1 | (2^128 + 1)G  | 
1456  |  |  *     6 | 0 1 1 0 | (2^128 + 2^64)G  | 
1457  |  |  *     7 | 0 1 1 1 | (2^128 + 2^64 + 1)G  | 
1458  |  |  *     8 | 1 0 0 0 | 2^192G  | 
1459  |  |  *     9 | 1 0 0 1 | (2^192 + 1)G  | 
1460  |  |  *    10 | 1 0 1 0 | (2^192 + 2^64)G  | 
1461  |  |  *    11 | 1 0 1 1 | (2^192 + 2^64 + 1)G  | 
1462  |  |  *    12 | 1 1 0 0 | (2^192 + 2^128)G  | 
1463  |  |  *    13 | 1 1 0 1 | (2^192 + 2^128 + 1)G  | 
1464  |  |  *    14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G  | 
1465  |  |  *    15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G  | 
1466  |  |  * followed by a copy of this with each element multiplied by 2^32.  | 
1467  |  |  *  | 
1468  |  |  * The reason for this is so that we can clock bits into four different  | 
1469  |  |  * locations when doing simple scalar multiplies against the base point,  | 
1470  |  |  * and then another four locations using the second 16 elements.  | 
1471  |  |  *  | 
1472  |  |  * Tables for other points have table[i] = iG for i in 0 .. 16. */  | 
1473  |  |  | 
1474  |  | /* gmul is the table of precomputed base points */  | 
1475  |  | static const smallfelem gmul[2][16][3] = { | 
1476  |  |     {{{0, 0, 0, 0}, | 
1477  |  |       {0, 0, 0, 0}, | 
1478  |  |       {0, 0, 0, 0}}, | 
1479  |  |      {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, | 
1480  |  |        0x6b17d1f2e12c4247},  | 
1481  |  |       {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, | 
1482  |  |        0x4fe342e2fe1a7f9b},  | 
1483  |  |       {1, 0, 0, 0}}, | 
1484  |  |      {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, | 
1485  |  |        0x0fa822bc2811aaa5},  | 
1486  |  |       {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, | 
1487  |  |        0xbff44ae8f5dba80d},  | 
1488  |  |       {1, 0, 0, 0}}, | 
1489  |  |      {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, | 
1490  |  |        0x300a4bbc89d6726f},  | 
1491  |  |       {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, | 
1492  |  |        0x72aac7e0d09b4644},  | 
1493  |  |       {1, 0, 0, 0}}, | 
1494  |  |      {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, | 
1495  |  |        0x447d739beedb5e67},  | 
1496  |  |       {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, | 
1497  |  |        0x2d4825ab834131ee},  | 
1498  |  |       {1, 0, 0, 0}}, | 
1499  |  |      {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, | 
1500  |  |        0xef9519328a9c72ff},  | 
1501  |  |       {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, | 
1502  |  |        0x611e9fc37dbb2c9b},  | 
1503  |  |       {1, 0, 0, 0}}, | 
1504  |  |      {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, | 
1505  |  |        0x550663797b51f5d8},  | 
1506  |  |       {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, | 
1507  |  |        0x157164848aecb851},  | 
1508  |  |       {1, 0, 0, 0}}, | 
1509  |  |      {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, | 
1510  |  |        0xeb5d7745b21141ea},  | 
1511  |  |       {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, | 
1512  |  |        0xeafd72ebdbecc17b},  | 
1513  |  |       {1, 0, 0, 0}}, | 
1514  |  |      {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, | 
1515  |  |        0xa6d39677a7849276},  | 
1516  |  |       {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, | 
1517  |  |        0x674f84749b0b8816},  | 
1518  |  |       {1, 0, 0, 0}}, | 
1519  |  |      {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, | 
1520  |  |        0x4e769e7672c9ddad},  | 
1521  |  |       {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, | 
1522  |  |        0x42b99082de830663},  | 
1523  |  |       {1, 0, 0, 0}}, | 
1524  |  |      {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, | 
1525  |  |        0x78878ef61c6ce04d},  | 
1526  |  |       {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, | 
1527  |  |        0xb6cb3f5d7b72c321},  | 
1528  |  |       {1, 0, 0, 0}}, | 
1529  |  |      {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, | 
1530  |  |        0x0c88bc4d716b1287},  | 
1531  |  |       {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, | 
1532  |  |        0xdd5ddea3f3901dc6},  | 
1533  |  |       {1, 0, 0, 0}}, | 
1534  |  |      {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, | 
1535  |  |        0x68f344af6b317466},  | 
1536  |  |       {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, | 
1537  |  |        0x31b9c405f8540a20},  | 
1538  |  |       {1, 0, 0, 0}}, | 
1539  |  |      {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, | 
1540  |  |        0x4052bf4b6f461db9},  | 
1541  |  |       {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, | 
1542  |  |        0xfecf4d5190b0fc61},  | 
1543  |  |       {1, 0, 0, 0}}, | 
1544  |  |      {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, | 
1545  |  |        0x1eddbae2c802e41a},  | 
1546  |  |       {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, | 
1547  |  |        0x43104d86560ebcfc},  | 
1548  |  |       {1, 0, 0, 0}}, | 
1549  |  |      {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, | 
1550  |  |        0xb48e26b484f7a21c},  | 
1551  |  |       {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, | 
1552  |  |        0xfac015404d4d3dab},  | 
1553  |  |       {1, 0, 0, 0}}}, | 
1554  |  |     {{{0, 0, 0, 0}, | 
1555  |  |       {0, 0, 0, 0}, | 
1556  |  |       {0, 0, 0, 0}}, | 
1557  |  |      {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, | 
1558  |  |        0x7fe36b40af22af89},  | 
1559  |  |       {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, | 
1560  |  |        0xe697d45825b63624},  | 
1561  |  |       {1, 0, 0, 0}}, | 
1562  |  |      {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, | 
1563  |  |        0x4a5b506612a677a6},  | 
1564  |  |       {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, | 
1565  |  |        0xeb13461ceac089f1},  | 
1566  |  |       {1, 0, 0, 0}}, | 
1567  |  |      {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, | 
1568  |  |        0x0781b8291c6a220a},  | 
1569  |  |       {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, | 
1570  |  |        0x690cde8df0151593},  | 
1571  |  |       {1, 0, 0, 0}}, | 
1572  |  |      {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, | 
1573  |  |        0x8a535f566ec73617},  | 
1574  |  |       {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, | 
1575  |  |        0x0455c08468b08bd7},  | 
1576  |  |       {1, 0, 0, 0}}, | 
1577  |  |      {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, | 
1578  |  |        0x06bada7ab77f8276},  | 
1579  |  |       {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, | 
1580  |  |        0x5b476dfd0e6cb18a},  | 
1581  |  |       {1, 0, 0, 0}}, | 
1582  |  |      {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, | 
1583  |  |        0x3e29864e8a2ec908},  | 
1584  |  |       {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, | 
1585  |  |        0x239b90ea3dc31e7e},  | 
1586  |  |       {1, 0, 0, 0}}, | 
1587  |  |      {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, | 
1588  |  |        0x820f4dd949f72ff7},  | 
1589  |  |       {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, | 
1590  |  |        0x140406ec783a05ec},  | 
1591  |  |       {1, 0, 0, 0}}, | 
1592  |  |      {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, | 
1593  |  |        0x68f6b8542783dfee},  | 
1594  |  |       {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, | 
1595  |  |        0xcbe1feba92e40ce6},  | 
1596  |  |       {1, 0, 0, 0}}, | 
1597  |  |      {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, | 
1598  |  |        0xd0b2f94d2f420109},  | 
1599  |  |       {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, | 
1600  |  |        0x971459828b0719e5},  | 
1601  |  |       {1, 0, 0, 0}}, | 
1602  |  |      {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, | 
1603  |  |        0x961610004a866aba},  | 
1604  |  |       {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, | 
1605  |  |        0x7acb9fadcee75e44},  | 
1606  |  |       {1, 0, 0, 0}}, | 
1607  |  |      {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, | 
1608  |  |        0x24eb9acca333bf5b},  | 
1609  |  |       {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, | 
1610  |  |        0x69f891c5acd079cc},  | 
1611  |  |       {1, 0, 0, 0}}, | 
1612  |  |      {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, | 
1613  |  |        0xe51f547c5972a107},  | 
1614  |  |       {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, | 
1615  |  |        0x1c309a2b25bb1387},  | 
1616  |  |       {1, 0, 0, 0}}, | 
1617  |  |      {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, | 
1618  |  |        0x20b87b8aa2c4e503},  | 
1619  |  |       {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, | 
1620  |  |        0xf5c6fa49919776be},  | 
1621  |  |       {1, 0, 0, 0}}, | 
1622  |  |      {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, | 
1623  |  |        0x1ed7d1b9332010b9},  | 
1624  |  |       {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, | 
1625  |  |        0x3a2b03f03217257a},  | 
1626  |  |       {1, 0, 0, 0}}, | 
1627  |  |      {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, | 
1628  |  |        0x15fee545c78dd9f6},  | 
1629  |  |       {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, | 
1630  |  |        0x4ab5b6b2b8753f81},  | 
1631  |  |       {1, 0, 0, 0}}} | 
1632  |  | };  | 
1633  |  |  | 
1634  |  | /*  | 
1635  |  |  * select_point selects the |idx|th point from a precomputation table and  | 
1636  |  |  * copies it to out.  | 
1637  |  |  */  | 
1638  |  | static void select_point(const u64 idx, unsigned int size,  | 
1639  |  |                          const smallfelem pre_comp[16][3], smallfelem out[3])  | 
1640  | 0  | { | 
1641  | 0  |     unsigned i, j;  | 
1642  | 0  |     u64 *outlimbs = &out[0][0];  | 
1643  |  | 
  | 
1644  | 0  |     memset(out, 0, sizeof(*out) * 3);  | 
1645  |  | 
  | 
1646  | 0  |     for (i = 0; i < size; i++) { | 
1647  | 0  |         const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];  | 
1648  | 0  |         u64 mask = i ^ idx;  | 
1649  | 0  |         mask |= mask >> 4;  | 
1650  | 0  |         mask |= mask >> 2;  | 
1651  | 0  |         mask |= mask >> 1;  | 
1652  | 0  |         mask &= 1;  | 
1653  | 0  |         mask--;  | 
1654  | 0  |         for (j = 0; j < NLIMBS * 3; j++)  | 
1655  | 0  |             outlimbs[j] |= inlimbs[j] & mask;  | 
1656  | 0  |     }  | 
1657  | 0  | }  | 
1658  |  |  | 
1659  |  | /* get_bit returns the |i|th bit in |in| */  | 
1660  |  | static char get_bit(const felem_bytearray in, int i)  | 
1661  | 0  | { | 
1662  | 0  |     if ((i < 0) || (i >= 256))  | 
1663  | 0  |         return 0;  | 
1664  | 0  |     return (in[i >> 3] >> (i & 7)) & 1;  | 
1665  | 0  | }  | 
1666  |  |  | 
1667  |  | /*  | 
1668  |  |  * Interleaved point multiplication using precomputed point multiples: The  | 
1669  |  |  * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars  | 
1670  |  |  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the  | 
1671  |  |  * generator, using certain (large) precomputed multiples in g_pre_comp.  | 
1672  |  |  * Output point (X, Y, Z) is stored in x_out, y_out, z_out  | 
1673  |  |  */  | 
1674  |  | static void batch_mul(felem x_out, felem y_out, felem z_out,  | 
1675  |  |                       const felem_bytearray scalars[],  | 
1676  |  |                       const unsigned num_points, const u8 *g_scalar,  | 
1677  |  |                       const int mixed, const smallfelem pre_comp[][17][3],  | 
1678  |  |                       const smallfelem g_pre_comp[2][16][3])  | 
1679  | 0  | { | 
1680  | 0  |     int i, skip;  | 
1681  | 0  |     unsigned num, gen_mul = (g_scalar != NULL);  | 
1682  | 0  |     felem nq[3], ftmp;  | 
1683  | 0  |     smallfelem tmp[3];  | 
1684  | 0  |     u64 bits;  | 
1685  | 0  |     u8 sign, digit;  | 
1686  |  |  | 
1687  |  |     /* set nq to the point at infinity */  | 
1688  | 0  |     memset(nq, 0, sizeof(nq));  | 
1689  |  |  | 
1690  |  |     /*  | 
1691  |  |      * Loop over all scalars msb-to-lsb, interleaving additions of multiples  | 
1692  |  |      * of the generator (two in each of the last 32 rounds) and additions of  | 
1693  |  |      * other points multiples (every 5th round).  | 
1694  |  |      */  | 
1695  | 0  |     skip = 1;                   /* save two point operations in the first  | 
1696  |  |                                  * round */  | 
1697  | 0  |     for (i = (num_points ? 255 : 31); i >= 0; --i) { | 
1698  |  |         /* double */  | 
1699  | 0  |         if (!skip)  | 
1700  | 0  |             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);  | 
1701  |  |  | 
1702  |  |         /* add multiples of the generator */  | 
1703  | 0  |         if (gen_mul && (i <= 31)) { | 
1704  |  |             /* first, look 32 bits upwards */  | 
1705  | 0  |             bits = get_bit(g_scalar, i + 224) << 3;  | 
1706  | 0  |             bits |= get_bit(g_scalar, i + 160) << 2;  | 
1707  | 0  |             bits |= get_bit(g_scalar, i + 96) << 1;  | 
1708  | 0  |             bits |= get_bit(g_scalar, i + 32);  | 
1709  |  |             /* select the point to add, in constant time */  | 
1710  | 0  |             select_point(bits, 16, g_pre_comp[1], tmp);  | 
1711  |  | 
  | 
1712  | 0  |             if (!skip) { | 
1713  |  |                 /* Arg 1 below is for "mixed" */  | 
1714  | 0  |                 point_add(nq[0], nq[1], nq[2],  | 
1715  | 0  |                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);  | 
1716  | 0  |             } else { | 
1717  | 0  |                 smallfelem_expand(nq[0], tmp[0]);  | 
1718  | 0  |                 smallfelem_expand(nq[1], tmp[1]);  | 
1719  | 0  |                 smallfelem_expand(nq[2], tmp[2]);  | 
1720  | 0  |                 skip = 0;  | 
1721  | 0  |             }  | 
1722  |  |  | 
1723  |  |             /* second, look at the current position */  | 
1724  | 0  |             bits = get_bit(g_scalar, i + 192) << 3;  | 
1725  | 0  |             bits |= get_bit(g_scalar, i + 128) << 2;  | 
1726  | 0  |             bits |= get_bit(g_scalar, i + 64) << 1;  | 
1727  | 0  |             bits |= get_bit(g_scalar, i);  | 
1728  |  |             /* select the point to add, in constant time */  | 
1729  | 0  |             select_point(bits, 16, g_pre_comp[0], tmp);  | 
1730  |  |             /* Arg 1 below is for "mixed" */  | 
1731  | 0  |             point_add(nq[0], nq[1], nq[2],  | 
1732  | 0  |                       nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);  | 
1733  | 0  |         }  | 
1734  |  |  | 
1735  |  |         /* do other additions every 5 doublings */  | 
1736  | 0  |         if (num_points && (i % 5 == 0)) { | 
1737  |  |             /* loop over all scalars */  | 
1738  | 0  |             for (num = 0; num < num_points; ++num) { | 
1739  | 0  |                 bits = get_bit(scalars[num], i + 4) << 5;  | 
1740  | 0  |                 bits |= get_bit(scalars[num], i + 3) << 4;  | 
1741  | 0  |                 bits |= get_bit(scalars[num], i + 2) << 3;  | 
1742  | 0  |                 bits |= get_bit(scalars[num], i + 1) << 2;  | 
1743  | 0  |                 bits |= get_bit(scalars[num], i) << 1;  | 
1744  | 0  |                 bits |= get_bit(scalars[num], i - 1);  | 
1745  | 0  |                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);  | 
1746  |  |  | 
1747  |  |                 /*  | 
1748  |  |                  * select the point to add or subtract, in constant time  | 
1749  |  |                  */  | 
1750  | 0  |                 select_point(digit, 17, pre_comp[num], tmp);  | 
1751  | 0  |                 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative  | 
1752  |  |                                                * point */  | 
1753  | 0  |                 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));  | 
1754  | 0  |                 felem_contract(tmp[1], ftmp);  | 
1755  |  | 
  | 
1756  | 0  |                 if (!skip) { | 
1757  | 0  |                     point_add(nq[0], nq[1], nq[2],  | 
1758  | 0  |                               nq[0], nq[1], nq[2],  | 
1759  | 0  |                               mixed, tmp[0], tmp[1], tmp[2]);  | 
1760  | 0  |                 } else { | 
1761  | 0  |                     smallfelem_expand(nq[0], tmp[0]);  | 
1762  | 0  |                     smallfelem_expand(nq[1], tmp[1]);  | 
1763  | 0  |                     smallfelem_expand(nq[2], tmp[2]);  | 
1764  | 0  |                     skip = 0;  | 
1765  | 0  |                 }  | 
1766  | 0  |             }  | 
1767  | 0  |         }  | 
1768  | 0  |     }  | 
1769  | 0  |     felem_assign(x_out, nq[0]);  | 
1770  | 0  |     felem_assign(y_out, nq[1]);  | 
1771  | 0  |     felem_assign(z_out, nq[2]);  | 
1772  | 0  | }  | 
1773  |  |  | 
1774  |  | /* Precomputation for the group generator. */  | 
1775  |  | struct nistp256_pre_comp_st { | 
1776  |  |     smallfelem g_pre_comp[2][16][3];  | 
1777  |  |     CRYPTO_REF_COUNT references;  | 
1778  |  | };  | 
1779  |  |  | 
1780  |  | const EC_METHOD *EC_GFp_nistp256_method(void)  | 
1781  | 0  | { | 
1782  | 0  |     static const EC_METHOD ret = { | 
1783  | 0  |         EC_FLAGS_DEFAULT_OCT,  | 
1784  | 0  |         NID_X9_62_prime_field,  | 
1785  | 0  |         ossl_ec_GFp_nistp256_group_init,  | 
1786  | 0  |         ossl_ec_GFp_simple_group_finish,  | 
1787  | 0  |         ossl_ec_GFp_simple_group_clear_finish,  | 
1788  | 0  |         ossl_ec_GFp_nist_group_copy,  | 
1789  | 0  |         ossl_ec_GFp_nistp256_group_set_curve,  | 
1790  | 0  |         ossl_ec_GFp_simple_group_get_curve,  | 
1791  | 0  |         ossl_ec_GFp_simple_group_get_degree,  | 
1792  | 0  |         ossl_ec_group_simple_order_bits,  | 
1793  | 0  |         ossl_ec_GFp_simple_group_check_discriminant,  | 
1794  | 0  |         ossl_ec_GFp_simple_point_init,  | 
1795  | 0  |         ossl_ec_GFp_simple_point_finish,  | 
1796  | 0  |         ossl_ec_GFp_simple_point_clear_finish,  | 
1797  | 0  |         ossl_ec_GFp_simple_point_copy,  | 
1798  | 0  |         ossl_ec_GFp_simple_point_set_to_infinity,  | 
1799  | 0  |         ossl_ec_GFp_simple_point_set_affine_coordinates,  | 
1800  | 0  |         ossl_ec_GFp_nistp256_point_get_affine_coordinates,  | 
1801  | 0  |         0 /* point_set_compressed_coordinates */ ,  | 
1802  | 0  |         0 /* point2oct */ ,  | 
1803  | 0  |         0 /* oct2point */ ,  | 
1804  | 0  |         ossl_ec_GFp_simple_add,  | 
1805  | 0  |         ossl_ec_GFp_simple_dbl,  | 
1806  | 0  |         ossl_ec_GFp_simple_invert,  | 
1807  | 0  |         ossl_ec_GFp_simple_is_at_infinity,  | 
1808  | 0  |         ossl_ec_GFp_simple_is_on_curve,  | 
1809  | 0  |         ossl_ec_GFp_simple_cmp,  | 
1810  | 0  |         ossl_ec_GFp_simple_make_affine,  | 
1811  | 0  |         ossl_ec_GFp_simple_points_make_affine,  | 
1812  | 0  |         ossl_ec_GFp_nistp256_points_mul,  | 
1813  | 0  |         ossl_ec_GFp_nistp256_precompute_mult,  | 
1814  | 0  |         ossl_ec_GFp_nistp256_have_precompute_mult,  | 
1815  | 0  |         ossl_ec_GFp_nist_field_mul,  | 
1816  | 0  |         ossl_ec_GFp_nist_field_sqr,  | 
1817  | 0  |         0 /* field_div */ ,  | 
1818  | 0  |         ossl_ec_GFp_simple_field_inv,  | 
1819  | 0  |         0 /* field_encode */ ,  | 
1820  | 0  |         0 /* field_decode */ ,  | 
1821  | 0  |         0,                      /* field_set_to_one */  | 
1822  | 0  |         ossl_ec_key_simple_priv2oct,  | 
1823  | 0  |         ossl_ec_key_simple_oct2priv,  | 
1824  | 0  |         0, /* set private */  | 
1825  | 0  |         ossl_ec_key_simple_generate_key,  | 
1826  | 0  |         ossl_ec_key_simple_check_key,  | 
1827  | 0  |         ossl_ec_key_simple_generate_public_key,  | 
1828  | 0  |         0, /* keycopy */  | 
1829  | 0  |         0, /* keyfinish */  | 
1830  | 0  |         ossl_ecdh_simple_compute_key,  | 
1831  | 0  |         ossl_ecdsa_simple_sign_setup,  | 
1832  | 0  |         ossl_ecdsa_simple_sign_sig,  | 
1833  | 0  |         ossl_ecdsa_simple_verify_sig,  | 
1834  | 0  |         0, /* field_inverse_mod_ord */  | 
1835  | 0  |         0, /* blind_coordinates */  | 
1836  | 0  |         0, /* ladder_pre */  | 
1837  | 0  |         0, /* ladder_step */  | 
1838  | 0  |         0  /* ladder_post */  | 
1839  | 0  |     };  | 
1840  |  | 
  | 
1841  | 0  |     return &ret;  | 
1842  | 0  | }  | 
1843  |  |  | 
1844  |  | /******************************************************************************/  | 
1845  |  | /*  | 
1846  |  |  * FUNCTIONS TO MANAGE PRECOMPUTATION  | 
1847  |  |  */  | 
1848  |  |  | 
1849  |  | static NISTP256_PRE_COMP *nistp256_pre_comp_new(void)  | 
1850  | 0  | { | 
1851  | 0  |     NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));  | 
1852  |  | 
  | 
1853  | 0  |     if (ret == NULL)  | 
1854  | 0  |         return ret;  | 
1855  |  |  | 
1856  | 0  |     if (!CRYPTO_NEW_REF(&ret->references, 1)) { | 
1857  | 0  |         OPENSSL_free(ret);  | 
1858  | 0  |         return NULL;  | 
1859  | 0  |     }  | 
1860  | 0  |     return ret;  | 
1861  | 0  | }  | 
1862  |  |  | 
1863  |  | NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)  | 
1864  | 0  | { | 
1865  | 0  |     int i;  | 
1866  | 0  |     if (p != NULL)  | 
1867  | 0  |         CRYPTO_UP_REF(&p->references, &i);  | 
1868  | 0  |     return p;  | 
1869  | 0  | }  | 
1870  |  |  | 
1871  |  | void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)  | 
1872  | 0  | { | 
1873  | 0  |     int i;  | 
1874  |  | 
  | 
1875  | 0  |     if (pre == NULL)  | 
1876  | 0  |         return;  | 
1877  |  |  | 
1878  | 0  |     CRYPTO_DOWN_REF(&pre->references, &i);  | 
1879  | 0  |     REF_PRINT_COUNT("EC_nistp256", i, pre); | 
1880  | 0  |     if (i > 0)  | 
1881  | 0  |         return;  | 
1882  | 0  |     REF_ASSERT_ISNT(i < 0);  | 
1883  |  | 
  | 
1884  | 0  |     CRYPTO_FREE_REF(&pre->references);  | 
1885  | 0  |     OPENSSL_free(pre);  | 
1886  | 0  | }  | 
1887  |  |  | 
1888  |  | /******************************************************************************/  | 
1889  |  | /*  | 
1890  |  |  * OPENSSL EC_METHOD FUNCTIONS  | 
1891  |  |  */  | 
1892  |  |  | 
1893  |  | int ossl_ec_GFp_nistp256_group_init(EC_GROUP *group)  | 
1894  | 0  | { | 
1895  | 0  |     int ret;  | 
1896  | 0  |     ret = ossl_ec_GFp_simple_group_init(group);  | 
1897  | 0  |     group->a_is_minus3 = 1;  | 
1898  | 0  |     return ret;  | 
1899  | 0  | }  | 
1900  |  |  | 
1901  |  | int ossl_ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,  | 
1902  |  |                                          const BIGNUM *a, const BIGNUM *b,  | 
1903  |  |                                          BN_CTX *ctx)  | 
1904  | 0  | { | 
1905  | 0  |     int ret = 0;  | 
1906  | 0  |     BIGNUM *curve_p, *curve_a, *curve_b;  | 
1907  | 0  | #ifndef FIPS_MODULE  | 
1908  | 0  |     BN_CTX *new_ctx = NULL;  | 
1909  |  | 
  | 
1910  | 0  |     if (ctx == NULL)  | 
1911  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
1912  | 0  | #endif  | 
1913  | 0  |     if (ctx == NULL)  | 
1914  | 0  |         return 0;  | 
1915  |  |  | 
1916  | 0  |     BN_CTX_start(ctx);  | 
1917  | 0  |     curve_p = BN_CTX_get(ctx);  | 
1918  | 0  |     curve_a = BN_CTX_get(ctx);  | 
1919  | 0  |     curve_b = BN_CTX_get(ctx);  | 
1920  | 0  |     if (curve_b == NULL)  | 
1921  | 0  |         goto err;  | 
1922  | 0  |     BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);  | 
1923  | 0  |     BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);  | 
1924  | 0  |     BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);  | 
1925  | 0  |     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { | 
1926  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);  | 
1927  | 0  |         goto err;  | 
1928  | 0  |     }  | 
1929  | 0  |     group->field_mod_func = BN_nist_mod_256;  | 
1930  | 0  |     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);  | 
1931  | 0  |  err:  | 
1932  | 0  |     BN_CTX_end(ctx);  | 
1933  | 0  | #ifndef FIPS_MODULE  | 
1934  | 0  |     BN_CTX_free(new_ctx);  | 
1935  | 0  | #endif  | 
1936  | 0  |     return ret;  | 
1937  | 0  | }  | 
1938  |  |  | 
1939  |  | /*  | 
1940  |  |  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =  | 
1941  |  |  * (X/Z^2, Y/Z^3)  | 
1942  |  |  */  | 
1943  |  | int ossl_ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,  | 
1944  |  |                                                       const EC_POINT *point,  | 
1945  |  |                                                       BIGNUM *x, BIGNUM *y,  | 
1946  |  |                                                       BN_CTX *ctx)  | 
1947  | 0  | { | 
1948  | 0  |     felem z1, z2, x_in, y_in;  | 
1949  | 0  |     smallfelem x_out, y_out;  | 
1950  | 0  |     longfelem tmp;  | 
1951  |  | 
  | 
1952  | 0  |     if (EC_POINT_is_at_infinity(group, point)) { | 
1953  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);  | 
1954  | 0  |         return 0;  | 
1955  | 0  |     }  | 
1956  | 0  |     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||  | 
1957  | 0  |         (!BN_to_felem(z1, point->Z)))  | 
1958  | 0  |         return 0;  | 
1959  | 0  |     felem_inv(z2, z1);  | 
1960  | 0  |     felem_square(tmp, z2);  | 
1961  | 0  |     felem_reduce(z1, tmp);  | 
1962  | 0  |     felem_mul(tmp, x_in, z1);  | 
1963  | 0  |     felem_reduce(x_in, tmp);  | 
1964  | 0  |     felem_contract(x_out, x_in);  | 
1965  | 0  |     if (x != NULL) { | 
1966  | 0  |         if (!smallfelem_to_BN(x, x_out)) { | 
1967  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1968  | 0  |             return 0;  | 
1969  | 0  |         }  | 
1970  | 0  |     }  | 
1971  | 0  |     felem_mul(tmp, z1, z2);  | 
1972  | 0  |     felem_reduce(z1, tmp);  | 
1973  | 0  |     felem_mul(tmp, y_in, z1);  | 
1974  | 0  |     felem_reduce(y_in, tmp);  | 
1975  | 0  |     felem_contract(y_out, y_in);  | 
1976  | 0  |     if (y != NULL) { | 
1977  | 0  |         if (!smallfelem_to_BN(y, y_out)) { | 
1978  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1979  | 0  |             return 0;  | 
1980  | 0  |         }  | 
1981  | 0  |     }  | 
1982  | 0  |     return 1;  | 
1983  | 0  | }  | 
1984  |  |  | 
1985  |  | /* points below is of size |num|, and tmp_smallfelems is of size |num+1| */  | 
1986  |  | static void make_points_affine(size_t num, smallfelem points[][3],  | 
1987  |  |                                smallfelem tmp_smallfelems[])  | 
1988  | 0  | { | 
1989  |  |     /*  | 
1990  |  |      * Runs in constant time, unless an input is the point at infinity (which  | 
1991  |  |      * normally shouldn't happen).  | 
1992  |  |      */  | 
1993  | 0  |     ossl_ec_GFp_nistp_points_make_affine_internal(num,  | 
1994  | 0  |                                                   points,  | 
1995  | 0  |                                                   sizeof(smallfelem),  | 
1996  | 0  |                                                   tmp_smallfelems,  | 
1997  | 0  |                                                   (void (*)(void *))smallfelem_one,  | 
1998  | 0  |                                                   smallfelem_is_zero_int,  | 
1999  | 0  |                                                   (void (*)(void *, const void *))  | 
2000  | 0  |                                                   smallfelem_assign,  | 
2001  | 0  |                                                   (void (*)(void *, const void *))  | 
2002  | 0  |                                                   smallfelem_square_contract,  | 
2003  | 0  |                                                   (void (*)  | 
2004  | 0  |                                                    (void *, const void *,  | 
2005  | 0  |                                                     const void *))  | 
2006  | 0  |                                                   smallfelem_mul_contract,  | 
2007  | 0  |                                                   (void (*)(void *, const void *))  | 
2008  | 0  |                                                   smallfelem_inv_contract,  | 
2009  |  |                                                   /* nothing to contract */  | 
2010  | 0  |                                                   (void (*)(void *, const void *))  | 
2011  | 0  |                                                   smallfelem_assign);  | 
2012  | 0  | }  | 
2013  |  |  | 
2014  |  | /*  | 
2015  |  |  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL  | 
2016  |  |  * values Result is stored in r (r can equal one of the inputs).  | 
2017  |  |  */  | 
2018  |  | int ossl_ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,  | 
2019  |  |                                     const BIGNUM *scalar, size_t num,  | 
2020  |  |                                     const EC_POINT *points[],  | 
2021  |  |                                     const BIGNUM *scalars[], BN_CTX *ctx)  | 
2022  | 0  | { | 
2023  | 0  |     int ret = 0;  | 
2024  | 0  |     int j;  | 
2025  | 0  |     int mixed = 0;  | 
2026  | 0  |     BIGNUM *x, *y, *z, *tmp_scalar;  | 
2027  | 0  |     felem_bytearray g_secret;  | 
2028  | 0  |     felem_bytearray *secrets = NULL;  | 
2029  | 0  |     smallfelem (*pre_comp)[17][3] = NULL;  | 
2030  | 0  |     smallfelem *tmp_smallfelems = NULL;  | 
2031  | 0  |     unsigned i;  | 
2032  | 0  |     int num_bytes;  | 
2033  | 0  |     int have_pre_comp = 0;  | 
2034  | 0  |     size_t num_points = num;  | 
2035  | 0  |     smallfelem x_in, y_in, z_in;  | 
2036  | 0  |     felem x_out, y_out, z_out;  | 
2037  | 0  |     NISTP256_PRE_COMP *pre = NULL;  | 
2038  | 0  |     const smallfelem(*g_pre_comp)[16][3] = NULL;  | 
2039  | 0  |     EC_POINT *generator = NULL;  | 
2040  | 0  |     const EC_POINT *p = NULL;  | 
2041  | 0  |     const BIGNUM *p_scalar = NULL;  | 
2042  |  | 
  | 
2043  | 0  |     BN_CTX_start(ctx);  | 
2044  | 0  |     x = BN_CTX_get(ctx);  | 
2045  | 0  |     y = BN_CTX_get(ctx);  | 
2046  | 0  |     z = BN_CTX_get(ctx);  | 
2047  | 0  |     tmp_scalar = BN_CTX_get(ctx);  | 
2048  | 0  |     if (tmp_scalar == NULL)  | 
2049  | 0  |         goto err;  | 
2050  |  |  | 
2051  | 0  |     if (scalar != NULL) { | 
2052  | 0  |         pre = group->pre_comp.nistp256;  | 
2053  | 0  |         if (pre)  | 
2054  |  |             /* we have precomputation, try to use it */  | 
2055  | 0  |             g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;  | 
2056  | 0  |         else  | 
2057  |  |             /* try to use the standard precomputation */  | 
2058  | 0  |             g_pre_comp = &gmul[0];  | 
2059  | 0  |         generator = EC_POINT_new(group);  | 
2060  | 0  |         if (generator == NULL)  | 
2061  | 0  |             goto err;  | 
2062  |  |         /* get the generator from precomputation */  | 
2063  | 0  |         if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||  | 
2064  | 0  |             !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||  | 
2065  | 0  |             !smallfelem_to_BN(z, g_pre_comp[0][1][2])) { | 
2066  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2067  | 0  |             goto err;  | 
2068  | 0  |         }  | 
2069  | 0  |         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,  | 
2070  | 0  |                                                                 generator,  | 
2071  | 0  |                                                                 x, y, z, ctx))  | 
2072  | 0  |             goto err;  | 
2073  | 0  |         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))  | 
2074  |  |             /* precomputation matches generator */  | 
2075  | 0  |             have_pre_comp = 1;  | 
2076  | 0  |         else  | 
2077  |  |             /*  | 
2078  |  |              * we don't have valid precomputation: treat the generator as a  | 
2079  |  |              * random point  | 
2080  |  |              */  | 
2081  | 0  |             num_points++;  | 
2082  | 0  |     }  | 
2083  | 0  |     if (num_points > 0) { | 
2084  | 0  |         if (num_points >= 3) { | 
2085  |  |             /*  | 
2086  |  |              * unless we precompute multiples for just one or two points,  | 
2087  |  |              * converting those into affine form is time well spent  | 
2088  |  |              */  | 
2089  | 0  |             mixed = 1;  | 
2090  | 0  |         }  | 
2091  | 0  |         secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);  | 
2092  | 0  |         pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);  | 
2093  | 0  |         if (mixed)  | 
2094  | 0  |             tmp_smallfelems =  | 
2095  | 0  |               OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));  | 
2096  | 0  |         if ((secrets == NULL) || (pre_comp == NULL)  | 
2097  | 0  |             || (mixed && (tmp_smallfelems == NULL)))  | 
2098  | 0  |             goto err;  | 
2099  |  |  | 
2100  |  |         /*  | 
2101  |  |          * we treat NULL scalars as 0, and NULL points as points at infinity,  | 
2102  |  |          * i.e., they contribute nothing to the linear combination  | 
2103  |  |          */  | 
2104  | 0  |         memset(secrets, 0, sizeof(*secrets) * num_points);  | 
2105  | 0  |         memset(pre_comp, 0, sizeof(*pre_comp) * num_points);  | 
2106  | 0  |         for (i = 0; i < num_points; ++i) { | 
2107  | 0  |             if (i == num) { | 
2108  |  |                 /*  | 
2109  |  |                  * we didn't have a valid precomputation, so we pick the  | 
2110  |  |                  * generator  | 
2111  |  |                  */  | 
2112  | 0  |                 p = EC_GROUP_get0_generator(group);  | 
2113  | 0  |                 p_scalar = scalar;  | 
2114  | 0  |             } else { | 
2115  |  |                 /* the i^th point */  | 
2116  | 0  |                 p = points[i];  | 
2117  | 0  |                 p_scalar = scalars[i];  | 
2118  | 0  |             }  | 
2119  | 0  |             if ((p_scalar != NULL) && (p != NULL)) { | 
2120  |  |                 /* reduce scalar to 0 <= scalar < 2^256 */  | 
2121  | 0  |                 if ((BN_num_bits(p_scalar) > 256)  | 
2122  | 0  |                     || (BN_is_negative(p_scalar))) { | 
2123  |  |                     /*  | 
2124  |  |                      * this is an unusual input, and we don't guarantee  | 
2125  |  |                      * constant-timeness  | 
2126  |  |                      */  | 
2127  | 0  |                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) { | 
2128  | 0  |                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2129  | 0  |                         goto err;  | 
2130  | 0  |                     }  | 
2131  | 0  |                     num_bytes = BN_bn2lebinpad(tmp_scalar,  | 
2132  | 0  |                                                secrets[i], sizeof(secrets[i]));  | 
2133  | 0  |                 } else { | 
2134  | 0  |                     num_bytes = BN_bn2lebinpad(p_scalar,  | 
2135  | 0  |                                                secrets[i], sizeof(secrets[i]));  | 
2136  | 0  |                 }  | 
2137  | 0  |                 if (num_bytes < 0) { | 
2138  | 0  |                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2139  | 0  |                     goto err;  | 
2140  | 0  |                 }  | 
2141  |  |                 /* precompute multiples */  | 
2142  | 0  |                 if ((!BN_to_felem(x_out, p->X)) ||  | 
2143  | 0  |                     (!BN_to_felem(y_out, p->Y)) ||  | 
2144  | 0  |                     (!BN_to_felem(z_out, p->Z)))  | 
2145  | 0  |                     goto err;  | 
2146  | 0  |                 felem_shrink(pre_comp[i][1][0], x_out);  | 
2147  | 0  |                 felem_shrink(pre_comp[i][1][1], y_out);  | 
2148  | 0  |                 felem_shrink(pre_comp[i][1][2], z_out);  | 
2149  | 0  |                 for (j = 2; j <= 16; ++j) { | 
2150  | 0  |                     if (j & 1) { | 
2151  | 0  |                         point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],  | 
2152  | 0  |                                         pre_comp[i][j][2], pre_comp[i][1][0],  | 
2153  | 0  |                                         pre_comp[i][1][1], pre_comp[i][1][2],  | 
2154  | 0  |                                         pre_comp[i][j - 1][0],  | 
2155  | 0  |                                         pre_comp[i][j - 1][1],  | 
2156  | 0  |                                         pre_comp[i][j - 1][2]);  | 
2157  | 0  |                     } else { | 
2158  | 0  |                         point_double_small(pre_comp[i][j][0],  | 
2159  | 0  |                                            pre_comp[i][j][1],  | 
2160  | 0  |                                            pre_comp[i][j][2],  | 
2161  | 0  |                                            pre_comp[i][j / 2][0],  | 
2162  | 0  |                                            pre_comp[i][j / 2][1],  | 
2163  | 0  |                                            pre_comp[i][j / 2][2]);  | 
2164  | 0  |                     }  | 
2165  | 0  |                 }  | 
2166  | 0  |             }  | 
2167  | 0  |         }  | 
2168  | 0  |         if (mixed)  | 
2169  | 0  |             make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);  | 
2170  | 0  |     }  | 
2171  |  |  | 
2172  |  |     /* the scalar for the generator */  | 
2173  | 0  |     if ((scalar != NULL) && (have_pre_comp)) { | 
2174  | 0  |         memset(g_secret, 0, sizeof(g_secret));  | 
2175  |  |         /* reduce scalar to 0 <= scalar < 2^256 */  | 
2176  | 0  |         if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) { | 
2177  |  |             /*  | 
2178  |  |              * this is an unusual input, and we don't guarantee  | 
2179  |  |              * constant-timeness  | 
2180  |  |              */  | 
2181  | 0  |             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) { | 
2182  | 0  |                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2183  | 0  |                 goto err;  | 
2184  | 0  |             }  | 
2185  | 0  |             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));  | 
2186  | 0  |         } else { | 
2187  | 0  |             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));  | 
2188  | 0  |         }  | 
2189  |  |         /* do the multiplication with generator precomputation */  | 
2190  | 0  |         batch_mul(x_out, y_out, z_out,  | 
2191  | 0  |                   (const felem_bytearray(*))secrets, num_points,  | 
2192  | 0  |                   g_secret,  | 
2193  | 0  |                   mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);  | 
2194  | 0  |     } else { | 
2195  |  |         /* do the multiplication without generator precomputation */  | 
2196  | 0  |         batch_mul(x_out, y_out, z_out,  | 
2197  | 0  |                   (const felem_bytearray(*))secrets, num_points,  | 
2198  | 0  |                   NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);  | 
2199  | 0  |     }  | 
2200  |  |     /* reduce the output to its unique minimal representation */  | 
2201  | 0  |     felem_contract(x_in, x_out);  | 
2202  | 0  |     felem_contract(y_in, y_out);  | 
2203  | 0  |     felem_contract(z_in, z_out);  | 
2204  | 0  |     if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||  | 
2205  | 0  |         (!smallfelem_to_BN(z, z_in))) { | 
2206  | 0  |         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
2207  | 0  |         goto err;  | 
2208  | 0  |     }  | 
2209  | 0  |     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,  | 
2210  | 0  |                                                              ctx);  | 
2211  |  | 
  | 
2212  | 0  |  err:  | 
2213  | 0  |     BN_CTX_end(ctx);  | 
2214  | 0  |     EC_POINT_free(generator);  | 
2215  | 0  |     OPENSSL_free(secrets);  | 
2216  | 0  |     OPENSSL_free(pre_comp);  | 
2217  | 0  |     OPENSSL_free(tmp_smallfelems);  | 
2218  | 0  |     return ret;  | 
2219  | 0  | }  | 
2220  |  |  | 
2221  |  | int ossl_ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)  | 
2222  | 0  | { | 
2223  | 0  |     int ret = 0;  | 
2224  | 0  |     NISTP256_PRE_COMP *pre = NULL;  | 
2225  | 0  |     int i, j;  | 
2226  | 0  |     BIGNUM *x, *y;  | 
2227  | 0  |     EC_POINT *generator = NULL;  | 
2228  | 0  |     smallfelem tmp_smallfelems[32];  | 
2229  | 0  |     felem x_tmp, y_tmp, z_tmp;  | 
2230  | 0  | #ifndef FIPS_MODULE  | 
2231  | 0  |     BN_CTX *new_ctx = NULL;  | 
2232  | 0  | #endif  | 
2233  |  |  | 
2234  |  |     /* throw away old precomputation */  | 
2235  | 0  |     EC_pre_comp_free(group);  | 
2236  |  | 
  | 
2237  | 0  | #ifndef FIPS_MODULE  | 
2238  | 0  |     if (ctx == NULL)  | 
2239  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
2240  | 0  | #endif  | 
2241  | 0  |     if (ctx == NULL)  | 
2242  | 0  |         return 0;  | 
2243  |  |  | 
2244  | 0  |     BN_CTX_start(ctx);  | 
2245  | 0  |     x = BN_CTX_get(ctx);  | 
2246  | 0  |     y = BN_CTX_get(ctx);  | 
2247  | 0  |     if (y == NULL)  | 
2248  | 0  |         goto err;  | 
2249  |  |     /* get the generator */  | 
2250  | 0  |     if (group->generator == NULL)  | 
2251  | 0  |         goto err;  | 
2252  | 0  |     generator = EC_POINT_new(group);  | 
2253  | 0  |     if (generator == NULL)  | 
2254  | 0  |         goto err;  | 
2255  | 0  |     BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);  | 
2256  | 0  |     BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);  | 
2257  | 0  |     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))  | 
2258  | 0  |         goto err;  | 
2259  | 0  |     if ((pre = nistp256_pre_comp_new()) == NULL)  | 
2260  | 0  |         goto err;  | 
2261  |  |     /*  | 
2262  |  |      * if the generator is the standard one, use built-in precomputation  | 
2263  |  |      */  | 
2264  | 0  |     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { | 
2265  | 0  |         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));  | 
2266  | 0  |         goto done;  | 
2267  | 0  |     }  | 
2268  | 0  |     if ((!BN_to_felem(x_tmp, group->generator->X)) ||  | 
2269  | 0  |         (!BN_to_felem(y_tmp, group->generator->Y)) ||  | 
2270  | 0  |         (!BN_to_felem(z_tmp, group->generator->Z)))  | 
2271  | 0  |         goto err;  | 
2272  | 0  |     felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);  | 
2273  | 0  |     felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);  | 
2274  | 0  |     felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);  | 
2275  |  |     /*  | 
2276  |  |      * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,  | 
2277  |  |      * 2^160*G, 2^224*G for the second one  | 
2278  |  |      */  | 
2279  | 0  |     for (i = 1; i <= 8; i <<= 1) { | 
2280  | 0  |         point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],  | 
2281  | 0  |                            pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],  | 
2282  | 0  |                            pre->g_pre_comp[0][i][1],  | 
2283  | 0  |                            pre->g_pre_comp[0][i][2]);  | 
2284  | 0  |         for (j = 0; j < 31; ++j) { | 
2285  | 0  |             point_double_small(pre->g_pre_comp[1][i][0],  | 
2286  | 0  |                                pre->g_pre_comp[1][i][1],  | 
2287  | 0  |                                pre->g_pre_comp[1][i][2],  | 
2288  | 0  |                                pre->g_pre_comp[1][i][0],  | 
2289  | 0  |                                pre->g_pre_comp[1][i][1],  | 
2290  | 0  |                                pre->g_pre_comp[1][i][2]);  | 
2291  | 0  |         }  | 
2292  | 0  |         if (i == 8)  | 
2293  | 0  |             break;  | 
2294  | 0  |         point_double_small(pre->g_pre_comp[0][2 * i][0],  | 
2295  | 0  |                            pre->g_pre_comp[0][2 * i][1],  | 
2296  | 0  |                            pre->g_pre_comp[0][2 * i][2],  | 
2297  | 0  |                            pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],  | 
2298  | 0  |                            pre->g_pre_comp[1][i][2]);  | 
2299  | 0  |         for (j = 0; j < 31; ++j) { | 
2300  | 0  |             point_double_small(pre->g_pre_comp[0][2 * i][0],  | 
2301  | 0  |                                pre->g_pre_comp[0][2 * i][1],  | 
2302  | 0  |                                pre->g_pre_comp[0][2 * i][2],  | 
2303  | 0  |                                pre->g_pre_comp[0][2 * i][0],  | 
2304  | 0  |                                pre->g_pre_comp[0][2 * i][1],  | 
2305  | 0  |                                pre->g_pre_comp[0][2 * i][2]);  | 
2306  | 0  |         }  | 
2307  | 0  |     }  | 
2308  | 0  |     for (i = 0; i < 2; i++) { | 
2309  |  |         /* g_pre_comp[i][0] is the point at infinity */  | 
2310  | 0  |         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));  | 
2311  |  |         /* the remaining multiples */  | 
2312  |  |         /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */  | 
2313  | 0  |         point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],  | 
2314  | 0  |                         pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],  | 
2315  | 0  |                         pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],  | 
2316  | 0  |                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],  | 
2317  | 0  |                         pre->g_pre_comp[i][2][2]);  | 
2318  |  |         /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */  | 
2319  | 0  |         point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],  | 
2320  | 0  |                         pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],  | 
2321  | 0  |                         pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],  | 
2322  | 0  |                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],  | 
2323  | 0  |                         pre->g_pre_comp[i][2][2]);  | 
2324  |  |         /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */  | 
2325  | 0  |         point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],  | 
2326  | 0  |                         pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],  | 
2327  | 0  |                         pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],  | 
2328  | 0  |                         pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],  | 
2329  | 0  |                         pre->g_pre_comp[i][4][2]);  | 
2330  |  |         /*  | 
2331  |  |          * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G  | 
2332  |  |          */  | 
2333  | 0  |         point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],  | 
2334  | 0  |                         pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],  | 
2335  | 0  |                         pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],  | 
2336  | 0  |                         pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],  | 
2337  | 0  |                         pre->g_pre_comp[i][2][2]);  | 
2338  | 0  |         for (j = 1; j < 8; ++j) { | 
2339  |  |             /* odd multiples: add G resp. 2^32*G */  | 
2340  | 0  |             point_add_small(pre->g_pre_comp[i][2 * j + 1][0],  | 
2341  | 0  |                             pre->g_pre_comp[i][2 * j + 1][1],  | 
2342  | 0  |                             pre->g_pre_comp[i][2 * j + 1][2],  | 
2343  | 0  |                             pre->g_pre_comp[i][2 * j][0],  | 
2344  | 0  |                             pre->g_pre_comp[i][2 * j][1],  | 
2345  | 0  |                             pre->g_pre_comp[i][2 * j][2],  | 
2346  | 0  |                             pre->g_pre_comp[i][1][0],  | 
2347  | 0  |                             pre->g_pre_comp[i][1][1],  | 
2348  | 0  |                             pre->g_pre_comp[i][1][2]);  | 
2349  | 0  |         }  | 
2350  | 0  |     }  | 
2351  | 0  |     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);  | 
2352  |  | 
  | 
2353  | 0  |  done:  | 
2354  | 0  |     SETPRECOMP(group, nistp256, pre);  | 
2355  | 0  |     pre = NULL;  | 
2356  | 0  |     ret = 1;  | 
2357  |  | 
  | 
2358  | 0  |  err:  | 
2359  | 0  |     BN_CTX_end(ctx);  | 
2360  | 0  |     EC_POINT_free(generator);  | 
2361  | 0  | #ifndef FIPS_MODULE  | 
2362  | 0  |     BN_CTX_free(new_ctx);  | 
2363  | 0  | #endif  | 
2364  | 0  |     EC_nistp256_pre_comp_free(pre);  | 
2365  | 0  |     return ret;  | 
2366  | 0  | }  | 
2367  |  |  | 
2368  |  | int ossl_ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)  | 
2369  | 0  | { | 
2370  | 0  |     return HAVEPRECOMP(group, nistp256);  | 
2371  | 0  | }  |