/src/openssl/crypto/ec/ecp_nistp384.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.  | 
3  |  |  *  | 
4  |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use  | 
5  |  |  * this file except in compliance with the License.  You can obtain a copy  | 
6  |  |  * in the file LICENSE in the source distribution or at  | 
7  |  |  * https://www.openssl.org/source/license.html  | 
8  |  |  */  | 
9  |  |  | 
10  |  | /* Copyright 2023 IBM Corp.  | 
11  |  |  *  | 
12  |  |  * Licensed under the Apache License, Version 2.0 (the "License");  | 
13  |  |  *  | 
14  |  |  * you may not use this file except in compliance with the License.  | 
15  |  |  * You may obtain a copy of the License at  | 
16  |  |  *  | 
17  |  |  *     http://www.apache.org/licenses/LICENSE-2.0  | 
18  |  |  *  | 
19  |  |  *  Unless required by applicable law or agreed to in writing, software  | 
20  |  |  *  distributed under the License is distributed on an "AS IS" BASIS,  | 
21  |  |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
22  |  |  *  See the License for the specific language governing permissions and  | 
23  |  |  *  limitations under the License.  | 
24  |  |  */  | 
25  |  |  | 
26  |  | /*  | 
27  |  |  * Designed for 56-bit limbs by Rohan McLure <rohan.mclure@linux.ibm.com>.  | 
28  |  |  * The layout is based on that of ecp_nistp{224,521}.c, allowing even for asm | 
29  |  |  * acceleration of felem_{square,mul} as supported in these files. | 
30  |  |  */  | 
31  |  |  | 
32  |  | #include <openssl/e_os2.h>  | 
33  |  |  | 
34  |  | #include <string.h>  | 
35  |  | #include <openssl/err.h>  | 
36  |  | #include "ec_local.h"  | 
37  |  |  | 
38  |  | #include "internal/numbers.h"  | 
39  |  |  | 
40  |  | #ifndef INT128_MAX  | 
41  |  | # error "Your compiler doesn't appear to support 128-bit integer types"  | 
42  |  | #endif  | 
43  |  |  | 
44  |  | typedef uint8_t u8;  | 
45  |  | typedef uint64_t u64;  | 
46  |  |  | 
47  |  | /*  | 
48  |  |  * The underlying field. P384 operates over GF(2^384-2^128-2^96+2^32-1). We  | 
49  |  |  * can serialize an element of this field into 48 bytes. We call this an  | 
50  |  |  * felem_bytearray.  | 
51  |  |  */  | 
52  |  |  | 
53  |  | typedef u8 felem_bytearray[48];  | 
54  |  |  | 
55  |  | /*  | 
56  |  |  * These are the parameters of P384, taken from FIPS 186-3, section D.1.2.4.  | 
57  |  |  * These values are big-endian.  | 
58  |  |  */  | 
59  |  | static const felem_bytearray nistp384_curve_params[5] = { | 
60  |  |   {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ | 
61  |  |    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  | 
62  |  |    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,  | 
63  |  |    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF},  | 
64  |  |   {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a = -3 */ | 
65  |  |    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  | 
66  |  |    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,  | 
67  |  |    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC},  | 
68  |  |   {0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B, /* b */ | 
69  |  |    0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12,  | 
70  |  |    0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D,  | 
71  |  |    0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF},  | 
72  |  |   {0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, /* x */ | 
73  |  |    0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,  | 
74  |  |    0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,  | 
75  |  |    0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7},  | 
76  |  |   {0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, /* y */ | 
77  |  |    0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,  | 
78  |  |    0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,  | 
79  |  |    0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F},  | 
80  |  | };  | 
81  |  |  | 
82  |  | /*-  | 
83  |  |  * The representation of field elements.  | 
84  |  |  * ------------------------------------  | 
85  |  |  *  | 
86  |  |  * We represent field elements with seven values. These values are either 64 or  | 
87  |  |  * 128 bits and the field element represented is:  | 
88  |  |  *   v[0]*2^0 + v[1]*2^56 + v[2]*2^112 + ... + v[6]*2^336  (mod p)  | 
89  |  |  * Each of the seven values is called a 'limb'. Since the limbs are spaced only  | 
90  |  |  * 56 bits apart, but are greater than 56 bits in length, the most significant  | 
91  |  |  * bits of each limb overlap with the least significant bits of the next  | 
92  |  |  *  | 
93  |  |  * This representation is considered to be 'redundant' in the sense that  | 
94  |  |  * intermediate values can each contain more than a 56-bit value in each limb.  | 
95  |  |  * Reduction causes all but the final limb to be reduced to contain a value less  | 
96  |  |  * than 2^56, with the final value represented allowed to be larger than 2^384,  | 
97  |  |  * inasmuch as we can be sure that arithmetic overflow remains impossible. The  | 
98  |  |  * reduced value must of course be congruent to the unreduced value.  | 
99  |  |  *  | 
100  |  |  * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a  | 
101  |  |  * 'widefelem', featuring enough bits to store the result of a multiplication  | 
102  |  |  * and even some further arithmetic without need for immediate reduction.  | 
103  |  |  */  | 
104  |  |  | 
105  | 0  | #define NLIMBS 7  | 
106  |  |  | 
107  |  | typedef uint64_t limb;  | 
108  |  | typedef uint128_t widelimb;  | 
109  |  | typedef limb limb_aX __attribute((__aligned__(1)));  | 
110  |  | typedef limb felem[NLIMBS];  | 
111  |  | typedef widelimb widefelem[2*NLIMBS-1];  | 
112  |  |  | 
113  |  | static const limb bottom56bits = 0xffffffffffffff;  | 
114  |  |  | 
115  |  | /* Helper functions (de)serialising reduced field elements in little endian */  | 
116  |  | static void bin48_to_felem(felem out, const u8 in[48])  | 
117  | 0  | { | 
118  | 0  |     memset(out, 0, 56);  | 
119  | 0  |     out[0] = (*((limb *) & in[0])) & bottom56bits;  | 
120  | 0  |     out[1] = (*((limb_aX *) & in[7])) & bottom56bits;  | 
121  | 0  |     out[2] = (*((limb_aX *) & in[14])) & bottom56bits;  | 
122  | 0  |     out[3] = (*((limb_aX *) & in[21])) & bottom56bits;  | 
123  | 0  |     out[4] = (*((limb_aX *) & in[28])) & bottom56bits;  | 
124  | 0  |     out[5] = (*((limb_aX *) & in[35])) & bottom56bits;  | 
125  | 0  |     memmove(&out[6], &in[42], 6);  | 
126  | 0  | }  | 
127  |  |  | 
128  |  | static void felem_to_bin48(u8 out[48], const felem in)  | 
129  | 0  | { | 
130  | 0  |     memset(out, 0, 48);  | 
131  | 0  |     (*((limb *) & out[0]))     |= (in[0] & bottom56bits);  | 
132  | 0  |     (*((limb_aX *) & out[7]))  |= (in[1] & bottom56bits);  | 
133  | 0  |     (*((limb_aX *) & out[14])) |= (in[2] & bottom56bits);  | 
134  | 0  |     (*((limb_aX *) & out[21])) |= (in[3] & bottom56bits);  | 
135  | 0  |     (*((limb_aX *) & out[28])) |= (in[4] & bottom56bits);  | 
136  | 0  |     (*((limb_aX *) & out[35])) |= (in[5] & bottom56bits);  | 
137  | 0  |     memmove(&out[42], &in[6], 6);  | 
138  | 0  | }  | 
139  |  |  | 
140  |  | /* BN_to_felem converts an OpenSSL BIGNUM into an felem */  | 
141  |  | static int BN_to_felem(felem out, const BIGNUM *bn)  | 
142  | 0  | { | 
143  | 0  |     felem_bytearray b_out;  | 
144  | 0  |     int num_bytes;  | 
145  |  | 
  | 
146  | 0  |     if (BN_is_negative(bn)) { | 
147  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
148  | 0  |         return 0;  | 
149  | 0  |     }  | 
150  | 0  |     num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));  | 
151  | 0  |     if (num_bytes < 0) { | 
152  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);  | 
153  | 0  |         return 0;  | 
154  | 0  |     }  | 
155  | 0  |     bin48_to_felem(out, b_out);  | 
156  | 0  |     return 1;  | 
157  | 0  | }  | 
158  |  |  | 
159  |  | /* felem_to_BN converts an felem into an OpenSSL BIGNUM */  | 
160  |  | static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)  | 
161  | 0  | { | 
162  | 0  |     felem_bytearray b_out;  | 
163  |  | 
  | 
164  | 0  |     felem_to_bin48(b_out, in);  | 
165  | 0  |     return BN_lebin2bn(b_out, sizeof(b_out), out);  | 
166  | 0  | }  | 
167  |  |  | 
168  |  | /*-  | 
169  |  |  * Field operations  | 
170  |  |  * ----------------  | 
171  |  |  */  | 
172  |  |  | 
173  |  | static void felem_one(felem out)  | 
174  | 0  | { | 
175  | 0  |     out[0] = 1;  | 
176  | 0  |     memset(&out[1], 0, sizeof(limb) * (NLIMBS-1));  | 
177  | 0  | }  | 
178  |  |  | 
179  |  | static void felem_assign(felem out, const felem in)  | 
180  | 0  | { | 
181  | 0  |     memcpy(out, in, sizeof(felem));  | 
182  | 0  | }  | 
183  |  |  | 
184  |  | /* felem_sum64 sets out = out + in. */  | 
185  |  | static void felem_sum64(felem out, const felem in)  | 
186  | 0  | { | 
187  | 0  |     unsigned int i;  | 
188  |  | 
  | 
189  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
190  | 0  |         out[i] += in[i];  | 
191  | 0  | }  | 
192  |  |  | 
193  |  | /* felem_scalar sets out = in * scalar */  | 
194  |  | static void felem_scalar(felem out, const felem in, limb scalar)  | 
195  | 0  | { | 
196  | 0  |     unsigned int i;  | 
197  |  | 
  | 
198  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
199  | 0  |         out[i] = in[i] * scalar;  | 
200  | 0  | }  | 
201  |  |  | 
202  |  | /* felem_scalar64 sets out = out * scalar */  | 
203  |  | static void felem_scalar64(felem out, limb scalar)  | 
204  | 0  | { | 
205  | 0  |     unsigned int i;  | 
206  |  | 
  | 
207  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
208  | 0  |         out[i] *= scalar;  | 
209  | 0  | }  | 
210  |  |  | 
211  |  | /* felem_scalar128 sets out = out * scalar */  | 
212  |  | static void felem_scalar128(widefelem out, limb scalar)  | 
213  | 0  | { | 
214  | 0  |     unsigned int i;  | 
215  |  | 
  | 
216  | 0  |     for (i = 0; i < 2*NLIMBS-1; i++)  | 
217  | 0  |         out[i] *= scalar;  | 
218  | 0  | }  | 
219  |  |  | 
220  |  | /*-  | 
221  |  |  * felem_neg sets |out| to |-in|  | 
222  |  |  * On entry:  | 
223  |  |  *   in[i] < 2^60 - 2^29  | 
224  |  |  * On exit:  | 
225  |  |  *   out[i] < 2^60  | 
226  |  |  */  | 
227  |  | static void felem_neg(felem out, const felem in)  | 
228  | 0  | { | 
229  |  |     /*  | 
230  |  |      * In order to prevent underflow, we add a multiple of p before subtracting.  | 
231  |  |      * Use telescopic sums to represent 2^12 * p redundantly with each limb  | 
232  |  |      * of the form 2^60 + ...  | 
233  |  |      */  | 
234  | 0  |     static const limb two60m52m4 = (((limb) 1) << 60)  | 
235  | 0  |                                  - (((limb) 1) << 52)  | 
236  | 0  |                                  - (((limb) 1) << 4);  | 
237  | 0  |     static const limb two60p44m12 = (((limb) 1) << 60)  | 
238  | 0  |                                   + (((limb) 1) << 44)  | 
239  | 0  |                                   - (((limb) 1) << 12);  | 
240  | 0  |     static const limb two60m28m4 = (((limb) 1) << 60)  | 
241  | 0  |                                  - (((limb) 1) << 28)  | 
242  | 0  |                                  - (((limb) 1) << 4);  | 
243  | 0  |     static const limb two60m4 = (((limb) 1) << 60)  | 
244  | 0  |                               - (((limb) 1) << 4);  | 
245  |  | 
  | 
246  | 0  |     out[0] = two60p44m12 - in[0];  | 
247  | 0  |     out[1] = two60m52m4 - in[1];  | 
248  | 0  |     out[2] = two60m28m4 - in[2];  | 
249  | 0  |     out[3] = two60m4 - in[3];  | 
250  | 0  |     out[4] = two60m4 - in[4];  | 
251  | 0  |     out[5] = two60m4 - in[5];  | 
252  | 0  |     out[6] = two60m4 - in[6];  | 
253  | 0  | }  | 
254  |  |  | 
255  |  | /*-  | 
256  |  |  * felem_diff64 subtracts |in| from |out|  | 
257  |  |  * On entry:  | 
258  |  |  *   in[i] < 2^60 - 2^52 - 2^4  | 
259  |  |  * On exit:  | 
260  |  |  *   out[i] < out_orig[i] + 2^60 + 2^44  | 
261  |  |  */  | 
262  |  | static void felem_diff64(felem out, const felem in)  | 
263  | 0  | { | 
264  |  |     /*  | 
265  |  |      * In order to prevent underflow, we add a multiple of p before subtracting.  | 
266  |  |      * Use telescopic sums to represent 2^12 * p redundantly with each limb  | 
267  |  |      * of the form 2^60 + ...  | 
268  |  |      */  | 
269  |  | 
  | 
270  | 0  |     static const limb two60m52m4 = (((limb) 1) << 60)  | 
271  | 0  |                                  - (((limb) 1) << 52)  | 
272  | 0  |                                  - (((limb) 1) << 4);  | 
273  | 0  |     static const limb two60p44m12 = (((limb) 1) << 60)  | 
274  | 0  |                                   + (((limb) 1) << 44)  | 
275  | 0  |                                   - (((limb) 1) << 12);  | 
276  | 0  |     static const limb two60m28m4 = (((limb) 1) << 60)  | 
277  | 0  |                                  - (((limb) 1) << 28)  | 
278  | 0  |                                  - (((limb) 1) << 4);  | 
279  | 0  |     static const limb two60m4 = (((limb) 1) << 60)  | 
280  | 0  |                               - (((limb) 1) << 4);  | 
281  |  | 
  | 
282  | 0  |     out[0] += two60p44m12 - in[0];  | 
283  | 0  |     out[1] += two60m52m4 - in[1];  | 
284  | 0  |     out[2] += two60m28m4 - in[2];  | 
285  | 0  |     out[3] += two60m4 - in[3];  | 
286  | 0  |     out[4] += two60m4 - in[4];  | 
287  | 0  |     out[5] += two60m4 - in[5];  | 
288  | 0  |     out[6] += two60m4 - in[6];  | 
289  | 0  | }  | 
290  |  |  | 
291  |  | /*  | 
292  |  |  * in[i] < 2^63  | 
293  |  |  * out[i] < out_orig[i] + 2^64 + 2^48  | 
294  |  |  */  | 
295  |  | static void felem_diff_128_64(widefelem out, const felem in)  | 
296  | 0  | { | 
297  |  |     /*  | 
298  |  |      * In order to prevent underflow, we add a multiple of p before subtracting.  | 
299  |  |      * Use telescopic sums to represent 2^16 * p redundantly with each limb  | 
300  |  |      * of the form 2^64 + ...  | 
301  |  |      */  | 
302  |  | 
  | 
303  | 0  |     static const widelimb two64m56m8 = (((widelimb) 1) << 64)  | 
304  | 0  |                                      - (((widelimb) 1) << 56)  | 
305  | 0  |                                      - (((widelimb) 1) << 8);  | 
306  | 0  |     static const widelimb two64m32m8 = (((widelimb) 1) << 64)  | 
307  | 0  |                                      - (((widelimb) 1) << 32)  | 
308  | 0  |                                      - (((widelimb) 1) << 8);  | 
309  | 0  |     static const widelimb two64m8 = (((widelimb) 1) << 64)  | 
310  | 0  |                                   - (((widelimb) 1) << 8);  | 
311  | 0  |     static const widelimb two64p48m16 = (((widelimb) 1) << 64)  | 
312  | 0  |                                       + (((widelimb) 1) << 48)  | 
313  | 0  |                                       - (((widelimb) 1) << 16);  | 
314  | 0  |     unsigned int i;  | 
315  |  | 
  | 
316  | 0  |     out[0] += two64p48m16;  | 
317  | 0  |     out[1] += two64m56m8;  | 
318  | 0  |     out[2] += two64m32m8;  | 
319  | 0  |     out[3] += two64m8;  | 
320  | 0  |     out[4] += two64m8;  | 
321  | 0  |     out[5] += two64m8;  | 
322  | 0  |     out[6] += two64m8;  | 
323  |  | 
  | 
324  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
325  | 0  |         out[i] -= in[i];  | 
326  | 0  | }  | 
327  |  |  | 
328  |  | /*  | 
329  |  |  * in[i] < 2^127 - 2^119 - 2^71  | 
330  |  |  * out[i] < out_orig[i] + 2^127 + 2^111  | 
331  |  |  */  | 
332  |  | static void felem_diff128(widefelem out, const widefelem in)  | 
333  | 0  | { | 
334  |  |     /*  | 
335  |  |      * In order to prevent underflow, we add a multiple of p before subtracting.  | 
336  |  |      * Use telescopic sums to represent 2^415 * p redundantly with each limb  | 
337  |  |      * of the form 2^127 + ...  | 
338  |  |      */  | 
339  |  | 
  | 
340  | 0  |     static const widelimb two127 = ((widelimb) 1) << 127;  | 
341  | 0  |     static const widelimb two127m71 = (((widelimb) 1) << 127)  | 
342  | 0  |                                     - (((widelimb) 1) << 71);  | 
343  | 0  |     static const widelimb two127p111m79m71 = (((widelimb) 1) << 127)  | 
344  | 0  |                                            + (((widelimb) 1) << 111)  | 
345  | 0  |                                            - (((widelimb) 1) << 79)  | 
346  | 0  |                                            - (((widelimb) 1) << 71);  | 
347  | 0  |     static const widelimb two127m119m71 = (((widelimb) 1) << 127)  | 
348  | 0  |                                         - (((widelimb) 1) << 119)  | 
349  | 0  |                                         - (((widelimb) 1) << 71);  | 
350  | 0  |     static const widelimb two127m95m71 = (((widelimb) 1) << 127)  | 
351  | 0  |                                        - (((widelimb) 1) << 95)  | 
352  | 0  |                                        - (((widelimb) 1) << 71);  | 
353  | 0  |     unsigned int i;  | 
354  |  | 
  | 
355  | 0  |     out[0]  += two127;  | 
356  | 0  |     out[1]  += two127m71;  | 
357  | 0  |     out[2]  += two127m71;  | 
358  | 0  |     out[3]  += two127m71;  | 
359  | 0  |     out[4]  += two127m71;  | 
360  | 0  |     out[5]  += two127m71;  | 
361  | 0  |     out[6]  += two127p111m79m71;  | 
362  | 0  |     out[7]  += two127m119m71;  | 
363  | 0  |     out[8]  += two127m95m71;  | 
364  | 0  |     out[9]  += two127m71;  | 
365  | 0  |     out[10] += two127m71;  | 
366  | 0  |     out[11] += two127m71;  | 
367  | 0  |     out[12] += two127m71;  | 
368  |  | 
  | 
369  | 0  |     for (i = 0; i < 2*NLIMBS-1; i++)  | 
370  | 0  |         out[i] -= in[i];  | 
371  | 0  | }  | 
372  |  |  | 
373  |  | static void felem_square_ref(widefelem out, const felem in)  | 
374  | 0  | { | 
375  | 0  |     felem inx2;  | 
376  | 0  |     felem_scalar(inx2, in, 2);  | 
377  |  | 
  | 
378  | 0  |     out[0] = ((uint128_t) in[0]) * in[0];  | 
379  |  | 
  | 
380  | 0  |     out[1] = ((uint128_t) in[0]) * inx2[1];  | 
381  |  | 
  | 
382  | 0  |     out[2] = ((uint128_t) in[0]) * inx2[2]  | 
383  | 0  |            + ((uint128_t) in[1]) * in[1];  | 
384  |  | 
  | 
385  | 0  |     out[3] = ((uint128_t) in[0]) * inx2[3]  | 
386  | 0  |            + ((uint128_t) in[1]) * inx2[2];  | 
387  |  | 
  | 
388  | 0  |     out[4] = ((uint128_t) in[0]) * inx2[4]  | 
389  | 0  |            + ((uint128_t) in[1]) * inx2[3]  | 
390  | 0  |            + ((uint128_t) in[2]) * in[2];  | 
391  |  | 
  | 
392  | 0  |     out[5] = ((uint128_t) in[0]) * inx2[5]  | 
393  | 0  |            + ((uint128_t) in[1]) * inx2[4]  | 
394  | 0  |            + ((uint128_t) in[2]) * inx2[3];  | 
395  |  | 
  | 
396  | 0  |     out[6] = ((uint128_t) in[0]) * inx2[6]  | 
397  | 0  |            + ((uint128_t) in[1]) * inx2[5]  | 
398  | 0  |            + ((uint128_t) in[2]) * inx2[4]  | 
399  | 0  |            + ((uint128_t) in[3]) * in[3];  | 
400  |  | 
  | 
401  | 0  |     out[7] = ((uint128_t) in[1]) * inx2[6]  | 
402  | 0  |            + ((uint128_t) in[2]) * inx2[5]  | 
403  | 0  |            + ((uint128_t) in[3]) * inx2[4];  | 
404  |  | 
  | 
405  | 0  |     out[8] = ((uint128_t) in[2]) * inx2[6]  | 
406  | 0  |            + ((uint128_t) in[3]) * inx2[5]  | 
407  | 0  |            + ((uint128_t) in[4]) * in[4];  | 
408  |  | 
  | 
409  | 0  |     out[9] = ((uint128_t) in[3]) * inx2[6]  | 
410  | 0  |            + ((uint128_t) in[4]) * inx2[5];  | 
411  |  | 
  | 
412  | 0  |     out[10] = ((uint128_t) in[4]) * inx2[6]  | 
413  | 0  |             + ((uint128_t) in[5]) * in[5];  | 
414  |  | 
  | 
415  | 0  |     out[11] = ((uint128_t) in[5]) * inx2[6];  | 
416  |  | 
  | 
417  | 0  |     out[12] = ((uint128_t) in[6]) * in[6];  | 
418  | 0  | }  | 
419  |  |  | 
420  |  | static void felem_mul_ref(widefelem out, const felem in1, const felem in2)  | 
421  | 0  | { | 
422  | 0  |     out[0] = ((uint128_t) in1[0]) * in2[0];  | 
423  |  | 
  | 
424  | 0  |     out[1] = ((uint128_t) in1[0]) * in2[1]  | 
425  | 0  |            + ((uint128_t) in1[1]) * in2[0];  | 
426  |  | 
  | 
427  | 0  |     out[2] = ((uint128_t) in1[0]) * in2[2]  | 
428  | 0  |            + ((uint128_t) in1[1]) * in2[1]  | 
429  | 0  |            + ((uint128_t) in1[2]) * in2[0];  | 
430  |  | 
  | 
431  | 0  |     out[3] = ((uint128_t) in1[0]) * in2[3]  | 
432  | 0  |            + ((uint128_t) in1[1]) * in2[2]  | 
433  | 0  |            + ((uint128_t) in1[2]) * in2[1]  | 
434  | 0  |            + ((uint128_t) in1[3]) * in2[0];  | 
435  |  | 
  | 
436  | 0  |     out[4] = ((uint128_t) in1[0]) * in2[4]  | 
437  | 0  |            + ((uint128_t) in1[1]) * in2[3]  | 
438  | 0  |            + ((uint128_t) in1[2]) * in2[2]  | 
439  | 0  |            + ((uint128_t) in1[3]) * in2[1]  | 
440  | 0  |            + ((uint128_t) in1[4]) * in2[0];  | 
441  |  | 
  | 
442  | 0  |     out[5] = ((uint128_t) in1[0]) * in2[5]  | 
443  | 0  |            + ((uint128_t) in1[1]) * in2[4]  | 
444  | 0  |            + ((uint128_t) in1[2]) * in2[3]  | 
445  | 0  |            + ((uint128_t) in1[3]) * in2[2]  | 
446  | 0  |            + ((uint128_t) in1[4]) * in2[1]  | 
447  | 0  |            + ((uint128_t) in1[5]) * in2[0];  | 
448  |  | 
  | 
449  | 0  |     out[6] = ((uint128_t) in1[0]) * in2[6]  | 
450  | 0  |            + ((uint128_t) in1[1]) * in2[5]  | 
451  | 0  |            + ((uint128_t) in1[2]) * in2[4]  | 
452  | 0  |            + ((uint128_t) in1[3]) * in2[3]  | 
453  | 0  |            + ((uint128_t) in1[4]) * in2[2]  | 
454  | 0  |            + ((uint128_t) in1[5]) * in2[1]  | 
455  | 0  |            + ((uint128_t) in1[6]) * in2[0];  | 
456  |  | 
  | 
457  | 0  |     out[7] = ((uint128_t) in1[1]) * in2[6]  | 
458  | 0  |            + ((uint128_t) in1[2]) * in2[5]  | 
459  | 0  |            + ((uint128_t) in1[3]) * in2[4]  | 
460  | 0  |            + ((uint128_t) in1[4]) * in2[3]  | 
461  | 0  |            + ((uint128_t) in1[5]) * in2[2]  | 
462  | 0  |            + ((uint128_t) in1[6]) * in2[1];  | 
463  |  | 
  | 
464  | 0  |     out[8] = ((uint128_t) in1[2]) * in2[6]  | 
465  | 0  |            + ((uint128_t) in1[3]) * in2[5]  | 
466  | 0  |            + ((uint128_t) in1[4]) * in2[4]  | 
467  | 0  |            + ((uint128_t) in1[5]) * in2[3]  | 
468  | 0  |            + ((uint128_t) in1[6]) * in2[2];  | 
469  |  | 
  | 
470  | 0  |     out[9] = ((uint128_t) in1[3]) * in2[6]  | 
471  | 0  |            + ((uint128_t) in1[4]) * in2[5]  | 
472  | 0  |            + ((uint128_t) in1[5]) * in2[4]  | 
473  | 0  |            + ((uint128_t) in1[6]) * in2[3];  | 
474  |  | 
  | 
475  | 0  |     out[10] = ((uint128_t) in1[4]) * in2[6]  | 
476  | 0  |             + ((uint128_t) in1[5]) * in2[5]  | 
477  | 0  |             + ((uint128_t) in1[6]) * in2[4];  | 
478  |  | 
  | 
479  | 0  |     out[11] = ((uint128_t) in1[5]) * in2[6]  | 
480  | 0  |             + ((uint128_t) in1[6]) * in2[5];  | 
481  |  | 
  | 
482  | 0  |     out[12] = ((uint128_t) in1[6]) * in2[6];  | 
483  | 0  | }  | 
484  |  |  | 
485  |  | /*-  | 
486  |  |  * Reduce thirteen 128-bit coefficients to seven 64-bit coefficients.  | 
487  |  |  * in[i] < 2^128 - 2^125  | 
488  |  |  * out[i] < 2^56 for i < 6,  | 
489  |  |  * out[6] <= 2^48  | 
490  |  |  *  | 
491  |  |  * The technique in use here stems from the format of the prime modulus:  | 
492  |  |  * P384 = 2^384 - delta  | 
493  |  |  *  | 
494  |  |  * Thus we can reduce numbers of the form (X + 2^384 * Y) by substituting  | 
495  |  |  * them with (X + delta Y), with delta = 2^128 + 2^96 + (-2^32 + 1). These  | 
496  |  |  * coefficients are still quite large, and so we repeatedly apply this  | 
497  |  |  * technique on high-order bits in order to guarantee the desired bounds on  | 
498  |  |  * the size of our output.  | 
499  |  |  *  | 
500  |  |  * The three phases of elimination are as follows:  | 
501  |  |  * [1]: Y = 2^120 (in[12] | in[11] | in[10] | in[9])  | 
502  |  |  * [2]: Y = 2^8 (acc[8] | acc[7])  | 
503  |  |  * [3]: Y = 2^48 (acc[6] >> 48)  | 
504  |  |  * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d)  | 
505  |  |  */  | 
506  |  | static void felem_reduce(felem out, const widefelem in)  | 
507  | 0  | { | 
508  |  |     /*  | 
509  |  |      * In order to prevent underflow, we add a multiple of p before subtracting.  | 
510  |  |      * Use telescopic sums to represent 2^76 * p redundantly with each limb  | 
511  |  |      * of the form 2^124 + ...  | 
512  |  |      */  | 
513  | 0  |     static const widelimb two124m68 = (((widelimb) 1) << 124)  | 
514  | 0  |                                     - (((widelimb) 1) << 68);  | 
515  | 0  |     static const widelimb two124m116m68 = (((widelimb) 1) << 124)  | 
516  | 0  |                                         - (((widelimb) 1) << 116)  | 
517  | 0  |                                         - (((widelimb) 1) << 68);  | 
518  | 0  |     static const widelimb two124p108m76 = (((widelimb) 1) << 124)  | 
519  | 0  |                                         + (((widelimb) 1) << 108)  | 
520  | 0  |                                         - (((widelimb) 1) << 76);  | 
521  | 0  |     static const widelimb two124m92m68 = (((widelimb) 1) << 124)  | 
522  | 0  |                                        - (((widelimb) 1) << 92)  | 
523  | 0  |                                        - (((widelimb) 1) << 68);  | 
524  | 0  |     widelimb temp, acc[9];  | 
525  | 0  |     unsigned int i;  | 
526  |  | 
  | 
527  | 0  |     memcpy(acc, in, sizeof(widelimb) * 9);  | 
528  |  | 
  | 
529  | 0  |     acc[0] += two124p108m76;  | 
530  | 0  |     acc[1] += two124m116m68;  | 
531  | 0  |     acc[2] += two124m92m68;  | 
532  | 0  |     acc[3] += two124m68;  | 
533  | 0  |     acc[4] += two124m68;  | 
534  | 0  |     acc[5] += two124m68;  | 
535  | 0  |     acc[6] += two124m68;  | 
536  |  |  | 
537  |  |     /* [1]: Eliminate in[9], ..., in[12] */  | 
538  | 0  |     acc[8] += in[12] >> 32;  | 
539  | 0  |     acc[7] += (in[12] & 0xffffffff) << 24;  | 
540  | 0  |     acc[7] += in[12] >> 8;  | 
541  | 0  |     acc[6] += (in[12] & 0xff) << 48;  | 
542  | 0  |     acc[6] -= in[12] >> 16;  | 
543  | 0  |     acc[5] -= (in[12] & 0xffff) << 40;  | 
544  | 0  |     acc[6] += in[12] >> 48;  | 
545  | 0  |     acc[5] += (in[12] & 0xffffffffffff) << 8;  | 
546  |  | 
  | 
547  | 0  |     acc[7] += in[11] >> 32;  | 
548  | 0  |     acc[6] += (in[11] & 0xffffffff) << 24;  | 
549  | 0  |     acc[6] += in[11] >> 8;  | 
550  | 0  |     acc[5] += (in[11] & 0xff) << 48;  | 
551  | 0  |     acc[5] -= in[11] >> 16;  | 
552  | 0  |     acc[4] -= (in[11] & 0xffff) << 40;  | 
553  | 0  |     acc[5] += in[11] >> 48;  | 
554  | 0  |     acc[4] += (in[11] & 0xffffffffffff) << 8;  | 
555  |  | 
  | 
556  | 0  |     acc[6] += in[10] >> 32;  | 
557  | 0  |     acc[5] += (in[10] & 0xffffffff) << 24;  | 
558  | 0  |     acc[5] += in[10] >> 8;  | 
559  | 0  |     acc[4] += (in[10] & 0xff) << 48;  | 
560  | 0  |     acc[4] -= in[10] >> 16;  | 
561  | 0  |     acc[3] -= (in[10] & 0xffff) << 40;  | 
562  | 0  |     acc[4] += in[10] >> 48;  | 
563  | 0  |     acc[3] += (in[10] & 0xffffffffffff) << 8;  | 
564  |  | 
  | 
565  | 0  |     acc[5] += in[9] >> 32;  | 
566  | 0  |     acc[4] += (in[9] & 0xffffffff) << 24;  | 
567  | 0  |     acc[4] += in[9] >> 8;  | 
568  | 0  |     acc[3] += (in[9] & 0xff) << 48;  | 
569  | 0  |     acc[3] -= in[9] >> 16;  | 
570  | 0  |     acc[2] -= (in[9] & 0xffff) << 40;  | 
571  | 0  |     acc[3] += in[9] >> 48;  | 
572  | 0  |     acc[2] += (in[9] & 0xffffffffffff) << 8;  | 
573  |  |  | 
574  |  |     /*  | 
575  |  |      * [2]: Eliminate acc[7], acc[8], that is the 7 and eighth limbs, as  | 
576  |  |      * well as the contributions made from eliminating higher limbs.  | 
577  |  |      * acc[7] < in[7] + 2^120 + 2^56 < in[7] + 2^121  | 
578  |  |      * acc[8] < in[8] + 2^96  | 
579  |  |      */  | 
580  | 0  |     acc[4] += acc[8] >> 32;  | 
581  | 0  |     acc[3] += (acc[8] & 0xffffffff) << 24;  | 
582  | 0  |     acc[3] += acc[8] >> 8;  | 
583  | 0  |     acc[2] += (acc[8] & 0xff) << 48;  | 
584  | 0  |     acc[2] -= acc[8] >> 16;  | 
585  | 0  |     acc[1] -= (acc[8] & 0xffff) << 40;  | 
586  | 0  |     acc[2] += acc[8] >> 48;  | 
587  | 0  |     acc[1] += (acc[8] & 0xffffffffffff) << 8;  | 
588  |  | 
  | 
589  | 0  |     acc[3] += acc[7] >> 32;  | 
590  | 0  |     acc[2] += (acc[7] & 0xffffffff) << 24;  | 
591  | 0  |     acc[2] += acc[7] >> 8;  | 
592  | 0  |     acc[1] += (acc[7] & 0xff) << 48;  | 
593  | 0  |     acc[1] -= acc[7] >> 16;  | 
594  | 0  |     acc[0] -= (acc[7] & 0xffff) << 40;  | 
595  | 0  |     acc[1] += acc[7] >> 48;  | 
596  | 0  |     acc[0] += (acc[7] & 0xffffffffffff) << 8;  | 
597  |  |  | 
598  |  |     /*-  | 
599  |  |      * acc[k] < in[k] + 2^124 + 2^121   | 
600  |  |      *        < in[k] + 2^125  | 
601  |  |      *        < 2^128, for k <= 6  | 
602  |  |      */  | 
603  |  |  | 
604  |  |     /*  | 
605  |  |      * Carry 4 -> 5 -> 6  | 
606  |  |      * This has the effect of ensuring that these more significant limbs  | 
607  |  |      * will be small in value after eliminating high bits from acc[6].  | 
608  |  |      */  | 
609  | 0  |     acc[5] += acc[4] >> 56;  | 
610  | 0  |     acc[4] &= 0x00ffffffffffffff;  | 
611  |  | 
  | 
612  | 0  |     acc[6] += acc[5] >> 56;  | 
613  | 0  |     acc[5] &= 0x00ffffffffffffff;  | 
614  |  |  | 
615  |  |     /*-  | 
616  |  |      * acc[6] < in[6] + 2^124 + 2^121 + 2^72 + 2^16  | 
617  |  |      *        < in[6] + 2^125  | 
618  |  |      *        < 2^128  | 
619  |  |      */  | 
620  |  |  | 
621  |  |     /* [3]: Eliminate high bits of acc[6] */  | 
622  | 0  |     temp = acc[6] >> 48;  | 
623  | 0  |     acc[6] &= 0x0000ffffffffffff;  | 
624  |  |       | 
625  |  |     /* temp < 2^80 */  | 
626  |  | 
  | 
627  | 0  |     acc[3] += temp >> 40;  | 
628  | 0  |     acc[2] += (temp & 0xffffffffff) << 16;  | 
629  | 0  |     acc[2] += temp >> 16;  | 
630  | 0  |     acc[1] += (temp & 0xffff) << 40;  | 
631  | 0  |     acc[1] -= temp >> 24;  | 
632  | 0  |     acc[0] -= (temp & 0xffffff) << 32;  | 
633  | 0  |     acc[0] += temp;  | 
634  |  |  | 
635  |  |     /*-  | 
636  |  |      * acc[k] < acc_old[k] + 2^64 + 2^56  | 
637  |  |      *        < in[k] + 2^124 + 2^121 + 2^72 + 2^64 + 2^56 + 2^16 , k < 4  | 
638  |  |      */  | 
639  |  |  | 
640  |  |     /* Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 */  | 
641  | 0  |     acc[1] += acc[0] >> 56;   /* acc[1] < acc_old[1] + 2^72 */  | 
642  | 0  |     acc[0] &= 0x00ffffffffffffff;  | 
643  |  | 
  | 
644  | 0  |     acc[2] += acc[1] >> 56;   /* acc[2] < acc_old[2] + 2^72 + 2^16 */  | 
645  | 0  |     acc[1] &= 0x00ffffffffffffff;  | 
646  |  | 
  | 
647  | 0  |     acc[3] += acc[2] >> 56;   /* acc[3] < acc_old[3] + 2^72 + 2^16 */  | 
648  | 0  |     acc[2] &= 0x00ffffffffffffff;  | 
649  |  |  | 
650  |  |     /*-  | 
651  |  |      * acc[k] < acc_old[k] + 2^72 + 2^16  | 
652  |  |      *        < in[k] + 2^124 + 2^121 + 2^73 + 2^64 + 2^56 + 2^17  | 
653  |  |      *        < in[k] + 2^125  | 
654  |  |      *        < 2^128 , k < 4  | 
655  |  |      */  | 
656  |  | 
  | 
657  | 0  |     acc[4] += acc[3] >> 56;   /*-  | 
658  |  |                                * acc[4] < acc_old[4] + 2^72 + 2^16  | 
659  |  |                                *        < 2^72 + 2^56 + 2^16  | 
660  |  |                                */  | 
661  | 0  |     acc[3] &= 0x00ffffffffffffff;  | 
662  |  | 
  | 
663  | 0  |     acc[5] += acc[4] >> 56;   /*-  | 
664  |  |                                * acc[5] < acc_old[5] + 2^16 + 1  | 
665  |  |                                *        < 2^56 + 2^16 + 1  | 
666  |  |                                */  | 
667  | 0  |     acc[4] &= 0x00ffffffffffffff;  | 
668  |  | 
  | 
669  | 0  |     acc[6] += acc[5] >> 56;   /* acc[6] < 2^48 + 1 <= 2^48 */  | 
670  | 0  |     acc[5] &= 0x00ffffffffffffff;  | 
671  |  | 
  | 
672  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
673  | 0  |         out[i] = acc[i];  | 
674  | 0  | }  | 
675  |  |  | 
676  |  | #if defined(ECP_NISTP384_ASM)  | 
677  |  | static void felem_square_wrapper(widefelem out, const felem in);  | 
678  |  | static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2);  | 
679  |  |  | 
680  |  | static void (*felem_square_p)(widefelem out, const felem in) =  | 
681  |  |     felem_square_wrapper;  | 
682  |  | static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =  | 
683  |  |     felem_mul_wrapper;  | 
684  |  |  | 
685  |  | void p384_felem_square(widefelem out, const felem in);  | 
686  |  | void p384_felem_mul(widefelem out, const felem in1, const felem in2);  | 
687  |  |  | 
688  |  | # if defined(_ARCH_PPC64)  | 
689  |  | #  include "crypto/ppc_arch.h"  | 
690  |  | # endif  | 
691  |  |  | 
692  |  | static void felem_select(void)  | 
693  |  | { | 
694  |  | # if defined(_ARCH_PPC64)  | 
695  |  |     if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) { | 
696  |  |         felem_square_p = p384_felem_square;  | 
697  |  |         felem_mul_p = p384_felem_mul;  | 
698  |  |  | 
699  |  |         return;  | 
700  |  |     }  | 
701  |  | # endif  | 
702  |  |  | 
703  |  |     /* Default */  | 
704  |  |     felem_square_p = felem_square_ref;  | 
705  |  |     felem_mul_p = felem_mul_ref;  | 
706  |  | }  | 
707  |  |  | 
708  |  | static void felem_square_wrapper(widefelem out, const felem in)  | 
709  |  | { | 
710  |  |     felem_select();  | 
711  |  |     felem_square_p(out, in);  | 
712  |  | }  | 
713  |  |  | 
714  |  | static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)  | 
715  |  | { | 
716  |  |     felem_select();  | 
717  |  |     felem_mul_p(out, in1, in2);  | 
718  |  | }  | 
719  |  |  | 
720  |  | # define felem_square felem_square_p  | 
721  |  | # define felem_mul felem_mul_p  | 
722  |  | #else  | 
723  | 0  | # define felem_square felem_square_ref  | 
724  | 0  | # define felem_mul felem_mul_ref  | 
725  |  | #endif  | 
726  |  |  | 
727  |  | static ossl_inline void felem_square_reduce(felem out, const felem in)  | 
728  | 0  | { | 
729  | 0  |     widefelem tmp;  | 
730  |  | 
  | 
731  | 0  |     felem_square(tmp, in);  | 
732  | 0  |     felem_reduce(out, tmp);  | 
733  | 0  | }  | 
734  |  |  | 
735  |  | static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2)  | 
736  | 0  | { | 
737  | 0  |     widefelem tmp;  | 
738  |  | 
  | 
739  | 0  |     felem_mul(tmp, in1, in2);  | 
740  | 0  |     felem_reduce(out, tmp);  | 
741  | 0  | }  | 
742  |  |  | 
743  |  | /*-  | 
744  |  |  * felem_inv calculates |out| = |in|^{-1} | 
745  |  |  *  | 
746  |  |  * Based on Fermat's Little Theorem:  | 
747  |  |  *   a^p = a (mod p)  | 
748  |  |  *   a^{p-1} = 1 (mod p) | 
749  |  |  *   a^{p-2} = a^{-1} (mod p) | 
750  |  |  */  | 
751  |  | static void felem_inv(felem out, const felem in)  | 
752  | 0  | { | 
753  | 0  |     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6;  | 
754  | 0  |     unsigned int i = 0;  | 
755  |  | 
  | 
756  | 0  |     felem_square_reduce(ftmp, in);      /* 2^1 */  | 
757  | 0  |     felem_mul_reduce(ftmp, ftmp, in);   /* 2^1 + 2^0 */  | 
758  | 0  |     felem_assign(ftmp2, ftmp);  | 
759  |  | 
  | 
760  | 0  |     felem_square_reduce(ftmp, ftmp);    /* 2^2 + 2^1 */  | 
761  | 0  |     felem_mul_reduce(ftmp, ftmp, in);   /* 2^2 + 2^1 * 2^0 */  | 
762  | 0  |     felem_assign(ftmp3, ftmp);  | 
763  |  | 
  | 
764  | 0  |     for (i = 0; i < 3; i++)  | 
765  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^5 + 2^4 + 2^3 */  | 
766  | 0  |     felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 */  | 
767  | 0  |     felem_assign(ftmp4, ftmp);  | 
768  |  | 
  | 
769  | 0  |     for (i = 0; i < 6; i++)  | 
770  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^11 + ... + 2^6 */  | 
771  | 0  |     felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^11 + ... + 2^0 */  | 
772  |  | 
  | 
773  | 0  |     for (i = 0; i < 3; i++)  | 
774  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^14 + ... + 2^3 */  | 
775  | 0  |     felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^14 + ... + 2^0 */  | 
776  | 0  |     felem_assign(ftmp5, ftmp);  | 
777  |  | 
  | 
778  | 0  |     for (i = 0; i < 15; i++)  | 
779  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^29 + ... + 2^15 */  | 
780  | 0  |     felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^29 + ... + 2^0 */  | 
781  | 0  |     felem_assign(ftmp6, ftmp);  | 
782  |  | 
  | 
783  | 0  |     for (i = 0; i < 30; i++)  | 
784  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^59 + ... + 2^30 */  | 
785  | 0  |     felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^59 + ... + 2^0 */  | 
786  | 0  |     felem_assign(ftmp4, ftmp);  | 
787  |  | 
  | 
788  | 0  |     for (i = 0; i < 60; i++)  | 
789  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^119 + ... + 2^60 */  | 
790  | 0  |     felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^119 + ... + 2^0 */  | 
791  | 0  |     felem_assign(ftmp4, ftmp);  | 
792  |  | 
  | 
793  | 0  |     for (i = 0; i < 120; i++)  | 
794  | 0  |       felem_square_reduce(ftmp, ftmp);   /* 2^239 + ... + 2^120 */  | 
795  | 0  |     felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^239 + ... + 2^0 */  | 
796  |  | 
  | 
797  | 0  |     for (i = 0; i < 15; i++)  | 
798  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^254 + ... + 2^15 */  | 
799  | 0  |     felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^254 + ... + 2^0 */  | 
800  |  | 
  | 
801  | 0  |     for (i = 0; i < 31; i++)  | 
802  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^285 + ... + 2^31 */  | 
803  | 0  |     felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^285 + ... + 2^31 + 2^29 + ... + 2^0 */  | 
804  |  | 
  | 
805  | 0  |     for (i = 0; i < 2; i++)  | 
806  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^2 */  | 
807  | 0  |     felem_mul_reduce(ftmp, ftmp2, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^0 */  | 
808  |  | 
  | 
809  | 0  |     for (i = 0; i < 94; i++)  | 
810  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 */  | 
811  | 0  |     felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 + 2^29 + ... + 2^0 */  | 
812  |  | 
  | 
813  | 0  |     for (i = 0; i < 2; i++)  | 
814  | 0  |         felem_square_reduce(ftmp, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 */  | 
815  | 0  |     felem_mul_reduce(ftmp, in, ftmp);    /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 + 2^0 */  | 
816  |  | 
  | 
817  | 0  |     memcpy(out, ftmp, sizeof(felem));  | 
818  | 0  | }  | 
819  |  |  | 
820  |  | /*  | 
821  |  |  * Zero-check: returns a limb with all bits set if |in| == 0 (mod p)  | 
822  |  |  * and 0 otherwise. We know that field elements are reduced to  | 
823  |  |  * 0 < in < 2p, so we only need to check two cases:  | 
824  |  |  * 0 and 2^384 - 2^128 - 2^96 + 2^32 - 1  | 
825  |  |  *   in[k] < 2^56, k < 6  | 
826  |  |  *   in[6] <= 2^48  | 
827  |  |  */  | 
828  |  | static limb felem_is_zero(const felem in)  | 
829  | 0  | { | 
830  | 0  |     limb zero, p384;  | 
831  |  | 
  | 
832  | 0  |     zero = in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6];  | 
833  | 0  |     zero = ((int64_t) (zero) - 1) >> 63;  | 
834  | 0  |     p384 = (in[0] ^ 0x000000ffffffff) | (in[1] ^ 0xffff0000000000)  | 
835  | 0  |          | (in[2] ^ 0xfffffffffeffff) | (in[3] ^ 0xffffffffffffff)  | 
836  | 0  |          | (in[4] ^ 0xffffffffffffff) | (in[5] ^ 0xffffffffffffff)  | 
837  | 0  |          | (in[6] ^ 0xffffffffffff);  | 
838  | 0  |     p384 = ((int64_t) (p384) - 1) >> 63;  | 
839  |  | 
  | 
840  | 0  |     return (zero | p384);  | 
841  | 0  | }  | 
842  |  |  | 
843  |  | static int felem_is_zero_int(const void *in)  | 
844  | 0  | { | 
845  | 0  |     return (int)(felem_is_zero(in) & ((limb) 1));  | 
846  | 0  | }  | 
847  |  |  | 
848  |  | /*-  | 
849  |  |  * felem_contract converts |in| to its unique, minimal representation.  | 
850  |  |  * Assume we've removed all redundant bits.  | 
851  |  |  * On entry:  | 
852  |  |  *   in[k] < 2^56, k < 6  | 
853  |  |  *   in[6] <= 2^48  | 
854  |  |  */  | 
855  |  | static void felem_contract(felem out, const felem in)  | 
856  | 0  | { | 
857  | 0  |     static const int64_t two56 = ((limb) 1) << 56;  | 
858  |  |  | 
859  |  |     /*  | 
860  |  |      * We know for a fact that 0 <= |in| < 2*p, for p = 2^384 - 2^128 - 2^96 + 2^32 - 1  | 
861  |  |      * Perform two successive, idempotent subtractions to reduce if |in| >= p.  | 
862  |  |      */  | 
863  |  | 
  | 
864  | 0  |     int64_t tmp[NLIMBS], cond[5], a;  | 
865  | 0  |     unsigned int i;  | 
866  |  | 
  | 
867  | 0  |     memcpy(tmp, in, sizeof(felem));  | 
868  |  |    | 
869  |  |     /* Case 1: a = 1 iff |in| >= 2^384 */  | 
870  | 0  |     a = (in[6] >> 48);  | 
871  | 0  |     tmp[0] += a;  | 
872  | 0  |     tmp[0] -= a << 32;  | 
873  | 0  |     tmp[1] += a << 40;  | 
874  | 0  |     tmp[2] += a << 16;  | 
875  | 0  |     tmp[6] &= 0x0000ffffffffffff;  | 
876  |  |  | 
877  |  |     /*  | 
878  |  |      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be  | 
879  |  |      * non-zero, so we only need one step  | 
880  |  |      */  | 
881  |  | 
  | 
882  | 0  |     a = tmp[0] >> 63;  | 
883  | 0  |     tmp[0] += a & two56;  | 
884  | 0  |     tmp[1] -= a & 1;  | 
885  |  |  | 
886  |  |     /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */  | 
887  | 0  |     tmp[2] += tmp[1] >> 56;  | 
888  | 0  |     tmp[1] &= 0x00ffffffffffffff;  | 
889  |  | 
  | 
890  | 0  |     tmp[3] += tmp[2] >> 56;  | 
891  | 0  |     tmp[2] &= 0x00ffffffffffffff;  | 
892  |  | 
  | 
893  | 0  |     tmp[4] += tmp[3] >> 56;  | 
894  | 0  |     tmp[3] &= 0x00ffffffffffffff;  | 
895  |  | 
  | 
896  | 0  |     tmp[5] += tmp[4] >> 56;  | 
897  | 0  |     tmp[4] &= 0x00ffffffffffffff;  | 
898  |  | 
  | 
899  | 0  |     tmp[6] += tmp[5] >> 56; /* tmp[6] < 2^48 */  | 
900  | 0  |     tmp[5] &= 0x00ffffffffffffff;  | 
901  |  |  | 
902  |  |     /*  | 
903  |  |      * Case 2: a = all ones if p <= |in| < 2^384, 0 otherwise  | 
904  |  |      */  | 
905  |  |  | 
906  |  |     /* 0 iff (2^129..2^383) are all one */  | 
907  | 0  |     cond[0] = ((tmp[6] | 0xff000000000000) & tmp[5] & tmp[4] & tmp[3] & (tmp[2] | 0x0000000001ffff)) + 1;  | 
908  |  |     /* 0 iff 2^128 bit is one */  | 
909  | 0  |     cond[1] = (tmp[2] | ~0x00000000010000) + 1;  | 
910  |  |     /* 0 iff (2^96..2^127) bits are all one */  | 
911  | 0  |     cond[2] = ((tmp[2] | 0xffffffffff0000) & (tmp[1] | 0x0000ffffffffff)) + 1;  | 
912  |  |     /* 0 iff (2^32..2^95) bits are all zero */  | 
913  | 0  |     cond[3] = (tmp[1] & ~0xffff0000000000) | (tmp[0] & ~((int64_t) 0x000000ffffffff));  | 
914  |  |     /* 0 iff (2^0..2^31) bits are all one */  | 
915  | 0  |     cond[4] = (tmp[0] | 0xffffff00000000) + 1;  | 
916  |  |  | 
917  |  |     /*  | 
918  |  |      * In effect, invert our conditions, so that 0 values become all 1's,  | 
919  |  |      * any non-zero value in the low-order 56 bits becomes all 0's  | 
920  |  |      */  | 
921  | 0  |     for (i = 0; i < 5; i++)  | 
922  | 0  |        cond[i] = ((cond[i] & 0x00ffffffffffffff) - 1) >> 63;  | 
923  |  |  | 
924  |  |     /*  | 
925  |  |      * The condition for determining whether in is greater than our  | 
926  |  |      * prime is given by the following condition.  | 
927  |  |      */  | 
928  |  |  | 
929  |  |     /* First subtract 2^384 - 2^129 cheaply */  | 
930  | 0  |     a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));  | 
931  | 0  |     tmp[6] &= ~a;  | 
932  | 0  |     tmp[5] &= ~a;  | 
933  | 0  |     tmp[4] &= ~a;  | 
934  | 0  |     tmp[3] &= ~a;  | 
935  | 0  |     tmp[2] &= ~a | 0x0000000001ffff;  | 
936  |  |  | 
937  |  |     /*  | 
938  |  |      * Subtract 2^128 - 2^96 by  | 
939  |  |      * means of disjoint cases.  | 
940  |  |      */  | 
941  |  |  | 
942  |  |     /* subtract 2^128 if that bit is present, and add 2^96 */  | 
943  | 0  |     a = cond[0] & cond[1];  | 
944  | 0  |     tmp[2] &= ~a | 0xfffffffffeffff;  | 
945  | 0  |     tmp[1] += a & ((int64_t) 1 << 40);  | 
946  |  |  | 
947  |  |     /* otherwise, clear bits 2^127 .. 2^96  */  | 
948  | 0  |     a = cond[0] & ~cond[1] & (cond[2] & (~cond[3] | cond[4]));  | 
949  | 0  |     tmp[2] &= ~a | 0xffffffffff0000;  | 
950  | 0  |     tmp[1] &= ~a | 0x0000ffffffffff;  | 
951  |  |  | 
952  |  |     /* finally, subtract the last 2^32 - 1 */  | 
953  | 0  |     a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));  | 
954  | 0  |     tmp[0] += a & (-((int64_t) 1 << 32) + 1);  | 
955  |  |  | 
956  |  |     /*  | 
957  |  |      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be  | 
958  |  |      * non-zero, so we only need one step  | 
959  |  |      */  | 
960  | 0  |     a = tmp[0] >> 63;  | 
961  | 0  |     tmp[0] += a & two56;  | 
962  | 0  |     tmp[1] -= a & 1;  | 
963  |  |  | 
964  |  |     /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */  | 
965  | 0  |     tmp[2] += tmp[1] >> 56;  | 
966  | 0  |     tmp[1] &= 0x00ffffffffffffff;  | 
967  |  | 
  | 
968  | 0  |     tmp[3] += tmp[2] >> 56;  | 
969  | 0  |     tmp[2] &= 0x00ffffffffffffff;  | 
970  |  | 
  | 
971  | 0  |     tmp[4] += tmp[3] >> 56;  | 
972  | 0  |     tmp[3] &= 0x00ffffffffffffff;  | 
973  |  | 
  | 
974  | 0  |     tmp[5] += tmp[4] >> 56;  | 
975  | 0  |     tmp[4] &= 0x00ffffffffffffff;  | 
976  |  | 
  | 
977  | 0  |     tmp[6] += tmp[5] >> 56;  | 
978  | 0  |     tmp[5] &= 0x00ffffffffffffff;  | 
979  |  | 
  | 
980  | 0  |     memcpy(out, tmp, sizeof(felem));  | 
981  | 0  | }  | 
982  |  |  | 
983  |  | /*-  | 
984  |  |  * Group operations  | 
985  |  |  * ----------------  | 
986  |  |  *  | 
987  |  |  * Building on top of the field operations we have the operations on the  | 
988  |  |  * elliptic curve group itself. Points on the curve are represented in Jacobian  | 
989  |  |  * coordinates  | 
990  |  |  */  | 
991  |  |  | 
992  |  | /*-  | 
993  |  |  * point_double calculates 2*(x_in, y_in, z_in)  | 
994  |  |  *  | 
995  |  |  * The method is taken from:  | 
996  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b  | 
997  |  |  *  | 
998  |  |  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.  | 
999  |  |  * while x_out == y_in is not (maybe this works, but it's not tested).  | 
1000  |  |  */  | 
1001  |  | static void  | 
1002  |  | point_double(felem x_out, felem y_out, felem z_out,  | 
1003  |  |              const felem x_in, const felem y_in, const felem z_in)  | 
1004  | 0  | { | 
1005  | 0  |     widefelem tmp, tmp2;  | 
1006  | 0  |     felem delta, gamma, beta, alpha, ftmp, ftmp2;  | 
1007  |  | 
  | 
1008  | 0  |     felem_assign(ftmp, x_in);  | 
1009  | 0  |     felem_assign(ftmp2, x_in);  | 
1010  |  |  | 
1011  |  |     /* delta = z^2 */  | 
1012  | 0  |     felem_square_reduce(delta, z_in);     /* delta[i] < 2^56 */  | 
1013  |  |  | 
1014  |  |     /* gamma = y^2 */  | 
1015  | 0  |     felem_square_reduce(gamma, y_in);     /* gamma[i] < 2^56 */  | 
1016  |  |  | 
1017  |  |     /* beta = x*gamma */  | 
1018  | 0  |     felem_mul_reduce(beta, x_in, gamma);  /* beta[i] < 2^56 */  | 
1019  |  |  | 
1020  |  |     /* alpha = 3*(x-delta)*(x+delta) */  | 
1021  | 0  |     felem_diff64(ftmp, delta);            /* ftmp[i] < 2^60 + 2^58 + 2^44 */  | 
1022  | 0  |     felem_sum64(ftmp2, delta);            /* ftmp2[i] < 2^59 */  | 
1023  | 0  |     felem_scalar64(ftmp2, 3);             /* ftmp2[i] < 2^61 */  | 
1024  | 0  |     felem_mul_reduce(alpha, ftmp, ftmp2); /* alpha[i] < 2^56 */  | 
1025  |  |  | 
1026  |  |     /* x' = alpha^2 - 8*beta */  | 
1027  | 0  |     felem_square(tmp, alpha);             /* tmp[i] < 2^115 */  | 
1028  | 0  |     felem_assign(ftmp, beta);             /* ftmp[i] < 2^56 */  | 
1029  | 0  |     felem_scalar64(ftmp, 8);              /* ftmp[i] < 2^59 */  | 
1030  | 0  |     felem_diff_128_64(tmp, ftmp);         /* tmp[i] < 2^115 + 2^64 + 2^48 */  | 
1031  | 0  |     felem_reduce(x_out, tmp);             /* x_out[i] < 2^56 */  | 
1032  |  |  | 
1033  |  |     /* z' = (y + z)^2 - gamma - delta */  | 
1034  | 0  |     felem_sum64(delta, gamma);     /* delta[i] < 2^57 */  | 
1035  | 0  |     felem_assign(ftmp, y_in);      /* ftmp[i] < 2^56 */  | 
1036  | 0  |     felem_sum64(ftmp, z_in);       /* ftmp[i] < 2^56 */  | 
1037  | 0  |     felem_square(tmp, ftmp);       /* tmp[i] < 2^115 */  | 
1038  | 0  |     felem_diff_128_64(tmp, delta); /* tmp[i] < 2^115 + 2^64 + 2^48 */  | 
1039  | 0  |     felem_reduce(z_out, tmp);      /* z_out[i] < 2^56 */  | 
1040  |  |  | 
1041  |  |     /* y' = alpha*(4*beta - x') - 8*gamma^2 */  | 
1042  | 0  |     felem_scalar64(beta, 4);       /* beta[i] < 2^58 */  | 
1043  | 0  |     felem_diff64(beta, x_out);     /* beta[i] < 2^60 + 2^58 + 2^44 */  | 
1044  | 0  |     felem_mul(tmp, alpha, beta);   /* tmp[i] < 2^119 */  | 
1045  | 0  |     felem_square(tmp2, gamma);     /* tmp2[i] < 2^115 */  | 
1046  | 0  |     felem_scalar128(tmp2, 8);      /* tmp2[i] < 2^118 */  | 
1047  | 0  |     felem_diff128(tmp, tmp2);      /* tmp[i] < 2^127 + 2^119 + 2^111 */  | 
1048  | 0  |     felem_reduce(y_out, tmp);      /* tmp[i] < 2^56 */  | 
1049  | 0  | }  | 
1050  |  |  | 
1051  |  | /* copy_conditional copies in to out iff mask is all ones. */  | 
1052  |  | static void copy_conditional(felem out, const felem in, limb mask)  | 
1053  | 0  | { | 
1054  | 0  |     unsigned int i;  | 
1055  |  | 
  | 
1056  | 0  |     for (i = 0; i < NLIMBS; i++)  | 
1057  | 0  |         out[i] ^= mask & (in[i] ^ out[i]);  | 
1058  | 0  | }  | 
1059  |  |  | 
1060  |  | /*-  | 
1061  |  |  * point_add calculates (x1, y1, z1) + (x2, y2, z2)  | 
1062  |  |  *  | 
1063  |  |  * The method is taken from  | 
1064  |  |  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,  | 
1065  |  |  * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).  | 
1066  |  |  *  | 
1067  |  |  * This function includes a branch for checking whether the two input points  | 
1068  |  |  * are equal (while not equal to the point at infinity). See comment below  | 
1069  |  |  * on constant-time.  | 
1070  |  |  */  | 
1071  |  | static void point_add(felem x3, felem y3, felem z3,  | 
1072  |  |                       const felem x1, const felem y1, const felem z1,  | 
1073  |  |                       const int mixed, const felem x2, const felem y2,  | 
1074  |  |                       const felem z2)  | 
1075  | 0  | { | 
1076  | 0  |     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;  | 
1077  | 0  |     widefelem tmp, tmp2;  | 
1078  | 0  |     limb x_equal, y_equal, z1_is_zero, z2_is_zero;  | 
1079  | 0  |     limb points_equal;  | 
1080  |  | 
  | 
1081  | 0  |     z1_is_zero = felem_is_zero(z1);  | 
1082  | 0  |     z2_is_zero = felem_is_zero(z2);  | 
1083  |  |  | 
1084  |  |     /* ftmp = z1z1 = z1**2 */  | 
1085  | 0  |     felem_square_reduce(ftmp, z1);      /* ftmp[i] < 2^56 */  | 
1086  |  | 
  | 
1087  | 0  |     if (!mixed) { | 
1088  |  |         /* ftmp2 = z2z2 = z2**2 */  | 
1089  | 0  |         felem_square_reduce(ftmp2, z2); /* ftmp2[i] < 2^56 */  | 
1090  |  |  | 
1091  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1092  | 0  |         felem_mul_reduce(ftmp3, x1, ftmp2); /* ftmp3[i] < 2^56 */  | 
1093  |  |  | 
1094  |  |         /* ftmp5 = z1 + z2 */  | 
1095  | 0  |         felem_assign(ftmp5, z1);       /* ftmp5[i] < 2^56 */  | 
1096  | 0  |         felem_sum64(ftmp5, z2);        /* ftmp5[i] < 2^57 */  | 
1097  |  |  | 
1098  |  |         /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */  | 
1099  | 0  |         felem_square(tmp, ftmp5);      /* tmp[i] < 2^117 */  | 
1100  | 0  |         felem_diff_128_64(tmp, ftmp);  /* tmp[i] < 2^117 + 2^64 + 2^48 */  | 
1101  | 0  |         felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^65 + 2^49 */  | 
1102  | 0  |         felem_reduce(ftmp5, tmp);      /* ftmp5[i] < 2^56 */  | 
1103  |  |  | 
1104  |  |         /* ftmp2 = z2 * z2z2 */  | 
1105  | 0  |         felem_mul_reduce(ftmp2, ftmp2, z2); /* ftmp2[i] < 2^56 */  | 
1106  |  |  | 
1107  |  |         /* s1 = ftmp6 = y1 * z2**3 */  | 
1108  | 0  |         felem_mul_reduce(ftmp6, y1, ftmp2); /* ftmp6[i] < 2^56 */  | 
1109  | 0  |     } else { | 
1110  |  |         /*  | 
1111  |  |          * We'll assume z2 = 1 (special case z2 = 0 is handled later)  | 
1112  |  |          */  | 
1113  |  |  | 
1114  |  |         /* u1 = ftmp3 = x1*z2z2 */  | 
1115  | 0  |         felem_assign(ftmp3, x1);     /* ftmp3[i] < 2^56 */  | 
1116  |  |  | 
1117  |  |         /* ftmp5 = 2*z1z2 */  | 
1118  | 0  |         felem_scalar(ftmp5, z1, 2);  /* ftmp5[i] < 2^57 */  | 
1119  |  |  | 
1120  |  |         /* s1 = ftmp6 = y1 * z2**3 */  | 
1121  | 0  |         felem_assign(ftmp6, y1);     /* ftmp6[i] < 2^56 */  | 
1122  | 0  |     }  | 
1123  |  |     /* ftmp3[i] < 2^56, ftmp5[i] < 2^57, ftmp6[i] < 2^56 */  | 
1124  |  |  | 
1125  |  |     /* u2 = x2*z1z1 */  | 
1126  | 0  |     felem_mul(tmp, x2, ftmp);        /* tmp[i] < 2^115 */  | 
1127  |  |  | 
1128  |  |     /* h = ftmp4 = u2 - u1 */  | 
1129  | 0  |     felem_diff_128_64(tmp, ftmp3);   /* tmp[i] < 2^115 + 2^64 + 2^48 */  | 
1130  | 0  |     felem_reduce(ftmp4, tmp);        /* ftmp[4] < 2^56 */  | 
1131  |  | 
  | 
1132  | 0  |     x_equal = felem_is_zero(ftmp4);  | 
1133  |  |  | 
1134  |  |     /* z_out = ftmp5 * h */  | 
1135  | 0  |     felem_mul_reduce(z_out, ftmp5, ftmp4);  /* z_out[i] < 2^56 */  | 
1136  |  |  | 
1137  |  |     /* ftmp = z1 * z1z1 */  | 
1138  | 0  |     felem_mul_reduce(ftmp, ftmp, z1);  /* ftmp[i] < 2^56 */  | 
1139  |  |  | 
1140  |  |     /* s2 = tmp = y2 * z1**3 */  | 
1141  | 0  |     felem_mul(tmp, y2, ftmp);      /* tmp[i] < 2^115 */  | 
1142  |  |  | 
1143  |  |     /* r = ftmp5 = (s2 - s1)*2 */  | 
1144  | 0  |     felem_diff_128_64(tmp, ftmp6); /* tmp[i] < 2^115 + 2^64 + 2^48 */  | 
1145  | 0  |     felem_reduce(ftmp5, tmp);      /* ftmp5[i] < 2^56 */  | 
1146  | 0  |     y_equal = felem_is_zero(ftmp5);  | 
1147  | 0  |     felem_scalar64(ftmp5, 2);      /* ftmp5[i] < 2^57 */  | 
1148  |  |  | 
1149  |  |     /*  | 
1150  |  |      * The formulae are incorrect if the points are equal, in affine coordinates  | 
1151  |  |      * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this  | 
1152  |  |      * happens.  | 
1153  |  |      *  | 
1154  |  |      * We use bitwise operations to avoid potential side-channels introduced by  | 
1155  |  |      * the short-circuiting behaviour of boolean operators.  | 
1156  |  |      *  | 
1157  |  |      * The special case of either point being the point at infinity (z1 and/or  | 
1158  |  |      * z2 are zero), is handled separately later on in this function, so we  | 
1159  |  |      * avoid jumping to point_double here in those special cases.  | 
1160  |  |      *  | 
1161  |  |      * Notice the comment below on the implications of this branching for timing  | 
1162  |  |      * leaks and why it is considered practically irrelevant.  | 
1163  |  |      */  | 
1164  | 0  |     points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));  | 
1165  |  | 
  | 
1166  | 0  |     if (points_equal) { | 
1167  |  |         /*  | 
1168  |  |          * This is obviously not constant-time but it will almost-never happen  | 
1169  |  |          * for ECDH / ECDSA.  | 
1170  |  |          */  | 
1171  | 0  |         point_double(x3, y3, z3, x1, y1, z1);  | 
1172  | 0  |         return;  | 
1173  | 0  |     }  | 
1174  |  |  | 
1175  |  |     /* I = ftmp = (2h)**2 */  | 
1176  | 0  |     felem_assign(ftmp, ftmp4);        /* ftmp[i] < 2^56 */  | 
1177  | 0  |     felem_scalar64(ftmp, 2);          /* ftmp[i] < 2^57 */  | 
1178  | 0  |     felem_square_reduce(ftmp, ftmp);  /* ftmp[i] < 2^56 */  | 
1179  |  |  | 
1180  |  |     /* J = ftmp2 = h * I */  | 
1181  | 0  |     felem_mul_reduce(ftmp2, ftmp4, ftmp); /* ftmp2[i] < 2^56 */  | 
1182  |  |  | 
1183  |  |     /* V = ftmp4 = U1 * I */  | 
1184  | 0  |     felem_mul_reduce(ftmp4, ftmp3, ftmp); /* ftmp4[i] < 2^56 */  | 
1185  |  |  | 
1186  |  |     /* x_out = r**2 - J - 2V */  | 
1187  | 0  |     felem_square(tmp, ftmp5);      /* tmp[i] < 2^117 */  | 
1188  | 0  |     felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^64 + 2^48 */  | 
1189  | 0  |     felem_assign(ftmp3, ftmp4);    /* ftmp3[i] < 2^56 */  | 
1190  | 0  |     felem_scalar64(ftmp4, 2);      /* ftmp4[i] < 2^57 */  | 
1191  | 0  |     felem_diff_128_64(tmp, ftmp4); /* tmp[i] < 2^117 + 2^65 + 2^49 */  | 
1192  | 0  |     felem_reduce(x_out, tmp);      /* x_out[i] < 2^56 */  | 
1193  |  |  | 
1194  |  |     /* y_out = r(V-x_out) - 2 * s1 * J */  | 
1195  | 0  |     felem_diff64(ftmp3, x_out);    /* ftmp3[i] < 2^60 + 2^56 + 2^44 */  | 
1196  | 0  |     felem_mul(tmp, ftmp5, ftmp3);  /* tmp[i] < 2^116 */  | 
1197  | 0  |     felem_mul(tmp2, ftmp6, ftmp2); /* tmp2[i] < 2^115 */  | 
1198  | 0  |     felem_scalar128(tmp2, 2);      /* tmp2[i] < 2^116 */  | 
1199  | 0  |     felem_diff128(tmp, tmp2);      /* tmp[i] < 2^127 + 2^116 + 2^111 */  | 
1200  | 0  |     felem_reduce(y_out, tmp);      /* y_out[i] < 2^56 */  | 
1201  |  | 
  | 
1202  | 0  |     copy_conditional(x_out, x2, z1_is_zero);  | 
1203  | 0  |     copy_conditional(x_out, x1, z2_is_zero);  | 
1204  | 0  |     copy_conditional(y_out, y2, z1_is_zero);  | 
1205  | 0  |     copy_conditional(y_out, y1, z2_is_zero);  | 
1206  | 0  |     copy_conditional(z_out, z2, z1_is_zero);  | 
1207  | 0  |     copy_conditional(z_out, z1, z2_is_zero);  | 
1208  | 0  |     felem_assign(x3, x_out);  | 
1209  | 0  |     felem_assign(y3, y_out);  | 
1210  | 0  |     felem_assign(z3, z_out);  | 
1211  | 0  | }  | 
1212  |  |  | 
1213  |  | /*-  | 
1214  |  |  * Base point pre computation  | 
1215  |  |  * --------------------------  | 
1216  |  |  *  | 
1217  |  |  * Two different sorts of precomputed tables are used in the following code.  | 
1218  |  |  * Each contain various points on the curve, where each point is three field  | 
1219  |  |  * elements (x, y, z).  | 
1220  |  |  *  | 
1221  |  |  * For the base point table, z is usually 1 (0 for the point at infinity).  | 
1222  |  |  * This table has 16 elements:  | 
1223  |  |  * index | bits    | point  | 
1224  |  |  * ------+---------+------------------------------  | 
1225  |  |  *     0 | 0 0 0 0 | 0G  | 
1226  |  |  *     1 | 0 0 0 1 | 1G  | 
1227  |  |  *     2 | 0 0 1 0 | 2^95G  | 
1228  |  |  *     3 | 0 0 1 1 | (2^95 + 1)G  | 
1229  |  |  *     4 | 0 1 0 0 | 2^190G  | 
1230  |  |  *     5 | 0 1 0 1 | (2^190 + 1)G  | 
1231  |  |  *     6 | 0 1 1 0 | (2^190 + 2^95)G  | 
1232  |  |  *     7 | 0 1 1 1 | (2^190 + 2^95 + 1)G  | 
1233  |  |  *     8 | 1 0 0 0 | 2^285G  | 
1234  |  |  *     9 | 1 0 0 1 | (2^285 + 1)G  | 
1235  |  |  *    10 | 1 0 1 0 | (2^285 + 2^95)G  | 
1236  |  |  *    11 | 1 0 1 1 | (2^285 + 2^95 + 1)G  | 
1237  |  |  *    12 | 1 1 0 0 | (2^285 + 2^190)G  | 
1238  |  |  *    13 | 1 1 0 1 | (2^285 + 2^190 + 1)G  | 
1239  |  |  *    14 | 1 1 1 0 | (2^285 + 2^190 + 2^95)G  | 
1240  |  |  *    15 | 1 1 1 1 | (2^285 + 2^190 + 2^95 + 1)G  | 
1241  |  |  *  | 
1242  |  |  * The reason for this is so that we can clock bits into four different  | 
1243  |  |  * locations when doing simple scalar multiplies against the base point.  | 
1244  |  |  *  | 
1245  |  |  * Tables for other points have table[i] = iG for i in 0 .. 16.  | 
1246  |  |  */  | 
1247  |  |  | 
1248  |  | /* gmul is the table of precomputed base points */  | 
1249  |  | static const felem gmul[16][3] = { | 
1250  |  | {{0, 0, 0, 0, 0, 0, 0}, | 
1251  |  |  {0, 0, 0, 0, 0, 0, 0}, | 
1252  |  |  {0, 0, 0, 0, 0, 0, 0}}, | 
1253  |  | {{0x00545e3872760ab7, 0x00f25dbf55296c3a, 0x00e082542a385502, 0x008ba79b9859f741, | 
1254  |  |   0x0020ad746e1d3b62, 0x0005378eb1c71ef3, 0x0000aa87ca22be8b},  | 
1255  |  |  {0x00431d7c90ea0e5f, 0x00b1ce1d7e819d7a, 0x0013b5f0b8c00a60, 0x00289a147ce9da31, | 
1256  |  |   0x0092dc29f8f41dbd, 0x002c6f5d9e98bf92, 0x00003617de4a9626},  | 
1257  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1258  |  | {{0x00024711cc902a90, 0x00acb2e579ab4fe1, 0x00af818a4b4d57b1, 0x00a17c7bec49c3de, | 
1259  |  |   0x004280482d726a8b, 0x00128dd0f0a90f3b, 0x00004387c1c3fa3c},  | 
1260  |  |  {0x002ce76543cf5c3a, 0x00de6cee5ef58f0a, 0x00403e42fa561ca6, 0x00bc54d6f9cb9731, | 
1261  |  |   0x007155f925fb4ff1, 0x004a9ce731b7b9bc, 0x00002609076bd7b2},  | 
1262  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1263  |  | {{0x00e74c9182f0251d, 0x0039bf54bb111974, 0x00b9d2f2eec511d2, 0x0036b1594eb3a6a4, | 
1264  |  |   0x00ac3bb82d9d564b, 0x00f9313f4615a100, 0x00006716a9a91b10},  | 
1265  |  |  {0x0046698116e2f15c, 0x00f34347067d3d33, 0x008de4ccfdebd002, 0x00e838c6b8e8c97b, | 
1266  |  |   0x006faf0798def346, 0x007349794a57563c, 0x00002629e7e6ad84},  | 
1267  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1268  |  | {{0x0075300e34fd163b, 0x0092e9db4e8d0ad3, 0x00254be9f625f760, 0x00512c518c72ae68, | 
1269  |  |   0x009bfcf162bede5a, 0x00bf9341566ce311, 0x0000cd6175bd41cf},  | 
1270  |  |  {0x007dfe52af4ac70f, 0x0002159d2d5c4880, 0x00b504d16f0af8d0, 0x0014585e11f5e64c, | 
1271  |  |   0x0089c6388e030967, 0x00ffb270cbfa5f71, 0x00009a15d92c3947},  | 
1272  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1273  |  | {{0x0033fc1278dc4fe5, 0x00d53088c2caa043, 0x0085558827e2db66, 0x00c192bef387b736, | 
1274  |  |   0x00df6405a2225f2c, 0x0075205aa90fd91a, 0x0000137e3f12349d},  | 
1275  |  |  {0x00ce5b115efcb07e, 0x00abc3308410deeb, 0x005dc6fc1de39904, 0x00907c1c496f36b4, | 
1276  |  |   0x0008e6ad3926cbe1, 0x00110747b787928c, 0x0000021b9162eb7e},  | 
1277  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1278  |  | {{0x008180042cfa26e1, 0x007b826a96254967, 0x0082473694d6b194, 0x007bd6880a45b589, | 
1279  |  |   0x00c0a5097072d1a3, 0x0019186555e18b4e, 0x000020278190e5ca},  | 
1280  |  |  {0x00b4bef17de61ac0, 0x009535e3c38ed348, 0x002d4aa8e468ceab, 0x00ef40b431036ad3, | 
1281  |  |   0x00defd52f4542857, 0x0086edbf98234266, 0x00002025b3a7814d},  | 
1282  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1283  |  | {{0x00b238aa97b886be, 0x00ef3192d6dd3a32, 0x0079f9e01fd62df8, 0x00742e890daba6c5, | 
1284  |  |   0x008e5289144408ce, 0x0073bbcc8e0171a5, 0x0000c4fd329d3b52},  | 
1285  |  |  {0x00c6f64a15ee23e7, 0x00dcfb7b171cad8b, 0x00039f6cbd805867, 0x00de024e428d4562, | 
1286  |  |   0x00be6a594d7c64c5, 0x0078467b70dbcd64, 0x0000251f2ed7079b},  | 
1287  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1288  |  | {{0x000e5cc25fc4b872, 0x005ebf10d31ef4e1, 0x0061e0ebd11e8256, 0x0076e026096f5a27, | 
1289  |  |   0x0013e6fc44662e9a, 0x0042b00289d3597e, 0x000024f089170d88},  | 
1290  |  |  {0x001604d7e0effbe6, 0x0048d77cba64ec2c, 0x008166b16da19e36, 0x006b0d1a0f28c088, | 
1291  |  |   0x000259fcd47754fd, 0x00cc643e4d725f9a, 0x00007b10f3c79c14},  | 
1292  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1293  |  | {{0x00430155e3b908af, 0x00b801e4fec25226, 0x00b0d4bcfe806d26, 0x009fc4014eb13d37, | 
1294  |  |   0x0066c94e44ec07e8, 0x00d16adc03874ba2, 0x000030c917a0d2a7},  | 
1295  |  |  {0x00edac9e21eb891c, 0x00ef0fb768102eff, 0x00c088cef272a5f3, 0x00cbf782134e2964, | 
1296  |  |   0x0001044a7ba9a0e3, 0x00e363f5b194cf3c, 0x00009ce85249e372},  | 
1297  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1298  |  | {{0x001dd492dda5a7eb, 0x008fd577be539fd1, 0x002ff4b25a5fc3f1, 0x0074a8a1b64df72f, | 
1299  |  |   0x002ba3d8c204a76c, 0x009d5cff95c8235a, 0x0000e014b9406e0f},  | 
1300  |  |  {0x008c2e4dbfc98aba, 0x00f30bb89f1a1436, 0x00b46f7aea3e259c, 0x009224454ac02f54, | 
1301  |  |   0x00906401f5645fa2, 0x003a1d1940eabc77, 0x00007c9351d680e6},  | 
1302  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1303  |  | {{0x005a35d872ef967c, 0x0049f1b7884e1987, 0x0059d46d7e31f552, 0x00ceb4869d2d0fb6, | 
1304  |  |   0x00e8e89eee56802a, 0x0049d806a774aaf2, 0x0000147e2af0ae24},  | 
1305  |  |  {0x005fd1bd852c6e5e, 0x00b674b7b3de6885, 0x003b9ea5eb9b6c08, 0x005c9f03babf3ef7, | 
1306  |  |   0x00605337fecab3c7, 0x009a3f85b11bbcc8, 0x0000455470f330ec},  | 
1307  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1308  |  | {{0x002197ff4d55498d, 0x00383e8916c2d8af, 0x00eb203f34d1c6d2, 0x0080367cbd11b542, | 
1309  |  |   0x00769b3be864e4f5, 0x0081a8458521c7bb, 0x0000c531b34d3539},  | 
1310  |  |  {0x00e2a3d775fa2e13, 0x00534fc379573844, 0x00ff237d2a8db54a, 0x00d301b2335a8882, | 
1311  |  |   0x000f75ea96103a80, 0x0018fecb3cdd96fa, 0x0000304bf61e94eb},  | 
1312  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1313  |  | {{0x00b2afc332a73dbd, 0x0029a0d5bb007bc5, 0x002d628eb210f577, 0x009f59a36dd05f50, | 
1314  |  |   0x006d339de4eca613, 0x00c75a71addc86bc, 0x000060384c5ea93c},  | 
1315  |  |  {0x00aa9641c32a30b4, 0x00cc73ae8cce565d, 0x00ec911a4df07f61, 0x00aa4b762ea4b264, | 
1316  |  |   0x0096d395bb393629, 0x004efacfb7632fe0, 0x00006f252f46fa3f},  | 
1317  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1318  |  | {{0x00567eec597c7af6, 0x0059ba6795204413, 0x00816d4e6f01196f, 0x004ae6b3eb57951d, | 
1319  |  |   0x00420f5abdda2108, 0x003401d1f57ca9d9, 0x0000cf5837b0b67a},  | 
1320  |  |  {0x00eaa64b8aeeabf9, 0x00246ddf16bcb4de, 0x000e7e3c3aecd751, 0x0008449f04fed72e, | 
1321  |  |   0x00307b67ccf09183, 0x0017108c3556b7b1, 0x0000229b2483b3bf},  | 
1322  |  |  {1, 0, 0, 0, 0, 0, 0}}, | 
1323  |  | {{0x00e7c491a7bb78a1, 0x00eafddd1d3049ab, 0x00352c05e2bc7c98, 0x003d6880c165fa5c, | 
1324  |  |   0x00b6ac61cc11c97d, 0x00beeb54fcf90ce5, 0x0000dc1f0b455edc},  | 
1325  |  |  {0x002db2e7aee34d60, 0x0073b5f415a2d8c0, 0x00dd84e4193e9a0c, 0x00d02d873467c572, | 
1326  |  |   0x0018baaeda60aee5, 0x0013fb11d697c61e, 0x000083aafcc3a973},  | 
1327  |  |  {1, 0, 0, 0, 0, 0, 0}} | 
1328  |  | };  | 
1329  |  |  | 
1330  |  | /*  | 
1331  |  |  * select_point selects the |idx|th point from a precomputation table and  | 
1332  |  |  * copies it to out.  | 
1333  |  |  *  | 
1334  |  |  * pre_comp below is of the size provided in |size|.  | 
1335  |  |  */  | 
1336  |  | static void select_point(const limb idx, unsigned int size,  | 
1337  |  |                          const felem pre_comp[][3], felem out[3])  | 
1338  | 0  | { | 
1339  | 0  |     unsigned int i, j;  | 
1340  | 0  |     limb *outlimbs = &out[0][0];  | 
1341  |  | 
  | 
1342  | 0  |     memset(out, 0, sizeof(*out) * 3);  | 
1343  |  | 
  | 
1344  | 0  |     for (i = 0; i < size; i++) { | 
1345  | 0  |         const limb *inlimbs = &pre_comp[i][0][0];  | 
1346  | 0  |         limb mask = i ^ idx;  | 
1347  |  | 
  | 
1348  | 0  |         mask |= mask >> 4;  | 
1349  | 0  |         mask |= mask >> 2;  | 
1350  | 0  |         mask |= mask >> 1;  | 
1351  | 0  |         mask &= 1;  | 
1352  | 0  |         mask--;  | 
1353  | 0  |         for (j = 0; j < NLIMBS * 3; j++)  | 
1354  | 0  |             outlimbs[j] |= inlimbs[j] & mask;  | 
1355  | 0  |     }  | 
1356  | 0  | }  | 
1357  |  |  | 
1358  |  | /* get_bit returns the |i|th bit in |in| */  | 
1359  |  | static char get_bit(const felem_bytearray in, int i)  | 
1360  | 0  | { | 
1361  | 0  |     if (i < 0 || i >= 384)  | 
1362  | 0  |         return 0;  | 
1363  | 0  |     return (in[i >> 3] >> (i & 7)) & 1;  | 
1364  | 0  | }  | 
1365  |  |  | 
1366  |  | /*  | 
1367  |  |  * Interleaved point multiplication using precomputed point multiples: The  | 
1368  |  |  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars  | 
1369  |  |  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the  | 
1370  |  |  * generator, using certain (large) precomputed multiples in g_pre_comp.  | 
1371  |  |  * Output point (X, Y, Z) is stored in x_out, y_out, z_out  | 
1372  |  |  */  | 
1373  |  | static void batch_mul(felem x_out, felem y_out, felem z_out,  | 
1374  |  |                       const felem_bytearray scalars[],  | 
1375  |  |                       const unsigned int num_points, const u8 *g_scalar,  | 
1376  |  |                       const int mixed, const felem pre_comp[][17][3],  | 
1377  |  |                       const felem g_pre_comp[16][3])  | 
1378  | 0  | { | 
1379  | 0  |     int i, skip;  | 
1380  | 0  |     unsigned int num, gen_mul = (g_scalar != NULL);  | 
1381  | 0  |     felem nq[3], tmp[4];  | 
1382  | 0  |     limb bits;  | 
1383  | 0  |     u8 sign, digit;  | 
1384  |  |  | 
1385  |  |     /* set nq to the point at infinity */  | 
1386  | 0  |     memset(nq, 0, sizeof(nq));  | 
1387  |  |  | 
1388  |  |     /*  | 
1389  |  |      * Loop over all scalars msb-to-lsb, interleaving additions of multiples  | 
1390  |  |      * of the generator (last quarter of rounds) and additions of other  | 
1391  |  |      * points multiples (every 5th round).  | 
1392  |  |      */  | 
1393  | 0  |     skip = 1;                   /* save two point operations in the first  | 
1394  |  |                                  * round */  | 
1395  | 0  |     for (i = (num_points ? 380 : 98); i >= 0; --i) { | 
1396  |  |         /* double */  | 
1397  | 0  |         if (!skip)  | 
1398  | 0  |             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);  | 
1399  |  |  | 
1400  |  |         /* add multiples of the generator */  | 
1401  | 0  |         if (gen_mul && (i <= 98)) { | 
1402  | 0  |             bits = get_bit(g_scalar, i + 285) << 3;  | 
1403  | 0  |             if (i < 95) { | 
1404  | 0  |                 bits |= get_bit(g_scalar, i + 190) << 2;  | 
1405  | 0  |                 bits |= get_bit(g_scalar, i + 95) << 1;  | 
1406  | 0  |                 bits |= get_bit(g_scalar, i);  | 
1407  | 0  |             }  | 
1408  |  |             /* select the point to add, in constant time */  | 
1409  | 0  |             select_point(bits, 16, g_pre_comp, tmp);  | 
1410  | 0  |             if (!skip) { | 
1411  |  |                 /* The 1 argument below is for "mixed" */  | 
1412  | 0  |                 point_add(nq[0],  nq[1],  nq[2],  | 
1413  | 0  |                           nq[0],  nq[1],  nq[2], 1,  | 
1414  | 0  |                           tmp[0], tmp[1], tmp[2]);  | 
1415  | 0  |             } else { | 
1416  | 0  |                 memcpy(nq, tmp, 3 * sizeof(felem));  | 
1417  | 0  |                 skip = 0;  | 
1418  | 0  |             }  | 
1419  | 0  |         }  | 
1420  |  |  | 
1421  |  |         /* do other additions every 5 doublings */  | 
1422  | 0  |         if (num_points && (i % 5 == 0)) { | 
1423  |  |             /* loop over all scalars */  | 
1424  | 0  |             for (num = 0; num < num_points; ++num) { | 
1425  | 0  |                 bits = get_bit(scalars[num], i + 4) << 5;  | 
1426  | 0  |                 bits |= get_bit(scalars[num], i + 3) << 4;  | 
1427  | 0  |                 bits |= get_bit(scalars[num], i + 2) << 3;  | 
1428  | 0  |                 bits |= get_bit(scalars[num], i + 1) << 2;  | 
1429  | 0  |                 bits |= get_bit(scalars[num], i) << 1;  | 
1430  | 0  |                 bits |= get_bit(scalars[num], i - 1);  | 
1431  | 0  |                 ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);  | 
1432  |  |  | 
1433  |  |                 /*  | 
1434  |  |                  * select the point to add or subtract, in constant time  | 
1435  |  |                  */  | 
1436  | 0  |                 select_point(digit, 17, pre_comp[num], tmp);  | 
1437  | 0  |                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative  | 
1438  |  |                                             * point */  | 
1439  | 0  |                 copy_conditional(tmp[1], tmp[3], (-(limb) sign));  | 
1440  |  | 
  | 
1441  | 0  |                 if (!skip) { | 
1442  | 0  |                     point_add(nq[0],  nq[1],  nq[2],  | 
1443  | 0  |                               nq[0],  nq[1],  nq[2], mixed,  | 
1444  | 0  |                               tmp[0], tmp[1], tmp[2]);  | 
1445  | 0  |                 } else { | 
1446  | 0  |                     memcpy(nq, tmp, 3 * sizeof(felem));  | 
1447  | 0  |                     skip = 0;  | 
1448  | 0  |                 }  | 
1449  | 0  |             }  | 
1450  | 0  |         }  | 
1451  | 0  |     }  | 
1452  | 0  |     felem_assign(x_out, nq[0]);  | 
1453  | 0  |     felem_assign(y_out, nq[1]);  | 
1454  | 0  |     felem_assign(z_out, nq[2]);  | 
1455  | 0  | }  | 
1456  |  |  | 
1457  |  | /* Precomputation for the group generator. */  | 
1458  |  | struct nistp384_pre_comp_st { | 
1459  |  |     felem g_pre_comp[16][3];  | 
1460  |  |     CRYPTO_REF_COUNT references;  | 
1461  |  | };  | 
1462  |  |  | 
1463  |  | const EC_METHOD *ossl_ec_GFp_nistp384_method(void)  | 
1464  | 0  | { | 
1465  | 0  |     static const EC_METHOD ret = { | 
1466  | 0  |         EC_FLAGS_DEFAULT_OCT,  | 
1467  | 0  |         NID_X9_62_prime_field,  | 
1468  | 0  |         ossl_ec_GFp_nistp384_group_init,  | 
1469  | 0  |         ossl_ec_GFp_simple_group_finish,  | 
1470  | 0  |         ossl_ec_GFp_simple_group_clear_finish,  | 
1471  | 0  |         ossl_ec_GFp_nist_group_copy,  | 
1472  | 0  |         ossl_ec_GFp_nistp384_group_set_curve,  | 
1473  | 0  |         ossl_ec_GFp_simple_group_get_curve,  | 
1474  | 0  |         ossl_ec_GFp_simple_group_get_degree,  | 
1475  | 0  |         ossl_ec_group_simple_order_bits,  | 
1476  | 0  |         ossl_ec_GFp_simple_group_check_discriminant,  | 
1477  | 0  |         ossl_ec_GFp_simple_point_init,  | 
1478  | 0  |         ossl_ec_GFp_simple_point_finish,  | 
1479  | 0  |         ossl_ec_GFp_simple_point_clear_finish,  | 
1480  | 0  |         ossl_ec_GFp_simple_point_copy,  | 
1481  | 0  |         ossl_ec_GFp_simple_point_set_to_infinity,  | 
1482  | 0  |         ossl_ec_GFp_simple_point_set_affine_coordinates,  | 
1483  | 0  |         ossl_ec_GFp_nistp384_point_get_affine_coordinates,  | 
1484  | 0  |         0, /* point_set_compressed_coordinates */  | 
1485  | 0  |         0, /* point2oct */  | 
1486  | 0  |         0, /* oct2point */  | 
1487  | 0  |         ossl_ec_GFp_simple_add,  | 
1488  | 0  |         ossl_ec_GFp_simple_dbl,  | 
1489  | 0  |         ossl_ec_GFp_simple_invert,  | 
1490  | 0  |         ossl_ec_GFp_simple_is_at_infinity,  | 
1491  | 0  |         ossl_ec_GFp_simple_is_on_curve,  | 
1492  | 0  |         ossl_ec_GFp_simple_cmp,  | 
1493  | 0  |         ossl_ec_GFp_simple_make_affine,  | 
1494  | 0  |         ossl_ec_GFp_simple_points_make_affine,  | 
1495  | 0  |         ossl_ec_GFp_nistp384_points_mul,  | 
1496  | 0  |         ossl_ec_GFp_nistp384_precompute_mult,  | 
1497  | 0  |         ossl_ec_GFp_nistp384_have_precompute_mult,  | 
1498  | 0  |         ossl_ec_GFp_nist_field_mul,  | 
1499  | 0  |         ossl_ec_GFp_nist_field_sqr,  | 
1500  | 0  |         0, /* field_div */  | 
1501  | 0  |         ossl_ec_GFp_simple_field_inv,  | 
1502  | 0  |         0, /* field_encode */  | 
1503  | 0  |         0, /* field_decode */  | 
1504  | 0  |         0, /* field_set_to_one */  | 
1505  | 0  |         ossl_ec_key_simple_priv2oct,  | 
1506  | 0  |         ossl_ec_key_simple_oct2priv,  | 
1507  | 0  |         0, /* set private */  | 
1508  | 0  |         ossl_ec_key_simple_generate_key,  | 
1509  | 0  |         ossl_ec_key_simple_check_key,  | 
1510  | 0  |         ossl_ec_key_simple_generate_public_key,  | 
1511  | 0  |         0, /* keycopy */  | 
1512  | 0  |         0, /* keyfinish */  | 
1513  | 0  |         ossl_ecdh_simple_compute_key,  | 
1514  | 0  |         ossl_ecdsa_simple_sign_setup,  | 
1515  | 0  |         ossl_ecdsa_simple_sign_sig,  | 
1516  | 0  |         ossl_ecdsa_simple_verify_sig,  | 
1517  | 0  |         0, /* field_inverse_mod_ord */  | 
1518  | 0  |         0, /* blind_coordinates */  | 
1519  | 0  |         0, /* ladder_pre */  | 
1520  | 0  |         0, /* ladder_step */  | 
1521  | 0  |         0  /* ladder_post */  | 
1522  | 0  |     };  | 
1523  |  | 
  | 
1524  | 0  |     return &ret;  | 
1525  | 0  | }  | 
1526  |  |  | 
1527  |  | /******************************************************************************/  | 
1528  |  | /*  | 
1529  |  |  * FUNCTIONS TO MANAGE PRECOMPUTATION  | 
1530  |  |  */  | 
1531  |  |  | 
1532  |  | static NISTP384_PRE_COMP *nistp384_pre_comp_new(void)  | 
1533  | 0  | { | 
1534  | 0  |     NISTP384_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));  | 
1535  |  | 
  | 
1536  | 0  |     if (ret == NULL)  | 
1537  | 0  |         return ret;  | 
1538  |  |  | 
1539  | 0  |     if (!CRYPTO_NEW_REF(&ret->references, 1)) { | 
1540  | 0  |         OPENSSL_free(ret);  | 
1541  | 0  |         return NULL;  | 
1542  | 0  |     }  | 
1543  | 0  |     return ret;  | 
1544  | 0  | }  | 
1545  |  |  | 
1546  |  | NISTP384_PRE_COMP *ossl_ec_nistp384_pre_comp_dup(NISTP384_PRE_COMP *p)  | 
1547  | 0  | { | 
1548  | 0  |     int i;  | 
1549  |  | 
  | 
1550  | 0  |     if (p != NULL)  | 
1551  | 0  |         CRYPTO_UP_REF(&p->references, &i);  | 
1552  | 0  |     return p;  | 
1553  | 0  | }  | 
1554  |  |  | 
1555  |  | void ossl_ec_nistp384_pre_comp_free(NISTP384_PRE_COMP *p)  | 
1556  | 0  | { | 
1557  | 0  |     int i;  | 
1558  |  | 
  | 
1559  | 0  |     if (p == NULL)  | 
1560  | 0  |         return;  | 
1561  |  |  | 
1562  | 0  |     CRYPTO_DOWN_REF(&p->references, &i);  | 
1563  | 0  |     REF_PRINT_COUNT("ossl_ec_nistp384", p); | 
1564  | 0  |     if (i > 0)  | 
1565  | 0  |         return;  | 
1566  | 0  |     REF_ASSERT_ISNT(i < 0);  | 
1567  |  | 
  | 
1568  | 0  |     CRYPTO_FREE_REF(&p->references);  | 
1569  | 0  |     OPENSSL_free(p);  | 
1570  | 0  | }  | 
1571  |  |  | 
1572  |  | /******************************************************************************/  | 
1573  |  | /*  | 
1574  |  |  * OPENSSL EC_METHOD FUNCTIONS  | 
1575  |  |  */  | 
1576  |  |  | 
1577  |  | int ossl_ec_GFp_nistp384_group_init(EC_GROUP *group)  | 
1578  | 0  | { | 
1579  | 0  |     int ret;  | 
1580  |  | 
  | 
1581  | 0  |     ret = ossl_ec_GFp_simple_group_init(group);  | 
1582  | 0  |     group->a_is_minus3 = 1;  | 
1583  | 0  |     return ret;  | 
1584  | 0  | }  | 
1585  |  |  | 
1586  |  | int ossl_ec_GFp_nistp384_group_set_curve(EC_GROUP *group, const BIGNUM *p,  | 
1587  |  |                                          const BIGNUM *a, const BIGNUM *b,  | 
1588  |  |                                          BN_CTX *ctx)  | 
1589  | 0  | { | 
1590  | 0  |     int ret = 0;  | 
1591  | 0  |     BIGNUM *curve_p, *curve_a, *curve_b;  | 
1592  | 0  | #ifndef FIPS_MODULE  | 
1593  | 0  |     BN_CTX *new_ctx = NULL;  | 
1594  |  | 
  | 
1595  | 0  |     if (ctx == NULL)  | 
1596  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
1597  | 0  | #endif  | 
1598  | 0  |     if (ctx == NULL)  | 
1599  | 0  |         return 0;  | 
1600  |  |  | 
1601  | 0  |     BN_CTX_start(ctx);  | 
1602  | 0  |     curve_p = BN_CTX_get(ctx);  | 
1603  | 0  |     curve_a = BN_CTX_get(ctx);  | 
1604  | 0  |     curve_b = BN_CTX_get(ctx);  | 
1605  | 0  |     if (curve_b == NULL)  | 
1606  | 0  |         goto err;  | 
1607  | 0  |     BN_bin2bn(nistp384_curve_params[0], sizeof(felem_bytearray), curve_p);  | 
1608  | 0  |     BN_bin2bn(nistp384_curve_params[1], sizeof(felem_bytearray), curve_a);  | 
1609  | 0  |     BN_bin2bn(nistp384_curve_params[2], sizeof(felem_bytearray), curve_b);  | 
1610  | 0  |     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { | 
1611  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);  | 
1612  | 0  |         goto err;  | 
1613  | 0  |     }  | 
1614  | 0  |     group->field_mod_func = BN_nist_mod_384;  | 
1615  | 0  |     ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);  | 
1616  | 0  |  err:  | 
1617  | 0  |     BN_CTX_end(ctx);  | 
1618  | 0  | #ifndef FIPS_MODULE  | 
1619  | 0  |     BN_CTX_free(new_ctx);  | 
1620  | 0  | #endif  | 
1621  | 0  |     return ret;  | 
1622  | 0  | }  | 
1623  |  |  | 
1624  |  | /*  | 
1625  |  |  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =  | 
1626  |  |  * (X/Z^2, Y/Z^3)  | 
1627  |  |  */  | 
1628  |  | int ossl_ec_GFp_nistp384_point_get_affine_coordinates(const EC_GROUP *group,  | 
1629  |  |                                                       const EC_POINT *point,  | 
1630  |  |                                                       BIGNUM *x, BIGNUM *y,  | 
1631  |  |                                                       BN_CTX *ctx)  | 
1632  | 0  | { | 
1633  | 0  |     felem z1, z2, x_in, y_in, x_out, y_out;  | 
1634  | 0  |     widefelem tmp;  | 
1635  |  | 
  | 
1636  | 0  |     if (EC_POINT_is_at_infinity(group, point)) { | 
1637  | 0  |         ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);  | 
1638  | 0  |         return 0;  | 
1639  | 0  |     }  | 
1640  | 0  |     if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||  | 
1641  | 0  |         (!BN_to_felem(z1, point->Z)))  | 
1642  | 0  |         return 0;  | 
1643  | 0  |     felem_inv(z2, z1);  | 
1644  | 0  |     felem_square(tmp, z2);  | 
1645  | 0  |     felem_reduce(z1, tmp);  | 
1646  | 0  |     felem_mul(tmp, x_in, z1);  | 
1647  | 0  |     felem_reduce(x_in, tmp);  | 
1648  | 0  |     felem_contract(x_out, x_in);  | 
1649  | 0  |     if (x != NULL) { | 
1650  | 0  |         if (!felem_to_BN(x, x_out)) { | 
1651  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1652  | 0  |             return 0;  | 
1653  | 0  |         }  | 
1654  | 0  |     }  | 
1655  | 0  |     felem_mul(tmp, z1, z2);  | 
1656  | 0  |     felem_reduce(z1, tmp);  | 
1657  | 0  |     felem_mul(tmp, y_in, z1);  | 
1658  | 0  |     felem_reduce(y_in, tmp);  | 
1659  | 0  |     felem_contract(y_out, y_in);  | 
1660  | 0  |     if (y != NULL) { | 
1661  | 0  |         if (!felem_to_BN(y, y_out)) { | 
1662  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1663  | 0  |             return 0;  | 
1664  | 0  |         }  | 
1665  | 0  |     }  | 
1666  | 0  |     return 1;  | 
1667  | 0  | }  | 
1668  |  |  | 
1669  |  | /* points below is of size |num|, and tmp_felems is of size |num+1/ */  | 
1670  |  | static void make_points_affine(size_t num, felem points[][3],  | 
1671  |  |                                felem tmp_felems[])  | 
1672  | 0  | { | 
1673  |  |     /*  | 
1674  |  |      * Runs in constant time, unless an input is the point at infinity (which  | 
1675  |  |      * normally shouldn't happen).  | 
1676  |  |      */  | 
1677  | 0  |     ossl_ec_GFp_nistp_points_make_affine_internal(num,  | 
1678  | 0  |                                                   points,  | 
1679  | 0  |                                                   sizeof(felem),  | 
1680  | 0  |                                                   tmp_felems,  | 
1681  | 0  |                                                   (void (*)(void *))felem_one,  | 
1682  | 0  |                                                   felem_is_zero_int,  | 
1683  | 0  |                                                   (void (*)(void *, const void *))  | 
1684  | 0  |                                                   felem_assign,  | 
1685  | 0  |                                                   (void (*)(void *, const void *))  | 
1686  | 0  |                                                   felem_square_reduce,  | 
1687  | 0  |                                                   (void (*)(void *, const void *, const void*))  | 
1688  | 0  |                                                   felem_mul_reduce,  | 
1689  | 0  |                                                   (void (*)(void *, const void *))  | 
1690  | 0  |                                                   felem_inv,  | 
1691  | 0  |                                                   (void (*)(void *, const void *))  | 
1692  | 0  |                                                   felem_contract);  | 
1693  | 0  | }  | 
1694  |  |  | 
1695  |  | /*  | 
1696  |  |  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL  | 
1697  |  |  * values Result is stored in r (r can equal one of the inputs).  | 
1698  |  |  */  | 
1699  |  | int ossl_ec_GFp_nistp384_points_mul(const EC_GROUP *group, EC_POINT *r,  | 
1700  |  |                                     const BIGNUM *scalar, size_t num,  | 
1701  |  |                                     const EC_POINT *points[],  | 
1702  |  |                                     const BIGNUM *scalars[], BN_CTX *ctx)  | 
1703  | 0  | { | 
1704  | 0  |     int ret = 0;  | 
1705  | 0  |     int j;  | 
1706  | 0  |     int mixed = 0;  | 
1707  | 0  |     BIGNUM *x, *y, *z, *tmp_scalar;  | 
1708  | 0  |     felem_bytearray g_secret;  | 
1709  | 0  |     felem_bytearray *secrets = NULL;  | 
1710  | 0  |     felem (*pre_comp)[17][3] = NULL;  | 
1711  | 0  |     felem *tmp_felems = NULL;  | 
1712  | 0  |     unsigned int i;  | 
1713  | 0  |     int num_bytes;  | 
1714  | 0  |     int have_pre_comp = 0;  | 
1715  | 0  |     size_t num_points = num;  | 
1716  | 0  |     felem x_in, y_in, z_in, x_out, y_out, z_out;  | 
1717  | 0  |     NISTP384_PRE_COMP *pre = NULL;  | 
1718  | 0  |     felem(*g_pre_comp)[3] = NULL;  | 
1719  | 0  |     EC_POINT *generator = NULL;  | 
1720  | 0  |     const EC_POINT *p = NULL;  | 
1721  | 0  |     const BIGNUM *p_scalar = NULL;  | 
1722  |  | 
  | 
1723  | 0  |     BN_CTX_start(ctx);  | 
1724  | 0  |     x = BN_CTX_get(ctx);  | 
1725  | 0  |     y = BN_CTX_get(ctx);  | 
1726  | 0  |     z = BN_CTX_get(ctx);  | 
1727  | 0  |     tmp_scalar = BN_CTX_get(ctx);  | 
1728  | 0  |     if (tmp_scalar == NULL)  | 
1729  | 0  |         goto err;  | 
1730  |  |  | 
1731  | 0  |     if (scalar != NULL) { | 
1732  | 0  |         pre = group->pre_comp.nistp384;  | 
1733  | 0  |         if (pre)  | 
1734  |  |             /* we have precomputation, try to use it */  | 
1735  | 0  |             g_pre_comp = &pre->g_pre_comp[0];  | 
1736  | 0  |         else  | 
1737  |  |             /* try to use the standard precomputation */  | 
1738  | 0  |             g_pre_comp = (felem(*)[3]) gmul;  | 
1739  | 0  |         generator = EC_POINT_new(group);  | 
1740  | 0  |         if (generator == NULL)  | 
1741  | 0  |             goto err;  | 
1742  |  |         /* get the generator from precomputation */  | 
1743  | 0  |         if (!felem_to_BN(x, g_pre_comp[1][0]) ||  | 
1744  | 0  |             !felem_to_BN(y, g_pre_comp[1][1]) ||  | 
1745  | 0  |             !felem_to_BN(z, g_pre_comp[1][2])) { | 
1746  | 0  |             ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1747  | 0  |             goto err;  | 
1748  | 0  |         }  | 
1749  | 0  |         if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,  | 
1750  | 0  |                                                                 generator,  | 
1751  | 0  |                                                                 x, y, z, ctx))  | 
1752  | 0  |             goto err;  | 
1753  | 0  |         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))  | 
1754  |  |             /* precomputation matches generator */  | 
1755  | 0  |             have_pre_comp = 1;  | 
1756  | 0  |         else  | 
1757  |  |             /*  | 
1758  |  |              * we don't have valid precomputation: treat the generator as a  | 
1759  |  |              * random point  | 
1760  |  |              */  | 
1761  | 0  |             num_points++;  | 
1762  | 0  |     }  | 
1763  |  |  | 
1764  | 0  |     if (num_points > 0) { | 
1765  | 0  |         if (num_points >= 2) { | 
1766  |  |             /*  | 
1767  |  |              * unless we precompute multiples for just one point, converting  | 
1768  |  |              * those into affine form is time well spent  | 
1769  |  |              */  | 
1770  | 0  |             mixed = 1;  | 
1771  | 0  |         }  | 
1772  | 0  |         secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);  | 
1773  | 0  |         pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);  | 
1774  | 0  |         if (mixed)  | 
1775  | 0  |             tmp_felems =  | 
1776  | 0  |                 OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));  | 
1777  | 0  |         if ((secrets == NULL) || (pre_comp == NULL)  | 
1778  | 0  |             || (mixed && (tmp_felems == NULL)))  | 
1779  | 0  |             goto err;  | 
1780  |  |  | 
1781  |  |         /*  | 
1782  |  |          * we treat NULL scalars as 0, and NULL points as points at infinity,  | 
1783  |  |          * i.e., they contribute nothing to the linear combination  | 
1784  |  |          */  | 
1785  | 0  |         for (i = 0; i < num_points; ++i) { | 
1786  | 0  |             if (i == num) { | 
1787  |  |                 /*  | 
1788  |  |                  * we didn't have a valid precomputation, so we pick the  | 
1789  |  |                  * generator  | 
1790  |  |                  */  | 
1791  | 0  |                 p = EC_GROUP_get0_generator(group);  | 
1792  | 0  |                 p_scalar = scalar;  | 
1793  | 0  |             } else { | 
1794  |  |                 /* the i^th point */  | 
1795  | 0  |                 p = points[i];  | 
1796  | 0  |                 p_scalar = scalars[i];  | 
1797  | 0  |             }  | 
1798  | 0  |             if (p_scalar != NULL && p != NULL) { | 
1799  |  |                 /* reduce scalar to 0 <= scalar < 2^384 */  | 
1800  | 0  |                 if ((BN_num_bits(p_scalar) > 384)  | 
1801  | 0  |                     || (BN_is_negative(p_scalar))) { | 
1802  |  |                     /*  | 
1803  |  |                      * this is an unusual input, and we don't guarantee  | 
1804  |  |                      * constant-timeness  | 
1805  |  |                      */  | 
1806  | 0  |                     if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) { | 
1807  | 0  |                         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1808  | 0  |                         goto err;  | 
1809  | 0  |                     }  | 
1810  | 0  |                     num_bytes = BN_bn2lebinpad(tmp_scalar,  | 
1811  | 0  |                                                secrets[i], sizeof(secrets[i]));  | 
1812  | 0  |                 } else { | 
1813  | 0  |                     num_bytes = BN_bn2lebinpad(p_scalar,  | 
1814  | 0  |                                                secrets[i], sizeof(secrets[i]));  | 
1815  | 0  |                 }  | 
1816  | 0  |                 if (num_bytes < 0) { | 
1817  | 0  |                     ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1818  | 0  |                     goto err;  | 
1819  | 0  |                 }  | 
1820  |  |                 /* precompute multiples */  | 
1821  | 0  |                 if ((!BN_to_felem(x_out, p->X)) ||  | 
1822  | 0  |                     (!BN_to_felem(y_out, p->Y)) ||  | 
1823  | 0  |                     (!BN_to_felem(z_out, p->Z)))  | 
1824  | 0  |                     goto err;  | 
1825  | 0  |                 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));  | 
1826  | 0  |                 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));  | 
1827  | 0  |                 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));  | 
1828  | 0  |                 for (j = 2; j <= 16; ++j) { | 
1829  | 0  |                     if (j & 1) { | 
1830  | 0  |                         point_add(pre_comp[i][j][0],     pre_comp[i][j][1],     pre_comp[i][j][2],  | 
1831  | 0  |                                   pre_comp[i][1][0],     pre_comp[i][1][1],     pre_comp[i][1][2], 0,  | 
1832  | 0  |                                   pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);  | 
1833  | 0  |                     } else { | 
1834  | 0  |                         point_double(pre_comp[i][j][0],     pre_comp[i][j][1],     pre_comp[i][j][2],  | 
1835  | 0  |                                      pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);  | 
1836  | 0  |                     }  | 
1837  | 0  |                 }  | 
1838  | 0  |             }  | 
1839  | 0  |         }  | 
1840  | 0  |         if (mixed)  | 
1841  | 0  |             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);  | 
1842  | 0  |     }  | 
1843  |  |  | 
1844  |  |     /* the scalar for the generator */  | 
1845  | 0  |     if (scalar != NULL && have_pre_comp) { | 
1846  | 0  |         memset(g_secret, 0, sizeof(g_secret));  | 
1847  |  |         /* reduce scalar to 0 <= scalar < 2^384 */  | 
1848  | 0  |         if ((BN_num_bits(scalar) > 384) || (BN_is_negative(scalar))) { | 
1849  |  |             /*  | 
1850  |  |              * this is an unusual input, and we don't guarantee  | 
1851  |  |              * constant-timeness  | 
1852  |  |              */  | 
1853  | 0  |             if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) { | 
1854  | 0  |                 ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1855  | 0  |                 goto err;  | 
1856  | 0  |             }  | 
1857  | 0  |             num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));  | 
1858  | 0  |         } else { | 
1859  | 0  |             num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));  | 
1860  | 0  |         }  | 
1861  |  |         /* do the multiplication with generator precomputation */  | 
1862  | 0  |         batch_mul(x_out, y_out, z_out,  | 
1863  | 0  |                   (const felem_bytearray(*))secrets, num_points,  | 
1864  | 0  |                   g_secret,  | 
1865  | 0  |                   mixed, (const felem(*)[17][3])pre_comp,  | 
1866  | 0  |                   (const felem(*)[3])g_pre_comp);  | 
1867  | 0  |     } else { | 
1868  |  |         /* do the multiplication without generator precomputation */  | 
1869  | 0  |         batch_mul(x_out, y_out, z_out,  | 
1870  | 0  |                   (const felem_bytearray(*))secrets, num_points,  | 
1871  | 0  |                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);  | 
1872  | 0  |     }  | 
1873  |  |     /* reduce the output to its unique minimal representation */  | 
1874  | 0  |     felem_contract(x_in, x_out);  | 
1875  | 0  |     felem_contract(y_in, y_out);  | 
1876  | 0  |     felem_contract(z_in, z_out);  | 
1877  | 0  |     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||  | 
1878  | 0  |         (!felem_to_BN(z, z_in))) { | 
1879  | 0  |         ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);  | 
1880  | 0  |         goto err;  | 
1881  | 0  |     }  | 
1882  | 0  |     ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,  | 
1883  | 0  |                                                              ctx);  | 
1884  |  | 
  | 
1885  | 0  |  err:  | 
1886  | 0  |     BN_CTX_end(ctx);  | 
1887  | 0  |     EC_POINT_free(generator);  | 
1888  | 0  |     OPENSSL_free(secrets);  | 
1889  | 0  |     OPENSSL_free(pre_comp);  | 
1890  | 0  |     OPENSSL_free(tmp_felems);  | 
1891  | 0  |     return ret;  | 
1892  | 0  | }  | 
1893  |  |  | 
1894  |  | int ossl_ec_GFp_nistp384_precompute_mult(EC_GROUP *group, BN_CTX *ctx)  | 
1895  | 0  | { | 
1896  | 0  |     int ret = 0;  | 
1897  | 0  |     NISTP384_PRE_COMP *pre = NULL;  | 
1898  | 0  |     int i, j;  | 
1899  | 0  |     BIGNUM *x, *y;  | 
1900  | 0  |     EC_POINT *generator = NULL;  | 
1901  | 0  |     felem tmp_felems[16];  | 
1902  | 0  | #ifndef FIPS_MODULE  | 
1903  | 0  |     BN_CTX *new_ctx = NULL;  | 
1904  | 0  | #endif  | 
1905  |  |  | 
1906  |  |     /* throw away old precomputation */  | 
1907  | 0  |     EC_pre_comp_free(group);  | 
1908  |  | 
  | 
1909  | 0  | #ifndef FIPS_MODULE  | 
1910  | 0  |     if (ctx == NULL)  | 
1911  | 0  |         ctx = new_ctx = BN_CTX_new();  | 
1912  | 0  | #endif  | 
1913  | 0  |     if (ctx == NULL)  | 
1914  | 0  |         return 0;  | 
1915  |  |  | 
1916  | 0  |     BN_CTX_start(ctx);  | 
1917  | 0  |     x = BN_CTX_get(ctx);  | 
1918  | 0  |     y = BN_CTX_get(ctx);  | 
1919  | 0  |     if (y == NULL)  | 
1920  | 0  |         goto err;  | 
1921  |  |     /* get the generator */  | 
1922  | 0  |     if (group->generator == NULL)  | 
1923  | 0  |         goto err;  | 
1924  | 0  |     generator = EC_POINT_new(group);  | 
1925  | 0  |     if (generator == NULL)  | 
1926  | 0  |         goto err;  | 
1927  | 0  |     BN_bin2bn(nistp384_curve_params[3], sizeof(felem_bytearray), x);  | 
1928  | 0  |     BN_bin2bn(nistp384_curve_params[4], sizeof(felem_bytearray), y);  | 
1929  | 0  |     if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))  | 
1930  | 0  |         goto err;  | 
1931  | 0  |     if ((pre = nistp384_pre_comp_new()) == NULL)  | 
1932  | 0  |         goto err;  | 
1933  |  |     /*  | 
1934  |  |      * if the generator is the standard one, use built-in precomputation  | 
1935  |  |      */  | 
1936  | 0  |     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { | 
1937  | 0  |         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));  | 
1938  | 0  |         goto done;  | 
1939  | 0  |     }  | 
1940  | 0  |     if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||  | 
1941  | 0  |         (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||  | 
1942  | 0  |         (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))  | 
1943  | 0  |         goto err;  | 
1944  |  |     /* compute 2^95*G, 2^190*G, 2^285*G */  | 
1945  | 0  |     for (i = 1; i <= 4; i <<= 1) { | 
1946  | 0  |         point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],  | 
1947  | 0  |                      pre->g_pre_comp[i][0],  pre->g_pre_comp[i][1],    pre->g_pre_comp[i][2]);  | 
1948  | 0  |         for (j = 0; j < 94; ++j) { | 
1949  | 0  |             point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],  | 
1950  | 0  |                          pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2]);  | 
1951  | 0  |         }  | 
1952  | 0  |     }  | 
1953  |  |     /* g_pre_comp[0] is the point at infinity */  | 
1954  | 0  |     memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));  | 
1955  |  |     /* the remaining multiples */  | 
1956  |  |     /* 2^95*G + 2^190*G */  | 
1957  | 0  |     point_add(pre->g_pre_comp[6][0],  pre->g_pre_comp[6][1],  pre->g_pre_comp[6][2],  | 
1958  | 0  |               pre->g_pre_comp[4][0],  pre->g_pre_comp[4][1],  pre->g_pre_comp[4][2], 0,  | 
1959  | 0  |               pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);  | 
1960  |  |     /* 2^95*G + 2^285*G */  | 
1961  | 0  |     point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], pre->g_pre_comp[10][2],  | 
1962  | 0  |               pre->g_pre_comp[8][0],  pre->g_pre_comp[8][1],  pre->g_pre_comp[8][2], 0,  | 
1963  | 0  |               pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);  | 
1964  |  |     /* 2^190*G + 2^285*G */  | 
1965  | 0  |     point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],  | 
1966  | 0  |               pre->g_pre_comp[8][0],  pre->g_pre_comp[8][1],  pre->g_pre_comp[8][2], 0,  | 
1967  | 0  |               pre->g_pre_comp[4][0],  pre->g_pre_comp[4][1],  pre->g_pre_comp[4][2]);  | 
1968  |  |     /* 2^95*G + 2^190*G + 2^285*G */  | 
1969  | 0  |     point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], pre->g_pre_comp[14][2],  | 
1970  | 0  |               pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 0,  | 
1971  | 0  |               pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);  | 
1972  | 0  |     for (i = 1; i < 8; ++i) { | 
1973  |  |         /* odd multiples: add G */  | 
1974  | 0  |         point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1], pre->g_pre_comp[2 * i + 1][2],  | 
1975  | 0  |                   pre->g_pre_comp[2 * i][0],     pre->g_pre_comp[2 * i][1],     pre->g_pre_comp[2 * i][2], 0,  | 
1976  | 0  |                   pre->g_pre_comp[1][0],         pre->g_pre_comp[1][1],         pre->g_pre_comp[1][2]);  | 
1977  | 0  |     }  | 
1978  | 0  |     make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);  | 
1979  |  | 
  | 
1980  | 0  |  done:  | 
1981  | 0  |     SETPRECOMP(group, nistp384, pre);  | 
1982  | 0  |     ret = 1;  | 
1983  | 0  |     pre = NULL;  | 
1984  | 0  |  err:  | 
1985  | 0  |     BN_CTX_end(ctx);  | 
1986  | 0  |     EC_POINT_free(generator);  | 
1987  | 0  | #ifndef FIPS_MODULE  | 
1988  | 0  |     BN_CTX_free(new_ctx);  | 
1989  | 0  | #endif  | 
1990  | 0  |     ossl_ec_nistp384_pre_comp_free(pre);  | 
1991  | 0  |     return ret;  | 
1992  | 0  | }  | 
1993  |  |  | 
1994  |  | int ossl_ec_GFp_nistp384_have_precompute_mult(const EC_GROUP *group)  | 
1995  | 0  | { | 
1996  | 0  |     return HAVEPRECOMP(group, nistp384);  | 
1997  | 0  | }  |