Coverage Report

Created: 2023-09-25 06:41

/src/openssl/crypto/ec/ecp_nistp384.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2023 IBM Corp.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * Designed for 56-bit limbs by Rohan McLure <rohan.mclure@linux.ibm.com>.
28
 * The layout is based on that of ecp_nistp{224,521}.c, allowing even for asm
29
 * acceleration of felem_{square,mul} as supported in these files.
30
 */
31
32
#include <openssl/e_os2.h>
33
34
#include <string.h>
35
#include <openssl/err.h>
36
#include "ec_local.h"
37
38
#include "internal/numbers.h"
39
40
#ifndef INT128_MAX
41
# error "Your compiler doesn't appear to support 128-bit integer types"
42
#endif
43
44
typedef uint8_t u8;
45
typedef uint64_t u64;
46
47
/*
48
 * The underlying field. P384 operates over GF(2^384-2^128-2^96+2^32-1). We
49
 * can serialize an element of this field into 48 bytes. We call this an
50
 * felem_bytearray.
51
 */
52
53
typedef u8 felem_bytearray[48];
54
55
/*
56
 * These are the parameters of P384, taken from FIPS 186-3, section D.1.2.4.
57
 * These values are big-endian.
58
 */
59
static const felem_bytearray nistp384_curve_params[5] = {
60
  {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
61
   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
62
   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
63
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF},
64
  {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a = -3 */
65
   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
66
   0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
67
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC},
68
  {0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B, /* b */
69
   0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12,
70
   0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D,
71
   0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF},
72
  {0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, /* x */
73
   0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,
74
   0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,
75
   0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7},
76
  {0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, /* y */
77
   0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,
78
   0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,
79
   0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F},
80
};
81
82
/*-
83
 * The representation of field elements.
84
 * ------------------------------------
85
 *
86
 * We represent field elements with seven values. These values are either 64 or
87
 * 128 bits and the field element represented is:
88
 *   v[0]*2^0 + v[1]*2^56 + v[2]*2^112 + ... + v[6]*2^336  (mod p)
89
 * Each of the seven values is called a 'limb'. Since the limbs are spaced only
90
 * 56 bits apart, but are greater than 56 bits in length, the most significant
91
 * bits of each limb overlap with the least significant bits of the next
92
 *
93
 * This representation is considered to be 'redundant' in the sense that
94
 * intermediate values can each contain more than a 56-bit value in each limb.
95
 * Reduction causes all but the final limb to be reduced to contain a value less
96
 * than 2^56, with the final value represented allowed to be larger than 2^384,
97
 * inasmuch as we can be sure that arithmetic overflow remains impossible. The
98
 * reduced value must of course be congruent to the unreduced value.
99
 *
100
 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
101
 * 'widefelem', featuring enough bits to store the result of a multiplication
102
 * and even some further arithmetic without need for immediate reduction.
103
 */
104
105
0
#define NLIMBS 7
106
107
typedef uint64_t limb;
108
typedef uint128_t widelimb;
109
typedef limb limb_aX __attribute((__aligned__(1)));
110
typedef limb felem[NLIMBS];
111
typedef widelimb widefelem[2*NLIMBS-1];
112
113
static const limb bottom56bits = 0xffffffffffffff;
114
115
/* Helper functions (de)serialising reduced field elements in little endian */
116
static void bin48_to_felem(felem out, const u8 in[48])
117
0
{
118
0
    memset(out, 0, 56);
119
0
    out[0] = (*((limb *) & in[0])) & bottom56bits;
120
0
    out[1] = (*((limb_aX *) & in[7])) & bottom56bits;
121
0
    out[2] = (*((limb_aX *) & in[14])) & bottom56bits;
122
0
    out[3] = (*((limb_aX *) & in[21])) & bottom56bits;
123
0
    out[4] = (*((limb_aX *) & in[28])) & bottom56bits;
124
0
    out[5] = (*((limb_aX *) & in[35])) & bottom56bits;
125
0
    memmove(&out[6], &in[42], 6);
126
0
}
127
128
static void felem_to_bin48(u8 out[48], const felem in)
129
0
{
130
0
    memset(out, 0, 48);
131
0
    (*((limb *) & out[0]))     |= (in[0] & bottom56bits);
132
0
    (*((limb_aX *) & out[7]))  |= (in[1] & bottom56bits);
133
0
    (*((limb_aX *) & out[14])) |= (in[2] & bottom56bits);
134
0
    (*((limb_aX *) & out[21])) |= (in[3] & bottom56bits);
135
0
    (*((limb_aX *) & out[28])) |= (in[4] & bottom56bits);
136
0
    (*((limb_aX *) & out[35])) |= (in[5] & bottom56bits);
137
0
    memmove(&out[42], &in[6], 6);
138
0
}
139
140
/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
141
static int BN_to_felem(felem out, const BIGNUM *bn)
142
0
{
143
0
    felem_bytearray b_out;
144
0
    int num_bytes;
145
146
0
    if (BN_is_negative(bn)) {
147
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
148
0
        return 0;
149
0
    }
150
0
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
151
0
    if (num_bytes < 0) {
152
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
153
0
        return 0;
154
0
    }
155
0
    bin48_to_felem(out, b_out);
156
0
    return 1;
157
0
}
158
159
/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
160
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
161
0
{
162
0
    felem_bytearray b_out;
163
164
0
    felem_to_bin48(b_out, in);
165
0
    return BN_lebin2bn(b_out, sizeof(b_out), out);
166
0
}
167
168
/*-
169
 * Field operations
170
 * ----------------
171
 */
172
173
static void felem_one(felem out)
174
0
{
175
0
    out[0] = 1;
176
0
    memset(&out[1], 0, sizeof(limb) * (NLIMBS-1));
177
0
}
178
179
static void felem_assign(felem out, const felem in)
180
0
{
181
0
    memcpy(out, in, sizeof(felem));
182
0
}
183
184
/* felem_sum64 sets out = out + in. */
185
static void felem_sum64(felem out, const felem in)
186
0
{
187
0
    unsigned int i;
188
189
0
    for (i = 0; i < NLIMBS; i++)
190
0
        out[i] += in[i];
191
0
}
192
193
/* felem_scalar sets out = in * scalar */
194
static void felem_scalar(felem out, const felem in, limb scalar)
195
0
{
196
0
    unsigned int i;
197
198
0
    for (i = 0; i < NLIMBS; i++)
199
0
        out[i] = in[i] * scalar;
200
0
}
201
202
/* felem_scalar64 sets out = out * scalar */
203
static void felem_scalar64(felem out, limb scalar)
204
0
{
205
0
    unsigned int i;
206
207
0
    for (i = 0; i < NLIMBS; i++)
208
0
        out[i] *= scalar;
209
0
}
210
211
/* felem_scalar128 sets out = out * scalar */
212
static void felem_scalar128(widefelem out, limb scalar)
213
0
{
214
0
    unsigned int i;
215
216
0
    for (i = 0; i < 2*NLIMBS-1; i++)
217
0
        out[i] *= scalar;
218
0
}
219
220
/*-
221
 * felem_neg sets |out| to |-in|
222
 * On entry:
223
 *   in[i] < 2^60 - 2^29
224
 * On exit:
225
 *   out[i] < 2^60
226
 */
227
static void felem_neg(felem out, const felem in)
228
0
{
229
    /*
230
     * In order to prevent underflow, we add a multiple of p before subtracting.
231
     * Use telescopic sums to represent 2^12 * p redundantly with each limb
232
     * of the form 2^60 + ...
233
     */
234
0
    static const limb two60m52m4 = (((limb) 1) << 60)
235
0
                                 - (((limb) 1) << 52)
236
0
                                 - (((limb) 1) << 4);
237
0
    static const limb two60p44m12 = (((limb) 1) << 60)
238
0
                                  + (((limb) 1) << 44)
239
0
                                  - (((limb) 1) << 12);
240
0
    static const limb two60m28m4 = (((limb) 1) << 60)
241
0
                                 - (((limb) 1) << 28)
242
0
                                 - (((limb) 1) << 4);
243
0
    static const limb two60m4 = (((limb) 1) << 60)
244
0
                              - (((limb) 1) << 4);
245
246
0
    out[0] = two60p44m12 - in[0];
247
0
    out[1] = two60m52m4 - in[1];
248
0
    out[2] = two60m28m4 - in[2];
249
0
    out[3] = two60m4 - in[3];
250
0
    out[4] = two60m4 - in[4];
251
0
    out[5] = two60m4 - in[5];
252
0
    out[6] = two60m4 - in[6];
253
0
}
254
255
/*-
256
 * felem_diff64 subtracts |in| from |out|
257
 * On entry:
258
 *   in[i] < 2^60 - 2^52 - 2^4
259
 * On exit:
260
 *   out[i] < out_orig[i] + 2^60 + 2^44
261
 */
262
static void felem_diff64(felem out, const felem in)
263
0
{
264
    /*
265
     * In order to prevent underflow, we add a multiple of p before subtracting.
266
     * Use telescopic sums to represent 2^12 * p redundantly with each limb
267
     * of the form 2^60 + ...
268
     */
269
270
0
    static const limb two60m52m4 = (((limb) 1) << 60)
271
0
                                 - (((limb) 1) << 52)
272
0
                                 - (((limb) 1) << 4);
273
0
    static const limb two60p44m12 = (((limb) 1) << 60)
274
0
                                  + (((limb) 1) << 44)
275
0
                                  - (((limb) 1) << 12);
276
0
    static const limb two60m28m4 = (((limb) 1) << 60)
277
0
                                 - (((limb) 1) << 28)
278
0
                                 - (((limb) 1) << 4);
279
0
    static const limb two60m4 = (((limb) 1) << 60)
280
0
                              - (((limb) 1) << 4);
281
282
0
    out[0] += two60p44m12 - in[0];
283
0
    out[1] += two60m52m4 - in[1];
284
0
    out[2] += two60m28m4 - in[2];
285
0
    out[3] += two60m4 - in[3];
286
0
    out[4] += two60m4 - in[4];
287
0
    out[5] += two60m4 - in[5];
288
0
    out[6] += two60m4 - in[6];
289
0
}
290
291
/*
292
 * in[i] < 2^63
293
 * out[i] < out_orig[i] + 2^64 + 2^48
294
 */
295
static void felem_diff_128_64(widefelem out, const felem in)
296
0
{
297
    /*
298
     * In order to prevent underflow, we add a multiple of p before subtracting.
299
     * Use telescopic sums to represent 2^16 * p redundantly with each limb
300
     * of the form 2^64 + ...
301
     */
302
303
0
    static const widelimb two64m56m8 = (((widelimb) 1) << 64)
304
0
                                     - (((widelimb) 1) << 56)
305
0
                                     - (((widelimb) 1) << 8);
306
0
    static const widelimb two64m32m8 = (((widelimb) 1) << 64)
307
0
                                     - (((widelimb) 1) << 32)
308
0
                                     - (((widelimb) 1) << 8);
309
0
    static const widelimb two64m8 = (((widelimb) 1) << 64)
310
0
                                  - (((widelimb) 1) << 8);
311
0
    static const widelimb two64p48m16 = (((widelimb) 1) << 64)
312
0
                                      + (((widelimb) 1) << 48)
313
0
                                      - (((widelimb) 1) << 16);
314
0
    unsigned int i;
315
316
0
    out[0] += two64p48m16;
317
0
    out[1] += two64m56m8;
318
0
    out[2] += two64m32m8;
319
0
    out[3] += two64m8;
320
0
    out[4] += two64m8;
321
0
    out[5] += two64m8;
322
0
    out[6] += two64m8;
323
324
0
    for (i = 0; i < NLIMBS; i++)
325
0
        out[i] -= in[i];
326
0
}
327
328
/*
329
 * in[i] < 2^127 - 2^119 - 2^71
330
 * out[i] < out_orig[i] + 2^127 + 2^111
331
 */
332
static void felem_diff128(widefelem out, const widefelem in)
333
0
{
334
    /*
335
     * In order to prevent underflow, we add a multiple of p before subtracting.
336
     * Use telescopic sums to represent 2^415 * p redundantly with each limb
337
     * of the form 2^127 + ...
338
     */
339
340
0
    static const widelimb two127 = ((widelimb) 1) << 127;
341
0
    static const widelimb two127m71 = (((widelimb) 1) << 127)
342
0
                                    - (((widelimb) 1) << 71);
343
0
    static const widelimb two127p111m79m71 = (((widelimb) 1) << 127)
344
0
                                           + (((widelimb) 1) << 111)
345
0
                                           - (((widelimb) 1) << 79)
346
0
                                           - (((widelimb) 1) << 71);
347
0
    static const widelimb two127m119m71 = (((widelimb) 1) << 127)
348
0
                                        - (((widelimb) 1) << 119)
349
0
                                        - (((widelimb) 1) << 71);
350
0
    static const widelimb two127m95m71 = (((widelimb) 1) << 127)
351
0
                                       - (((widelimb) 1) << 95)
352
0
                                       - (((widelimb) 1) << 71);
353
0
    unsigned int i;
354
355
0
    out[0]  += two127;
356
0
    out[1]  += two127m71;
357
0
    out[2]  += two127m71;
358
0
    out[3]  += two127m71;
359
0
    out[4]  += two127m71;
360
0
    out[5]  += two127m71;
361
0
    out[6]  += two127p111m79m71;
362
0
    out[7]  += two127m119m71;
363
0
    out[8]  += two127m95m71;
364
0
    out[9]  += two127m71;
365
0
    out[10] += two127m71;
366
0
    out[11] += two127m71;
367
0
    out[12] += two127m71;
368
369
0
    for (i = 0; i < 2*NLIMBS-1; i++)
370
0
        out[i] -= in[i];
371
0
}
372
373
static void felem_square_ref(widefelem out, const felem in)
374
0
{
375
0
    felem inx2;
376
0
    felem_scalar(inx2, in, 2);
377
378
0
    out[0] = ((uint128_t) in[0]) * in[0];
379
380
0
    out[1] = ((uint128_t) in[0]) * inx2[1];
381
382
0
    out[2] = ((uint128_t) in[0]) * inx2[2]
383
0
           + ((uint128_t) in[1]) * in[1];
384
385
0
    out[3] = ((uint128_t) in[0]) * inx2[3]
386
0
           + ((uint128_t) in[1]) * inx2[2];
387
388
0
    out[4] = ((uint128_t) in[0]) * inx2[4]
389
0
           + ((uint128_t) in[1]) * inx2[3]
390
0
           + ((uint128_t) in[2]) * in[2];
391
392
0
    out[5] = ((uint128_t) in[0]) * inx2[5]
393
0
           + ((uint128_t) in[1]) * inx2[4]
394
0
           + ((uint128_t) in[2]) * inx2[3];
395
396
0
    out[6] = ((uint128_t) in[0]) * inx2[6]
397
0
           + ((uint128_t) in[1]) * inx2[5]
398
0
           + ((uint128_t) in[2]) * inx2[4]
399
0
           + ((uint128_t) in[3]) * in[3];
400
401
0
    out[7] = ((uint128_t) in[1]) * inx2[6]
402
0
           + ((uint128_t) in[2]) * inx2[5]
403
0
           + ((uint128_t) in[3]) * inx2[4];
404
405
0
    out[8] = ((uint128_t) in[2]) * inx2[6]
406
0
           + ((uint128_t) in[3]) * inx2[5]
407
0
           + ((uint128_t) in[4]) * in[4];
408
409
0
    out[9] = ((uint128_t) in[3]) * inx2[6]
410
0
           + ((uint128_t) in[4]) * inx2[5];
411
412
0
    out[10] = ((uint128_t) in[4]) * inx2[6]
413
0
            + ((uint128_t) in[5]) * in[5];
414
415
0
    out[11] = ((uint128_t) in[5]) * inx2[6];
416
417
0
    out[12] = ((uint128_t) in[6]) * in[6];
418
0
}
419
420
static void felem_mul_ref(widefelem out, const felem in1, const felem in2)
421
0
{
422
0
    out[0] = ((uint128_t) in1[0]) * in2[0];
423
424
0
    out[1] = ((uint128_t) in1[0]) * in2[1]
425
0
           + ((uint128_t) in1[1]) * in2[0];
426
427
0
    out[2] = ((uint128_t) in1[0]) * in2[2]
428
0
           + ((uint128_t) in1[1]) * in2[1]
429
0
           + ((uint128_t) in1[2]) * in2[0];
430
431
0
    out[3] = ((uint128_t) in1[0]) * in2[3]
432
0
           + ((uint128_t) in1[1]) * in2[2]
433
0
           + ((uint128_t) in1[2]) * in2[1]
434
0
           + ((uint128_t) in1[3]) * in2[0];
435
436
0
    out[4] = ((uint128_t) in1[0]) * in2[4]
437
0
           + ((uint128_t) in1[1]) * in2[3]
438
0
           + ((uint128_t) in1[2]) * in2[2]
439
0
           + ((uint128_t) in1[3]) * in2[1]
440
0
           + ((uint128_t) in1[4]) * in2[0];
441
442
0
    out[5] = ((uint128_t) in1[0]) * in2[5]
443
0
           + ((uint128_t) in1[1]) * in2[4]
444
0
           + ((uint128_t) in1[2]) * in2[3]
445
0
           + ((uint128_t) in1[3]) * in2[2]
446
0
           + ((uint128_t) in1[4]) * in2[1]
447
0
           + ((uint128_t) in1[5]) * in2[0];
448
449
0
    out[6] = ((uint128_t) in1[0]) * in2[6]
450
0
           + ((uint128_t) in1[1]) * in2[5]
451
0
           + ((uint128_t) in1[2]) * in2[4]
452
0
           + ((uint128_t) in1[3]) * in2[3]
453
0
           + ((uint128_t) in1[4]) * in2[2]
454
0
           + ((uint128_t) in1[5]) * in2[1]
455
0
           + ((uint128_t) in1[6]) * in2[0];
456
457
0
    out[7] = ((uint128_t) in1[1]) * in2[6]
458
0
           + ((uint128_t) in1[2]) * in2[5]
459
0
           + ((uint128_t) in1[3]) * in2[4]
460
0
           + ((uint128_t) in1[4]) * in2[3]
461
0
           + ((uint128_t) in1[5]) * in2[2]
462
0
           + ((uint128_t) in1[6]) * in2[1];
463
464
0
    out[8] = ((uint128_t) in1[2]) * in2[6]
465
0
           + ((uint128_t) in1[3]) * in2[5]
466
0
           + ((uint128_t) in1[4]) * in2[4]
467
0
           + ((uint128_t) in1[5]) * in2[3]
468
0
           + ((uint128_t) in1[6]) * in2[2];
469
470
0
    out[9] = ((uint128_t) in1[3]) * in2[6]
471
0
           + ((uint128_t) in1[4]) * in2[5]
472
0
           + ((uint128_t) in1[5]) * in2[4]
473
0
           + ((uint128_t) in1[6]) * in2[3];
474
475
0
    out[10] = ((uint128_t) in1[4]) * in2[6]
476
0
            + ((uint128_t) in1[5]) * in2[5]
477
0
            + ((uint128_t) in1[6]) * in2[4];
478
479
0
    out[11] = ((uint128_t) in1[5]) * in2[6]
480
0
            + ((uint128_t) in1[6]) * in2[5];
481
482
0
    out[12] = ((uint128_t) in1[6]) * in2[6];
483
0
}
484
485
/*-
486
 * Reduce thirteen 128-bit coefficients to seven 64-bit coefficients.
487
 * in[i] < 2^128 - 2^125
488
 * out[i] < 2^56 for i < 6,
489
 * out[6] <= 2^48
490
 *
491
 * The technique in use here stems from the format of the prime modulus:
492
 * P384 = 2^384 - delta
493
 *
494
 * Thus we can reduce numbers of the form (X + 2^384 * Y) by substituting
495
 * them with (X + delta Y), with delta = 2^128 + 2^96 + (-2^32 + 1). These
496
 * coefficients are still quite large, and so we repeatedly apply this
497
 * technique on high-order bits in order to guarantee the desired bounds on
498
 * the size of our output.
499
 *
500
 * The three phases of elimination are as follows:
501
 * [1]: Y = 2^120 (in[12] | in[11] | in[10] | in[9])
502
 * [2]: Y = 2^8 (acc[8] | acc[7])
503
 * [3]: Y = 2^48 (acc[6] >> 48)
504
 * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d)
505
 */
506
static void felem_reduce(felem out, const widefelem in)
507
0
{
508
    /*
509
     * In order to prevent underflow, we add a multiple of p before subtracting.
510
     * Use telescopic sums to represent 2^76 * p redundantly with each limb
511
     * of the form 2^124 + ...
512
     */
513
0
    static const widelimb two124m68 = (((widelimb) 1) << 124)
514
0
                                    - (((widelimb) 1) << 68);
515
0
    static const widelimb two124m116m68 = (((widelimb) 1) << 124)
516
0
                                        - (((widelimb) 1) << 116)
517
0
                                        - (((widelimb) 1) << 68);
518
0
    static const widelimb two124p108m76 = (((widelimb) 1) << 124)
519
0
                                        + (((widelimb) 1) << 108)
520
0
                                        - (((widelimb) 1) << 76);
521
0
    static const widelimb two124m92m68 = (((widelimb) 1) << 124)
522
0
                                       - (((widelimb) 1) << 92)
523
0
                                       - (((widelimb) 1) << 68);
524
0
    widelimb temp, acc[9];
525
0
    unsigned int i;
526
527
0
    memcpy(acc, in, sizeof(widelimb) * 9);
528
529
0
    acc[0] += two124p108m76;
530
0
    acc[1] += two124m116m68;
531
0
    acc[2] += two124m92m68;
532
0
    acc[3] += two124m68;
533
0
    acc[4] += two124m68;
534
0
    acc[5] += two124m68;
535
0
    acc[6] += two124m68;
536
537
    /* [1]: Eliminate in[9], ..., in[12] */
538
0
    acc[8] += in[12] >> 32;
539
0
    acc[7] += (in[12] & 0xffffffff) << 24;
540
0
    acc[7] += in[12] >> 8;
541
0
    acc[6] += (in[12] & 0xff) << 48;
542
0
    acc[6] -= in[12] >> 16;
543
0
    acc[5] -= (in[12] & 0xffff) << 40;
544
0
    acc[6] += in[12] >> 48;
545
0
    acc[5] += (in[12] & 0xffffffffffff) << 8;
546
547
0
    acc[7] += in[11] >> 32;
548
0
    acc[6] += (in[11] & 0xffffffff) << 24;
549
0
    acc[6] += in[11] >> 8;
550
0
    acc[5] += (in[11] & 0xff) << 48;
551
0
    acc[5] -= in[11] >> 16;
552
0
    acc[4] -= (in[11] & 0xffff) << 40;
553
0
    acc[5] += in[11] >> 48;
554
0
    acc[4] += (in[11] & 0xffffffffffff) << 8;
555
556
0
    acc[6] += in[10] >> 32;
557
0
    acc[5] += (in[10] & 0xffffffff) << 24;
558
0
    acc[5] += in[10] >> 8;
559
0
    acc[4] += (in[10] & 0xff) << 48;
560
0
    acc[4] -= in[10] >> 16;
561
0
    acc[3] -= (in[10] & 0xffff) << 40;
562
0
    acc[4] += in[10] >> 48;
563
0
    acc[3] += (in[10] & 0xffffffffffff) << 8;
564
565
0
    acc[5] += in[9] >> 32;
566
0
    acc[4] += (in[9] & 0xffffffff) << 24;
567
0
    acc[4] += in[9] >> 8;
568
0
    acc[3] += (in[9] & 0xff) << 48;
569
0
    acc[3] -= in[9] >> 16;
570
0
    acc[2] -= (in[9] & 0xffff) << 40;
571
0
    acc[3] += in[9] >> 48;
572
0
    acc[2] += (in[9] & 0xffffffffffff) << 8;
573
574
    /*
575
     * [2]: Eliminate acc[7], acc[8], that is the 7 and eighth limbs, as
576
     * well as the contributions made from eliminating higher limbs.
577
     * acc[7] < in[7] + 2^120 + 2^56 < in[7] + 2^121
578
     * acc[8] < in[8] + 2^96
579
     */
580
0
    acc[4] += acc[8] >> 32;
581
0
    acc[3] += (acc[8] & 0xffffffff) << 24;
582
0
    acc[3] += acc[8] >> 8;
583
0
    acc[2] += (acc[8] & 0xff) << 48;
584
0
    acc[2] -= acc[8] >> 16;
585
0
    acc[1] -= (acc[8] & 0xffff) << 40;
586
0
    acc[2] += acc[8] >> 48;
587
0
    acc[1] += (acc[8] & 0xffffffffffff) << 8;
588
589
0
    acc[3] += acc[7] >> 32;
590
0
    acc[2] += (acc[7] & 0xffffffff) << 24;
591
0
    acc[2] += acc[7] >> 8;
592
0
    acc[1] += (acc[7] & 0xff) << 48;
593
0
    acc[1] -= acc[7] >> 16;
594
0
    acc[0] -= (acc[7] & 0xffff) << 40;
595
0
    acc[1] += acc[7] >> 48;
596
0
    acc[0] += (acc[7] & 0xffffffffffff) << 8;
597
598
    /*-
599
     * acc[k] < in[k] + 2^124 + 2^121 
600
     *        < in[k] + 2^125
601
     *        < 2^128, for k <= 6
602
     */
603
604
    /*
605
     * Carry 4 -> 5 -> 6
606
     * This has the effect of ensuring that these more significant limbs
607
     * will be small in value after eliminating high bits from acc[6].
608
     */
609
0
    acc[5] += acc[4] >> 56;
610
0
    acc[4] &= 0x00ffffffffffffff;
611
612
0
    acc[6] += acc[5] >> 56;
613
0
    acc[5] &= 0x00ffffffffffffff;
614
615
    /*-
616
     * acc[6] < in[6] + 2^124 + 2^121 + 2^72 + 2^16
617
     *        < in[6] + 2^125
618
     *        < 2^128
619
     */
620
621
    /* [3]: Eliminate high bits of acc[6] */
622
0
    temp = acc[6] >> 48;
623
0
    acc[6] &= 0x0000ffffffffffff;
624
    
625
    /* temp < 2^80 */
626
627
0
    acc[3] += temp >> 40;
628
0
    acc[2] += (temp & 0xffffffffff) << 16;
629
0
    acc[2] += temp >> 16;
630
0
    acc[1] += (temp & 0xffff) << 40;
631
0
    acc[1] -= temp >> 24;
632
0
    acc[0] -= (temp & 0xffffff) << 32;
633
0
    acc[0] += temp;
634
635
    /*-
636
     * acc[k] < acc_old[k] + 2^64 + 2^56
637
     *        < in[k] + 2^124 + 2^121 + 2^72 + 2^64 + 2^56 + 2^16 , k < 4
638
     */
639
640
    /* Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
641
0
    acc[1] += acc[0] >> 56;   /* acc[1] < acc_old[1] + 2^72 */
642
0
    acc[0] &= 0x00ffffffffffffff;
643
644
0
    acc[2] += acc[1] >> 56;   /* acc[2] < acc_old[2] + 2^72 + 2^16 */
645
0
    acc[1] &= 0x00ffffffffffffff;
646
647
0
    acc[3] += acc[2] >> 56;   /* acc[3] < acc_old[3] + 2^72 + 2^16 */
648
0
    acc[2] &= 0x00ffffffffffffff;
649
650
    /*-
651
     * acc[k] < acc_old[k] + 2^72 + 2^16
652
     *        < in[k] + 2^124 + 2^121 + 2^73 + 2^64 + 2^56 + 2^17
653
     *        < in[k] + 2^125
654
     *        < 2^128 , k < 4
655
     */
656
657
0
    acc[4] += acc[3] >> 56;   /*-
658
                               * acc[4] < acc_old[4] + 2^72 + 2^16
659
                               *        < 2^72 + 2^56 + 2^16
660
                               */
661
0
    acc[3] &= 0x00ffffffffffffff;
662
663
0
    acc[5] += acc[4] >> 56;   /*-
664
                               * acc[5] < acc_old[5] + 2^16 + 1
665
                               *        < 2^56 + 2^16 + 1
666
                               */
667
0
    acc[4] &= 0x00ffffffffffffff;
668
669
0
    acc[6] += acc[5] >> 56;   /* acc[6] < 2^48 + 1 <= 2^48 */
670
0
    acc[5] &= 0x00ffffffffffffff;
671
672
0
    for (i = 0; i < NLIMBS; i++)
673
0
        out[i] = acc[i];
674
0
}
675
676
#if defined(ECP_NISTP384_ASM)
677
static void felem_square_wrapper(widefelem out, const felem in);
678
static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2);
679
680
static void (*felem_square_p)(widefelem out, const felem in) =
681
    felem_square_wrapper;
682
static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =
683
    felem_mul_wrapper;
684
685
void p384_felem_square(widefelem out, const felem in);
686
void p384_felem_mul(widefelem out, const felem in1, const felem in2);
687
688
# if defined(_ARCH_PPC64)
689
#  include "crypto/ppc_arch.h"
690
# endif
691
692
static void felem_select(void)
693
{
694
# if defined(_ARCH_PPC64)
695
    if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
696
        felem_square_p = p384_felem_square;
697
        felem_mul_p = p384_felem_mul;
698
699
        return;
700
    }
701
# endif
702
703
    /* Default */
704
    felem_square_p = felem_square_ref;
705
    felem_mul_p = felem_mul_ref;
706
}
707
708
static void felem_square_wrapper(widefelem out, const felem in)
709
{
710
    felem_select();
711
    felem_square_p(out, in);
712
}
713
714
static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)
715
{
716
    felem_select();
717
    felem_mul_p(out, in1, in2);
718
}
719
720
# define felem_square felem_square_p
721
# define felem_mul felem_mul_p
722
#else
723
0
# define felem_square felem_square_ref
724
0
# define felem_mul felem_mul_ref
725
#endif
726
727
static ossl_inline void felem_square_reduce(felem out, const felem in)
728
0
{
729
0
    widefelem tmp;
730
731
0
    felem_square(tmp, in);
732
0
    felem_reduce(out, tmp);
733
0
}
734
735
static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2)
736
0
{
737
0
    widefelem tmp;
738
739
0
    felem_mul(tmp, in1, in2);
740
0
    felem_reduce(out, tmp);
741
0
}
742
743
/*-
744
 * felem_inv calculates |out| = |in|^{-1}
745
 *
746
 * Based on Fermat's Little Theorem:
747
 *   a^p = a (mod p)
748
 *   a^{p-1} = 1 (mod p)
749
 *   a^{p-2} = a^{-1} (mod p)
750
 */
751
static void felem_inv(felem out, const felem in)
752
0
{
753
0
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6;
754
0
    unsigned int i = 0;
755
756
0
    felem_square_reduce(ftmp, in);      /* 2^1 */
757
0
    felem_mul_reduce(ftmp, ftmp, in);   /* 2^1 + 2^0 */
758
0
    felem_assign(ftmp2, ftmp);
759
760
0
    felem_square_reduce(ftmp, ftmp);    /* 2^2 + 2^1 */
761
0
    felem_mul_reduce(ftmp, ftmp, in);   /* 2^2 + 2^1 * 2^0 */
762
0
    felem_assign(ftmp3, ftmp);
763
764
0
    for (i = 0; i < 3; i++)
765
0
        felem_square_reduce(ftmp, ftmp); /* 2^5 + 2^4 + 2^3 */
766
0
    felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 */
767
0
    felem_assign(ftmp4, ftmp);
768
769
0
    for (i = 0; i < 6; i++)
770
0
        felem_square_reduce(ftmp, ftmp); /* 2^11 + ... + 2^6 */
771
0
    felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^11 + ... + 2^0 */
772
773
0
    for (i = 0; i < 3; i++)
774
0
        felem_square_reduce(ftmp, ftmp); /* 2^14 + ... + 2^3 */
775
0
    felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^14 + ... + 2^0 */
776
0
    felem_assign(ftmp5, ftmp);
777
778
0
    for (i = 0; i < 15; i++)
779
0
        felem_square_reduce(ftmp, ftmp); /* 2^29 + ... + 2^15 */
780
0
    felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^29 + ... + 2^0 */
781
0
    felem_assign(ftmp6, ftmp);
782
783
0
    for (i = 0; i < 30; i++)
784
0
        felem_square_reduce(ftmp, ftmp); /* 2^59 + ... + 2^30 */
785
0
    felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^59 + ... + 2^0 */
786
0
    felem_assign(ftmp4, ftmp);
787
788
0
    for (i = 0; i < 60; i++)
789
0
        felem_square_reduce(ftmp, ftmp); /* 2^119 + ... + 2^60 */
790
0
    felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^119 + ... + 2^0 */
791
0
    felem_assign(ftmp4, ftmp);
792
793
0
    for (i = 0; i < 120; i++)
794
0
      felem_square_reduce(ftmp, ftmp);   /* 2^239 + ... + 2^120 */
795
0
    felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^239 + ... + 2^0 */
796
797
0
    for (i = 0; i < 15; i++)
798
0
        felem_square_reduce(ftmp, ftmp); /* 2^254 + ... + 2^15 */
799
0
    felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^254 + ... + 2^0 */
800
801
0
    for (i = 0; i < 31; i++)
802
0
        felem_square_reduce(ftmp, ftmp); /* 2^285 + ... + 2^31 */
803
0
    felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^285 + ... + 2^31 + 2^29 + ... + 2^0 */
804
805
0
    for (i = 0; i < 2; i++)
806
0
        felem_square_reduce(ftmp, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^2 */
807
0
    felem_mul_reduce(ftmp, ftmp2, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^0 */
808
809
0
    for (i = 0; i < 94; i++)
810
0
        felem_square_reduce(ftmp, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 */
811
0
    felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 + 2^29 + ... + 2^0 */
812
813
0
    for (i = 0; i < 2; i++)
814
0
        felem_square_reduce(ftmp, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 */
815
0
    felem_mul_reduce(ftmp, in, ftmp);    /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 + 2^0 */
816
817
0
    memcpy(out, ftmp, sizeof(felem));
818
0
}
819
820
/*
821
 * Zero-check: returns a limb with all bits set if |in| == 0 (mod p)
822
 * and 0 otherwise. We know that field elements are reduced to
823
 * 0 < in < 2p, so we only need to check two cases:
824
 * 0 and 2^384 - 2^128 - 2^96 + 2^32 - 1
825
 *   in[k] < 2^56, k < 6
826
 *   in[6] <= 2^48
827
 */
828
static limb felem_is_zero(const felem in)
829
0
{
830
0
    limb zero, p384;
831
832
0
    zero = in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6];
833
0
    zero = ((int64_t) (zero) - 1) >> 63;
834
0
    p384 = (in[0] ^ 0x000000ffffffff) | (in[1] ^ 0xffff0000000000)
835
0
         | (in[2] ^ 0xfffffffffeffff) | (in[3] ^ 0xffffffffffffff)
836
0
         | (in[4] ^ 0xffffffffffffff) | (in[5] ^ 0xffffffffffffff)
837
0
         | (in[6] ^ 0xffffffffffff);
838
0
    p384 = ((int64_t) (p384) - 1) >> 63;
839
840
0
    return (zero | p384);
841
0
}
842
843
static int felem_is_zero_int(const void *in)
844
0
{
845
0
    return (int)(felem_is_zero(in) & ((limb) 1));
846
0
}
847
848
/*-
849
 * felem_contract converts |in| to its unique, minimal representation.
850
 * Assume we've removed all redundant bits.
851
 * On entry:
852
 *   in[k] < 2^56, k < 6
853
 *   in[6] <= 2^48
854
 */
855
static void felem_contract(felem out, const felem in)
856
0
{
857
0
    static const int64_t two56 = ((limb) 1) << 56;
858
859
    /*
860
     * We know for a fact that 0 <= |in| < 2*p, for p = 2^384 - 2^128 - 2^96 + 2^32 - 1
861
     * Perform two successive, idempotent subtractions to reduce if |in| >= p.
862
     */
863
864
0
    int64_t tmp[NLIMBS], cond[5], a;
865
0
    unsigned int i;
866
867
0
    memcpy(tmp, in, sizeof(felem));
868
 
869
    /* Case 1: a = 1 iff |in| >= 2^384 */
870
0
    a = (in[6] >> 48);
871
0
    tmp[0] += a;
872
0
    tmp[0] -= a << 32;
873
0
    tmp[1] += a << 40;
874
0
    tmp[2] += a << 16;
875
0
    tmp[6] &= 0x0000ffffffffffff;
876
877
    /*
878
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
879
     * non-zero, so we only need one step
880
     */
881
882
0
    a = tmp[0] >> 63;
883
0
    tmp[0] += a & two56;
884
0
    tmp[1] -= a & 1;
885
886
    /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
887
0
    tmp[2] += tmp[1] >> 56;
888
0
    tmp[1] &= 0x00ffffffffffffff;
889
890
0
    tmp[3] += tmp[2] >> 56;
891
0
    tmp[2] &= 0x00ffffffffffffff;
892
893
0
    tmp[4] += tmp[3] >> 56;
894
0
    tmp[3] &= 0x00ffffffffffffff;
895
896
0
    tmp[5] += tmp[4] >> 56;
897
0
    tmp[4] &= 0x00ffffffffffffff;
898
899
0
    tmp[6] += tmp[5] >> 56; /* tmp[6] < 2^48 */
900
0
    tmp[5] &= 0x00ffffffffffffff;
901
902
    /*
903
     * Case 2: a = all ones if p <= |in| < 2^384, 0 otherwise
904
     */
905
906
    /* 0 iff (2^129..2^383) are all one */
907
0
    cond[0] = ((tmp[6] | 0xff000000000000) & tmp[5] & tmp[4] & tmp[3] & (tmp[2] | 0x0000000001ffff)) + 1;
908
    /* 0 iff 2^128 bit is one */
909
0
    cond[1] = (tmp[2] | ~0x00000000010000) + 1;
910
    /* 0 iff (2^96..2^127) bits are all one */
911
0
    cond[2] = ((tmp[2] | 0xffffffffff0000) & (tmp[1] | 0x0000ffffffffff)) + 1;
912
    /* 0 iff (2^32..2^95) bits are all zero */
913
0
    cond[3] = (tmp[1] & ~0xffff0000000000) | (tmp[0] & ~((int64_t) 0x000000ffffffff));
914
    /* 0 iff (2^0..2^31) bits are all one */
915
0
    cond[4] = (tmp[0] | 0xffffff00000000) + 1;
916
917
    /*
918
     * In effect, invert our conditions, so that 0 values become all 1's,
919
     * any non-zero value in the low-order 56 bits becomes all 0's
920
     */
921
0
    for (i = 0; i < 5; i++)
922
0
       cond[i] = ((cond[i] & 0x00ffffffffffffff) - 1) >> 63;
923
924
    /*
925
     * The condition for determining whether in is greater than our
926
     * prime is given by the following condition.
927
     */
928
929
    /* First subtract 2^384 - 2^129 cheaply */
930
0
    a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
931
0
    tmp[6] &= ~a;
932
0
    tmp[5] &= ~a;
933
0
    tmp[4] &= ~a;
934
0
    tmp[3] &= ~a;
935
0
    tmp[2] &= ~a | 0x0000000001ffff;
936
937
    /*
938
     * Subtract 2^128 - 2^96 by
939
     * means of disjoint cases.
940
     */
941
942
    /* subtract 2^128 if that bit is present, and add 2^96 */
943
0
    a = cond[0] & cond[1];
944
0
    tmp[2] &= ~a | 0xfffffffffeffff;
945
0
    tmp[1] += a & ((int64_t) 1 << 40);
946
947
    /* otherwise, clear bits 2^127 .. 2^96  */
948
0
    a = cond[0] & ~cond[1] & (cond[2] & (~cond[3] | cond[4]));
949
0
    tmp[2] &= ~a | 0xffffffffff0000;
950
0
    tmp[1] &= ~a | 0x0000ffffffffff;
951
952
    /* finally, subtract the last 2^32 - 1 */
953
0
    a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
954
0
    tmp[0] += a & (-((int64_t) 1 << 32) + 1);
955
956
    /*
957
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
958
     * non-zero, so we only need one step
959
     */
960
0
    a = tmp[0] >> 63;
961
0
    tmp[0] += a & two56;
962
0
    tmp[1] -= a & 1;
963
964
    /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
965
0
    tmp[2] += tmp[1] >> 56;
966
0
    tmp[1] &= 0x00ffffffffffffff;
967
968
0
    tmp[3] += tmp[2] >> 56;
969
0
    tmp[2] &= 0x00ffffffffffffff;
970
971
0
    tmp[4] += tmp[3] >> 56;
972
0
    tmp[3] &= 0x00ffffffffffffff;
973
974
0
    tmp[5] += tmp[4] >> 56;
975
0
    tmp[4] &= 0x00ffffffffffffff;
976
977
0
    tmp[6] += tmp[5] >> 56;
978
0
    tmp[5] &= 0x00ffffffffffffff;
979
980
0
    memcpy(out, tmp, sizeof(felem));
981
0
}
982
983
/*-
984
 * Group operations
985
 * ----------------
986
 *
987
 * Building on top of the field operations we have the operations on the
988
 * elliptic curve group itself. Points on the curve are represented in Jacobian
989
 * coordinates
990
 */
991
992
/*-
993
 * point_double calculates 2*(x_in, y_in, z_in)
994
 *
995
 * The method is taken from:
996
 *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
997
 *
998
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
999
 * while x_out == y_in is not (maybe this works, but it's not tested).
1000
 */
1001
static void
1002
point_double(felem x_out, felem y_out, felem z_out,
1003
             const felem x_in, const felem y_in, const felem z_in)
1004
0
{
1005
0
    widefelem tmp, tmp2;
1006
0
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
1007
1008
0
    felem_assign(ftmp, x_in);
1009
0
    felem_assign(ftmp2, x_in);
1010
1011
    /* delta = z^2 */
1012
0
    felem_square_reduce(delta, z_in);     /* delta[i] < 2^56 */
1013
1014
    /* gamma = y^2 */
1015
0
    felem_square_reduce(gamma, y_in);     /* gamma[i] < 2^56 */
1016
1017
    /* beta = x*gamma */
1018
0
    felem_mul_reduce(beta, x_in, gamma);  /* beta[i] < 2^56 */
1019
1020
    /* alpha = 3*(x-delta)*(x+delta) */
1021
0
    felem_diff64(ftmp, delta);            /* ftmp[i] < 2^60 + 2^58 + 2^44 */
1022
0
    felem_sum64(ftmp2, delta);            /* ftmp2[i] < 2^59 */
1023
0
    felem_scalar64(ftmp2, 3);             /* ftmp2[i] < 2^61 */
1024
0
    felem_mul_reduce(alpha, ftmp, ftmp2); /* alpha[i] < 2^56 */
1025
1026
    /* x' = alpha^2 - 8*beta */
1027
0
    felem_square(tmp, alpha);             /* tmp[i] < 2^115 */
1028
0
    felem_assign(ftmp, beta);             /* ftmp[i] < 2^56 */
1029
0
    felem_scalar64(ftmp, 8);              /* ftmp[i] < 2^59 */
1030
0
    felem_diff_128_64(tmp, ftmp);         /* tmp[i] < 2^115 + 2^64 + 2^48 */
1031
0
    felem_reduce(x_out, tmp);             /* x_out[i] < 2^56 */
1032
1033
    /* z' = (y + z)^2 - gamma - delta */
1034
0
    felem_sum64(delta, gamma);     /* delta[i] < 2^57 */
1035
0
    felem_assign(ftmp, y_in);      /* ftmp[i] < 2^56 */
1036
0
    felem_sum64(ftmp, z_in);       /* ftmp[i] < 2^56 */
1037
0
    felem_square(tmp, ftmp);       /* tmp[i] < 2^115 */
1038
0
    felem_diff_128_64(tmp, delta); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1039
0
    felem_reduce(z_out, tmp);      /* z_out[i] < 2^56 */
1040
1041
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1042
0
    felem_scalar64(beta, 4);       /* beta[i] < 2^58 */
1043
0
    felem_diff64(beta, x_out);     /* beta[i] < 2^60 + 2^58 + 2^44 */
1044
0
    felem_mul(tmp, alpha, beta);   /* tmp[i] < 2^119 */
1045
0
    felem_square(tmp2, gamma);     /* tmp2[i] < 2^115 */
1046
0
    felem_scalar128(tmp2, 8);      /* tmp2[i] < 2^118 */
1047
0
    felem_diff128(tmp, tmp2);      /* tmp[i] < 2^127 + 2^119 + 2^111 */
1048
0
    felem_reduce(y_out, tmp);      /* tmp[i] < 2^56 */
1049
0
}
1050
1051
/* copy_conditional copies in to out iff mask is all ones. */
1052
static void copy_conditional(felem out, const felem in, limb mask)
1053
0
{
1054
0
    unsigned int i;
1055
1056
0
    for (i = 0; i < NLIMBS; i++)
1057
0
        out[i] ^= mask & (in[i] ^ out[i]);
1058
0
}
1059
1060
/*-
1061
 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1062
 *
1063
 * The method is taken from
1064
 *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1065
 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1066
 *
1067
 * This function includes a branch for checking whether the two input points
1068
 * are equal (while not equal to the point at infinity). See comment below
1069
 * on constant-time.
1070
 */
1071
static void point_add(felem x3, felem y3, felem z3,
1072
                      const felem x1, const felem y1, const felem z1,
1073
                      const int mixed, const felem x2, const felem y2,
1074
                      const felem z2)
1075
0
{
1076
0
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1077
0
    widefelem tmp, tmp2;
1078
0
    limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1079
0
    limb points_equal;
1080
1081
0
    z1_is_zero = felem_is_zero(z1);
1082
0
    z2_is_zero = felem_is_zero(z2);
1083
1084
    /* ftmp = z1z1 = z1**2 */
1085
0
    felem_square_reduce(ftmp, z1);      /* ftmp[i] < 2^56 */
1086
1087
0
    if (!mixed) {
1088
        /* ftmp2 = z2z2 = z2**2 */
1089
0
        felem_square_reduce(ftmp2, z2); /* ftmp2[i] < 2^56 */
1090
1091
        /* u1 = ftmp3 = x1*z2z2 */
1092
0
        felem_mul_reduce(ftmp3, x1, ftmp2); /* ftmp3[i] < 2^56 */
1093
1094
        /* ftmp5 = z1 + z2 */
1095
0
        felem_assign(ftmp5, z1);       /* ftmp5[i] < 2^56 */
1096
0
        felem_sum64(ftmp5, z2);        /* ftmp5[i] < 2^57 */
1097
1098
        /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1099
0
        felem_square(tmp, ftmp5);      /* tmp[i] < 2^117 */
1100
0
        felem_diff_128_64(tmp, ftmp);  /* tmp[i] < 2^117 + 2^64 + 2^48 */
1101
0
        felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^65 + 2^49 */
1102
0
        felem_reduce(ftmp5, tmp);      /* ftmp5[i] < 2^56 */
1103
1104
        /* ftmp2 = z2 * z2z2 */
1105
0
        felem_mul_reduce(ftmp2, ftmp2, z2); /* ftmp2[i] < 2^56 */
1106
1107
        /* s1 = ftmp6 = y1 * z2**3 */
1108
0
        felem_mul_reduce(ftmp6, y1, ftmp2); /* ftmp6[i] < 2^56 */
1109
0
    } else {
1110
        /*
1111
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1112
         */
1113
1114
        /* u1 = ftmp3 = x1*z2z2 */
1115
0
        felem_assign(ftmp3, x1);     /* ftmp3[i] < 2^56 */
1116
1117
        /* ftmp5 = 2*z1z2 */
1118
0
        felem_scalar(ftmp5, z1, 2);  /* ftmp5[i] < 2^57 */
1119
1120
        /* s1 = ftmp6 = y1 * z2**3 */
1121
0
        felem_assign(ftmp6, y1);     /* ftmp6[i] < 2^56 */
1122
0
    }
1123
    /* ftmp3[i] < 2^56, ftmp5[i] < 2^57, ftmp6[i] < 2^56 */
1124
1125
    /* u2 = x2*z1z1 */
1126
0
    felem_mul(tmp, x2, ftmp);        /* tmp[i] < 2^115 */
1127
1128
    /* h = ftmp4 = u2 - u1 */
1129
0
    felem_diff_128_64(tmp, ftmp3);   /* tmp[i] < 2^115 + 2^64 + 2^48 */
1130
0
    felem_reduce(ftmp4, tmp);        /* ftmp[4] < 2^56 */
1131
1132
0
    x_equal = felem_is_zero(ftmp4);
1133
1134
    /* z_out = ftmp5 * h */
1135
0
    felem_mul_reduce(z_out, ftmp5, ftmp4);  /* z_out[i] < 2^56 */
1136
1137
    /* ftmp = z1 * z1z1 */
1138
0
    felem_mul_reduce(ftmp, ftmp, z1);  /* ftmp[i] < 2^56 */
1139
1140
    /* s2 = tmp = y2 * z1**3 */
1141
0
    felem_mul(tmp, y2, ftmp);      /* tmp[i] < 2^115 */
1142
1143
    /* r = ftmp5 = (s2 - s1)*2 */
1144
0
    felem_diff_128_64(tmp, ftmp6); /* tmp[i] < 2^115 + 2^64 + 2^48 */
1145
0
    felem_reduce(ftmp5, tmp);      /* ftmp5[i] < 2^56 */
1146
0
    y_equal = felem_is_zero(ftmp5);
1147
0
    felem_scalar64(ftmp5, 2);      /* ftmp5[i] < 2^57 */
1148
1149
    /*
1150
     * The formulae are incorrect if the points are equal, in affine coordinates
1151
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
1152
     * happens.
1153
     *
1154
     * We use bitwise operations to avoid potential side-channels introduced by
1155
     * the short-circuiting behaviour of boolean operators.
1156
     *
1157
     * The special case of either point being the point at infinity (z1 and/or
1158
     * z2 are zero), is handled separately later on in this function, so we
1159
     * avoid jumping to point_double here in those special cases.
1160
     *
1161
     * Notice the comment below on the implications of this branching for timing
1162
     * leaks and why it is considered practically irrelevant.
1163
     */
1164
0
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
1165
1166
0
    if (points_equal) {
1167
        /*
1168
         * This is obviously not constant-time but it will almost-never happen
1169
         * for ECDH / ECDSA.
1170
         */
1171
0
        point_double(x3, y3, z3, x1, y1, z1);
1172
0
        return;
1173
0
    }
1174
1175
    /* I = ftmp = (2h)**2 */
1176
0
    felem_assign(ftmp, ftmp4);        /* ftmp[i] < 2^56 */
1177
0
    felem_scalar64(ftmp, 2);          /* ftmp[i] < 2^57 */
1178
0
    felem_square_reduce(ftmp, ftmp);  /* ftmp[i] < 2^56 */
1179
1180
    /* J = ftmp2 = h * I */
1181
0
    felem_mul_reduce(ftmp2, ftmp4, ftmp); /* ftmp2[i] < 2^56 */
1182
1183
    /* V = ftmp4 = U1 * I */
1184
0
    felem_mul_reduce(ftmp4, ftmp3, ftmp); /* ftmp4[i] < 2^56 */
1185
1186
    /* x_out = r**2 - J - 2V */
1187
0
    felem_square(tmp, ftmp5);      /* tmp[i] < 2^117 */
1188
0
    felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^64 + 2^48 */
1189
0
    felem_assign(ftmp3, ftmp4);    /* ftmp3[i] < 2^56 */
1190
0
    felem_scalar64(ftmp4, 2);      /* ftmp4[i] < 2^57 */
1191
0
    felem_diff_128_64(tmp, ftmp4); /* tmp[i] < 2^117 + 2^65 + 2^49 */
1192
0
    felem_reduce(x_out, tmp);      /* x_out[i] < 2^56 */
1193
1194
    /* y_out = r(V-x_out) - 2 * s1 * J */
1195
0
    felem_diff64(ftmp3, x_out);    /* ftmp3[i] < 2^60 + 2^56 + 2^44 */
1196
0
    felem_mul(tmp, ftmp5, ftmp3);  /* tmp[i] < 2^116 */
1197
0
    felem_mul(tmp2, ftmp6, ftmp2); /* tmp2[i] < 2^115 */
1198
0
    felem_scalar128(tmp2, 2);      /* tmp2[i] < 2^116 */
1199
0
    felem_diff128(tmp, tmp2);      /* tmp[i] < 2^127 + 2^116 + 2^111 */
1200
0
    felem_reduce(y_out, tmp);      /* y_out[i] < 2^56 */
1201
1202
0
    copy_conditional(x_out, x2, z1_is_zero);
1203
0
    copy_conditional(x_out, x1, z2_is_zero);
1204
0
    copy_conditional(y_out, y2, z1_is_zero);
1205
0
    copy_conditional(y_out, y1, z2_is_zero);
1206
0
    copy_conditional(z_out, z2, z1_is_zero);
1207
0
    copy_conditional(z_out, z1, z2_is_zero);
1208
0
    felem_assign(x3, x_out);
1209
0
    felem_assign(y3, y_out);
1210
0
    felem_assign(z3, z_out);
1211
0
}
1212
1213
/*-
1214
 * Base point pre computation
1215
 * --------------------------
1216
 *
1217
 * Two different sorts of precomputed tables are used in the following code.
1218
 * Each contain various points on the curve, where each point is three field
1219
 * elements (x, y, z).
1220
 *
1221
 * For the base point table, z is usually 1 (0 for the point at infinity).
1222
 * This table has 16 elements:
1223
 * index | bits    | point
1224
 * ------+---------+------------------------------
1225
 *     0 | 0 0 0 0 | 0G
1226
 *     1 | 0 0 0 1 | 1G
1227
 *     2 | 0 0 1 0 | 2^95G
1228
 *     3 | 0 0 1 1 | (2^95 + 1)G
1229
 *     4 | 0 1 0 0 | 2^190G
1230
 *     5 | 0 1 0 1 | (2^190 + 1)G
1231
 *     6 | 0 1 1 0 | (2^190 + 2^95)G
1232
 *     7 | 0 1 1 1 | (2^190 + 2^95 + 1)G
1233
 *     8 | 1 0 0 0 | 2^285G
1234
 *     9 | 1 0 0 1 | (2^285 + 1)G
1235
 *    10 | 1 0 1 0 | (2^285 + 2^95)G
1236
 *    11 | 1 0 1 1 | (2^285 + 2^95 + 1)G
1237
 *    12 | 1 1 0 0 | (2^285 + 2^190)G
1238
 *    13 | 1 1 0 1 | (2^285 + 2^190 + 1)G
1239
 *    14 | 1 1 1 0 | (2^285 + 2^190 + 2^95)G
1240
 *    15 | 1 1 1 1 | (2^285 + 2^190 + 2^95 + 1)G
1241
 *
1242
 * The reason for this is so that we can clock bits into four different
1243
 * locations when doing simple scalar multiplies against the base point.
1244
 *
1245
 * Tables for other points have table[i] = iG for i in 0 .. 16.
1246
 */
1247
1248
/* gmul is the table of precomputed base points */
1249
static const felem gmul[16][3] = {
1250
{{0, 0, 0, 0, 0, 0, 0},
1251
 {0, 0, 0, 0, 0, 0, 0},
1252
 {0, 0, 0, 0, 0, 0, 0}},
1253
{{0x00545e3872760ab7, 0x00f25dbf55296c3a, 0x00e082542a385502, 0x008ba79b9859f741,
1254
  0x0020ad746e1d3b62, 0x0005378eb1c71ef3, 0x0000aa87ca22be8b},
1255
 {0x00431d7c90ea0e5f, 0x00b1ce1d7e819d7a, 0x0013b5f0b8c00a60, 0x00289a147ce9da31,
1256
  0x0092dc29f8f41dbd, 0x002c6f5d9e98bf92, 0x00003617de4a9626},
1257
 {1, 0, 0, 0, 0, 0, 0}},
1258
{{0x00024711cc902a90, 0x00acb2e579ab4fe1, 0x00af818a4b4d57b1, 0x00a17c7bec49c3de,
1259
  0x004280482d726a8b, 0x00128dd0f0a90f3b, 0x00004387c1c3fa3c},
1260
 {0x002ce76543cf5c3a, 0x00de6cee5ef58f0a, 0x00403e42fa561ca6, 0x00bc54d6f9cb9731,
1261
  0x007155f925fb4ff1, 0x004a9ce731b7b9bc, 0x00002609076bd7b2},
1262
 {1, 0, 0, 0, 0, 0, 0}},
1263
{{0x00e74c9182f0251d, 0x0039bf54bb111974, 0x00b9d2f2eec511d2, 0x0036b1594eb3a6a4,
1264
  0x00ac3bb82d9d564b, 0x00f9313f4615a100, 0x00006716a9a91b10},
1265
 {0x0046698116e2f15c, 0x00f34347067d3d33, 0x008de4ccfdebd002, 0x00e838c6b8e8c97b,
1266
  0x006faf0798def346, 0x007349794a57563c, 0x00002629e7e6ad84},
1267
 {1, 0, 0, 0, 0, 0, 0}},
1268
{{0x0075300e34fd163b, 0x0092e9db4e8d0ad3, 0x00254be9f625f760, 0x00512c518c72ae68,
1269
  0x009bfcf162bede5a, 0x00bf9341566ce311, 0x0000cd6175bd41cf},
1270
 {0x007dfe52af4ac70f, 0x0002159d2d5c4880, 0x00b504d16f0af8d0, 0x0014585e11f5e64c,
1271
  0x0089c6388e030967, 0x00ffb270cbfa5f71, 0x00009a15d92c3947},
1272
 {1, 0, 0, 0, 0, 0, 0}},
1273
{{0x0033fc1278dc4fe5, 0x00d53088c2caa043, 0x0085558827e2db66, 0x00c192bef387b736,
1274
  0x00df6405a2225f2c, 0x0075205aa90fd91a, 0x0000137e3f12349d},
1275
 {0x00ce5b115efcb07e, 0x00abc3308410deeb, 0x005dc6fc1de39904, 0x00907c1c496f36b4,
1276
  0x0008e6ad3926cbe1, 0x00110747b787928c, 0x0000021b9162eb7e},
1277
 {1, 0, 0, 0, 0, 0, 0}},
1278
{{0x008180042cfa26e1, 0x007b826a96254967, 0x0082473694d6b194, 0x007bd6880a45b589,
1279
  0x00c0a5097072d1a3, 0x0019186555e18b4e, 0x000020278190e5ca},
1280
 {0x00b4bef17de61ac0, 0x009535e3c38ed348, 0x002d4aa8e468ceab, 0x00ef40b431036ad3,
1281
  0x00defd52f4542857, 0x0086edbf98234266, 0x00002025b3a7814d},
1282
 {1, 0, 0, 0, 0, 0, 0}},
1283
{{0x00b238aa97b886be, 0x00ef3192d6dd3a32, 0x0079f9e01fd62df8, 0x00742e890daba6c5,
1284
  0x008e5289144408ce, 0x0073bbcc8e0171a5, 0x0000c4fd329d3b52},
1285
 {0x00c6f64a15ee23e7, 0x00dcfb7b171cad8b, 0x00039f6cbd805867, 0x00de024e428d4562,
1286
  0x00be6a594d7c64c5, 0x0078467b70dbcd64, 0x0000251f2ed7079b},
1287
 {1, 0, 0, 0, 0, 0, 0}},
1288
{{0x000e5cc25fc4b872, 0x005ebf10d31ef4e1, 0x0061e0ebd11e8256, 0x0076e026096f5a27,
1289
  0x0013e6fc44662e9a, 0x0042b00289d3597e, 0x000024f089170d88},
1290
 {0x001604d7e0effbe6, 0x0048d77cba64ec2c, 0x008166b16da19e36, 0x006b0d1a0f28c088,
1291
  0x000259fcd47754fd, 0x00cc643e4d725f9a, 0x00007b10f3c79c14},
1292
 {1, 0, 0, 0, 0, 0, 0}},
1293
{{0x00430155e3b908af, 0x00b801e4fec25226, 0x00b0d4bcfe806d26, 0x009fc4014eb13d37,
1294
  0x0066c94e44ec07e8, 0x00d16adc03874ba2, 0x000030c917a0d2a7},
1295
 {0x00edac9e21eb891c, 0x00ef0fb768102eff, 0x00c088cef272a5f3, 0x00cbf782134e2964,
1296
  0x0001044a7ba9a0e3, 0x00e363f5b194cf3c, 0x00009ce85249e372},
1297
 {1, 0, 0, 0, 0, 0, 0}},
1298
{{0x001dd492dda5a7eb, 0x008fd577be539fd1, 0x002ff4b25a5fc3f1, 0x0074a8a1b64df72f,
1299
  0x002ba3d8c204a76c, 0x009d5cff95c8235a, 0x0000e014b9406e0f},
1300
 {0x008c2e4dbfc98aba, 0x00f30bb89f1a1436, 0x00b46f7aea3e259c, 0x009224454ac02f54,
1301
  0x00906401f5645fa2, 0x003a1d1940eabc77, 0x00007c9351d680e6},
1302
 {1, 0, 0, 0, 0, 0, 0}},
1303
{{0x005a35d872ef967c, 0x0049f1b7884e1987, 0x0059d46d7e31f552, 0x00ceb4869d2d0fb6,
1304
  0x00e8e89eee56802a, 0x0049d806a774aaf2, 0x0000147e2af0ae24},
1305
 {0x005fd1bd852c6e5e, 0x00b674b7b3de6885, 0x003b9ea5eb9b6c08, 0x005c9f03babf3ef7,
1306
  0x00605337fecab3c7, 0x009a3f85b11bbcc8, 0x0000455470f330ec},
1307
 {1, 0, 0, 0, 0, 0, 0}},
1308
{{0x002197ff4d55498d, 0x00383e8916c2d8af, 0x00eb203f34d1c6d2, 0x0080367cbd11b542,
1309
  0x00769b3be864e4f5, 0x0081a8458521c7bb, 0x0000c531b34d3539},
1310
 {0x00e2a3d775fa2e13, 0x00534fc379573844, 0x00ff237d2a8db54a, 0x00d301b2335a8882,
1311
  0x000f75ea96103a80, 0x0018fecb3cdd96fa, 0x0000304bf61e94eb},
1312
 {1, 0, 0, 0, 0, 0, 0}},
1313
{{0x00b2afc332a73dbd, 0x0029a0d5bb007bc5, 0x002d628eb210f577, 0x009f59a36dd05f50,
1314
  0x006d339de4eca613, 0x00c75a71addc86bc, 0x000060384c5ea93c},
1315
 {0x00aa9641c32a30b4, 0x00cc73ae8cce565d, 0x00ec911a4df07f61, 0x00aa4b762ea4b264,
1316
  0x0096d395bb393629, 0x004efacfb7632fe0, 0x00006f252f46fa3f},
1317
 {1, 0, 0, 0, 0, 0, 0}},
1318
{{0x00567eec597c7af6, 0x0059ba6795204413, 0x00816d4e6f01196f, 0x004ae6b3eb57951d,
1319
  0x00420f5abdda2108, 0x003401d1f57ca9d9, 0x0000cf5837b0b67a},
1320
 {0x00eaa64b8aeeabf9, 0x00246ddf16bcb4de, 0x000e7e3c3aecd751, 0x0008449f04fed72e,
1321
  0x00307b67ccf09183, 0x0017108c3556b7b1, 0x0000229b2483b3bf},
1322
 {1, 0, 0, 0, 0, 0, 0}},
1323
{{0x00e7c491a7bb78a1, 0x00eafddd1d3049ab, 0x00352c05e2bc7c98, 0x003d6880c165fa5c,
1324
  0x00b6ac61cc11c97d, 0x00beeb54fcf90ce5, 0x0000dc1f0b455edc},
1325
 {0x002db2e7aee34d60, 0x0073b5f415a2d8c0, 0x00dd84e4193e9a0c, 0x00d02d873467c572,
1326
  0x0018baaeda60aee5, 0x0013fb11d697c61e, 0x000083aafcc3a973},
1327
 {1, 0, 0, 0, 0, 0, 0}}
1328
};
1329
1330
/*
1331
 * select_point selects the |idx|th point from a precomputation table and
1332
 * copies it to out.
1333
 *
1334
 * pre_comp below is of the size provided in |size|.
1335
 */
1336
static void select_point(const limb idx, unsigned int size,
1337
                         const felem pre_comp[][3], felem out[3])
1338
0
{
1339
0
    unsigned int i, j;
1340
0
    limb *outlimbs = &out[0][0];
1341
1342
0
    memset(out, 0, sizeof(*out) * 3);
1343
1344
0
    for (i = 0; i < size; i++) {
1345
0
        const limb *inlimbs = &pre_comp[i][0][0];
1346
0
        limb mask = i ^ idx;
1347
1348
0
        mask |= mask >> 4;
1349
0
        mask |= mask >> 2;
1350
0
        mask |= mask >> 1;
1351
0
        mask &= 1;
1352
0
        mask--;
1353
0
        for (j = 0; j < NLIMBS * 3; j++)
1354
0
            outlimbs[j] |= inlimbs[j] & mask;
1355
0
    }
1356
0
}
1357
1358
/* get_bit returns the |i|th bit in |in| */
1359
static char get_bit(const felem_bytearray in, int i)
1360
0
{
1361
0
    if (i < 0 || i >= 384)
1362
0
        return 0;
1363
0
    return (in[i >> 3] >> (i & 7)) & 1;
1364
0
}
1365
1366
/*
1367
 * Interleaved point multiplication using precomputed point multiples: The
1368
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1369
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1370
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1371
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1372
 */
1373
static void batch_mul(felem x_out, felem y_out, felem z_out,
1374
                      const felem_bytearray scalars[],
1375
                      const unsigned int num_points, const u8 *g_scalar,
1376
                      const int mixed, const felem pre_comp[][17][3],
1377
                      const felem g_pre_comp[16][3])
1378
0
{
1379
0
    int i, skip;
1380
0
    unsigned int num, gen_mul = (g_scalar != NULL);
1381
0
    felem nq[3], tmp[4];
1382
0
    limb bits;
1383
0
    u8 sign, digit;
1384
1385
    /* set nq to the point at infinity */
1386
0
    memset(nq, 0, sizeof(nq));
1387
1388
    /*
1389
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1390
     * of the generator (last quarter of rounds) and additions of other
1391
     * points multiples (every 5th round).
1392
     */
1393
0
    skip = 1;                   /* save two point operations in the first
1394
                                 * round */
1395
0
    for (i = (num_points ? 380 : 98); i >= 0; --i) {
1396
        /* double */
1397
0
        if (!skip)
1398
0
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1399
1400
        /* add multiples of the generator */
1401
0
        if (gen_mul && (i <= 98)) {
1402
0
            bits = get_bit(g_scalar, i + 285) << 3;
1403
0
            if (i < 95) {
1404
0
                bits |= get_bit(g_scalar, i + 190) << 2;
1405
0
                bits |= get_bit(g_scalar, i + 95) << 1;
1406
0
                bits |= get_bit(g_scalar, i);
1407
0
            }
1408
            /* select the point to add, in constant time */
1409
0
            select_point(bits, 16, g_pre_comp, tmp);
1410
0
            if (!skip) {
1411
                /* The 1 argument below is for "mixed" */
1412
0
                point_add(nq[0],  nq[1],  nq[2],
1413
0
                          nq[0],  nq[1],  nq[2], 1,
1414
0
                          tmp[0], tmp[1], tmp[2]);
1415
0
            } else {
1416
0
                memcpy(nq, tmp, 3 * sizeof(felem));
1417
0
                skip = 0;
1418
0
            }
1419
0
        }
1420
1421
        /* do other additions every 5 doublings */
1422
0
        if (num_points && (i % 5 == 0)) {
1423
            /* loop over all scalars */
1424
0
            for (num = 0; num < num_points; ++num) {
1425
0
                bits = get_bit(scalars[num], i + 4) << 5;
1426
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1427
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1428
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1429
0
                bits |= get_bit(scalars[num], i) << 1;
1430
0
                bits |= get_bit(scalars[num], i - 1);
1431
0
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1432
1433
                /*
1434
                 * select the point to add or subtract, in constant time
1435
                 */
1436
0
                select_point(digit, 17, pre_comp[num], tmp);
1437
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1438
                                            * point */
1439
0
                copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1440
1441
0
                if (!skip) {
1442
0
                    point_add(nq[0],  nq[1],  nq[2],
1443
0
                              nq[0],  nq[1],  nq[2], mixed,
1444
0
                              tmp[0], tmp[1], tmp[2]);
1445
0
                } else {
1446
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1447
0
                    skip = 0;
1448
0
                }
1449
0
            }
1450
0
        }
1451
0
    }
1452
0
    felem_assign(x_out, nq[0]);
1453
0
    felem_assign(y_out, nq[1]);
1454
0
    felem_assign(z_out, nq[2]);
1455
0
}
1456
1457
/* Precomputation for the group generator. */
1458
struct nistp384_pre_comp_st {
1459
    felem g_pre_comp[16][3];
1460
    CRYPTO_REF_COUNT references;
1461
};
1462
1463
const EC_METHOD *ossl_ec_GFp_nistp384_method(void)
1464
0
{
1465
0
    static const EC_METHOD ret = {
1466
0
        EC_FLAGS_DEFAULT_OCT,
1467
0
        NID_X9_62_prime_field,
1468
0
        ossl_ec_GFp_nistp384_group_init,
1469
0
        ossl_ec_GFp_simple_group_finish,
1470
0
        ossl_ec_GFp_simple_group_clear_finish,
1471
0
        ossl_ec_GFp_nist_group_copy,
1472
0
        ossl_ec_GFp_nistp384_group_set_curve,
1473
0
        ossl_ec_GFp_simple_group_get_curve,
1474
0
        ossl_ec_GFp_simple_group_get_degree,
1475
0
        ossl_ec_group_simple_order_bits,
1476
0
        ossl_ec_GFp_simple_group_check_discriminant,
1477
0
        ossl_ec_GFp_simple_point_init,
1478
0
        ossl_ec_GFp_simple_point_finish,
1479
0
        ossl_ec_GFp_simple_point_clear_finish,
1480
0
        ossl_ec_GFp_simple_point_copy,
1481
0
        ossl_ec_GFp_simple_point_set_to_infinity,
1482
0
        ossl_ec_GFp_simple_point_set_affine_coordinates,
1483
0
        ossl_ec_GFp_nistp384_point_get_affine_coordinates,
1484
0
        0, /* point_set_compressed_coordinates */
1485
0
        0, /* point2oct */
1486
0
        0, /* oct2point */
1487
0
        ossl_ec_GFp_simple_add,
1488
0
        ossl_ec_GFp_simple_dbl,
1489
0
        ossl_ec_GFp_simple_invert,
1490
0
        ossl_ec_GFp_simple_is_at_infinity,
1491
0
        ossl_ec_GFp_simple_is_on_curve,
1492
0
        ossl_ec_GFp_simple_cmp,
1493
0
        ossl_ec_GFp_simple_make_affine,
1494
0
        ossl_ec_GFp_simple_points_make_affine,
1495
0
        ossl_ec_GFp_nistp384_points_mul,
1496
0
        ossl_ec_GFp_nistp384_precompute_mult,
1497
0
        ossl_ec_GFp_nistp384_have_precompute_mult,
1498
0
        ossl_ec_GFp_nist_field_mul,
1499
0
        ossl_ec_GFp_nist_field_sqr,
1500
0
        0, /* field_div */
1501
0
        ossl_ec_GFp_simple_field_inv,
1502
0
        0, /* field_encode */
1503
0
        0, /* field_decode */
1504
0
        0, /* field_set_to_one */
1505
0
        ossl_ec_key_simple_priv2oct,
1506
0
        ossl_ec_key_simple_oct2priv,
1507
0
        0, /* set private */
1508
0
        ossl_ec_key_simple_generate_key,
1509
0
        ossl_ec_key_simple_check_key,
1510
0
        ossl_ec_key_simple_generate_public_key,
1511
0
        0, /* keycopy */
1512
0
        0, /* keyfinish */
1513
0
        ossl_ecdh_simple_compute_key,
1514
0
        ossl_ecdsa_simple_sign_setup,
1515
0
        ossl_ecdsa_simple_sign_sig,
1516
0
        ossl_ecdsa_simple_verify_sig,
1517
0
        0, /* field_inverse_mod_ord */
1518
0
        0, /* blind_coordinates */
1519
0
        0, /* ladder_pre */
1520
0
        0, /* ladder_step */
1521
0
        0  /* ladder_post */
1522
0
    };
1523
1524
0
    return &ret;
1525
0
}
1526
1527
/******************************************************************************/
1528
/*
1529
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1530
 */
1531
1532
static NISTP384_PRE_COMP *nistp384_pre_comp_new(void)
1533
0
{
1534
0
    NISTP384_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1535
1536
0
    if (ret == NULL)
1537
0
        return ret;
1538
1539
0
    if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1540
0
        OPENSSL_free(ret);
1541
0
        return NULL;
1542
0
    }
1543
0
    return ret;
1544
0
}
1545
1546
NISTP384_PRE_COMP *ossl_ec_nistp384_pre_comp_dup(NISTP384_PRE_COMP *p)
1547
0
{
1548
0
    int i;
1549
1550
0
    if (p != NULL)
1551
0
        CRYPTO_UP_REF(&p->references, &i);
1552
0
    return p;
1553
0
}
1554
1555
void ossl_ec_nistp384_pre_comp_free(NISTP384_PRE_COMP *p)
1556
0
{
1557
0
    int i;
1558
1559
0
    if (p == NULL)
1560
0
        return;
1561
1562
0
    CRYPTO_DOWN_REF(&p->references, &i);
1563
0
    REF_PRINT_COUNT("ossl_ec_nistp384", p);
1564
0
    if (i > 0)
1565
0
        return;
1566
0
    REF_ASSERT_ISNT(i < 0);
1567
1568
0
    CRYPTO_FREE_REF(&p->references);
1569
0
    OPENSSL_free(p);
1570
0
}
1571
1572
/******************************************************************************/
1573
/*
1574
 * OPENSSL EC_METHOD FUNCTIONS
1575
 */
1576
1577
int ossl_ec_GFp_nistp384_group_init(EC_GROUP *group)
1578
0
{
1579
0
    int ret;
1580
1581
0
    ret = ossl_ec_GFp_simple_group_init(group);
1582
0
    group->a_is_minus3 = 1;
1583
0
    return ret;
1584
0
}
1585
1586
int ossl_ec_GFp_nistp384_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1587
                                         const BIGNUM *a, const BIGNUM *b,
1588
                                         BN_CTX *ctx)
1589
0
{
1590
0
    int ret = 0;
1591
0
    BIGNUM *curve_p, *curve_a, *curve_b;
1592
0
#ifndef FIPS_MODULE
1593
0
    BN_CTX *new_ctx = NULL;
1594
1595
0
    if (ctx == NULL)
1596
0
        ctx = new_ctx = BN_CTX_new();
1597
0
#endif
1598
0
    if (ctx == NULL)
1599
0
        return 0;
1600
1601
0
    BN_CTX_start(ctx);
1602
0
    curve_p = BN_CTX_get(ctx);
1603
0
    curve_a = BN_CTX_get(ctx);
1604
0
    curve_b = BN_CTX_get(ctx);
1605
0
    if (curve_b == NULL)
1606
0
        goto err;
1607
0
    BN_bin2bn(nistp384_curve_params[0], sizeof(felem_bytearray), curve_p);
1608
0
    BN_bin2bn(nistp384_curve_params[1], sizeof(felem_bytearray), curve_a);
1609
0
    BN_bin2bn(nistp384_curve_params[2], sizeof(felem_bytearray), curve_b);
1610
0
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1611
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1612
0
        goto err;
1613
0
    }
1614
0
    group->field_mod_func = BN_nist_mod_384;
1615
0
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1616
0
 err:
1617
0
    BN_CTX_end(ctx);
1618
0
#ifndef FIPS_MODULE
1619
0
    BN_CTX_free(new_ctx);
1620
0
#endif
1621
0
    return ret;
1622
0
}
1623
1624
/*
1625
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1626
 * (X/Z^2, Y/Z^3)
1627
 */
1628
int ossl_ec_GFp_nistp384_point_get_affine_coordinates(const EC_GROUP *group,
1629
                                                      const EC_POINT *point,
1630
                                                      BIGNUM *x, BIGNUM *y,
1631
                                                      BN_CTX *ctx)
1632
0
{
1633
0
    felem z1, z2, x_in, y_in, x_out, y_out;
1634
0
    widefelem tmp;
1635
1636
0
    if (EC_POINT_is_at_infinity(group, point)) {
1637
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1638
0
        return 0;
1639
0
    }
1640
0
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1641
0
        (!BN_to_felem(z1, point->Z)))
1642
0
        return 0;
1643
0
    felem_inv(z2, z1);
1644
0
    felem_square(tmp, z2);
1645
0
    felem_reduce(z1, tmp);
1646
0
    felem_mul(tmp, x_in, z1);
1647
0
    felem_reduce(x_in, tmp);
1648
0
    felem_contract(x_out, x_in);
1649
0
    if (x != NULL) {
1650
0
        if (!felem_to_BN(x, x_out)) {
1651
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1652
0
            return 0;
1653
0
        }
1654
0
    }
1655
0
    felem_mul(tmp, z1, z2);
1656
0
    felem_reduce(z1, tmp);
1657
0
    felem_mul(tmp, y_in, z1);
1658
0
    felem_reduce(y_in, tmp);
1659
0
    felem_contract(y_out, y_in);
1660
0
    if (y != NULL) {
1661
0
        if (!felem_to_BN(y, y_out)) {
1662
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1663
0
            return 0;
1664
0
        }
1665
0
    }
1666
0
    return 1;
1667
0
}
1668
1669
/* points below is of size |num|, and tmp_felems is of size |num+1/ */
1670
static void make_points_affine(size_t num, felem points[][3],
1671
                               felem tmp_felems[])
1672
0
{
1673
    /*
1674
     * Runs in constant time, unless an input is the point at infinity (which
1675
     * normally shouldn't happen).
1676
     */
1677
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1678
0
                                                  points,
1679
0
                                                  sizeof(felem),
1680
0
                                                  tmp_felems,
1681
0
                                                  (void (*)(void *))felem_one,
1682
0
                                                  felem_is_zero_int,
1683
0
                                                  (void (*)(void *, const void *))
1684
0
                                                  felem_assign,
1685
0
                                                  (void (*)(void *, const void *))
1686
0
                                                  felem_square_reduce,
1687
0
                                                  (void (*)(void *, const void *, const void*))
1688
0
                                                  felem_mul_reduce,
1689
0
                                                  (void (*)(void *, const void *))
1690
0
                                                  felem_inv,
1691
0
                                                  (void (*)(void *, const void *))
1692
0
                                                  felem_contract);
1693
0
}
1694
1695
/*
1696
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1697
 * values Result is stored in r (r can equal one of the inputs).
1698
 */
1699
int ossl_ec_GFp_nistp384_points_mul(const EC_GROUP *group, EC_POINT *r,
1700
                                    const BIGNUM *scalar, size_t num,
1701
                                    const EC_POINT *points[],
1702
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1703
0
{
1704
0
    int ret = 0;
1705
0
    int j;
1706
0
    int mixed = 0;
1707
0
    BIGNUM *x, *y, *z, *tmp_scalar;
1708
0
    felem_bytearray g_secret;
1709
0
    felem_bytearray *secrets = NULL;
1710
0
    felem (*pre_comp)[17][3] = NULL;
1711
0
    felem *tmp_felems = NULL;
1712
0
    unsigned int i;
1713
0
    int num_bytes;
1714
0
    int have_pre_comp = 0;
1715
0
    size_t num_points = num;
1716
0
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1717
0
    NISTP384_PRE_COMP *pre = NULL;
1718
0
    felem(*g_pre_comp)[3] = NULL;
1719
0
    EC_POINT *generator = NULL;
1720
0
    const EC_POINT *p = NULL;
1721
0
    const BIGNUM *p_scalar = NULL;
1722
1723
0
    BN_CTX_start(ctx);
1724
0
    x = BN_CTX_get(ctx);
1725
0
    y = BN_CTX_get(ctx);
1726
0
    z = BN_CTX_get(ctx);
1727
0
    tmp_scalar = BN_CTX_get(ctx);
1728
0
    if (tmp_scalar == NULL)
1729
0
        goto err;
1730
1731
0
    if (scalar != NULL) {
1732
0
        pre = group->pre_comp.nistp384;
1733
0
        if (pre)
1734
            /* we have precomputation, try to use it */
1735
0
            g_pre_comp = &pre->g_pre_comp[0];
1736
0
        else
1737
            /* try to use the standard precomputation */
1738
0
            g_pre_comp = (felem(*)[3]) gmul;
1739
0
        generator = EC_POINT_new(group);
1740
0
        if (generator == NULL)
1741
0
            goto err;
1742
        /* get the generator from precomputation */
1743
0
        if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1744
0
            !felem_to_BN(y, g_pre_comp[1][1]) ||
1745
0
            !felem_to_BN(z, g_pre_comp[1][2])) {
1746
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1747
0
            goto err;
1748
0
        }
1749
0
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1750
0
                                                                generator,
1751
0
                                                                x, y, z, ctx))
1752
0
            goto err;
1753
0
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1754
            /* precomputation matches generator */
1755
0
            have_pre_comp = 1;
1756
0
        else
1757
            /*
1758
             * we don't have valid precomputation: treat the generator as a
1759
             * random point
1760
             */
1761
0
            num_points++;
1762
0
    }
1763
1764
0
    if (num_points > 0) {
1765
0
        if (num_points >= 2) {
1766
            /*
1767
             * unless we precompute multiples for just one point, converting
1768
             * those into affine form is time well spent
1769
             */
1770
0
            mixed = 1;
1771
0
        }
1772
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1773
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1774
0
        if (mixed)
1775
0
            tmp_felems =
1776
0
                OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));
1777
0
        if ((secrets == NULL) || (pre_comp == NULL)
1778
0
            || (mixed && (tmp_felems == NULL)))
1779
0
            goto err;
1780
1781
        /*
1782
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1783
         * i.e., they contribute nothing to the linear combination
1784
         */
1785
0
        for (i = 0; i < num_points; ++i) {
1786
0
            if (i == num) {
1787
                /*
1788
                 * we didn't have a valid precomputation, so we pick the
1789
                 * generator
1790
                 */
1791
0
                p = EC_GROUP_get0_generator(group);
1792
0
                p_scalar = scalar;
1793
0
            } else {
1794
                /* the i^th point */
1795
0
                p = points[i];
1796
0
                p_scalar = scalars[i];
1797
0
            }
1798
0
            if (p_scalar != NULL && p != NULL) {
1799
                /* reduce scalar to 0 <= scalar < 2^384 */
1800
0
                if ((BN_num_bits(p_scalar) > 384)
1801
0
                    || (BN_is_negative(p_scalar))) {
1802
                    /*
1803
                     * this is an unusual input, and we don't guarantee
1804
                     * constant-timeness
1805
                     */
1806
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1807
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1808
0
                        goto err;
1809
0
                    }
1810
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1811
0
                                               secrets[i], sizeof(secrets[i]));
1812
0
                } else {
1813
0
                    num_bytes = BN_bn2lebinpad(p_scalar,
1814
0
                                               secrets[i], sizeof(secrets[i]));
1815
0
                }
1816
0
                if (num_bytes < 0) {
1817
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1818
0
                    goto err;
1819
0
                }
1820
                /* precompute multiples */
1821
0
                if ((!BN_to_felem(x_out, p->X)) ||
1822
0
                    (!BN_to_felem(y_out, p->Y)) ||
1823
0
                    (!BN_to_felem(z_out, p->Z)))
1824
0
                    goto err;
1825
0
                memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1826
0
                memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1827
0
                memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1828
0
                for (j = 2; j <= 16; ++j) {
1829
0
                    if (j & 1) {
1830
0
                        point_add(pre_comp[i][j][0],     pre_comp[i][j][1],     pre_comp[i][j][2],
1831
0
                                  pre_comp[i][1][0],     pre_comp[i][1][1],     pre_comp[i][1][2], 0,
1832
0
                                  pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1833
0
                    } else {
1834
0
                        point_double(pre_comp[i][j][0],     pre_comp[i][j][1],     pre_comp[i][j][2],
1835
0
                                     pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1836
0
                    }
1837
0
                }
1838
0
            }
1839
0
        }
1840
0
        if (mixed)
1841
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1842
0
    }
1843
1844
    /* the scalar for the generator */
1845
0
    if (scalar != NULL && have_pre_comp) {
1846
0
        memset(g_secret, 0, sizeof(g_secret));
1847
        /* reduce scalar to 0 <= scalar < 2^384 */
1848
0
        if ((BN_num_bits(scalar) > 384) || (BN_is_negative(scalar))) {
1849
            /*
1850
             * this is an unusual input, and we don't guarantee
1851
             * constant-timeness
1852
             */
1853
0
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1854
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1855
0
                goto err;
1856
0
            }
1857
0
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1858
0
        } else {
1859
0
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1860
0
        }
1861
        /* do the multiplication with generator precomputation */
1862
0
        batch_mul(x_out, y_out, z_out,
1863
0
                  (const felem_bytearray(*))secrets, num_points,
1864
0
                  g_secret,
1865
0
                  mixed, (const felem(*)[17][3])pre_comp,
1866
0
                  (const felem(*)[3])g_pre_comp);
1867
0
    } else {
1868
        /* do the multiplication without generator precomputation */
1869
0
        batch_mul(x_out, y_out, z_out,
1870
0
                  (const felem_bytearray(*))secrets, num_points,
1871
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1872
0
    }
1873
    /* reduce the output to its unique minimal representation */
1874
0
    felem_contract(x_in, x_out);
1875
0
    felem_contract(y_in, y_out);
1876
0
    felem_contract(z_in, z_out);
1877
0
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1878
0
        (!felem_to_BN(z, z_in))) {
1879
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1880
0
        goto err;
1881
0
    }
1882
0
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1883
0
                                                             ctx);
1884
1885
0
 err:
1886
0
    BN_CTX_end(ctx);
1887
0
    EC_POINT_free(generator);
1888
0
    OPENSSL_free(secrets);
1889
0
    OPENSSL_free(pre_comp);
1890
0
    OPENSSL_free(tmp_felems);
1891
0
    return ret;
1892
0
}
1893
1894
int ossl_ec_GFp_nistp384_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1895
0
{
1896
0
    int ret = 0;
1897
0
    NISTP384_PRE_COMP *pre = NULL;
1898
0
    int i, j;
1899
0
    BIGNUM *x, *y;
1900
0
    EC_POINT *generator = NULL;
1901
0
    felem tmp_felems[16];
1902
0
#ifndef FIPS_MODULE
1903
0
    BN_CTX *new_ctx = NULL;
1904
0
#endif
1905
1906
    /* throw away old precomputation */
1907
0
    EC_pre_comp_free(group);
1908
1909
0
#ifndef FIPS_MODULE
1910
0
    if (ctx == NULL)
1911
0
        ctx = new_ctx = BN_CTX_new();
1912
0
#endif
1913
0
    if (ctx == NULL)
1914
0
        return 0;
1915
1916
0
    BN_CTX_start(ctx);
1917
0
    x = BN_CTX_get(ctx);
1918
0
    y = BN_CTX_get(ctx);
1919
0
    if (y == NULL)
1920
0
        goto err;
1921
    /* get the generator */
1922
0
    if (group->generator == NULL)
1923
0
        goto err;
1924
0
    generator = EC_POINT_new(group);
1925
0
    if (generator == NULL)
1926
0
        goto err;
1927
0
    BN_bin2bn(nistp384_curve_params[3], sizeof(felem_bytearray), x);
1928
0
    BN_bin2bn(nistp384_curve_params[4], sizeof(felem_bytearray), y);
1929
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1930
0
        goto err;
1931
0
    if ((pre = nistp384_pre_comp_new()) == NULL)
1932
0
        goto err;
1933
    /*
1934
     * if the generator is the standard one, use built-in precomputation
1935
     */
1936
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1937
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1938
0
        goto done;
1939
0
    }
1940
0
    if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||
1941
0
        (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||
1942
0
        (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))
1943
0
        goto err;
1944
    /* compute 2^95*G, 2^190*G, 2^285*G */
1945
0
    for (i = 1; i <= 4; i <<= 1) {
1946
0
        point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
1947
0
                     pre->g_pre_comp[i][0],  pre->g_pre_comp[i][1],    pre->g_pre_comp[i][2]);
1948
0
        for (j = 0; j < 94; ++j) {
1949
0
            point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
1950
0
                         pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2]);
1951
0
        }
1952
0
    }
1953
    /* g_pre_comp[0] is the point at infinity */
1954
0
    memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
1955
    /* the remaining multiples */
1956
    /* 2^95*G + 2^190*G */
1957
0
    point_add(pre->g_pre_comp[6][0],  pre->g_pre_comp[6][1],  pre->g_pre_comp[6][2],
1958
0
              pre->g_pre_comp[4][0],  pre->g_pre_comp[4][1],  pre->g_pre_comp[4][2], 0,
1959
0
              pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);
1960
    /* 2^95*G + 2^285*G */
1961
0
    point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], pre->g_pre_comp[10][2],
1962
0
              pre->g_pre_comp[8][0],  pre->g_pre_comp[8][1],  pre->g_pre_comp[8][2], 0,
1963
0
              pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);
1964
    /* 2^190*G + 2^285*G */
1965
0
    point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
1966
0
              pre->g_pre_comp[8][0],  pre->g_pre_comp[8][1],  pre->g_pre_comp[8][2], 0,
1967
0
              pre->g_pre_comp[4][0],  pre->g_pre_comp[4][1],  pre->g_pre_comp[4][2]);
1968
    /* 2^95*G + 2^190*G + 2^285*G */
1969
0
    point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], pre->g_pre_comp[14][2],
1970
0
              pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 0,
1971
0
              pre->g_pre_comp[2][0],  pre->g_pre_comp[2][1],  pre->g_pre_comp[2][2]);
1972
0
    for (i = 1; i < 8; ++i) {
1973
        /* odd multiples: add G */
1974
0
        point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1], pre->g_pre_comp[2 * i + 1][2],
1975
0
                  pre->g_pre_comp[2 * i][0],     pre->g_pre_comp[2 * i][1],     pre->g_pre_comp[2 * i][2], 0,
1976
0
                  pre->g_pre_comp[1][0],         pre->g_pre_comp[1][1],         pre->g_pre_comp[1][2]);
1977
0
    }
1978
0
    make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
1979
1980
0
 done:
1981
0
    SETPRECOMP(group, nistp384, pre);
1982
0
    ret = 1;
1983
0
    pre = NULL;
1984
0
 err:
1985
0
    BN_CTX_end(ctx);
1986
0
    EC_POINT_free(generator);
1987
0
#ifndef FIPS_MODULE
1988
0
    BN_CTX_free(new_ctx);
1989
0
#endif
1990
0
    ossl_ec_nistp384_pre_comp_free(pre);
1991
0
    return ret;
1992
0
}
1993
1994
int ossl_ec_GFp_nistp384_have_precompute_mult(const EC_GROUP *group)
1995
0
{
1996
0
    return HAVEPRECOMP(group, nistp384);
1997
0
}