Coverage Report

Created: 2022-11-30 06:20

/src/openssl/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/* crypto/ec/ecp_nistp224.c */
2
/*
3
 * Written by Emilia Kasper (Google) for the OpenSSL project.
4
 */
5
/* Copyright 2011 Google Inc.
6
 *
7
 * Licensed under the Apache License, Version 2.0 (the "License");
8
 *
9
 * you may not use this file except in compliance with the License.
10
 * You may obtain a copy of the License at
11
 *
12
 *     http://www.apache.org/licenses/LICENSE-2.0
13
 *
14
 *  Unless required by applicable law or agreed to in writing, software
15
 *  distributed under the License is distributed on an "AS IS" BASIS,
16
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
 *  See the License for the specific language governing permissions and
18
 *  limitations under the License.
19
 */
20
21
/*
22
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23
 *
24
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25
 * and Adam Langley's public domain 64-bit C implementation of curve25519
26
 */
27
28
#include <openssl/opensslconf.h>
29
#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30
31
# ifndef OPENSSL_SYS_VMS
32
#  include <stdint.h>
33
# else
34
#  include <inttypes.h>
35
# endif
36
37
# include <string.h>
38
# include <openssl/err.h>
39
# include "ec_lcl.h"
40
41
# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42
  /* even with gcc, the typedef won't work for 32-bit platforms */
43
typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44
                                 * platforms */
45
# else
46
#  error "Need GCC 3.1 or later to define type uint128_t"
47
# endif
48
49
typedef uint8_t u8;
50
typedef uint64_t u64;
51
typedef int64_t s64;
52
53
/******************************************************************************/
54
/*-
55
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
56
 *
57
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58
 * using 64-bit coefficients called 'limbs',
59
 * and sometimes (for multiplication results) as
60
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61
 * using 128-bit coefficients called 'widelimbs'.
62
 * A 4-limb representation is an 'felem';
63
 * a 7-widelimb representation is a 'widefelem'.
64
 * Even within felems, bits of adjacent limbs overlap, and we don't always
65
 * reduce the representations: we ensure that inputs to each felem
66
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67
 * and fit into a 128-bit word without overflow. The coefficients are then
68
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69
 * We only reduce to the unique minimal representation at the end of the
70
 * computation.
71
 */
72
73
typedef uint64_t limb;
74
typedef uint128_t widelimb;
75
76
typedef limb felem[4];
77
typedef widelimb widefelem[7];
78
79
/*
80
 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81
 * group order size for the elliptic curve, and we also use this type for
82
 * scalars for point multiplication.
83
 */
84
typedef u8 felem_bytearray[28];
85
86
static const felem_bytearray nistp224_curve_params[5] = {
87
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
102
};
103
104
/*-
105
 * Precomputed multiples of the standard generator
106
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
107
 * (0 for the point at infinity).
108
 * For each field element, slice a_0 is word 0, etc.
109
 *
110
 * The table has 2 * 16 elements, starting with the following:
111
 * index | bits    | point
112
 * ------+---------+------------------------------
113
 *     0 | 0 0 0 0 | 0G
114
 *     1 | 0 0 0 1 | 1G
115
 *     2 | 0 0 1 0 | 2^56G
116
 *     3 | 0 0 1 1 | (2^56 + 1)G
117
 *     4 | 0 1 0 0 | 2^112G
118
 *     5 | 0 1 0 1 | (2^112 + 1)G
119
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
120
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121
 *     8 | 1 0 0 0 | 2^168G
122
 *     9 | 1 0 0 1 | (2^168 + 1)G
123
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
124
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
126
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129
 * followed by a copy of this with each element multiplied by 2^28.
130
 *
131
 * The reason for this is so that we can clock bits into four different
132
 * locations when doing simple scalar multiplies against the base point,
133
 * and then another four locations using the second 16 elements.
134
 */
135
static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136
                                        {0, 0, 0, 0},
137
                                        {0, 0, 0, 0}},
138
                                       {{0x3280d6115c1d21, 0xc1d356c2112234,
139
                                         0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140
                                        {0xd5819985007e34, 0x75a05a07476444,
141
                                         0xfb4c22dfe6cd43, 0xbd376388b5f723},
142
                                        {1, 0, 0, 0}},
143
                                       {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144
                                         0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145
                                        {0x29e0b892dc9c43, 0xece8608436e662,
146
                                         0xdc858f185310d0, 0x9812dd4eb8d321},
147
                                        {1, 0, 0, 0}},
148
                                       {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149
                                         0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150
                                        {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151
                                         0x313865468fafec, 0x5cb379ba910a17},
152
                                        {1, 0, 0, 0}},
153
                                       {{0x0641966cab26e3, 0x91fb2991fab0a0,
154
                                         0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155
                                        {0x7510407766af5d, 0x84d929610d5450,
156
                                         0x81d77aae82f706, 0x6916f6d4338c5b},
157
                                        {1, 0, 0, 0}},
158
                                       {{0xea95ac3b1f15c6, 0x086000905e82d4,
159
                                         0xdd323ae4d1c8b1, 0x932b56be7685a3},
160
                                        {0x9ef93dea25dbbf, 0x41665960f390f0,
161
                                         0xfdec76dbe2a8a7, 0x523e80f019062a},
162
                                        {1, 0, 0, 0}},
163
                                       {{0x822fdd26732c73, 0xa01c83531b5d0f,
164
                                         0x363f37347c1ba4, 0xc391b45c84725c},
165
                                        {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166
                                         0xc393da7e222a7f, 0x1efb7890ede244},
167
                                        {1, 0, 0, 0}},
168
                                       {{0x4c9e90ca217da1, 0xd11beca79159bb,
169
                                         0xff8d33c2c98b7c, 0x2610b39409f849},
170
                                        {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171
                                         0x966c079b753c89, 0xfe67e4e820b112},
172
                                        {1, 0, 0, 0}},
173
                                       {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174
                                         0x79b7619a3e7c4c, 0x05c73240899b47},
175
                                        {0x9f7f6382c73e3a, 0x18615165c56bda,
176
                                         0x641fab2116fd56, 0x72855882b08394},
177
                                        {1, 0, 0, 0}},
178
                                       {{0x0469182f161c09, 0x74a98ca8d00fb5,
179
                                         0xb89da93489a3e0, 0x41c98768fb0c1d},
180
                                        {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181
                                         0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182
                                        {1, 0, 0, 0}},
183
                                       {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184
                                         0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185
                                        {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186
                                         0x9281d688bcbe50, 0xc40331df893881},
187
                                        {1, 0, 0, 0}},
188
                                       {{0xb89530796f0f60, 0xade92bd26909a3,
189
                                         0x1a0c83fb4884da, 0x1765bf22a5a984},
190
                                        {0x772a9ee75db09e, 0x23bc6c67cec16f,
191
                                         0x4c1edba8b14e2f, 0xe2a215d9611369},
192
                                        {1, 0, 0, 0}},
193
                                       {{0x571e509fb5efb3, 0xade88696410552,
194
                                         0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195
                                        {0xff9f51160f4652, 0xb47ce2495a6539,
196
                                         0xa2946c53b582f4, 0x286d2db3ee9a60},
197
                                        {1, 0, 0, 0}},
198
                                       {{0x40bbd5081a44af, 0x0995183b13926c,
199
                                         0xbcefba6f47f6d0, 0x215619e9cc0057},
200
                                        {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201
                                         0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202
                                        {1, 0, 0, 0}},
203
                                       {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204
                                         0x1c29819435d2c6, 0xc813132f4c07e9},
205
                                        {0x2891425503b11f, 0x08781030579fea,
206
                                         0xf5426ba5cc9674, 0x1e28ebf18562bc},
207
                                        {1, 0, 0, 0}},
208
                                       {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209
                                         0xff17036691a973, 0xf1aef351497c58},
210
                                        {0xdd1f2d600564ff, 0xdead073b1402db,
211
                                         0x74a684435bd693, 0xeea7471f962558},
212
                                        {1, 0, 0, 0}}},
213
{{{0, 0, 0, 0},
214
  {0, 0, 0, 0},
215
  {0, 0, 0, 0}},
216
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218
  {1, 0, 0, 0}},
219
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221
  {1, 0, 0, 0}},
222
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224
  {1, 0, 0, 0}},
225
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227
  {1, 0, 0, 0}},
228
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230
  {1, 0, 0, 0}},
231
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233
  {1, 0, 0, 0}},
234
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236
  {1, 0, 0, 0}},
237
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239
  {1, 0, 0, 0}},
240
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242
  {1, 0, 0, 0}},
243
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245
  {1, 0, 0, 0}},
246
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248
  {1, 0, 0, 0}},
249
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251
  {1, 0, 0, 0}},
252
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254
  {1, 0, 0, 0}},
255
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257
  {1, 0, 0, 0}},
258
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260
  {1, 0, 0, 0}}}
261
};
262
263
/* Precomputation for the group generator. */
264
typedef struct {
265
    felem g_pre_comp[2][16][3];
266
    int references;
267
} NISTP224_PRE_COMP;
268
269
const EC_METHOD *EC_GFp_nistp224_method(void)
270
0
{
271
0
    static const EC_METHOD ret = {
272
0
        EC_FLAGS_DEFAULT_OCT,
273
0
        NID_X9_62_prime_field,
274
0
        ec_GFp_nistp224_group_init,
275
0
        ec_GFp_simple_group_finish,
276
0
        ec_GFp_simple_group_clear_finish,
277
0
        ec_GFp_nist_group_copy,
278
0
        ec_GFp_nistp224_group_set_curve,
279
0
        ec_GFp_simple_group_get_curve,
280
0
        ec_GFp_simple_group_get_degree,
281
0
        ec_GFp_simple_group_check_discriminant,
282
0
        ec_GFp_simple_point_init,
283
0
        ec_GFp_simple_point_finish,
284
0
        ec_GFp_simple_point_clear_finish,
285
0
        ec_GFp_simple_point_copy,
286
0
        ec_GFp_simple_point_set_to_infinity,
287
0
        ec_GFp_simple_set_Jprojective_coordinates_GFp,
288
0
        ec_GFp_simple_get_Jprojective_coordinates_GFp,
289
0
        ec_GFp_simple_point_set_affine_coordinates,
290
0
        ec_GFp_nistp224_point_get_affine_coordinates,
291
0
        0 /* point_set_compressed_coordinates */ ,
292
0
        0 /* point2oct */ ,
293
0
        0 /* oct2point */ ,
294
0
        ec_GFp_simple_add,
295
0
        ec_GFp_simple_dbl,
296
0
        ec_GFp_simple_invert,
297
0
        ec_GFp_simple_is_at_infinity,
298
0
        ec_GFp_simple_is_on_curve,
299
0
        ec_GFp_simple_cmp,
300
0
        ec_GFp_simple_make_affine,
301
0
        ec_GFp_simple_points_make_affine,
302
0
        ec_GFp_nistp224_points_mul,
303
0
        ec_GFp_nistp224_precompute_mult,
304
0
        ec_GFp_nistp224_have_precompute_mult,
305
0
        ec_GFp_nist_field_mul,
306
0
        ec_GFp_nist_field_sqr,
307
0
        0 /* field_div */ ,
308
0
        0 /* field_encode */ ,
309
0
        0 /* field_decode */ ,
310
0
        0                       /* field_set_to_one */
311
0
    };
312
313
0
    return &ret;
314
0
}
315
316
/*
317
 * Helper functions to convert field elements to/from internal representation
318
 */
319
static void bin28_to_felem(felem out, const u8 in[28])
320
0
{
321
0
    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322
0
    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323
0
    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324
0
    out[3] = (*((const uint64_t *)(in+20))) >> 8;
325
0
}
326
327
static void felem_to_bin28(u8 out[28], const felem in)
328
0
{
329
0
    unsigned i;
330
0
    for (i = 0; i < 7; ++i) {
331
0
        out[i] = in[0] >> (8 * i);
332
0
        out[i + 7] = in[1] >> (8 * i);
333
0
        out[i + 14] = in[2] >> (8 * i);
334
0
        out[i + 21] = in[3] >> (8 * i);
335
0
    }
336
0
}
337
338
/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339
static void flip_endian(u8 *out, const u8 *in, unsigned len)
340
0
{
341
0
    unsigned i;
342
0
    for (i = 0; i < len; ++i)
343
0
        out[i] = in[len - 1 - i];
344
0
}
345
346
/* From OpenSSL BIGNUM to internal representation */
347
static int BN_to_felem(felem out, const BIGNUM *bn)
348
0
{
349
0
    felem_bytearray b_in;
350
0
    felem_bytearray b_out;
351
0
    unsigned num_bytes;
352
353
    /* BN_bn2bin eats leading zeroes */
354
0
    memset(b_out, 0, sizeof b_out);
355
0
    num_bytes = BN_num_bytes(bn);
356
0
    if (num_bytes > sizeof b_out) {
357
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358
0
        return 0;
359
0
    }
360
0
    if (BN_is_negative(bn)) {
361
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362
0
        return 0;
363
0
    }
364
0
    num_bytes = BN_bn2bin(bn, b_in);
365
0
    flip_endian(b_out, b_in, num_bytes);
366
0
    bin28_to_felem(out, b_out);
367
0
    return 1;
368
0
}
369
370
/* From internal representation to OpenSSL BIGNUM */
371
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
372
0
{
373
0
    felem_bytearray b_in, b_out;
374
0
    felem_to_bin28(b_in, in);
375
0
    flip_endian(b_out, b_in, sizeof b_out);
376
0
    return BN_bin2bn(b_out, sizeof b_out, out);
377
0
}
378
379
/******************************************************************************/
380
/*-
381
 *                              FIELD OPERATIONS
382
 *
383
 * Field operations, using the internal representation of field elements.
384
 * NB! These operations are specific to our point multiplication and cannot be
385
 * expected to be correct in general - e.g., multiplication with a large scalar
386
 * will cause an overflow.
387
 *
388
 */
389
390
static void felem_one(felem out)
391
0
{
392
0
    out[0] = 1;
393
0
    out[1] = 0;
394
0
    out[2] = 0;
395
0
    out[3] = 0;
396
0
}
397
398
static void felem_assign(felem out, const felem in)
399
0
{
400
0
    out[0] = in[0];
401
0
    out[1] = in[1];
402
0
    out[2] = in[2];
403
0
    out[3] = in[3];
404
0
}
405
406
/* Sum two field elements: out += in */
407
static void felem_sum(felem out, const felem in)
408
0
{
409
0
    out[0] += in[0];
410
0
    out[1] += in[1];
411
0
    out[2] += in[2];
412
0
    out[3] += in[3];
413
0
}
414
415
/* Get negative value: out = -in */
416
/* Assumes in[i] < 2^57 */
417
static void felem_neg(felem out, const felem in)
418
0
{
419
0
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420
0
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421
0
    static const limb two58m42m2 = (((limb) 1) << 58) -
422
0
        (((limb) 1) << 42) - (((limb) 1) << 2);
423
424
    /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425
0
    out[0] = two58p2 - in[0];
426
0
    out[1] = two58m42m2 - in[1];
427
0
    out[2] = two58m2 - in[2];
428
0
    out[3] = two58m2 - in[3];
429
0
}
430
431
/* Subtract field elements: out -= in */
432
/* Assumes in[i] < 2^57 */
433
static void felem_diff(felem out, const felem in)
434
0
{
435
0
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436
0
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437
0
    static const limb two58m42m2 = (((limb) 1) << 58) -
438
0
        (((limb) 1) << 42) - (((limb) 1) << 2);
439
440
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441
0
    out[0] += two58p2;
442
0
    out[1] += two58m42m2;
443
0
    out[2] += two58m2;
444
0
    out[3] += two58m2;
445
446
0
    out[0] -= in[0];
447
0
    out[1] -= in[1];
448
0
    out[2] -= in[2];
449
0
    out[3] -= in[3];
450
0
}
451
452
/* Subtract in unreduced 128-bit mode: out -= in */
453
/* Assumes in[i] < 2^119 */
454
static void widefelem_diff(widefelem out, const widefelem in)
455
0
{
456
0
    static const widelimb two120 = ((widelimb) 1) << 120;
457
0
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
458
0
        (((widelimb) 1) << 64);
459
0
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460
0
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
461
462
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463
0
    out[0] += two120;
464
0
    out[1] += two120m64;
465
0
    out[2] += two120m64;
466
0
    out[3] += two120;
467
0
    out[4] += two120m104m64;
468
0
    out[5] += two120m64;
469
0
    out[6] += two120m64;
470
471
0
    out[0] -= in[0];
472
0
    out[1] -= in[1];
473
0
    out[2] -= in[2];
474
0
    out[3] -= in[3];
475
0
    out[4] -= in[4];
476
0
    out[5] -= in[5];
477
0
    out[6] -= in[6];
478
0
}
479
480
/* Subtract in mixed mode: out128 -= in64 */
481
/* in[i] < 2^63 */
482
static void felem_diff_128_64(widefelem out, const felem in)
483
0
{
484
0
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
485
0
        (((widelimb) 1) << 8);
486
0
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
487
0
        (((widelimb) 1) << 8);
488
0
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489
0
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
490
491
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492
0
    out[0] += two64p8;
493
0
    out[1] += two64m48m8;
494
0
    out[2] += two64m8;
495
0
    out[3] += two64m8;
496
497
0
    out[0] -= in[0];
498
0
    out[1] -= in[1];
499
0
    out[2] -= in[2];
500
0
    out[3] -= in[3];
501
0
}
502
503
/*
504
 * Multiply a field element by a scalar: out = out * scalar The scalars we
505
 * actually use are small, so results fit without overflow
506
 */
507
static void felem_scalar(felem out, const limb scalar)
508
0
{
509
0
    out[0] *= scalar;
510
0
    out[1] *= scalar;
511
0
    out[2] *= scalar;
512
0
    out[3] *= scalar;
513
0
}
514
515
/*
516
 * Multiply an unreduced field element by a scalar: out = out * scalar The
517
 * scalars we actually use are small, so results fit without overflow
518
 */
519
static void widefelem_scalar(widefelem out, const widelimb scalar)
520
0
{
521
0
    out[0] *= scalar;
522
0
    out[1] *= scalar;
523
0
    out[2] *= scalar;
524
0
    out[3] *= scalar;
525
0
    out[4] *= scalar;
526
0
    out[5] *= scalar;
527
0
    out[6] *= scalar;
528
0
}
529
530
/* Square a field element: out = in^2 */
531
static void felem_square(widefelem out, const felem in)
532
0
{
533
0
    limb tmp0, tmp1, tmp2;
534
0
    tmp0 = 2 * in[0];
535
0
    tmp1 = 2 * in[1];
536
0
    tmp2 = 2 * in[2];
537
0
    out[0] = ((widelimb) in[0]) * in[0];
538
0
    out[1] = ((widelimb) in[0]) * tmp1;
539
0
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540
0
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541
0
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542
0
    out[5] = ((widelimb) in[3]) * tmp2;
543
0
    out[6] = ((widelimb) in[3]) * in[3];
544
0
}
545
546
/* Multiply two field elements: out = in1 * in2 */
547
static void felem_mul(widefelem out, const felem in1, const felem in2)
548
0
{
549
0
    out[0] = ((widelimb) in1[0]) * in2[0];
550
0
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551
0
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552
0
        ((widelimb) in1[2]) * in2[0];
553
0
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554
0
        ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555
0
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556
0
        ((widelimb) in1[3]) * in2[1];
557
0
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558
0
    out[6] = ((widelimb) in1[3]) * in2[3];
559
0
}
560
561
/*-
562
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563
 * Requires in[i] < 2^126,
564
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565
static void felem_reduce(felem out, const widefelem in)
566
0
{
567
0
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
568
0
        (((widelimb) 1) << 15);
569
0
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
570
0
        (((widelimb) 1) << 71);
571
0
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572
0
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573
0
    widelimb output[5];
574
575
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576
0
    output[0] = in[0] + two127p15;
577
0
    output[1] = in[1] + two127m71m55;
578
0
    output[2] = in[2] + two127m71;
579
0
    output[3] = in[3];
580
0
    output[4] = in[4];
581
582
    /* Eliminate in[4], in[5], in[6] */
583
0
    output[4] += in[6] >> 16;
584
0
    output[3] += (in[6] & 0xffff) << 40;
585
0
    output[2] -= in[6];
586
587
0
    output[3] += in[5] >> 16;
588
0
    output[2] += (in[5] & 0xffff) << 40;
589
0
    output[1] -= in[5];
590
591
0
    output[2] += output[4] >> 16;
592
0
    output[1] += (output[4] & 0xffff) << 40;
593
0
    output[0] -= output[4];
594
595
    /* Carry 2 -> 3 -> 4 */
596
0
    output[3] += output[2] >> 56;
597
0
    output[2] &= 0x00ffffffffffffff;
598
599
0
    output[4] = output[3] >> 56;
600
0
    output[3] &= 0x00ffffffffffffff;
601
602
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
603
604
    /* Eliminate output[4] */
605
0
    output[2] += output[4] >> 16;
606
    /* output[2] < 2^56 + 2^56 = 2^57 */
607
0
    output[1] += (output[4] & 0xffff) << 40;
608
0
    output[0] -= output[4];
609
610
    /* Carry 0 -> 1 -> 2 -> 3 */
611
0
    output[1] += output[0] >> 56;
612
0
    out[0] = output[0] & 0x00ffffffffffffff;
613
614
0
    output[2] += output[1] >> 56;
615
    /* output[2] < 2^57 + 2^72 */
616
0
    out[1] = output[1] & 0x00ffffffffffffff;
617
0
    output[3] += output[2] >> 56;
618
    /* output[3] <= 2^56 + 2^16 */
619
0
    out[2] = output[2] & 0x00ffffffffffffff;
620
621
    /*-
622
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623
     * out[3] <= 2^56 + 2^16 (due to final carry),
624
     * so out < 2*p
625
     */
626
0
    out[3] = output[3];
627
0
}
628
629
static void felem_square_reduce(felem out, const felem in)
630
0
{
631
0
    widefelem tmp;
632
0
    felem_square(tmp, in);
633
0
    felem_reduce(out, tmp);
634
0
}
635
636
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
637
0
{
638
0
    widefelem tmp;
639
0
    felem_mul(tmp, in1, in2);
640
0
    felem_reduce(out, tmp);
641
0
}
642
643
/*
644
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645
 * call felem_reduce first)
646
 */
647
static void felem_contract(felem out, const felem in)
648
0
{
649
0
    static const int64_t two56 = ((limb) 1) << 56;
650
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652
0
    int64_t tmp[4], a;
653
0
    tmp[0] = in[0];
654
0
    tmp[1] = in[1];
655
0
    tmp[2] = in[2];
656
0
    tmp[3] = in[3];
657
    /* Case 1: a = 1 iff in >= 2^224 */
658
0
    a = (in[3] >> 56);
659
0
    tmp[0] -= a;
660
0
    tmp[1] += a << 40;
661
0
    tmp[3] &= 0x00ffffffffffffff;
662
    /*
663
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664
     * and the lower part is non-zero
665
     */
666
0
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667
0
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668
0
    a &= 0x00ffffffffffffff;
669
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670
0
    a = (a - 1) >> 63;
671
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
672
0
    tmp[3] &= a ^ 0xffffffffffffffff;
673
0
    tmp[2] &= a ^ 0xffffffffffffffff;
674
0
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675
0
    tmp[0] -= 1 & a;
676
677
    /*
678
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679
     * non-zero, so we only need one step
680
     */
681
0
    a = tmp[0] >> 63;
682
0
    tmp[0] += two56 & a;
683
0
    tmp[1] -= 1 & a;
684
685
    /* carry 1 -> 2 -> 3 */
686
0
    tmp[2] += tmp[1] >> 56;
687
0
    tmp[1] &= 0x00ffffffffffffff;
688
689
0
    tmp[3] += tmp[2] >> 56;
690
0
    tmp[2] &= 0x00ffffffffffffff;
691
692
    /* Now 0 <= out < p */
693
0
    out[0] = tmp[0];
694
0
    out[1] = tmp[1];
695
0
    out[2] = tmp[2];
696
0
    out[3] = tmp[3];
697
0
}
698
699
/*
700
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701
 * elements are reduced to in < 2^225, so we only need to check three cases:
702
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
703
 */
704
static limb felem_is_zero(const felem in)
705
0
{
706
0
    limb zero, two224m96p1, two225m97p2;
707
708
0
    zero = in[0] | in[1] | in[2] | in[3];
709
0
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
710
0
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711
0
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712
0
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713
0
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714
0
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715
0
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716
0
    return (zero | two224m96p1 | two225m97p2);
717
0
}
718
719
static int felem_is_zero_int(const void *in)
720
0
{
721
0
    return (int)(felem_is_zero(in) & ((limb) 1));
722
0
}
723
724
/* Invert a field element */
725
/* Computation chain copied from djb's code */
726
static void felem_inv(felem out, const felem in)
727
0
{
728
0
    felem ftmp, ftmp2, ftmp3, ftmp4;
729
0
    widefelem tmp;
730
0
    unsigned i;
731
732
0
    felem_square(tmp, in);
733
0
    felem_reduce(ftmp, tmp);    /* 2 */
734
0
    felem_mul(tmp, in, ftmp);
735
0
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
736
0
    felem_square(tmp, ftmp);
737
0
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
738
0
    felem_mul(tmp, in, ftmp);
739
0
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
740
0
    felem_square(tmp, ftmp);
741
0
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
742
0
    felem_square(tmp, ftmp2);
743
0
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
744
0
    felem_square(tmp, ftmp2);
745
0
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
746
0
    felem_mul(tmp, ftmp2, ftmp);
747
0
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
748
0
    felem_square(tmp, ftmp);
749
0
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
750
0
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
751
0
        felem_square(tmp, ftmp2);
752
0
        felem_reduce(ftmp2, tmp);
753
0
    }
754
0
    felem_mul(tmp, ftmp2, ftmp);
755
0
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
756
0
    felem_square(tmp, ftmp2);
757
0
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
758
0
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
759
0
        felem_square(tmp, ftmp3);
760
0
        felem_reduce(ftmp3, tmp);
761
0
    }
762
0
    felem_mul(tmp, ftmp3, ftmp2);
763
0
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
764
0
    felem_square(tmp, ftmp2);
765
0
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
766
0
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
767
0
        felem_square(tmp, ftmp3);
768
0
        felem_reduce(ftmp3, tmp);
769
0
    }
770
0
    felem_mul(tmp, ftmp3, ftmp2);
771
0
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
772
0
    felem_square(tmp, ftmp3);
773
0
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
774
0
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
775
0
        felem_square(tmp, ftmp4);
776
0
        felem_reduce(ftmp4, tmp);
777
0
    }
778
0
    felem_mul(tmp, ftmp3, ftmp4);
779
0
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
780
0
    felem_square(tmp, ftmp3);
781
0
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
782
0
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
783
0
        felem_square(tmp, ftmp4);
784
0
        felem_reduce(ftmp4, tmp);
785
0
    }
786
0
    felem_mul(tmp, ftmp2, ftmp4);
787
0
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
788
0
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
789
0
        felem_square(tmp, ftmp2);
790
0
        felem_reduce(ftmp2, tmp);
791
0
    }
792
0
    felem_mul(tmp, ftmp2, ftmp);
793
0
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
794
0
    felem_square(tmp, ftmp);
795
0
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
796
0
    felem_mul(tmp, ftmp, in);
797
0
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
798
0
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
799
0
        felem_square(tmp, ftmp);
800
0
        felem_reduce(ftmp, tmp);
801
0
    }
802
0
    felem_mul(tmp, ftmp, ftmp3);
803
0
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
804
0
}
805
806
/*
807
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808
 * out to itself.
809
 */
810
static void copy_conditional(felem out, const felem in, limb icopy)
811
0
{
812
0
    unsigned i;
813
    /*
814
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
815
     */
816
0
    const limb copy = -icopy;
817
0
    for (i = 0; i < 4; ++i) {
818
0
        const limb tmp = copy & (in[i] ^ out[i]);
819
0
        out[i] ^= tmp;
820
0
    }
821
0
}
822
823
/******************************************************************************/
824
/*-
825
 *                       ELLIPTIC CURVE POINT OPERATIONS
826
 *
827
 * Points are represented in Jacobian projective coordinates:
828
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829
 * or to the point at infinity if Z == 0.
830
 *
831
 */
832
833
/*-
834
 * Double an elliptic curve point:
835
 * (X', Y', Z') = 2 * (X, Y, Z), where
836
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840
 * while x_out == y_in is not (maybe this works, but it's not tested).
841
 */
842
static void
843
point_double(felem x_out, felem y_out, felem z_out,
844
             const felem x_in, const felem y_in, const felem z_in)
845
0
{
846
0
    widefelem tmp, tmp2;
847
0
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
848
849
0
    felem_assign(ftmp, x_in);
850
0
    felem_assign(ftmp2, x_in);
851
852
    /* delta = z^2 */
853
0
    felem_square(tmp, z_in);
854
0
    felem_reduce(delta, tmp);
855
856
    /* gamma = y^2 */
857
0
    felem_square(tmp, y_in);
858
0
    felem_reduce(gamma, tmp);
859
860
    /* beta = x*gamma */
861
0
    felem_mul(tmp, x_in, gamma);
862
0
    felem_reduce(beta, tmp);
863
864
    /* alpha = 3*(x-delta)*(x+delta) */
865
0
    felem_diff(ftmp, delta);
866
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867
0
    felem_sum(ftmp2, delta);
868
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869
0
    felem_scalar(ftmp2, 3);
870
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
871
0
    felem_mul(tmp, ftmp, ftmp2);
872
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873
0
    felem_reduce(alpha, tmp);
874
875
    /* x' = alpha^2 - 8*beta */
876
0
    felem_square(tmp, alpha);
877
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878
0
    felem_assign(ftmp, beta);
879
0
    felem_scalar(ftmp, 8);
880
    /* ftmp[i] < 8 * 2^57 = 2^60 */
881
0
    felem_diff_128_64(tmp, ftmp);
882
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883
0
    felem_reduce(x_out, tmp);
884
885
    /* z' = (y + z)^2 - gamma - delta */
886
0
    felem_sum(delta, gamma);
887
    /* delta[i] < 2^57 + 2^57 = 2^58 */
888
0
    felem_assign(ftmp, y_in);
889
0
    felem_sum(ftmp, z_in);
890
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891
0
    felem_square(tmp, ftmp);
892
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893
0
    felem_diff_128_64(tmp, delta);
894
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895
0
    felem_reduce(z_out, tmp);
896
897
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898
0
    felem_scalar(beta, 4);
899
    /* beta[i] < 4 * 2^57 = 2^59 */
900
0
    felem_diff(beta, x_out);
901
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902
0
    felem_mul(tmp, alpha, beta);
903
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904
0
    felem_square(tmp2, gamma);
905
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906
0
    widefelem_scalar(tmp2, 8);
907
    /* tmp2[i] < 8 * 2^116 = 2^119 */
908
0
    widefelem_diff(tmp, tmp2);
909
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
910
0
    felem_reduce(y_out, tmp);
911
0
}
912
913
/*-
914
 * Add two elliptic curve points:
915
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
921
 *
922
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
923
 */
924
925
/*
926
 * This function is not entirely constant-time: it includes a branch for
927
 * checking whether the two input points are equal, (while not equal to the
928
 * point at infinity). This case never happens during single point
929
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
930
 */
931
static void point_add(felem x3, felem y3, felem z3,
932
                      const felem x1, const felem y1, const felem z1,
933
                      const int mixed, const felem x2, const felem y2,
934
                      const felem z2)
935
0
{
936
0
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937
0
    widefelem tmp, tmp2;
938
0
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
939
940
0
    if (!mixed) {
941
        /* ftmp2 = z2^2 */
942
0
        felem_square(tmp, z2);
943
0
        felem_reduce(ftmp2, tmp);
944
945
        /* ftmp4 = z2^3 */
946
0
        felem_mul(tmp, ftmp2, z2);
947
0
        felem_reduce(ftmp4, tmp);
948
949
        /* ftmp4 = z2^3*y1 */
950
0
        felem_mul(tmp2, ftmp4, y1);
951
0
        felem_reduce(ftmp4, tmp2);
952
953
        /* ftmp2 = z2^2*x1 */
954
0
        felem_mul(tmp2, ftmp2, x1);
955
0
        felem_reduce(ftmp2, tmp2);
956
0
    } else {
957
        /*
958
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
959
         */
960
961
        /* ftmp4 = z2^3*y1 */
962
0
        felem_assign(ftmp4, y1);
963
964
        /* ftmp2 = z2^2*x1 */
965
0
        felem_assign(ftmp2, x1);
966
0
    }
967
968
    /* ftmp = z1^2 */
969
0
    felem_square(tmp, z1);
970
0
    felem_reduce(ftmp, tmp);
971
972
    /* ftmp3 = z1^3 */
973
0
    felem_mul(tmp, ftmp, z1);
974
0
    felem_reduce(ftmp3, tmp);
975
976
    /* tmp = z1^3*y2 */
977
0
    felem_mul(tmp, ftmp3, y2);
978
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
979
980
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
981
0
    felem_diff_128_64(tmp, ftmp4);
982
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983
0
    felem_reduce(ftmp3, tmp);
984
985
    /* tmp = z1^2*x2 */
986
0
    felem_mul(tmp, ftmp, x2);
987
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
988
989
    /* ftmp = z1^2*x2 - z2^2*x1 */
990
0
    felem_diff_128_64(tmp, ftmp2);
991
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992
0
    felem_reduce(ftmp, tmp);
993
994
    /*
995
     * the formulae are incorrect if the points are equal so we check for
996
     * this and do doubling if this happens
997
     */
998
0
    x_equal = felem_is_zero(ftmp);
999
0
    y_equal = felem_is_zero(ftmp3);
1000
0
    z1_is_zero = felem_is_zero(z1);
1001
0
    z2_is_zero = felem_is_zero(z2);
1002
    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003
0
    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004
0
        point_double(x3, y3, z3, x1, y1, z1);
1005
0
        return;
1006
0
    }
1007
1008
    /* ftmp5 = z1*z2 */
1009
0
    if (!mixed) {
1010
0
        felem_mul(tmp, z1, z2);
1011
0
        felem_reduce(ftmp5, tmp);
1012
0
    } else {
1013
        /* special case z2 = 0 is handled later */
1014
0
        felem_assign(ftmp5, z1);
1015
0
    }
1016
1017
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018
0
    felem_mul(tmp, ftmp, ftmp5);
1019
0
    felem_reduce(z_out, tmp);
1020
1021
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022
0
    felem_assign(ftmp5, ftmp);
1023
0
    felem_square(tmp, ftmp);
1024
0
    felem_reduce(ftmp, tmp);
1025
1026
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027
0
    felem_mul(tmp, ftmp, ftmp5);
1028
0
    felem_reduce(ftmp5, tmp);
1029
1030
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031
0
    felem_mul(tmp, ftmp2, ftmp);
1032
0
    felem_reduce(ftmp2, tmp);
1033
1034
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035
0
    felem_mul(tmp, ftmp4, ftmp5);
1036
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1037
1038
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039
0
    felem_square(tmp2, ftmp3);
1040
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1041
1042
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043
0
    felem_diff_128_64(tmp2, ftmp5);
1044
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1045
1046
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047
0
    felem_assign(ftmp5, ftmp2);
1048
0
    felem_scalar(ftmp5, 2);
1049
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1050
1051
    /*-
1052
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1054
     */
1055
0
    felem_diff_128_64(tmp2, ftmp5);
1056
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057
0
    felem_reduce(x_out, tmp2);
1058
1059
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060
0
    felem_diff(ftmp2, x_out);
1061
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1062
1063
    /*
1064
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1065
     */
1066
0
    felem_mul(tmp2, ftmp3, ftmp2);
1067
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1068
1069
    /*-
1070
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1072
     */
1073
0
    widefelem_diff(tmp2, tmp);
1074
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075
0
    felem_reduce(y_out, tmp2);
1076
1077
    /*
1078
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079
     * the point at infinity, so we need to check for this separately
1080
     */
1081
1082
    /*
1083
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1084
     */
1085
0
    copy_conditional(x_out, x2, z1_is_zero);
1086
0
    copy_conditional(x_out, x1, z2_is_zero);
1087
0
    copy_conditional(y_out, y2, z1_is_zero);
1088
0
    copy_conditional(y_out, y1, z2_is_zero);
1089
0
    copy_conditional(z_out, z2, z1_is_zero);
1090
0
    copy_conditional(z_out, z1, z2_is_zero);
1091
0
    felem_assign(x3, x_out);
1092
0
    felem_assign(y3, y_out);
1093
0
    felem_assign(z3, z_out);
1094
0
}
1095
1096
/*
1097
 * select_point selects the |idx|th point from a precomputation table and
1098
 * copies it to out.
1099
 * The pre_comp array argument should be size of |size| argument
1100
 */
1101
static void select_point(const u64 idx, unsigned int size,
1102
                         const felem pre_comp[][3], felem out[3])
1103
0
{
1104
0
    unsigned i, j;
1105
0
    limb *outlimbs = &out[0][0];
1106
0
    memset(outlimbs, 0, 3 * sizeof(felem));
1107
1108
0
    for (i = 0; i < size; i++) {
1109
0
        const limb *inlimbs = &pre_comp[i][0][0];
1110
0
        u64 mask = i ^ idx;
1111
0
        mask |= mask >> 4;
1112
0
        mask |= mask >> 2;
1113
0
        mask |= mask >> 1;
1114
0
        mask &= 1;
1115
0
        mask--;
1116
0
        for (j = 0; j < 4 * 3; j++)
1117
0
            outlimbs[j] |= inlimbs[j] & mask;
1118
0
    }
1119
0
}
1120
1121
/* get_bit returns the |i|th bit in |in| */
1122
static char get_bit(const felem_bytearray in, unsigned i)
1123
0
{
1124
0
    if (i >= 224)
1125
0
        return 0;
1126
0
    return (in[i >> 3] >> (i & 7)) & 1;
1127
0
}
1128
1129
/*
1130
 * Interleaved point multiplication using precomputed point multiples: The
1131
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1134
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1135
 */
1136
static void batch_mul(felem x_out, felem y_out, felem z_out,
1137
                      const felem_bytearray scalars[],
1138
                      const unsigned num_points, const u8 *g_scalar,
1139
                      const int mixed, const felem pre_comp[][17][3],
1140
                      const felem g_pre_comp[2][16][3])
1141
0
{
1142
0
    int i, skip;
1143
0
    unsigned num;
1144
0
    unsigned gen_mul = (g_scalar != NULL);
1145
0
    felem nq[3], tmp[4];
1146
0
    u64 bits;
1147
0
    u8 sign, digit;
1148
1149
    /* set nq to the point at infinity */
1150
0
    memset(nq, 0, 3 * sizeof(felem));
1151
1152
    /*
1153
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154
     * of the generator (two in each of the last 28 rounds) and additions of
1155
     * other points multiples (every 5th round).
1156
     */
1157
0
    skip = 1;                   /* save two point operations in the first
1158
                                 * round */
1159
0
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160
        /* double */
1161
0
        if (!skip)
1162
0
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1163
1164
        /* add multiples of the generator */
1165
0
        if (gen_mul && (i <= 27)) {
1166
            /* first, look 28 bits upwards */
1167
0
            bits = get_bit(g_scalar, i + 196) << 3;
1168
0
            bits |= get_bit(g_scalar, i + 140) << 2;
1169
0
            bits |= get_bit(g_scalar, i + 84) << 1;
1170
0
            bits |= get_bit(g_scalar, i + 28);
1171
            /* select the point to add, in constant time */
1172
0
            select_point(bits, 16, g_pre_comp[1], tmp);
1173
1174
0
            if (!skip) {
1175
                /* value 1 below is argument for "mixed" */
1176
0
                point_add(nq[0], nq[1], nq[2],
1177
0
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178
0
            } else {
1179
0
                memcpy(nq, tmp, 3 * sizeof(felem));
1180
0
                skip = 0;
1181
0
            }
1182
1183
            /* second, look at the current position */
1184
0
            bits = get_bit(g_scalar, i + 168) << 3;
1185
0
            bits |= get_bit(g_scalar, i + 112) << 2;
1186
0
            bits |= get_bit(g_scalar, i + 56) << 1;
1187
0
            bits |= get_bit(g_scalar, i);
1188
            /* select the point to add, in constant time */
1189
0
            select_point(bits, 16, g_pre_comp[0], tmp);
1190
0
            point_add(nq[0], nq[1], nq[2],
1191
0
                      nq[0], nq[1], nq[2],
1192
0
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1193
0
        }
1194
1195
        /* do other additions every 5 doublings */
1196
0
        if (num_points && (i % 5 == 0)) {
1197
            /* loop over all scalars */
1198
0
            for (num = 0; num < num_points; ++num) {
1199
0
                bits = get_bit(scalars[num], i + 4) << 5;
1200
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1201
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1202
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1203
0
                bits |= get_bit(scalars[num], i) << 1;
1204
0
                bits |= get_bit(scalars[num], i - 1);
1205
0
                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1206
1207
                /* select the point to add or subtract */
1208
0
                select_point(digit, 17, pre_comp[num], tmp);
1209
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210
                                            * point */
1211
0
                copy_conditional(tmp[1], tmp[3], sign);
1212
1213
0
                if (!skip) {
1214
0
                    point_add(nq[0], nq[1], nq[2],
1215
0
                              nq[0], nq[1], nq[2],
1216
0
                              mixed, tmp[0], tmp[1], tmp[2]);
1217
0
                } else {
1218
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1219
0
                    skip = 0;
1220
0
                }
1221
0
            }
1222
0
        }
1223
0
    }
1224
0
    felem_assign(x_out, nq[0]);
1225
0
    felem_assign(y_out, nq[1]);
1226
0
    felem_assign(z_out, nq[2]);
1227
0
}
1228
1229
/******************************************************************************/
1230
/*
1231
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1232
 */
1233
1234
static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1235
0
{
1236
0
    NISTP224_PRE_COMP *ret = NULL;
1237
0
    ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238
0
    if (!ret) {
1239
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240
0
        return ret;
1241
0
    }
1242
0
    memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243
0
    ret->references = 1;
1244
0
    return ret;
1245
0
}
1246
1247
static void *nistp224_pre_comp_dup(void *src_)
1248
0
{
1249
0
    NISTP224_PRE_COMP *src = src_;
1250
1251
    /* no need to actually copy, these objects never change! */
1252
0
    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1253
1254
0
    return src_;
1255
0
}
1256
1257
static void nistp224_pre_comp_free(void *pre_)
1258
0
{
1259
0
    int i;
1260
0
    NISTP224_PRE_COMP *pre = pre_;
1261
1262
0
    if (!pre)
1263
0
        return;
1264
1265
0
    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266
0
    if (i > 0)
1267
0
        return;
1268
1269
0
    OPENSSL_free(pre);
1270
0
}
1271
1272
static void nistp224_pre_comp_clear_free(void *pre_)
1273
0
{
1274
0
    int i;
1275
0
    NISTP224_PRE_COMP *pre = pre_;
1276
1277
0
    if (!pre)
1278
0
        return;
1279
1280
0
    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281
0
    if (i > 0)
1282
0
        return;
1283
1284
0
    OPENSSL_cleanse(pre, sizeof *pre);
1285
0
    OPENSSL_free(pre);
1286
0
}
1287
1288
/******************************************************************************/
1289
/*
1290
 * OPENSSL EC_METHOD FUNCTIONS
1291
 */
1292
1293
int ec_GFp_nistp224_group_init(EC_GROUP *group)
1294
0
{
1295
0
    int ret;
1296
0
    ret = ec_GFp_simple_group_init(group);
1297
0
    group->a_is_minus3 = 1;
1298
0
    return ret;
1299
0
}
1300
1301
int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302
                                    const BIGNUM *a, const BIGNUM *b,
1303
                                    BN_CTX *ctx)
1304
0
{
1305
0
    int ret = 0;
1306
0
    BN_CTX *new_ctx = NULL;
1307
0
    BIGNUM *curve_p, *curve_a, *curve_b;
1308
1309
0
    if (ctx == NULL)
1310
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311
0
            return 0;
1312
0
    BN_CTX_start(ctx);
1313
0
    if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314
0
        ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315
0
        ((curve_b = BN_CTX_get(ctx)) == NULL))
1316
0
        goto err;
1317
0
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318
0
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319
0
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320
0
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321
0
        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322
0
              EC_R_WRONG_CURVE_PARAMETERS);
1323
0
        goto err;
1324
0
    }
1325
0
    group->field_mod_func = BN_nist_mod_224;
1326
0
    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327
0
 err:
1328
0
    BN_CTX_end(ctx);
1329
0
    if (new_ctx != NULL)
1330
0
        BN_CTX_free(new_ctx);
1331
0
    return ret;
1332
0
}
1333
1334
/*
1335
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336
 * (X/Z^2, Y/Z^3)
1337
 */
1338
int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339
                                                 const EC_POINT *point,
1340
                                                 BIGNUM *x, BIGNUM *y,
1341
                                                 BN_CTX *ctx)
1342
0
{
1343
0
    felem z1, z2, x_in, y_in, x_out, y_out;
1344
0
    widefelem tmp;
1345
1346
0
    if (EC_POINT_is_at_infinity(group, point)) {
1347
0
        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348
0
              EC_R_POINT_AT_INFINITY);
1349
0
        return 0;
1350
0
    }
1351
0
    if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352
0
        (!BN_to_felem(z1, &point->Z)))
1353
0
        return 0;
1354
0
    felem_inv(z2, z1);
1355
0
    felem_square(tmp, z2);
1356
0
    felem_reduce(z1, tmp);
1357
0
    felem_mul(tmp, x_in, z1);
1358
0
    felem_reduce(x_in, tmp);
1359
0
    felem_contract(x_out, x_in);
1360
0
    if (x != NULL) {
1361
0
        if (!felem_to_BN(x, x_out)) {
1362
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363
0
                  ERR_R_BN_LIB);
1364
0
            return 0;
1365
0
        }
1366
0
    }
1367
0
    felem_mul(tmp, z1, z2);
1368
0
    felem_reduce(z1, tmp);
1369
0
    felem_mul(tmp, y_in, z1);
1370
0
    felem_reduce(y_in, tmp);
1371
0
    felem_contract(y_out, y_in);
1372
0
    if (y != NULL) {
1373
0
        if (!felem_to_BN(y, y_out)) {
1374
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375
0
                  ERR_R_BN_LIB);
1376
0
            return 0;
1377
0
        }
1378
0
    }
1379
0
    return 1;
1380
0
}
1381
1382
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383
                               felem tmp_felems[ /* num+1 */ ])
1384
0
{
1385
    /*
1386
     * Runs in constant time, unless an input is the point at infinity (which
1387
     * normally shouldn't happen).
1388
     */
1389
0
    ec_GFp_nistp_points_make_affine_internal(num,
1390
0
                                             points,
1391
0
                                             sizeof(felem),
1392
0
                                             tmp_felems,
1393
0
                                             (void (*)(void *))felem_one,
1394
0
                                             felem_is_zero_int,
1395
0
                                             (void (*)(void *, const void *))
1396
0
                                             felem_assign,
1397
0
                                             (void (*)(void *, const void *))
1398
0
                                             felem_square_reduce, (void (*)
1399
0
                                                                   (void *,
1400
0
                                                                    const void
1401
0
                                                                    *,
1402
0
                                                                    const void
1403
0
                                                                    *))
1404
0
                                             felem_mul_reduce,
1405
0
                                             (void (*)(void *, const void *))
1406
0
                                             felem_inv,
1407
0
                                             (void (*)(void *, const void *))
1408
0
                                             felem_contract);
1409
0
}
1410
1411
/*
1412
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1413
 * values Result is stored in r (r can equal one of the inputs).
1414
 */
1415
int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1416
                               const BIGNUM *scalar, size_t num,
1417
                               const EC_POINT *points[],
1418
                               const BIGNUM *scalars[], BN_CTX *ctx)
1419
0
{
1420
0
    int ret = 0;
1421
0
    int j;
1422
0
    unsigned i;
1423
0
    int mixed = 0;
1424
0
    BN_CTX *new_ctx = NULL;
1425
0
    BIGNUM *x, *y, *z, *tmp_scalar;
1426
0
    felem_bytearray g_secret;
1427
0
    felem_bytearray *secrets = NULL;
1428
0
    felem(*pre_comp)[17][3] = NULL;
1429
0
    felem *tmp_felems = NULL;
1430
0
    felem_bytearray tmp;
1431
0
    unsigned num_bytes;
1432
0
    int have_pre_comp = 0;
1433
0
    size_t num_points = num;
1434
0
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1435
0
    NISTP224_PRE_COMP *pre = NULL;
1436
0
    const felem(*g_pre_comp)[16][3] = NULL;
1437
0
    EC_POINT *generator = NULL;
1438
0
    const EC_POINT *p = NULL;
1439
0
    const BIGNUM *p_scalar = NULL;
1440
1441
0
    if (ctx == NULL)
1442
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1443
0
            return 0;
1444
0
    BN_CTX_start(ctx);
1445
0
    if (((x = BN_CTX_get(ctx)) == NULL) ||
1446
0
        ((y = BN_CTX_get(ctx)) == NULL) ||
1447
0
        ((z = BN_CTX_get(ctx)) == NULL) ||
1448
0
        ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1449
0
        goto err;
1450
1451
0
    if (scalar != NULL) {
1452
0
        pre = EC_EX_DATA_get_data(group->extra_data,
1453
0
                                  nistp224_pre_comp_dup,
1454
0
                                  nistp224_pre_comp_free,
1455
0
                                  nistp224_pre_comp_clear_free);
1456
0
        if (pre)
1457
            /* we have precomputation, try to use it */
1458
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1459
0
        else
1460
            /* try to use the standard precomputation */
1461
0
            g_pre_comp = &gmul[0];
1462
0
        generator = EC_POINT_new(group);
1463
0
        if (generator == NULL)
1464
0
            goto err;
1465
        /* get the generator from precomputation */
1466
0
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1467
0
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1468
0
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1469
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1470
0
            goto err;
1471
0
        }
1472
0
        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1473
0
                                                      generator, x, y, z,
1474
0
                                                      ctx))
1475
0
            goto err;
1476
0
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1477
            /* precomputation matches generator */
1478
0
            have_pre_comp = 1;
1479
0
        else
1480
            /*
1481
             * we don't have valid precomputation: treat the generator as a
1482
             * random point
1483
             */
1484
0
            num_points = num_points + 1;
1485
0
    }
1486
1487
0
    if (num_points > 0) {
1488
0
        if (num_points >= 3) {
1489
            /*
1490
             * unless we precompute multiples for just one or two points,
1491
             * converting those into affine form is time well spent
1492
             */
1493
0
            mixed = 1;
1494
0
        }
1495
0
        secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1496
0
        pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1497
0
        if (mixed)
1498
0
            tmp_felems =
1499
0
                OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1500
0
        if ((secrets == NULL) || (pre_comp == NULL)
1501
0
            || (mixed && (tmp_felems == NULL))) {
1502
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1503
0
            goto err;
1504
0
        }
1505
1506
        /*
1507
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1508
         * i.e., they contribute nothing to the linear combination
1509
         */
1510
0
        memset(secrets, 0, num_points * sizeof(felem_bytearray));
1511
0
        memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1512
0
        for (i = 0; i < num_points; ++i) {
1513
0
            if (i == num)
1514
                /* the generator */
1515
0
            {
1516
0
                p = EC_GROUP_get0_generator(group);
1517
0
                p_scalar = scalar;
1518
0
            } else
1519
                /* the i^th point */
1520
0
            {
1521
0
                p = points[i];
1522
0
                p_scalar = scalars[i];
1523
0
            }
1524
0
            if ((p_scalar != NULL) && (p != NULL)) {
1525
                /* reduce scalar to 0 <= scalar < 2^224 */
1526
0
                if ((BN_num_bits(p_scalar) > 224)
1527
0
                    || (BN_is_negative(p_scalar))) {
1528
                    /*
1529
                     * this is an unusual input, and we don't guarantee
1530
                     * constant-timeness
1531
                     */
1532
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1533
0
                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1534
0
                        goto err;
1535
0
                    }
1536
0
                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1537
0
                } else
1538
0
                    num_bytes = BN_bn2bin(p_scalar, tmp);
1539
0
                flip_endian(secrets[i], tmp, num_bytes);
1540
                /* precompute multiples */
1541
0
                if ((!BN_to_felem(x_out, &p->X)) ||
1542
0
                    (!BN_to_felem(y_out, &p->Y)) ||
1543
0
                    (!BN_to_felem(z_out, &p->Z)))
1544
0
                    goto err;
1545
0
                felem_assign(pre_comp[i][1][0], x_out);
1546
0
                felem_assign(pre_comp[i][1][1], y_out);
1547
0
                felem_assign(pre_comp[i][1][2], z_out);
1548
0
                for (j = 2; j <= 16; ++j) {
1549
0
                    if (j & 1) {
1550
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1551
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1552
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1553
0
                                  pre_comp[i][j - 1][0],
1554
0
                                  pre_comp[i][j - 1][1],
1555
0
                                  pre_comp[i][j - 1][2]);
1556
0
                    } else {
1557
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1558
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1559
0
                                     pre_comp[i][j / 2][1],
1560
0
                                     pre_comp[i][j / 2][2]);
1561
0
                    }
1562
0
                }
1563
0
            }
1564
0
        }
1565
0
        if (mixed)
1566
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1567
0
    }
1568
1569
    /* the scalar for the generator */
1570
0
    if ((scalar != NULL) && (have_pre_comp)) {
1571
0
        memset(g_secret, 0, sizeof g_secret);
1572
        /* reduce scalar to 0 <= scalar < 2^224 */
1573
0
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1574
            /*
1575
             * this is an unusual input, and we don't guarantee
1576
             * constant-timeness
1577
             */
1578
0
            if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1579
0
                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1580
0
                goto err;
1581
0
            }
1582
0
            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1583
0
        } else
1584
0
            num_bytes = BN_bn2bin(scalar, tmp);
1585
0
        flip_endian(g_secret, tmp, num_bytes);
1586
        /* do the multiplication with generator precomputation */
1587
0
        batch_mul(x_out, y_out, z_out,
1588
0
                  (const felem_bytearray(*))secrets, num_points,
1589
0
                  g_secret,
1590
0
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1591
0
    } else
1592
        /* do the multiplication without generator precomputation */
1593
0
        batch_mul(x_out, y_out, z_out,
1594
0
                  (const felem_bytearray(*))secrets, num_points,
1595
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1596
    /* reduce the output to its unique minimal representation */
1597
0
    felem_contract(x_in, x_out);
1598
0
    felem_contract(y_in, y_out);
1599
0
    felem_contract(z_in, z_out);
1600
0
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1601
0
        (!felem_to_BN(z, z_in))) {
1602
0
        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1603
0
        goto err;
1604
0
    }
1605
0
    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1606
1607
0
 err:
1608
0
    BN_CTX_end(ctx);
1609
0
    if (generator != NULL)
1610
0
        EC_POINT_free(generator);
1611
0
    if (new_ctx != NULL)
1612
0
        BN_CTX_free(new_ctx);
1613
0
    if (secrets != NULL)
1614
0
        OPENSSL_free(secrets);
1615
0
    if (pre_comp != NULL)
1616
0
        OPENSSL_free(pre_comp);
1617
0
    if (tmp_felems != NULL)
1618
0
        OPENSSL_free(tmp_felems);
1619
0
    return ret;
1620
0
}
1621
1622
int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1623
0
{
1624
0
    int ret = 0;
1625
0
    NISTP224_PRE_COMP *pre = NULL;
1626
0
    int i, j;
1627
0
    BN_CTX *new_ctx = NULL;
1628
0
    BIGNUM *x, *y;
1629
0
    EC_POINT *generator = NULL;
1630
0
    felem tmp_felems[32];
1631
1632
    /* throw away old precomputation */
1633
0
    EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1634
0
                         nistp224_pre_comp_free,
1635
0
                         nistp224_pre_comp_clear_free);
1636
0
    if (ctx == NULL)
1637
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1638
0
            return 0;
1639
0
    BN_CTX_start(ctx);
1640
0
    if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1641
0
        goto err;
1642
    /* get the generator */
1643
0
    if (group->generator == NULL)
1644
0
        goto err;
1645
0
    generator = EC_POINT_new(group);
1646
0
    if (generator == NULL)
1647
0
        goto err;
1648
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1649
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1650
0
    if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1651
0
        goto err;
1652
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1653
0
        goto err;
1654
    /*
1655
     * if the generator is the standard one, use built-in precomputation
1656
     */
1657
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1658
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1659
0
        goto done;
1660
0
    }
1661
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1662
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1663
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1664
0
        goto err;
1665
    /*
1666
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1667
     * 2^140*G, 2^196*G for the second one
1668
     */
1669
0
    for (i = 1; i <= 8; i <<= 1) {
1670
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1671
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1672
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1673
0
        for (j = 0; j < 27; ++j) {
1674
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1675
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1676
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1677
0
        }
1678
0
        if (i == 8)
1679
0
            break;
1680
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1681
0
                     pre->g_pre_comp[0][2 * i][1],
1682
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1683
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1684
0
        for (j = 0; j < 27; ++j) {
1685
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1686
0
                         pre->g_pre_comp[0][2 * i][1],
1687
0
                         pre->g_pre_comp[0][2 * i][2],
1688
0
                         pre->g_pre_comp[0][2 * i][0],
1689
0
                         pre->g_pre_comp[0][2 * i][1],
1690
0
                         pre->g_pre_comp[0][2 * i][2]);
1691
0
        }
1692
0
    }
1693
0
    for (i = 0; i < 2; i++) {
1694
        /* g_pre_comp[i][0] is the point at infinity */
1695
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1696
        /* the remaining multiples */
1697
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1698
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1699
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1700
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1704
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1705
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1706
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708
0
                  pre->g_pre_comp[i][2][2]);
1709
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1710
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1711
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1712
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1713
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1714
0
                  pre->g_pre_comp[i][4][2]);
1715
        /*
1716
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1717
         */
1718
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1719
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1720
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1721
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1722
0
                  pre->g_pre_comp[i][2][2]);
1723
0
        for (j = 1; j < 8; ++j) {
1724
            /* odd multiples: add G resp. 2^28*G */
1725
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1726
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1727
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1728
0
                      pre->g_pre_comp[i][2 * j][0],
1729
0
                      pre->g_pre_comp[i][2 * j][1],
1730
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1731
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1732
0
                      pre->g_pre_comp[i][1][2]);
1733
0
        }
1734
0
    }
1735
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1736
1737
0
 done:
1738
0
    if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1739
0
                             nistp224_pre_comp_free,
1740
0
                             nistp224_pre_comp_clear_free))
1741
0
        goto err;
1742
0
    ret = 1;
1743
0
    pre = NULL;
1744
0
 err:
1745
0
    BN_CTX_end(ctx);
1746
0
    if (generator != NULL)
1747
0
        EC_POINT_free(generator);
1748
0
    if (new_ctx != NULL)
1749
0
        BN_CTX_free(new_ctx);
1750
0
    if (pre)
1751
0
        nistp224_pre_comp_free(pre);
1752
0
    return ret;
1753
0
}
1754
1755
int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1756
0
{
1757
0
    if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1758
0
                            nistp224_pre_comp_free,
1759
0
                            nistp224_pre_comp_clear_free)
1760
0
        != NULL)
1761
0
        return 1;
1762
0
    else
1763
0
        return 0;
1764
0
}
1765
1766
#else
1767
static void *dummy = &dummy;
1768
#endif