Coverage Report

Created: 2018-08-29 13:53

/src/openssl/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28
 *
29
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30
 * and Adam Langley's public domain 64-bit C implementation of curve25519
31
 */
32
33
#include <openssl/opensslconf.h>
34
#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35
NON_EMPTY_TRANSLATION_UNIT
36
#else
37
38
# include <stdint.h>
39
# include <string.h>
40
# include <openssl/err.h>
41
# include "ec_lcl.h"
42
43
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44
  /* even with gcc, the typedef won't work for 32-bit platforms */
45
typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
46
                                 * platforms */
47
# else
48
#  error "Your compiler doesn't appear to support 128-bit integer types"
49
# endif
50
51
typedef uint8_t u8;
52
typedef uint64_t u64;
53
54
/******************************************************************************/
55
/*-
56
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57
 *
58
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
59
 * using 64-bit coefficients called 'limbs',
60
 * and sometimes (for multiplication results) as
61
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
62
 * using 128-bit coefficients called 'widelimbs'.
63
 * A 4-limb representation is an 'felem';
64
 * a 7-widelimb representation is a 'widefelem'.
65
 * Even within felems, bits of adjacent limbs overlap, and we don't always
66
 * reduce the representations: we ensure that inputs to each felem
67
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68
 * and fit into a 128-bit word without overflow. The coefficients are then
69
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70
 * We only reduce to the unique minimal representation at the end of the
71
 * computation.
72
 */
73
74
typedef uint64_t limb;
75
typedef uint128_t widelimb;
76
77
typedef limb felem[4];
78
typedef widelimb widefelem[7];
79
80
/*
81
 * Field element represented as a byte array. 28*8 = 224 bits is also the
82
 * group order size for the elliptic curve, and we also use this type for
83
 * scalars for point multiplication.
84
 */
85
typedef u8 felem_bytearray[28];
86
87
static const felem_bytearray nistp224_curve_params[5] = {
88
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
89
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
90
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
91
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
92
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
93
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
94
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
95
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
96
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
97
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
98
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
99
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
100
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
101
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
102
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
103
};
104
105
/*-
106
 * Precomputed multiples of the standard generator
107
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
108
 * (0 for the point at infinity).
109
 * For each field element, slice a_0 is word 0, etc.
110
 *
111
 * The table has 2 * 16 elements, starting with the following:
112
 * index | bits    | point
113
 * ------+---------+------------------------------
114
 *     0 | 0 0 0 0 | 0G
115
 *     1 | 0 0 0 1 | 1G
116
 *     2 | 0 0 1 0 | 2^56G
117
 *     3 | 0 0 1 1 | (2^56 + 1)G
118
 *     4 | 0 1 0 0 | 2^112G
119
 *     5 | 0 1 0 1 | (2^112 + 1)G
120
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
121
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
122
 *     8 | 1 0 0 0 | 2^168G
123
 *     9 | 1 0 0 1 | (2^168 + 1)G
124
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
125
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
126
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
127
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
128
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
129
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
130
 * followed by a copy of this with each element multiplied by 2^28.
131
 *
132
 * The reason for this is so that we can clock bits into four different
133
 * locations when doing simple scalar multiplies against the base point,
134
 * and then another four locations using the second 16 elements.
135
 */
136
static const felem gmul[2][16][3] = {
137
{{{0, 0, 0, 0},
138
  {0, 0, 0, 0},
139
  {0, 0, 0, 0}},
140
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
141
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142
  {1, 0, 0, 0}},
143
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
145
  {1, 0, 0, 0}},
146
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
147
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
148
  {1, 0, 0, 0}},
149
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
151
  {1, 0, 0, 0}},
152
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
153
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
154
  {1, 0, 0, 0}},
155
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
156
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
157
  {1, 0, 0, 0}},
158
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
159
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
160
  {1, 0, 0, 0}},
161
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
162
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
163
  {1, 0, 0, 0}},
164
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
165
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
166
  {1, 0, 0, 0}},
167
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
168
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
169
  {1, 0, 0, 0}},
170
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
171
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
172
  {1, 0, 0, 0}},
173
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
174
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
175
  {1, 0, 0, 0}},
176
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
177
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
178
  {1, 0, 0, 0}},
179
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
180
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
181
  {1, 0, 0, 0}},
182
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
183
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
184
  {1, 0, 0, 0}}},
185
{{{0, 0, 0, 0},
186
  {0, 0, 0, 0},
187
  {0, 0, 0, 0}},
188
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
189
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
190
  {1, 0, 0, 0}},
191
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
192
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
193
  {1, 0, 0, 0}},
194
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
195
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
196
  {1, 0, 0, 0}},
197
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
198
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
199
  {1, 0, 0, 0}},
200
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
201
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
202
  {1, 0, 0, 0}},
203
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
204
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
205
  {1, 0, 0, 0}},
206
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
207
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
208
  {1, 0, 0, 0}},
209
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
210
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
211
  {1, 0, 0, 0}},
212
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
213
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
214
  {1, 0, 0, 0}},
215
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
216
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
217
  {1, 0, 0, 0}},
218
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
219
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
220
  {1, 0, 0, 0}},
221
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
222
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
223
  {1, 0, 0, 0}},
224
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
225
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
226
  {1, 0, 0, 0}},
227
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
228
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
229
  {1, 0, 0, 0}},
230
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
231
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232
  {1, 0, 0, 0}}}
233
};
234
235
/* Precomputation for the group generator. */
236
struct nistp224_pre_comp_st {
237
    felem g_pre_comp[2][16][3];
238
    CRYPTO_REF_COUNT references;
239
    CRYPTO_RWLOCK *lock;
240
};
241
242
const EC_METHOD *EC_GFp_nistp224_method(void)
243
0
{
244
0
    static const EC_METHOD ret = {
245
0
        EC_FLAGS_DEFAULT_OCT,
246
0
        NID_X9_62_prime_field,
247
0
        ec_GFp_nistp224_group_init,
248
0
        ec_GFp_simple_group_finish,
249
0
        ec_GFp_simple_group_clear_finish,
250
0
        ec_GFp_nist_group_copy,
251
0
        ec_GFp_nistp224_group_set_curve,
252
0
        ec_GFp_simple_group_get_curve,
253
0
        ec_GFp_simple_group_get_degree,
254
0
        ec_group_simple_order_bits,
255
0
        ec_GFp_simple_group_check_discriminant,
256
0
        ec_GFp_simple_point_init,
257
0
        ec_GFp_simple_point_finish,
258
0
        ec_GFp_simple_point_clear_finish,
259
0
        ec_GFp_simple_point_copy,
260
0
        ec_GFp_simple_point_set_to_infinity,
261
0
        ec_GFp_simple_set_Jprojective_coordinates_GFp,
262
0
        ec_GFp_simple_get_Jprojective_coordinates_GFp,
263
0
        ec_GFp_simple_point_set_affine_coordinates,
264
0
        ec_GFp_nistp224_point_get_affine_coordinates,
265
0
        0 /* point_set_compressed_coordinates */ ,
266
0
        0 /* point2oct */ ,
267
0
        0 /* oct2point */ ,
268
0
        ec_GFp_simple_add,
269
0
        ec_GFp_simple_dbl,
270
0
        ec_GFp_simple_invert,
271
0
        ec_GFp_simple_is_at_infinity,
272
0
        ec_GFp_simple_is_on_curve,
273
0
        ec_GFp_simple_cmp,
274
0
        ec_GFp_simple_make_affine,
275
0
        ec_GFp_simple_points_make_affine,
276
0
        ec_GFp_nistp224_points_mul,
277
0
        ec_GFp_nistp224_precompute_mult,
278
0
        ec_GFp_nistp224_have_precompute_mult,
279
0
        ec_GFp_nist_field_mul,
280
0
        ec_GFp_nist_field_sqr,
281
0
        0 /* field_div */ ,
282
0
        0 /* field_encode */ ,
283
0
        0 /* field_decode */ ,
284
0
        0,                      /* field_set_to_one */
285
0
        ec_key_simple_priv2oct,
286
0
        ec_key_simple_oct2priv,
287
0
        0, /* set private */
288
0
        ec_key_simple_generate_key,
289
0
        ec_key_simple_check_key,
290
0
        ec_key_simple_generate_public_key,
291
0
        0, /* keycopy */
292
0
        0, /* keyfinish */
293
0
        ecdh_simple_compute_key,
294
0
        0, /* field_inverse_mod_ord */
295
0
        0, /* blind_coordinates */
296
0
        0, /* ladder_pre */
297
0
        0, /* ladder_step */
298
0
        0  /* ladder_post */
299
0
    };
300
0
301
0
    return &ret;
302
0
}
303
304
/*
305
 * Helper functions to convert field elements to/from internal representation
306
 */
307
static void bin28_to_felem(felem out, const u8 in[28])
308
0
{
309
0
    out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
310
0
    out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
311
0
    out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
312
0
    out[3] = (*((const uint64_t *)(in+20))) >> 8;
313
0
}
314
315
static void felem_to_bin28(u8 out[28], const felem in)
316
0
{
317
0
    unsigned i;
318
0
    for (i = 0; i < 7; ++i) {
319
0
        out[i] = in[0] >> (8 * i);
320
0
        out[i + 7] = in[1] >> (8 * i);
321
0
        out[i + 14] = in[2] >> (8 * i);
322
0
        out[i + 21] = in[3] >> (8 * i);
323
0
    }
324
0
}
325
326
/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
327
static void flip_endian(u8 *out, const u8 *in, unsigned len)
328
0
{
329
0
    unsigned i;
330
0
    for (i = 0; i < len; ++i)
331
0
        out[i] = in[len - 1 - i];
332
0
}
333
334
/* From OpenSSL BIGNUM to internal representation */
335
static int BN_to_felem(felem out, const BIGNUM *bn)
336
0
{
337
0
    felem_bytearray b_in;
338
0
    felem_bytearray b_out;
339
0
    unsigned num_bytes;
340
0
341
0
    /* BN_bn2bin eats leading zeroes */
342
0
    memset(b_out, 0, sizeof(b_out));
343
0
    num_bytes = BN_num_bytes(bn);
344
0
    if (num_bytes > sizeof(b_out)) {
345
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
346
0
        return 0;
347
0
    }
348
0
    if (BN_is_negative(bn)) {
349
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
350
0
        return 0;
351
0
    }
352
0
    num_bytes = BN_bn2bin(bn, b_in);
353
0
    flip_endian(b_out, b_in, num_bytes);
354
0
    bin28_to_felem(out, b_out);
355
0
    return 1;
356
0
}
357
358
/* From internal representation to OpenSSL BIGNUM */
359
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
360
0
{
361
0
    felem_bytearray b_in, b_out;
362
0
    felem_to_bin28(b_in, in);
363
0
    flip_endian(b_out, b_in, sizeof(b_out));
364
0
    return BN_bin2bn(b_out, sizeof(b_out), out);
365
0
}
366
367
/******************************************************************************/
368
/*-
369
 *                              FIELD OPERATIONS
370
 *
371
 * Field operations, using the internal representation of field elements.
372
 * NB! These operations are specific to our point multiplication and cannot be
373
 * expected to be correct in general - e.g., multiplication with a large scalar
374
 * will cause an overflow.
375
 *
376
 */
377
378
static void felem_one(felem out)
379
0
{
380
0
    out[0] = 1;
381
0
    out[1] = 0;
382
0
    out[2] = 0;
383
0
    out[3] = 0;
384
0
}
385
386
static void felem_assign(felem out, const felem in)
387
0
{
388
0
    out[0] = in[0];
389
0
    out[1] = in[1];
390
0
    out[2] = in[2];
391
0
    out[3] = in[3];
392
0
}
393
394
/* Sum two field elements: out += in */
395
static void felem_sum(felem out, const felem in)
396
0
{
397
0
    out[0] += in[0];
398
0
    out[1] += in[1];
399
0
    out[2] += in[2];
400
0
    out[3] += in[3];
401
0
}
402
403
/* Subtract field elements: out -= in */
404
/* Assumes in[i] < 2^57 */
405
static void felem_diff(felem out, const felem in)
406
0
{
407
0
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
408
0
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
409
0
    static const limb two58m42m2 = (((limb) 1) << 58) -
410
0
        (((limb) 1) << 42) - (((limb) 1) << 2);
411
0
412
0
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
413
0
    out[0] += two58p2;
414
0
    out[1] += two58m42m2;
415
0
    out[2] += two58m2;
416
0
    out[3] += two58m2;
417
0
418
0
    out[0] -= in[0];
419
0
    out[1] -= in[1];
420
0
    out[2] -= in[2];
421
0
    out[3] -= in[3];
422
0
}
423
424
/* Subtract in unreduced 128-bit mode: out -= in */
425
/* Assumes in[i] < 2^119 */
426
static void widefelem_diff(widefelem out, const widefelem in)
427
0
{
428
0
    static const widelimb two120 = ((widelimb) 1) << 120;
429
0
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
430
0
        (((widelimb) 1) << 64);
431
0
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
432
0
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
433
0
434
0
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
435
0
    out[0] += two120;
436
0
    out[1] += two120m64;
437
0
    out[2] += two120m64;
438
0
    out[3] += two120;
439
0
    out[4] += two120m104m64;
440
0
    out[5] += two120m64;
441
0
    out[6] += two120m64;
442
0
443
0
    out[0] -= in[0];
444
0
    out[1] -= in[1];
445
0
    out[2] -= in[2];
446
0
    out[3] -= in[3];
447
0
    out[4] -= in[4];
448
0
    out[5] -= in[5];
449
0
    out[6] -= in[6];
450
0
}
451
452
/* Subtract in mixed mode: out128 -= in64 */
453
/* in[i] < 2^63 */
454
static void felem_diff_128_64(widefelem out, const felem in)
455
0
{
456
0
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
457
0
        (((widelimb) 1) << 8);
458
0
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
459
0
        (((widelimb) 1) << 8);
460
0
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
461
0
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
462
0
463
0
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
464
0
    out[0] += two64p8;
465
0
    out[1] += two64m48m8;
466
0
    out[2] += two64m8;
467
0
    out[3] += two64m8;
468
0
469
0
    out[0] -= in[0];
470
0
    out[1] -= in[1];
471
0
    out[2] -= in[2];
472
0
    out[3] -= in[3];
473
0
}
474
475
/*
476
 * Multiply a field element by a scalar: out = out * scalar The scalars we
477
 * actually use are small, so results fit without overflow
478
 */
479
static void felem_scalar(felem out, const limb scalar)
480
0
{
481
0
    out[0] *= scalar;
482
0
    out[1] *= scalar;
483
0
    out[2] *= scalar;
484
0
    out[3] *= scalar;
485
0
}
486
487
/*
488
 * Multiply an unreduced field element by a scalar: out = out * scalar The
489
 * scalars we actually use are small, so results fit without overflow
490
 */
491
static void widefelem_scalar(widefelem out, const widelimb scalar)
492
0
{
493
0
    out[0] *= scalar;
494
0
    out[1] *= scalar;
495
0
    out[2] *= scalar;
496
0
    out[3] *= scalar;
497
0
    out[4] *= scalar;
498
0
    out[5] *= scalar;
499
0
    out[6] *= scalar;
500
0
}
501
502
/* Square a field element: out = in^2 */
503
static void felem_square(widefelem out, const felem in)
504
0
{
505
0
    limb tmp0, tmp1, tmp2;
506
0
    tmp0 = 2 * in[0];
507
0
    tmp1 = 2 * in[1];
508
0
    tmp2 = 2 * in[2];
509
0
    out[0] = ((widelimb) in[0]) * in[0];
510
0
    out[1] = ((widelimb) in[0]) * tmp1;
511
0
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
512
0
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
513
0
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
514
0
    out[5] = ((widelimb) in[3]) * tmp2;
515
0
    out[6] = ((widelimb) in[3]) * in[3];
516
0
}
517
518
/* Multiply two field elements: out = in1 * in2 */
519
static void felem_mul(widefelem out, const felem in1, const felem in2)
520
0
{
521
0
    out[0] = ((widelimb) in1[0]) * in2[0];
522
0
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
523
0
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
524
0
             ((widelimb) in1[2]) * in2[0];
525
0
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
526
0
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
527
0
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
528
0
             ((widelimb) in1[3]) * in2[1];
529
0
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
530
0
    out[6] = ((widelimb) in1[3]) * in2[3];
531
0
}
532
533
/*-
534
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
535
 * Requires in[i] < 2^126,
536
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
537
static void felem_reduce(felem out, const widefelem in)
538
0
{
539
0
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
540
0
        (((widelimb) 1) << 15);
541
0
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
542
0
        (((widelimb) 1) << 71);
543
0
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
544
0
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
545
0
    widelimb output[5];
546
0
547
0
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
548
0
    output[0] = in[0] + two127p15;
549
0
    output[1] = in[1] + two127m71m55;
550
0
    output[2] = in[2] + two127m71;
551
0
    output[3] = in[3];
552
0
    output[4] = in[4];
553
0
554
0
    /* Eliminate in[4], in[5], in[6] */
555
0
    output[4] += in[6] >> 16;
556
0
    output[3] += (in[6] & 0xffff) << 40;
557
0
    output[2] -= in[6];
558
0
559
0
    output[3] += in[5] >> 16;
560
0
    output[2] += (in[5] & 0xffff) << 40;
561
0
    output[1] -= in[5];
562
0
563
0
    output[2] += output[4] >> 16;
564
0
    output[1] += (output[4] & 0xffff) << 40;
565
0
    output[0] -= output[4];
566
0
567
0
    /* Carry 2 -> 3 -> 4 */
568
0
    output[3] += output[2] >> 56;
569
0
    output[2] &= 0x00ffffffffffffff;
570
0
571
0
    output[4] = output[3] >> 56;
572
0
    output[3] &= 0x00ffffffffffffff;
573
0
574
0
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
575
0
576
0
    /* Eliminate output[4] */
577
0
    output[2] += output[4] >> 16;
578
0
    /* output[2] < 2^56 + 2^56 = 2^57 */
579
0
    output[1] += (output[4] & 0xffff) << 40;
580
0
    output[0] -= output[4];
581
0
582
0
    /* Carry 0 -> 1 -> 2 -> 3 */
583
0
    output[1] += output[0] >> 56;
584
0
    out[0] = output[0] & 0x00ffffffffffffff;
585
0
586
0
    output[2] += output[1] >> 56;
587
0
    /* output[2] < 2^57 + 2^72 */
588
0
    out[1] = output[1] & 0x00ffffffffffffff;
589
0
    output[3] += output[2] >> 56;
590
0
    /* output[3] <= 2^56 + 2^16 */
591
0
    out[2] = output[2] & 0x00ffffffffffffff;
592
0
593
0
    /*-
594
0
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
595
0
     * out[3] <= 2^56 + 2^16 (due to final carry),
596
0
     * so out < 2*p
597
0
     */
598
0
    out[3] = output[3];
599
0
}
600
601
static void felem_square_reduce(felem out, const felem in)
602
0
{
603
0
    widefelem tmp;
604
0
    felem_square(tmp, in);
605
0
    felem_reduce(out, tmp);
606
0
}
607
608
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
609
0
{
610
0
    widefelem tmp;
611
0
    felem_mul(tmp, in1, in2);
612
0
    felem_reduce(out, tmp);
613
0
}
614
615
/*
616
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
617
 * call felem_reduce first)
618
 */
619
static void felem_contract(felem out, const felem in)
620
0
{
621
0
    static const int64_t two56 = ((limb) 1) << 56;
622
0
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
623
0
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
624
0
    int64_t tmp[4], a;
625
0
    tmp[0] = in[0];
626
0
    tmp[1] = in[1];
627
0
    tmp[2] = in[2];
628
0
    tmp[3] = in[3];
629
0
    /* Case 1: a = 1 iff in >= 2^224 */
630
0
    a = (in[3] >> 56);
631
0
    tmp[0] -= a;
632
0
    tmp[1] += a << 40;
633
0
    tmp[3] &= 0x00ffffffffffffff;
634
0
    /*
635
0
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
636
0
     * and the lower part is non-zero
637
0
     */
638
0
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
639
0
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
640
0
    a &= 0x00ffffffffffffff;
641
0
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
642
0
    a = (a - 1) >> 63;
643
0
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
644
0
    tmp[3] &= a ^ 0xffffffffffffffff;
645
0
    tmp[2] &= a ^ 0xffffffffffffffff;
646
0
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
647
0
    tmp[0] -= 1 & a;
648
0
649
0
    /*
650
0
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
651
0
     * non-zero, so we only need one step
652
0
     */
653
0
    a = tmp[0] >> 63;
654
0
    tmp[0] += two56 & a;
655
0
    tmp[1] -= 1 & a;
656
0
657
0
    /* carry 1 -> 2 -> 3 */
658
0
    tmp[2] += tmp[1] >> 56;
659
0
    tmp[1] &= 0x00ffffffffffffff;
660
0
661
0
    tmp[3] += tmp[2] >> 56;
662
0
    tmp[2] &= 0x00ffffffffffffff;
663
0
664
0
    /* Now 0 <= out < p */
665
0
    out[0] = tmp[0];
666
0
    out[1] = tmp[1];
667
0
    out[2] = tmp[2];
668
0
    out[3] = tmp[3];
669
0
}
670
671
/*
672
 * Get negative value: out = -in
673
 * Requires in[i] < 2^63,
674
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
675
 */
676
static void felem_neg(felem out, const felem in)
677
0
{
678
0
    widefelem tmp = {0};
679
0
    felem_diff_128_64(tmp, in);
680
0
    felem_reduce(out, tmp);
681
0
}
682
683
/*
684
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
685
 * elements are reduced to in < 2^225, so we only need to check three cases:
686
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
687
 */
688
static limb felem_is_zero(const felem in)
689
0
{
690
0
    limb zero, two224m96p1, two225m97p2;
691
0
692
0
    zero = in[0] | in[1] | in[2] | in[3];
693
0
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
694
0
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
695
0
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
696
0
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
697
0
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
698
0
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
699
0
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
700
0
    return (zero | two224m96p1 | two225m97p2);
701
0
}
702
703
static int felem_is_zero_int(const void *in)
704
0
{
705
0
    return (int)(felem_is_zero(in) & ((limb) 1));
706
0
}
707
708
/* Invert a field element */
709
/* Computation chain copied from djb's code */
710
static void felem_inv(felem out, const felem in)
711
0
{
712
0
    felem ftmp, ftmp2, ftmp3, ftmp4;
713
0
    widefelem tmp;
714
0
    unsigned i;
715
0
716
0
    felem_square(tmp, in);
717
0
    felem_reduce(ftmp, tmp);    /* 2 */
718
0
    felem_mul(tmp, in, ftmp);
719
0
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
720
0
    felem_square(tmp, ftmp);
721
0
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
722
0
    felem_mul(tmp, in, ftmp);
723
0
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
724
0
    felem_square(tmp, ftmp);
725
0
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
726
0
    felem_square(tmp, ftmp2);
727
0
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
728
0
    felem_square(tmp, ftmp2);
729
0
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
730
0
    felem_mul(tmp, ftmp2, ftmp);
731
0
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
732
0
    felem_square(tmp, ftmp);
733
0
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
734
0
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
735
0
        felem_square(tmp, ftmp2);
736
0
        felem_reduce(ftmp2, tmp);
737
0
    }
738
0
    felem_mul(tmp, ftmp2, ftmp);
739
0
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
740
0
    felem_square(tmp, ftmp2);
741
0
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
742
0
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
743
0
        felem_square(tmp, ftmp3);
744
0
        felem_reduce(ftmp3, tmp);
745
0
    }
746
0
    felem_mul(tmp, ftmp3, ftmp2);
747
0
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
748
0
    felem_square(tmp, ftmp2);
749
0
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
750
0
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
751
0
        felem_square(tmp, ftmp3);
752
0
        felem_reduce(ftmp3, tmp);
753
0
    }
754
0
    felem_mul(tmp, ftmp3, ftmp2);
755
0
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
756
0
    felem_square(tmp, ftmp3);
757
0
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
758
0
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
759
0
        felem_square(tmp, ftmp4);
760
0
        felem_reduce(ftmp4, tmp);
761
0
    }
762
0
    felem_mul(tmp, ftmp3, ftmp4);
763
0
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
764
0
    felem_square(tmp, ftmp3);
765
0
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
766
0
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
767
0
        felem_square(tmp, ftmp4);
768
0
        felem_reduce(ftmp4, tmp);
769
0
    }
770
0
    felem_mul(tmp, ftmp2, ftmp4);
771
0
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
772
0
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
773
0
        felem_square(tmp, ftmp2);
774
0
        felem_reduce(ftmp2, tmp);
775
0
    }
776
0
    felem_mul(tmp, ftmp2, ftmp);
777
0
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
778
0
    felem_square(tmp, ftmp);
779
0
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
780
0
    felem_mul(tmp, ftmp, in);
781
0
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
782
0
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
783
0
        felem_square(tmp, ftmp);
784
0
        felem_reduce(ftmp, tmp);
785
0
    }
786
0
    felem_mul(tmp, ftmp, ftmp3);
787
0
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
788
0
}
789
790
/*
791
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
792
 * out to itself.
793
 */
794
static void copy_conditional(felem out, const felem in, limb icopy)
795
0
{
796
0
    unsigned i;
797
0
    /*
798
0
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
799
0
     */
800
0
    const limb copy = -icopy;
801
0
    for (i = 0; i < 4; ++i) {
802
0
        const limb tmp = copy & (in[i] ^ out[i]);
803
0
        out[i] ^= tmp;
804
0
    }
805
0
}
806
807
/******************************************************************************/
808
/*-
809
 *                       ELLIPTIC CURVE POINT OPERATIONS
810
 *
811
 * Points are represented in Jacobian projective coordinates:
812
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
813
 * or to the point at infinity if Z == 0.
814
 *
815
 */
816
817
/*-
818
 * Double an elliptic curve point:
819
 * (X', Y', Z') = 2 * (X, Y, Z), where
820
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
821
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
822
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
823
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
824
 * while x_out == y_in is not (maybe this works, but it's not tested).
825
 */
826
static void
827
point_double(felem x_out, felem y_out, felem z_out,
828
             const felem x_in, const felem y_in, const felem z_in)
829
0
{
830
0
    widefelem tmp, tmp2;
831
0
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
832
0
833
0
    felem_assign(ftmp, x_in);
834
0
    felem_assign(ftmp2, x_in);
835
0
836
0
    /* delta = z^2 */
837
0
    felem_square(tmp, z_in);
838
0
    felem_reduce(delta, tmp);
839
0
840
0
    /* gamma = y^2 */
841
0
    felem_square(tmp, y_in);
842
0
    felem_reduce(gamma, tmp);
843
0
844
0
    /* beta = x*gamma */
845
0
    felem_mul(tmp, x_in, gamma);
846
0
    felem_reduce(beta, tmp);
847
0
848
0
    /* alpha = 3*(x-delta)*(x+delta) */
849
0
    felem_diff(ftmp, delta);
850
0
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
851
0
    felem_sum(ftmp2, delta);
852
0
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
853
0
    felem_scalar(ftmp2, 3);
854
0
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
855
0
    felem_mul(tmp, ftmp, ftmp2);
856
0
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
857
0
    felem_reduce(alpha, tmp);
858
0
859
0
    /* x' = alpha^2 - 8*beta */
860
0
    felem_square(tmp, alpha);
861
0
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
862
0
    felem_assign(ftmp, beta);
863
0
    felem_scalar(ftmp, 8);
864
0
    /* ftmp[i] < 8 * 2^57 = 2^60 */
865
0
    felem_diff_128_64(tmp, ftmp);
866
0
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
867
0
    felem_reduce(x_out, tmp);
868
0
869
0
    /* z' = (y + z)^2 - gamma - delta */
870
0
    felem_sum(delta, gamma);
871
0
    /* delta[i] < 2^57 + 2^57 = 2^58 */
872
0
    felem_assign(ftmp, y_in);
873
0
    felem_sum(ftmp, z_in);
874
0
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
875
0
    felem_square(tmp, ftmp);
876
0
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
877
0
    felem_diff_128_64(tmp, delta);
878
0
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
879
0
    felem_reduce(z_out, tmp);
880
0
881
0
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
882
0
    felem_scalar(beta, 4);
883
0
    /* beta[i] < 4 * 2^57 = 2^59 */
884
0
    felem_diff(beta, x_out);
885
0
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
886
0
    felem_mul(tmp, alpha, beta);
887
0
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
888
0
    felem_square(tmp2, gamma);
889
0
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
890
0
    widefelem_scalar(tmp2, 8);
891
0
    /* tmp2[i] < 8 * 2^116 = 2^119 */
892
0
    widefelem_diff(tmp, tmp2);
893
0
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
894
0
    felem_reduce(y_out, tmp);
895
0
}
896
897
/*-
898
 * Add two elliptic curve points:
899
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
900
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
901
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
902
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
903
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
904
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
905
 *
906
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
907
 */
908
909
/*
910
 * This function is not entirely constant-time: it includes a branch for
911
 * checking whether the two input points are equal, (while not equal to the
912
 * point at infinity). This case never happens during single point
913
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
914
 */
915
static void point_add(felem x3, felem y3, felem z3,
916
                      const felem x1, const felem y1, const felem z1,
917
                      const int mixed, const felem x2, const felem y2,
918
                      const felem z2)
919
0
{
920
0
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
921
0
    widefelem tmp, tmp2;
922
0
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
923
0
924
0
    if (!mixed) {
925
0
        /* ftmp2 = z2^2 */
926
0
        felem_square(tmp, z2);
927
0
        felem_reduce(ftmp2, tmp);
928
0
929
0
        /* ftmp4 = z2^3 */
930
0
        felem_mul(tmp, ftmp2, z2);
931
0
        felem_reduce(ftmp4, tmp);
932
0
933
0
        /* ftmp4 = z2^3*y1 */
934
0
        felem_mul(tmp2, ftmp4, y1);
935
0
        felem_reduce(ftmp4, tmp2);
936
0
937
0
        /* ftmp2 = z2^2*x1 */
938
0
        felem_mul(tmp2, ftmp2, x1);
939
0
        felem_reduce(ftmp2, tmp2);
940
0
    } else {
941
0
        /*
942
0
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
943
0
         */
944
0
945
0
        /* ftmp4 = z2^3*y1 */
946
0
        felem_assign(ftmp4, y1);
947
0
948
0
        /* ftmp2 = z2^2*x1 */
949
0
        felem_assign(ftmp2, x1);
950
0
    }
951
0
952
0
    /* ftmp = z1^2 */
953
0
    felem_square(tmp, z1);
954
0
    felem_reduce(ftmp, tmp);
955
0
956
0
    /* ftmp3 = z1^3 */
957
0
    felem_mul(tmp, ftmp, z1);
958
0
    felem_reduce(ftmp3, tmp);
959
0
960
0
    /* tmp = z1^3*y2 */
961
0
    felem_mul(tmp, ftmp3, y2);
962
0
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
963
0
964
0
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
965
0
    felem_diff_128_64(tmp, ftmp4);
966
0
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
967
0
    felem_reduce(ftmp3, tmp);
968
0
969
0
    /* tmp = z1^2*x2 */
970
0
    felem_mul(tmp, ftmp, x2);
971
0
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
972
0
973
0
    /* ftmp = z1^2*x2 - z2^2*x1 */
974
0
    felem_diff_128_64(tmp, ftmp2);
975
0
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
976
0
    felem_reduce(ftmp, tmp);
977
0
978
0
    /*
979
0
     * the formulae are incorrect if the points are equal so we check for
980
0
     * this and do doubling if this happens
981
0
     */
982
0
    x_equal = felem_is_zero(ftmp);
983
0
    y_equal = felem_is_zero(ftmp3);
984
0
    z1_is_zero = felem_is_zero(z1);
985
0
    z2_is_zero = felem_is_zero(z2);
986
0
    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
987
0
    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
988
0
        point_double(x3, y3, z3, x1, y1, z1);
989
0
        return;
990
0
    }
991
0
992
0
    /* ftmp5 = z1*z2 */
993
0
    if (!mixed) {
994
0
        felem_mul(tmp, z1, z2);
995
0
        felem_reduce(ftmp5, tmp);
996
0
    } else {
997
0
        /* special case z2 = 0 is handled later */
998
0
        felem_assign(ftmp5, z1);
999
0
    }
1000
0
1001
0
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1002
0
    felem_mul(tmp, ftmp, ftmp5);
1003
0
    felem_reduce(z_out, tmp);
1004
0
1005
0
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1006
0
    felem_assign(ftmp5, ftmp);
1007
0
    felem_square(tmp, ftmp);
1008
0
    felem_reduce(ftmp, tmp);
1009
0
1010
0
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1011
0
    felem_mul(tmp, ftmp, ftmp5);
1012
0
    felem_reduce(ftmp5, tmp);
1013
0
1014
0
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1015
0
    felem_mul(tmp, ftmp2, ftmp);
1016
0
    felem_reduce(ftmp2, tmp);
1017
0
1018
0
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1019
0
    felem_mul(tmp, ftmp4, ftmp5);
1020
0
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1021
0
1022
0
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1023
0
    felem_square(tmp2, ftmp3);
1024
0
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1025
0
1026
0
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1027
0
    felem_diff_128_64(tmp2, ftmp5);
1028
0
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1029
0
1030
0
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031
0
    felem_assign(ftmp5, ftmp2);
1032
0
    felem_scalar(ftmp5, 2);
1033
0
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1034
0
1035
0
    /*-
1036
0
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1037
0
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1038
0
     */
1039
0
    felem_diff_128_64(tmp2, ftmp5);
1040
0
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1041
0
    felem_reduce(x_out, tmp2);
1042
0
1043
0
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1044
0
    felem_diff(ftmp2, x_out);
1045
0
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1046
0
1047
0
    /*
1048
0
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1049
0
     */
1050
0
    felem_mul(tmp2, ftmp3, ftmp2);
1051
0
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1052
0
1053
0
    /*-
1054
0
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1055
0
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1056
0
     */
1057
0
    widefelem_diff(tmp2, tmp);
1058
0
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1059
0
    felem_reduce(y_out, tmp2);
1060
0
1061
0
    /*
1062
0
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1063
0
     * the point at infinity, so we need to check for this separately
1064
0
     */
1065
0
1066
0
    /*
1067
0
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1068
0
     */
1069
0
    copy_conditional(x_out, x2, z1_is_zero);
1070
0
    copy_conditional(x_out, x1, z2_is_zero);
1071
0
    copy_conditional(y_out, y2, z1_is_zero);
1072
0
    copy_conditional(y_out, y1, z2_is_zero);
1073
0
    copy_conditional(z_out, z2, z1_is_zero);
1074
0
    copy_conditional(z_out, z1, z2_is_zero);
1075
0
    felem_assign(x3, x_out);
1076
0
    felem_assign(y3, y_out);
1077
0
    felem_assign(z3, z_out);
1078
0
}
1079
1080
/*
1081
 * select_point selects the |idx|th point from a precomputation table and
1082
 * copies it to out.
1083
 * The pre_comp array argument should be size of |size| argument
1084
 */
1085
static void select_point(const u64 idx, unsigned int size,
1086
                         const felem pre_comp[][3], felem out[3])
1087
0
{
1088
0
    unsigned i, j;
1089
0
    limb *outlimbs = &out[0][0];
1090
0
1091
0
    memset(out, 0, sizeof(*out) * 3);
1092
0
    for (i = 0; i < size; i++) {
1093
0
        const limb *inlimbs = &pre_comp[i][0][0];
1094
0
        u64 mask = i ^ idx;
1095
0
        mask |= mask >> 4;
1096
0
        mask |= mask >> 2;
1097
0
        mask |= mask >> 1;
1098
0
        mask &= 1;
1099
0
        mask--;
1100
0
        for (j = 0; j < 4 * 3; j++)
1101
0
            outlimbs[j] |= inlimbs[j] & mask;
1102
0
    }
1103
0
}
1104
1105
/* get_bit returns the |i|th bit in |in| */
1106
static char get_bit(const felem_bytearray in, unsigned i)
1107
0
{
1108
0
    if (i >= 224)
1109
0
        return 0;
1110
0
    return (in[i >> 3] >> (i & 7)) & 1;
1111
0
}
1112
1113
/*
1114
 * Interleaved point multiplication using precomputed point multiples: The
1115
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1116
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1117
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1118
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1119
 */
1120
static void batch_mul(felem x_out, felem y_out, felem z_out,
1121
                      const felem_bytearray scalars[],
1122
                      const unsigned num_points, const u8 *g_scalar,
1123
                      const int mixed, const felem pre_comp[][17][3],
1124
                      const felem g_pre_comp[2][16][3])
1125
0
{
1126
0
    int i, skip;
1127
0
    unsigned num;
1128
0
    unsigned gen_mul = (g_scalar != NULL);
1129
0
    felem nq[3], tmp[4];
1130
0
    u64 bits;
1131
0
    u8 sign, digit;
1132
0
1133
0
    /* set nq to the point at infinity */
1134
0
    memset(nq, 0, sizeof(nq));
1135
0
1136
0
    /*
1137
0
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1138
0
     * of the generator (two in each of the last 28 rounds) and additions of
1139
0
     * other points multiples (every 5th round).
1140
0
     */
1141
0
    skip = 1;                   /* save two point operations in the first
1142
0
                                 * round */
1143
0
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1144
0
        /* double */
1145
0
        if (!skip)
1146
0
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1147
0
1148
0
        /* add multiples of the generator */
1149
0
        if (gen_mul && (i <= 27)) {
1150
0
            /* first, look 28 bits upwards */
1151
0
            bits = get_bit(g_scalar, i + 196) << 3;
1152
0
            bits |= get_bit(g_scalar, i + 140) << 2;
1153
0
            bits |= get_bit(g_scalar, i + 84) << 1;
1154
0
            bits |= get_bit(g_scalar, i + 28);
1155
0
            /* select the point to add, in constant time */
1156
0
            select_point(bits, 16, g_pre_comp[1], tmp);
1157
0
1158
0
            if (!skip) {
1159
0
                /* value 1 below is argument for "mixed" */
1160
0
                point_add(nq[0], nq[1], nq[2],
1161
0
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1162
0
            } else {
1163
0
                memcpy(nq, tmp, 3 * sizeof(felem));
1164
0
                skip = 0;
1165
0
            }
1166
0
1167
0
            /* second, look at the current position */
1168
0
            bits = get_bit(g_scalar, i + 168) << 3;
1169
0
            bits |= get_bit(g_scalar, i + 112) << 2;
1170
0
            bits |= get_bit(g_scalar, i + 56) << 1;
1171
0
            bits |= get_bit(g_scalar, i);
1172
0
            /* select the point to add, in constant time */
1173
0
            select_point(bits, 16, g_pre_comp[0], tmp);
1174
0
            point_add(nq[0], nq[1], nq[2],
1175
0
                      nq[0], nq[1], nq[2],
1176
0
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1177
0
        }
1178
0
1179
0
        /* do other additions every 5 doublings */
1180
0
        if (num_points && (i % 5 == 0)) {
1181
0
            /* loop over all scalars */
1182
0
            for (num = 0; num < num_points; ++num) {
1183
0
                bits = get_bit(scalars[num], i + 4) << 5;
1184
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1185
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1186
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1187
0
                bits |= get_bit(scalars[num], i) << 1;
1188
0
                bits |= get_bit(scalars[num], i - 1);
1189
0
                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1190
0
1191
0
                /* select the point to add or subtract */
1192
0
                select_point(digit, 17, pre_comp[num], tmp);
1193
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1194
0
                                            * point */
1195
0
                copy_conditional(tmp[1], tmp[3], sign);
1196
0
1197
0
                if (!skip) {
1198
0
                    point_add(nq[0], nq[1], nq[2],
1199
0
                              nq[0], nq[1], nq[2],
1200
0
                              mixed, tmp[0], tmp[1], tmp[2]);
1201
0
                } else {
1202
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1203
0
                    skip = 0;
1204
0
                }
1205
0
            }
1206
0
        }
1207
0
    }
1208
0
    felem_assign(x_out, nq[0]);
1209
0
    felem_assign(y_out, nq[1]);
1210
0
    felem_assign(z_out, nq[2]);
1211
0
}
1212
1213
/******************************************************************************/
1214
/*
1215
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1216
 */
1217
1218
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1219
0
{
1220
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1221
0
1222
0
    if (!ret) {
1223
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1224
0
        return ret;
1225
0
    }
1226
0
1227
0
    ret->references = 1;
1228
0
1229
0
    ret->lock = CRYPTO_THREAD_lock_new();
1230
0
    if (ret->lock == NULL) {
1231
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1232
0
        OPENSSL_free(ret);
1233
0
        return NULL;
1234
0
    }
1235
0
    return ret;
1236
0
}
1237
1238
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1239
0
{
1240
0
    int i;
1241
0
    if (p != NULL)
1242
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1243
0
    return p;
1244
0
}
1245
1246
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1247
0
{
1248
0
    int i;
1249
0
1250
0
    if (p == NULL)
1251
0
        return;
1252
0
1253
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1254
0
    REF_PRINT_COUNT("EC_nistp224", x);
1255
0
    if (i > 0)
1256
0
        return;
1257
0
    REF_ASSERT_ISNT(i < 0);
1258
0
1259
0
    CRYPTO_THREAD_lock_free(p->lock);
1260
0
    OPENSSL_free(p);
1261
0
}
1262
1263
/******************************************************************************/
1264
/*
1265
 * OPENSSL EC_METHOD FUNCTIONS
1266
 */
1267
1268
int ec_GFp_nistp224_group_init(EC_GROUP *group)
1269
0
{
1270
0
    int ret;
1271
0
    ret = ec_GFp_simple_group_init(group);
1272
0
    group->a_is_minus3 = 1;
1273
0
    return ret;
1274
0
}
1275
1276
int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1277
                                    const BIGNUM *a, const BIGNUM *b,
1278
                                    BN_CTX *ctx)
1279
0
{
1280
0
    int ret = 0;
1281
0
    BN_CTX *new_ctx = NULL;
1282
0
    BIGNUM *curve_p, *curve_a, *curve_b;
1283
0
1284
0
    if (ctx == NULL)
1285
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1286
0
            return 0;
1287
0
    BN_CTX_start(ctx);
1288
0
    curve_p = BN_CTX_get(ctx);
1289
0
    curve_a = BN_CTX_get(ctx);
1290
0
    curve_b = BN_CTX_get(ctx);
1291
0
    if (curve_b == NULL)
1292
0
        goto err;
1293
0
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1294
0
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1295
0
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1296
0
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1297
0
        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1298
0
              EC_R_WRONG_CURVE_PARAMETERS);
1299
0
        goto err;
1300
0
    }
1301
0
    group->field_mod_func = BN_nist_mod_224;
1302
0
    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1303
0
 err:
1304
0
    BN_CTX_end(ctx);
1305
0
    BN_CTX_free(new_ctx);
1306
0
    return ret;
1307
0
}
1308
1309
/*
1310
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1311
 * (X/Z^2, Y/Z^3)
1312
 */
1313
int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1314
                                                 const EC_POINT *point,
1315
                                                 BIGNUM *x, BIGNUM *y,
1316
                                                 BN_CTX *ctx)
1317
0
{
1318
0
    felem z1, z2, x_in, y_in, x_out, y_out;
1319
0
    widefelem tmp;
1320
0
1321
0
    if (EC_POINT_is_at_infinity(group, point)) {
1322
0
        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1323
0
              EC_R_POINT_AT_INFINITY);
1324
0
        return 0;
1325
0
    }
1326
0
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1327
0
        (!BN_to_felem(z1, point->Z)))
1328
0
        return 0;
1329
0
    felem_inv(z2, z1);
1330
0
    felem_square(tmp, z2);
1331
0
    felem_reduce(z1, tmp);
1332
0
    felem_mul(tmp, x_in, z1);
1333
0
    felem_reduce(x_in, tmp);
1334
0
    felem_contract(x_out, x_in);
1335
0
    if (x != NULL) {
1336
0
        if (!felem_to_BN(x, x_out)) {
1337
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1338
0
                  ERR_R_BN_LIB);
1339
0
            return 0;
1340
0
        }
1341
0
    }
1342
0
    felem_mul(tmp, z1, z2);
1343
0
    felem_reduce(z1, tmp);
1344
0
    felem_mul(tmp, y_in, z1);
1345
0
    felem_reduce(y_in, tmp);
1346
0
    felem_contract(y_out, y_in);
1347
0
    if (y != NULL) {
1348
0
        if (!felem_to_BN(y, y_out)) {
1349
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1350
0
                  ERR_R_BN_LIB);
1351
0
            return 0;
1352
0
        }
1353
0
    }
1354
0
    return 1;
1355
0
}
1356
1357
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1358
                               felem tmp_felems[ /* num+1 */ ])
1359
0
{
1360
0
    /*
1361
0
     * Runs in constant time, unless an input is the point at infinity (which
1362
0
     * normally shouldn't happen).
1363
0
     */
1364
0
    ec_GFp_nistp_points_make_affine_internal(num,
1365
0
                                             points,
1366
0
                                             sizeof(felem),
1367
0
                                             tmp_felems,
1368
0
                                             (void (*)(void *))felem_one,
1369
0
                                             felem_is_zero_int,
1370
0
                                             (void (*)(void *, const void *))
1371
0
                                             felem_assign,
1372
0
                                             (void (*)(void *, const void *))
1373
0
                                             felem_square_reduce, (void (*)
1374
0
                                                                   (void *,
1375
0
                                                                    const void
1376
0
                                                                    *,
1377
0
                                                                    const void
1378
0
                                                                    *))
1379
0
                                             felem_mul_reduce,
1380
0
                                             (void (*)(void *, const void *))
1381
0
                                             felem_inv,
1382
0
                                             (void (*)(void *, const void *))
1383
0
                                             felem_contract);
1384
0
}
1385
1386
/*
1387
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1388
 * values Result is stored in r (r can equal one of the inputs).
1389
 */
1390
int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1391
                               const BIGNUM *scalar, size_t num,
1392
                               const EC_POINT *points[],
1393
                               const BIGNUM *scalars[], BN_CTX *ctx)
1394
0
{
1395
0
    int ret = 0;
1396
0
    int j;
1397
0
    unsigned i;
1398
0
    int mixed = 0;
1399
0
    BIGNUM *x, *y, *z, *tmp_scalar;
1400
0
    felem_bytearray g_secret;
1401
0
    felem_bytearray *secrets = NULL;
1402
0
    felem (*pre_comp)[17][3] = NULL;
1403
0
    felem *tmp_felems = NULL;
1404
0
    felem_bytearray tmp;
1405
0
    unsigned num_bytes;
1406
0
    int have_pre_comp = 0;
1407
0
    size_t num_points = num;
1408
0
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1409
0
    NISTP224_PRE_COMP *pre = NULL;
1410
0
    const felem(*g_pre_comp)[16][3] = NULL;
1411
0
    EC_POINT *generator = NULL;
1412
0
    const EC_POINT *p = NULL;
1413
0
    const BIGNUM *p_scalar = NULL;
1414
0
1415
0
    BN_CTX_start(ctx);
1416
0
    x = BN_CTX_get(ctx);
1417
0
    y = BN_CTX_get(ctx);
1418
0
    z = BN_CTX_get(ctx);
1419
0
    tmp_scalar = BN_CTX_get(ctx);
1420
0
    if (tmp_scalar == NULL)
1421
0
        goto err;
1422
0
1423
0
    if (scalar != NULL) {
1424
0
        pre = group->pre_comp.nistp224;
1425
0
        if (pre)
1426
0
            /* we have precomputation, try to use it */
1427
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1428
0
        else
1429
0
            /* try to use the standard precomputation */
1430
0
            g_pre_comp = &gmul[0];
1431
0
        generator = EC_POINT_new(group);
1432
0
        if (generator == NULL)
1433
0
            goto err;
1434
0
        /* get the generator from precomputation */
1435
0
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1436
0
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1437
0
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1438
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1439
0
            goto err;
1440
0
        }
1441
0
        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1442
0
                                                      generator, x, y, z,
1443
0
                                                      ctx))
1444
0
            goto err;
1445
0
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1446
0
            /* precomputation matches generator */
1447
0
            have_pre_comp = 1;
1448
0
        else
1449
0
            /*
1450
0
             * we don't have valid precomputation: treat the generator as a
1451
0
             * random point
1452
0
             */
1453
0
            num_points = num_points + 1;
1454
0
    }
1455
0
1456
0
    if (num_points > 0) {
1457
0
        if (num_points >= 3) {
1458
0
            /*
1459
0
             * unless we precompute multiples for just one or two points,
1460
0
             * converting those into affine form is time well spent
1461
0
             */
1462
0
            mixed = 1;
1463
0
        }
1464
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1465
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1466
0
        if (mixed)
1467
0
            tmp_felems =
1468
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1469
0
        if ((secrets == NULL) || (pre_comp == NULL)
1470
0
            || (mixed && (tmp_felems == NULL))) {
1471
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1472
0
            goto err;
1473
0
        }
1474
0
1475
0
        /*
1476
0
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1477
0
         * i.e., they contribute nothing to the linear combination
1478
0
         */
1479
0
        for (i = 0; i < num_points; ++i) {
1480
0
            if (i == num)
1481
0
                /* the generator */
1482
0
            {
1483
0
                p = EC_GROUP_get0_generator(group);
1484
0
                p_scalar = scalar;
1485
0
            } else
1486
0
                /* the i^th point */
1487
0
            {
1488
0
                p = points[i];
1489
0
                p_scalar = scalars[i];
1490
0
            }
1491
0
            if ((p_scalar != NULL) && (p != NULL)) {
1492
0
                /* reduce scalar to 0 <= scalar < 2^224 */
1493
0
                if ((BN_num_bits(p_scalar) > 224)
1494
0
                    || (BN_is_negative(p_scalar))) {
1495
0
                    /*
1496
0
                     * this is an unusual input, and we don't guarantee
1497
0
                     * constant-timeness
1498
0
                     */
1499
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1500
0
                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1501
0
                        goto err;
1502
0
                    }
1503
0
                    num_bytes = BN_bn2bin(tmp_scalar, tmp);
1504
0
                } else
1505
0
                    num_bytes = BN_bn2bin(p_scalar, tmp);
1506
0
                flip_endian(secrets[i], tmp, num_bytes);
1507
0
                /* precompute multiples */
1508
0
                if ((!BN_to_felem(x_out, p->X)) ||
1509
0
                    (!BN_to_felem(y_out, p->Y)) ||
1510
0
                    (!BN_to_felem(z_out, p->Z)))
1511
0
                    goto err;
1512
0
                felem_assign(pre_comp[i][1][0], x_out);
1513
0
                felem_assign(pre_comp[i][1][1], y_out);
1514
0
                felem_assign(pre_comp[i][1][2], z_out);
1515
0
                for (j = 2; j <= 16; ++j) {
1516
0
                    if (j & 1) {
1517
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1518
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1519
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1520
0
                                  pre_comp[i][j - 1][0],
1521
0
                                  pre_comp[i][j - 1][1],
1522
0
                                  pre_comp[i][j - 1][2]);
1523
0
                    } else {
1524
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1525
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1526
0
                                     pre_comp[i][j / 2][1],
1527
0
                                     pre_comp[i][j / 2][2]);
1528
0
                    }
1529
0
                }
1530
0
            }
1531
0
        }
1532
0
        if (mixed)
1533
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1534
0
    }
1535
0
1536
0
    /* the scalar for the generator */
1537
0
    if ((scalar != NULL) && (have_pre_comp)) {
1538
0
        memset(g_secret, 0, sizeof(g_secret));
1539
0
        /* reduce scalar to 0 <= scalar < 2^224 */
1540
0
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1541
0
            /*
1542
0
             * this is an unusual input, and we don't guarantee
1543
0
             * constant-timeness
1544
0
             */
1545
0
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1546
0
                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1547
0
                goto err;
1548
0
            }
1549
0
            num_bytes = BN_bn2bin(tmp_scalar, tmp);
1550
0
        } else
1551
0
            num_bytes = BN_bn2bin(scalar, tmp);
1552
0
        flip_endian(g_secret, tmp, num_bytes);
1553
0
        /* do the multiplication with generator precomputation */
1554
0
        batch_mul(x_out, y_out, z_out,
1555
0
                  (const felem_bytearray(*))secrets, num_points,
1556
0
                  g_secret,
1557
0
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1558
0
    } else
1559
0
        /* do the multiplication without generator precomputation */
1560
0
        batch_mul(x_out, y_out, z_out,
1561
0
                  (const felem_bytearray(*))secrets, num_points,
1562
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1563
0
    /* reduce the output to its unique minimal representation */
1564
0
    felem_contract(x_in, x_out);
1565
0
    felem_contract(y_in, y_out);
1566
0
    felem_contract(z_in, z_out);
1567
0
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1568
0
        (!felem_to_BN(z, z_in))) {
1569
0
        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1570
0
        goto err;
1571
0
    }
1572
0
    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1573
0
1574
0
 err:
1575
0
    BN_CTX_end(ctx);
1576
0
    EC_POINT_free(generator);
1577
0
    OPENSSL_free(secrets);
1578
0
    OPENSSL_free(pre_comp);
1579
0
    OPENSSL_free(tmp_felems);
1580
0
    return ret;
1581
0
}
1582
1583
int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1584
0
{
1585
0
    int ret = 0;
1586
0
    NISTP224_PRE_COMP *pre = NULL;
1587
0
    int i, j;
1588
0
    BN_CTX *new_ctx = NULL;
1589
0
    BIGNUM *x, *y;
1590
0
    EC_POINT *generator = NULL;
1591
0
    felem tmp_felems[32];
1592
0
1593
0
    /* throw away old precomputation */
1594
0
    EC_pre_comp_free(group);
1595
0
    if (ctx == NULL)
1596
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1597
0
            return 0;
1598
0
    BN_CTX_start(ctx);
1599
0
    x = BN_CTX_get(ctx);
1600
0
    y = BN_CTX_get(ctx);
1601
0
    if (y == NULL)
1602
0
        goto err;
1603
0
    /* get the generator */
1604
0
    if (group->generator == NULL)
1605
0
        goto err;
1606
0
    generator = EC_POINT_new(group);
1607
0
    if (generator == NULL)
1608
0
        goto err;
1609
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1610
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1611
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1612
0
        goto err;
1613
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1614
0
        goto err;
1615
0
    /*
1616
0
     * if the generator is the standard one, use built-in precomputation
1617
0
     */
1618
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1619
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1620
0
        goto done;
1621
0
    }
1622
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1623
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1624
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1625
0
        goto err;
1626
0
    /*
1627
0
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1628
0
     * 2^140*G, 2^196*G for the second one
1629
0
     */
1630
0
    for (i = 1; i <= 8; i <<= 1) {
1631
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1632
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1633
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1634
0
        for (j = 0; j < 27; ++j) {
1635
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1636
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1637
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1638
0
        }
1639
0
        if (i == 8)
1640
0
            break;
1641
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1642
0
                     pre->g_pre_comp[0][2 * i][1],
1643
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1644
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1645
0
        for (j = 0; j < 27; ++j) {
1646
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1647
0
                         pre->g_pre_comp[0][2 * i][1],
1648
0
                         pre->g_pre_comp[0][2 * i][2],
1649
0
                         pre->g_pre_comp[0][2 * i][0],
1650
0
                         pre->g_pre_comp[0][2 * i][1],
1651
0
                         pre->g_pre_comp[0][2 * i][2]);
1652
0
        }
1653
0
    }
1654
0
    for (i = 0; i < 2; i++) {
1655
0
        /* g_pre_comp[i][0] is the point at infinity */
1656
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1657
0
        /* the remaining multiples */
1658
0
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1659
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1660
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1661
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1662
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1663
0
                  pre->g_pre_comp[i][2][2]);
1664
0
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1665
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1666
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1667
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1668
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1669
0
                  pre->g_pre_comp[i][2][2]);
1670
0
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1671
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1672
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1673
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1674
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1675
0
                  pre->g_pre_comp[i][4][2]);
1676
0
        /*
1677
0
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1678
0
         */
1679
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1680
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1681
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1682
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1683
0
                  pre->g_pre_comp[i][2][2]);
1684
0
        for (j = 1; j < 8; ++j) {
1685
0
            /* odd multiples: add G resp. 2^28*G */
1686
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1687
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1688
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1689
0
                      pre->g_pre_comp[i][2 * j][0],
1690
0
                      pre->g_pre_comp[i][2 * j][1],
1691
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1692
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1693
0
                      pre->g_pre_comp[i][1][2]);
1694
0
        }
1695
0
    }
1696
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1697
0
1698
0
 done:
1699
0
    SETPRECOMP(group, nistp224, pre);
1700
0
    pre = NULL;
1701
0
    ret = 1;
1702
0
 err:
1703
0
    BN_CTX_end(ctx);
1704
0
    EC_POINT_free(generator);
1705
0
    BN_CTX_free(new_ctx);
1706
0
    EC_nistp224_pre_comp_free(pre);
1707
0
    return ret;
1708
0
}
1709
1710
int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1711
0
{
1712
0
    return HAVEPRECOMP(group, nistp224);
1713
0
}
1714
1715
#endif