Coverage Report

Created: 2023-06-08 06:41

/src/openssl111/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28
 *
29
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30
 * and Adam Langley's public domain 64-bit C implementation of curve25519
31
 */
32
33
#include <openssl/opensslconf.h>
34
#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35
NON_EMPTY_TRANSLATION_UNIT
36
#else
37
38
# include <stdint.h>
39
# include <string.h>
40
# include <openssl/err.h>
41
# include "ec_local.h"
42
43
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44
  /* even with gcc, the typedef won't work for 32-bit platforms */
45
typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
46
                                 * platforms */
47
# else
48
#  error "Your compiler doesn't appear to support 128-bit integer types"
49
# endif
50
51
typedef uint8_t u8;
52
typedef uint64_t u64;
53
54
/******************************************************************************/
55
/*-
56
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57
 *
58
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
59
 * using 64-bit coefficients called 'limbs',
60
 * and sometimes (for multiplication results) as
61
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
62
 * using 128-bit coefficients called 'widelimbs'.
63
 * A 4-limb representation is an 'felem';
64
 * a 7-widelimb representation is a 'widefelem'.
65
 * Even within felems, bits of adjacent limbs overlap, and we don't always
66
 * reduce the representations: we ensure that inputs to each felem
67
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68
 * and fit into a 128-bit word without overflow. The coefficients are then
69
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70
 * We only reduce to the unique minimal representation at the end of the
71
 * computation.
72
 */
73
74
typedef uint64_t limb;
75
typedef uint64_t limb_aX __attribute((__aligned__(1)));
76
typedef uint128_t widelimb;
77
78
typedef limb felem[4];
79
typedef widelimb widefelem[7];
80
81
/*
82
 * Field element represented as a byte array. 28*8 = 224 bits is also the
83
 * group order size for the elliptic curve, and we also use this type for
84
 * scalars for point multiplication.
85
 */
86
typedef u8 felem_bytearray[28];
87
88
static const felem_bytearray nistp224_curve_params[5] = {
89
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
90
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
91
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
92
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
93
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
95
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
96
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
97
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
98
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
99
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
100
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
101
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
102
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
103
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104
};
105
106
/*-
107
 * Precomputed multiples of the standard generator
108
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
109
 * (0 for the point at infinity).
110
 * For each field element, slice a_0 is word 0, etc.
111
 *
112
 * The table has 2 * 16 elements, starting with the following:
113
 * index | bits    | point
114
 * ------+---------+------------------------------
115
 *     0 | 0 0 0 0 | 0G
116
 *     1 | 0 0 0 1 | 1G
117
 *     2 | 0 0 1 0 | 2^56G
118
 *     3 | 0 0 1 1 | (2^56 + 1)G
119
 *     4 | 0 1 0 0 | 2^112G
120
 *     5 | 0 1 0 1 | (2^112 + 1)G
121
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
122
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
123
 *     8 | 1 0 0 0 | 2^168G
124
 *     9 | 1 0 0 1 | (2^168 + 1)G
125
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
126
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
127
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
128
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
129
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
130
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
131
 * followed by a copy of this with each element multiplied by 2^28.
132
 *
133
 * The reason for this is so that we can clock bits into four different
134
 * locations when doing simple scalar multiplies against the base point,
135
 * and then another four locations using the second 16 elements.
136
 */
137
static const felem gmul[2][16][3] = {
138
{{{0, 0, 0, 0},
139
  {0, 0, 0, 0},
140
  {0, 0, 0, 0}},
141
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
142
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143
  {1, 0, 0, 0}},
144
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146
  {1, 0, 0, 0}},
147
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
148
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149
  {1, 0, 0, 0}},
150
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
151
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152
  {1, 0, 0, 0}},
153
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
154
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155
  {1, 0, 0, 0}},
156
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
157
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158
  {1, 0, 0, 0}},
159
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
160
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161
  {1, 0, 0, 0}},
162
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
163
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164
  {1, 0, 0, 0}},
165
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
166
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167
  {1, 0, 0, 0}},
168
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
169
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170
  {1, 0, 0, 0}},
171
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
172
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173
  {1, 0, 0, 0}},
174
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
175
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176
  {1, 0, 0, 0}},
177
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
178
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179
  {1, 0, 0, 0}},
180
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
181
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182
  {1, 0, 0, 0}},
183
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
184
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
185
  {1, 0, 0, 0}}},
186
{{{0, 0, 0, 0},
187
  {0, 0, 0, 0},
188
  {0, 0, 0, 0}},
189
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
190
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191
  {1, 0, 0, 0}},
192
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
193
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194
  {1, 0, 0, 0}},
195
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
196
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197
  {1, 0, 0, 0}},
198
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
199
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200
  {1, 0, 0, 0}},
201
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
202
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203
  {1, 0, 0, 0}},
204
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
205
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206
  {1, 0, 0, 0}},
207
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
208
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209
  {1, 0, 0, 0}},
210
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
211
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212
  {1, 0, 0, 0}},
213
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
214
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215
  {1, 0, 0, 0}},
216
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
217
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218
  {1, 0, 0, 0}},
219
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
220
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221
  {1, 0, 0, 0}},
222
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
223
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224
  {1, 0, 0, 0}},
225
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
226
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227
  {1, 0, 0, 0}},
228
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
229
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230
  {1, 0, 0, 0}},
231
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
232
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
233
  {1, 0, 0, 0}}}
234
};
235
236
/* Precomputation for the group generator. */
237
struct nistp224_pre_comp_st {
238
    felem g_pre_comp[2][16][3];
239
    CRYPTO_REF_COUNT references;
240
    CRYPTO_RWLOCK *lock;
241
};
242
243
const EC_METHOD *EC_GFp_nistp224_method(void)
244
2.29k
{
245
2.29k
    static const EC_METHOD ret = {
246
2.29k
        EC_FLAGS_DEFAULT_OCT,
247
2.29k
        NID_X9_62_prime_field,
248
2.29k
        ec_GFp_nistp224_group_init,
249
2.29k
        ec_GFp_simple_group_finish,
250
2.29k
        ec_GFp_simple_group_clear_finish,
251
2.29k
        ec_GFp_nist_group_copy,
252
2.29k
        ec_GFp_nistp224_group_set_curve,
253
2.29k
        ec_GFp_simple_group_get_curve,
254
2.29k
        ec_GFp_simple_group_get_degree,
255
2.29k
        ec_group_simple_order_bits,
256
2.29k
        ec_GFp_simple_group_check_discriminant,
257
2.29k
        ec_GFp_simple_point_init,
258
2.29k
        ec_GFp_simple_point_finish,
259
2.29k
        ec_GFp_simple_point_clear_finish,
260
2.29k
        ec_GFp_simple_point_copy,
261
2.29k
        ec_GFp_simple_point_set_to_infinity,
262
2.29k
        ec_GFp_simple_set_Jprojective_coordinates_GFp,
263
2.29k
        ec_GFp_simple_get_Jprojective_coordinates_GFp,
264
2.29k
        ec_GFp_simple_point_set_affine_coordinates,
265
2.29k
        ec_GFp_nistp224_point_get_affine_coordinates,
266
2.29k
        0 /* point_set_compressed_coordinates */ ,
267
2.29k
        0 /* point2oct */ ,
268
2.29k
        0 /* oct2point */ ,
269
2.29k
        ec_GFp_simple_add,
270
2.29k
        ec_GFp_simple_dbl,
271
2.29k
        ec_GFp_simple_invert,
272
2.29k
        ec_GFp_simple_is_at_infinity,
273
2.29k
        ec_GFp_simple_is_on_curve,
274
2.29k
        ec_GFp_simple_cmp,
275
2.29k
        ec_GFp_simple_make_affine,
276
2.29k
        ec_GFp_simple_points_make_affine,
277
2.29k
        ec_GFp_nistp224_points_mul,
278
2.29k
        ec_GFp_nistp224_precompute_mult,
279
2.29k
        ec_GFp_nistp224_have_precompute_mult,
280
2.29k
        ec_GFp_nist_field_mul,
281
2.29k
        ec_GFp_nist_field_sqr,
282
2.29k
        0 /* field_div */ ,
283
2.29k
        ec_GFp_simple_field_inv,
284
2.29k
        0 /* field_encode */ ,
285
2.29k
        0 /* field_decode */ ,
286
2.29k
        0,                      /* field_set_to_one */
287
2.29k
        ec_key_simple_priv2oct,
288
2.29k
        ec_key_simple_oct2priv,
289
2.29k
        0, /* set private */
290
2.29k
        ec_key_simple_generate_key,
291
2.29k
        ec_key_simple_check_key,
292
2.29k
        ec_key_simple_generate_public_key,
293
2.29k
        0, /* keycopy */
294
2.29k
        0, /* keyfinish */
295
2.29k
        ecdh_simple_compute_key,
296
2.29k
        0, /* field_inverse_mod_ord */
297
2.29k
        0, /* blind_coordinates */
298
2.29k
        0, /* ladder_pre */
299
2.29k
        0, /* ladder_step */
300
2.29k
        0  /* ladder_post */
301
2.29k
    };
302
303
2.29k
    return &ret;
304
2.29k
}
305
306
/*
307
 * Helper functions to convert field elements to/from internal representation
308
 */
309
static void bin28_to_felem(felem out, const u8 in[28])
310
27
{
311
27
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
312
27
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
313
27
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
314
27
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
315
27
}
316
317
static void felem_to_bin28(u8 out[28], const felem in)
318
78
{
319
78
    unsigned i;
320
624
    for (i = 0; i < 7; ++i) {
321
546
        out[i] = in[0] >> (8 * i);
322
546
        out[i + 7] = in[1] >> (8 * i);
323
546
        out[i + 14] = in[2] >> (8 * i);
324
546
        out[i + 21] = in[3] >> (8 * i);
325
546
    }
326
78
}
327
328
/* From OpenSSL BIGNUM to internal representation */
329
static int BN_to_felem(felem out, const BIGNUM *bn)
330
27
{
331
27
    felem_bytearray b_out;
332
27
    int num_bytes;
333
334
27
    if (BN_is_negative(bn)) {
335
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
336
0
        return 0;
337
0
    }
338
27
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
339
27
    if (num_bytes < 0) {
340
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
341
0
        return 0;
342
0
    }
343
27
    bin28_to_felem(out, b_out);
344
27
    return 1;
345
27
}
346
347
/* From internal representation to OpenSSL BIGNUM */
348
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
349
78
{
350
78
    felem_bytearray b_out;
351
78
    felem_to_bin28(b_out, in);
352
78
    return BN_lebin2bn(b_out, sizeof(b_out), out);
353
78
}
354
355
/******************************************************************************/
356
/*-
357
 *                              FIELD OPERATIONS
358
 *
359
 * Field operations, using the internal representation of field elements.
360
 * NB! These operations are specific to our point multiplication and cannot be
361
 * expected to be correct in general - e.g., multiplication with a large scalar
362
 * will cause an overflow.
363
 *
364
 */
365
366
static void felem_one(felem out)
367
0
{
368
0
    out[0] = 1;
369
0
    out[1] = 0;
370
0
    out[2] = 0;
371
0
    out[3] = 0;
372
0
}
373
374
static void felem_assign(felem out, const felem in)
375
5.51k
{
376
5.51k
    out[0] = in[0];
377
5.51k
    out[1] = in[1];
378
5.51k
    out[2] = in[2];
379
5.51k
    out[3] = in[3];
380
5.51k
}
381
382
/* Sum two field elements: out += in */
383
static void felem_sum(felem out, const felem in)
384
810
{
385
810
    out[0] += in[0];
386
810
    out[1] += in[1];
387
810
    out[2] += in[2];
388
810
    out[3] += in[3];
389
810
}
390
391
/* Subtract field elements: out -= in */
392
/* Assumes in[i] < 2^57 */
393
static void felem_diff(felem out, const felem in)
394
1.09k
{
395
1.09k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
396
1.09k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
397
1.09k
    static const limb two58m42m2 = (((limb) 1) << 58) -
398
1.09k
        (((limb) 1) << 42) - (((limb) 1) << 2);
399
400
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
401
1.09k
    out[0] += two58p2;
402
1.09k
    out[1] += two58m42m2;
403
1.09k
    out[2] += two58m2;
404
1.09k
    out[3] += two58m2;
405
406
1.09k
    out[0] -= in[0];
407
1.09k
    out[1] -= in[1];
408
1.09k
    out[2] -= in[2];
409
1.09k
    out[3] -= in[3];
410
1.09k
}
411
412
/* Subtract in unreduced 128-bit mode: out -= in */
413
/* Assumes in[i] < 2^119 */
414
static void widefelem_diff(widefelem out, const widefelem in)
415
820
{
416
820
    static const widelimb two120 = ((widelimb) 1) << 120;
417
820
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
418
820
        (((widelimb) 1) << 64);
419
820
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
420
820
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
421
422
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
423
820
    out[0] += two120;
424
820
    out[1] += two120m64;
425
820
    out[2] += two120m64;
426
820
    out[3] += two120;
427
820
    out[4] += two120m104m64;
428
820
    out[5] += two120m64;
429
820
    out[6] += two120m64;
430
431
820
    out[0] -= in[0];
432
820
    out[1] -= in[1];
433
820
    out[2] -= in[2];
434
820
    out[3] -= in[3];
435
820
    out[4] -= in[4];
436
820
    out[5] -= in[5];
437
820
    out[6] -= in[6];
438
820
}
439
440
/* Subtract in mixed mode: out128 -= in64 */
441
/* in[i] < 2^63 */
442
static void felem_diff_128_64(widefelem out, const felem in)
443
2.74k
{
444
2.74k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
445
2.74k
        (((widelimb) 1) << 8);
446
2.74k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
447
2.74k
        (((widelimb) 1) << 8);
448
2.74k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
449
2.74k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
450
451
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
452
2.74k
    out[0] += two64p8;
453
2.74k
    out[1] += two64m48m8;
454
2.74k
    out[2] += two64m8;
455
2.74k
    out[3] += two64m8;
456
457
2.74k
    out[0] -= in[0];
458
2.74k
    out[1] -= in[1];
459
2.74k
    out[2] -= in[2];
460
2.74k
    out[3] -= in[3];
461
2.74k
}
462
463
/*
464
 * Multiply a field element by a scalar: out = out * scalar The scalars we
465
 * actually use are small, so results fit without overflow
466
 */
467
static void felem_scalar(felem out, const limb scalar)
468
1.36k
{
469
1.36k
    out[0] *= scalar;
470
1.36k
    out[1] *= scalar;
471
1.36k
    out[2] *= scalar;
472
1.36k
    out[3] *= scalar;
473
1.36k
}
474
475
/*
476
 * Multiply an unreduced field element by a scalar: out = out * scalar The
477
 * scalars we actually use are small, so results fit without overflow
478
 */
479
static void widefelem_scalar(widefelem out, const widelimb scalar)
480
270
{
481
270
    out[0] *= scalar;
482
270
    out[1] *= scalar;
483
270
    out[2] *= scalar;
484
270
    out[3] *= scalar;
485
270
    out[4] *= scalar;
486
270
    out[5] *= scalar;
487
270
    out[6] *= scalar;
488
270
}
489
490
/* Square a field element: out = in^2 */
491
static void felem_square(widefelem out, const felem in)
492
5.01k
{
493
5.01k
    limb tmp0, tmp1, tmp2;
494
5.01k
    tmp0 = 2 * in[0];
495
5.01k
    tmp1 = 2 * in[1];
496
5.01k
    tmp2 = 2 * in[2];
497
5.01k
    out[0] = ((widelimb) in[0]) * in[0];
498
5.01k
    out[1] = ((widelimb) in[0]) * tmp1;
499
5.01k
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500
5.01k
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
501
5.01k
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
502
5.01k
    out[5] = ((widelimb) in[3]) * tmp2;
503
5.01k
    out[6] = ((widelimb) in[3]) * in[3];
504
5.01k
}
505
506
/* Multiply two field elements: out = in1 * in2 */
507
static void felem_mul(widefelem out, const felem in1, const felem in2)
508
5.33k
{
509
5.33k
    out[0] = ((widelimb) in1[0]) * in2[0];
510
5.33k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
511
5.33k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
512
5.33k
             ((widelimb) in1[2]) * in2[0];
513
5.33k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
514
5.33k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
515
5.33k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
516
5.33k
             ((widelimb) in1[3]) * in2[1];
517
5.33k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
518
5.33k
    out[6] = ((widelimb) in1[3]) * in2[3];
519
5.33k
}
520
521
/*-
522
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
523
 * Requires in[i] < 2^126,
524
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525
static void felem_reduce(felem out, const widefelem in)
526
9.53k
{
527
9.53k
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
528
9.53k
        (((widelimb) 1) << 15);
529
9.53k
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
530
9.53k
        (((widelimb) 1) << 71);
531
9.53k
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532
9.53k
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
533
9.53k
    widelimb output[5];
534
535
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536
9.53k
    output[0] = in[0] + two127p15;
537
9.53k
    output[1] = in[1] + two127m71m55;
538
9.53k
    output[2] = in[2] + two127m71;
539
9.53k
    output[3] = in[3];
540
9.53k
    output[4] = in[4];
541
542
    /* Eliminate in[4], in[5], in[6] */
543
9.53k
    output[4] += in[6] >> 16;
544
9.53k
    output[3] += (in[6] & 0xffff) << 40;
545
9.53k
    output[2] -= in[6];
546
547
9.53k
    output[3] += in[5] >> 16;
548
9.53k
    output[2] += (in[5] & 0xffff) << 40;
549
9.53k
    output[1] -= in[5];
550
551
9.53k
    output[2] += output[4] >> 16;
552
9.53k
    output[1] += (output[4] & 0xffff) << 40;
553
9.53k
    output[0] -= output[4];
554
555
    /* Carry 2 -> 3 -> 4 */
556
9.53k
    output[3] += output[2] >> 56;
557
9.53k
    output[2] &= 0x00ffffffffffffff;
558
559
9.53k
    output[4] = output[3] >> 56;
560
9.53k
    output[3] &= 0x00ffffffffffffff;
561
562
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
563
564
    /* Eliminate output[4] */
565
9.53k
    output[2] += output[4] >> 16;
566
    /* output[2] < 2^56 + 2^56 = 2^57 */
567
9.53k
    output[1] += (output[4] & 0xffff) << 40;
568
9.53k
    output[0] -= output[4];
569
570
    /* Carry 0 -> 1 -> 2 -> 3 */
571
9.53k
    output[1] += output[0] >> 56;
572
9.53k
    out[0] = output[0] & 0x00ffffffffffffff;
573
574
9.53k
    output[2] += output[1] >> 56;
575
    /* output[2] < 2^57 + 2^72 */
576
9.53k
    out[1] = output[1] & 0x00ffffffffffffff;
577
9.53k
    output[3] += output[2] >> 56;
578
    /* output[3] <= 2^56 + 2^16 */
579
9.53k
    out[2] = output[2] & 0x00ffffffffffffff;
580
581
    /*-
582
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
583
     * out[3] <= 2^56 + 2^16 (due to final carry),
584
     * so out < 2*p
585
     */
586
9.53k
    out[3] = output[3];
587
9.53k
}
588
589
static void felem_square_reduce(felem out, const felem in)
590
0
{
591
0
    widefelem tmp;
592
0
    felem_square(tmp, in);
593
0
    felem_reduce(out, tmp);
594
0
}
595
596
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
597
0
{
598
0
    widefelem tmp;
599
0
    felem_mul(tmp, in1, in2);
600
0
    felem_reduce(out, tmp);
601
0
}
602
603
/*
604
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
605
 * call felem_reduce first)
606
 */
607
static void felem_contract(felem out, const felem in)
608
48
{
609
48
    static const int64_t two56 = ((limb) 1) << 56;
610
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
611
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
612
48
    int64_t tmp[4], a;
613
48
    tmp[0] = in[0];
614
48
    tmp[1] = in[1];
615
48
    tmp[2] = in[2];
616
48
    tmp[3] = in[3];
617
    /* Case 1: a = 1 iff in >= 2^224 */
618
48
    a = (in[3] >> 56);
619
48
    tmp[0] -= a;
620
48
    tmp[1] += a << 40;
621
48
    tmp[3] &= 0x00ffffffffffffff;
622
    /*
623
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
624
     * and the lower part is non-zero
625
     */
626
48
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
627
48
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628
48
    a &= 0x00ffffffffffffff;
629
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
630
48
    a = (a - 1) >> 63;
631
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
632
48
    tmp[3] &= a ^ 0xffffffffffffffff;
633
48
    tmp[2] &= a ^ 0xffffffffffffffff;
634
48
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
635
48
    tmp[0] -= 1 & a;
636
637
    /*
638
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
639
     * non-zero, so we only need one step
640
     */
641
48
    a = tmp[0] >> 63;
642
48
    tmp[0] += two56 & a;
643
48
    tmp[1] -= 1 & a;
644
645
    /* carry 1 -> 2 -> 3 */
646
48
    tmp[2] += tmp[1] >> 56;
647
48
    tmp[1] &= 0x00ffffffffffffff;
648
649
48
    tmp[3] += tmp[2] >> 56;
650
48
    tmp[2] &= 0x00ffffffffffffff;
651
652
    /* Now 0 <= out < p */
653
48
    out[0] = tmp[0];
654
48
    out[1] = tmp[1];
655
48
    out[2] = tmp[2];
656
48
    out[3] = tmp[3];
657
48
}
658
659
/*
660
 * Get negative value: out = -in
661
 * Requires in[i] < 2^63,
662
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
663
 */
664
static void felem_neg(felem out, const felem in)
665
0
{
666
0
    widefelem tmp = {0};
667
0
    felem_diff_128_64(tmp, in);
668
0
    felem_reduce(out, tmp);
669
0
}
670
671
/*
672
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
673
 * elements are reduced to in < 2^225, so we only need to check three cases:
674
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
675
 */
676
static limb felem_is_zero(const felem in)
677
2.20k
{
678
2.20k
    limb zero, two224m96p1, two225m97p2;
679
680
2.20k
    zero = in[0] | in[1] | in[2] | in[3];
681
2.20k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
682
2.20k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
683
2.20k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
684
2.20k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
685
2.20k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
686
2.20k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
687
2.20k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
688
2.20k
    return (zero | two224m96p1 | two225m97p2);
689
2.20k
}
690
691
static int felem_is_zero_int(const void *in)
692
0
{
693
0
    return (int)(felem_is_zero(in) & ((limb) 1));
694
0
}
695
696
/* Invert a field element */
697
/* Computation chain copied from djb's code */
698
static void felem_inv(felem out, const felem in)
699
9
{
700
9
    felem ftmp, ftmp2, ftmp3, ftmp4;
701
9
    widefelem tmp;
702
9
    unsigned i;
703
704
9
    felem_square(tmp, in);
705
9
    felem_reduce(ftmp, tmp);    /* 2 */
706
9
    felem_mul(tmp, in, ftmp);
707
9
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
708
9
    felem_square(tmp, ftmp);
709
9
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
710
9
    felem_mul(tmp, in, ftmp);
711
9
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
712
9
    felem_square(tmp, ftmp);
713
9
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
714
9
    felem_square(tmp, ftmp2);
715
9
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
716
9
    felem_square(tmp, ftmp2);
717
9
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
718
9
    felem_mul(tmp, ftmp2, ftmp);
719
9
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
720
9
    felem_square(tmp, ftmp);
721
9
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
722
54
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
723
45
        felem_square(tmp, ftmp2);
724
45
        felem_reduce(ftmp2, tmp);
725
45
    }
726
9
    felem_mul(tmp, ftmp2, ftmp);
727
9
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
728
9
    felem_square(tmp, ftmp2);
729
9
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
730
108
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
731
99
        felem_square(tmp, ftmp3);
732
99
        felem_reduce(ftmp3, tmp);
733
99
    }
734
9
    felem_mul(tmp, ftmp3, ftmp2);
735
9
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
736
9
    felem_square(tmp, ftmp2);
737
9
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
738
216
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
739
207
        felem_square(tmp, ftmp3);
740
207
        felem_reduce(ftmp3, tmp);
741
207
    }
742
9
    felem_mul(tmp, ftmp3, ftmp2);
743
9
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
744
9
    felem_square(tmp, ftmp3);
745
9
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
746
432
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
747
423
        felem_square(tmp, ftmp4);
748
423
        felem_reduce(ftmp4, tmp);
749
423
    }
750
9
    felem_mul(tmp, ftmp3, ftmp4);
751
9
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
752
9
    felem_square(tmp, ftmp3);
753
9
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
754
216
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
755
207
        felem_square(tmp, ftmp4);
756
207
        felem_reduce(ftmp4, tmp);
757
207
    }
758
9
    felem_mul(tmp, ftmp2, ftmp4);
759
9
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
760
63
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
761
54
        felem_square(tmp, ftmp2);
762
54
        felem_reduce(ftmp2, tmp);
763
54
    }
764
9
    felem_mul(tmp, ftmp2, ftmp);
765
9
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
766
9
    felem_square(tmp, ftmp);
767
9
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
768
9
    felem_mul(tmp, ftmp, in);
769
9
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
770
882
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
771
873
        felem_square(tmp, ftmp);
772
873
        felem_reduce(ftmp, tmp);
773
873
    }
774
9
    felem_mul(tmp, ftmp, ftmp3);
775
9
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
776
9
}
777
778
/*
779
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
780
 * out to itself.
781
 */
782
static void copy_conditional(felem out, const felem in, limb icopy)
783
3.30k
{
784
3.30k
    unsigned i;
785
    /*
786
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
787
     */
788
3.30k
    const limb copy = -icopy;
789
16.5k
    for (i = 0; i < 4; ++i) {
790
13.2k
        const limb tmp = copy & (in[i] ^ out[i]);
791
13.2k
        out[i] ^= tmp;
792
13.2k
    }
793
3.30k
}
794
795
/******************************************************************************/
796
/*-
797
 *                       ELLIPTIC CURVE POINT OPERATIONS
798
 *
799
 * Points are represented in Jacobian projective coordinates:
800
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
801
 * or to the point at infinity if Z == 0.
802
 *
803
 */
804
805
/*-
806
 * Double an elliptic curve point:
807
 * (X', Y', Z') = 2 * (X, Y, Z), where
808
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
809
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
810
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
811
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
812
 * while x_out == y_in is not (maybe this works, but it's not tested).
813
 */
814
static void
815
point_double(felem x_out, felem y_out, felem z_out,
816
             const felem x_in, const felem y_in, const felem z_in)
817
270
{
818
270
    widefelem tmp, tmp2;
819
270
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
820
821
270
    felem_assign(ftmp, x_in);
822
270
    felem_assign(ftmp2, x_in);
823
824
    /* delta = z^2 */
825
270
    felem_square(tmp, z_in);
826
270
    felem_reduce(delta, tmp);
827
828
    /* gamma = y^2 */
829
270
    felem_square(tmp, y_in);
830
270
    felem_reduce(gamma, tmp);
831
832
    /* beta = x*gamma */
833
270
    felem_mul(tmp, x_in, gamma);
834
270
    felem_reduce(beta, tmp);
835
836
    /* alpha = 3*(x-delta)*(x+delta) */
837
270
    felem_diff(ftmp, delta);
838
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
839
270
    felem_sum(ftmp2, delta);
840
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
841
270
    felem_scalar(ftmp2, 3);
842
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
843
270
    felem_mul(tmp, ftmp, ftmp2);
844
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
845
270
    felem_reduce(alpha, tmp);
846
847
    /* x' = alpha^2 - 8*beta */
848
270
    felem_square(tmp, alpha);
849
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
850
270
    felem_assign(ftmp, beta);
851
270
    felem_scalar(ftmp, 8);
852
    /* ftmp[i] < 8 * 2^57 = 2^60 */
853
270
    felem_diff_128_64(tmp, ftmp);
854
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
855
270
    felem_reduce(x_out, tmp);
856
857
    /* z' = (y + z)^2 - gamma - delta */
858
270
    felem_sum(delta, gamma);
859
    /* delta[i] < 2^57 + 2^57 = 2^58 */
860
270
    felem_assign(ftmp, y_in);
861
270
    felem_sum(ftmp, z_in);
862
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
863
270
    felem_square(tmp, ftmp);
864
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
865
270
    felem_diff_128_64(tmp, delta);
866
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
867
270
    felem_reduce(z_out, tmp);
868
869
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
870
270
    felem_scalar(beta, 4);
871
    /* beta[i] < 4 * 2^57 = 2^59 */
872
270
    felem_diff(beta, x_out);
873
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
874
270
    felem_mul(tmp, alpha, beta);
875
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
876
270
    felem_square(tmp2, gamma);
877
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
878
270
    widefelem_scalar(tmp2, 8);
879
    /* tmp2[i] < 8 * 2^116 = 2^119 */
880
270
    widefelem_diff(tmp, tmp2);
881
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
882
270
    felem_reduce(y_out, tmp);
883
270
}
884
885
/*-
886
 * Add two elliptic curve points:
887
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
888
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
889
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
890
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
891
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
892
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
893
 *
894
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
895
 */
896
897
/*
898
 * This function is not entirely constant-time: it includes a branch for
899
 * checking whether the two input points are equal, (while not equal to the
900
 * point at infinity). This case never happens during single point
901
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
902
 */
903
static void point_add(felem x3, felem y3, felem z3,
904
                      const felem x1, const felem y1, const felem z1,
905
                      const int mixed, const felem x2, const felem y2,
906
                      const felem z2)
907
550
{
908
550
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
909
550
    widefelem tmp, tmp2;
910
550
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
911
550
    limb points_equal;
912
913
550
    if (!mixed) {
914
        /* ftmp2 = z2^2 */
915
0
        felem_square(tmp, z2);
916
0
        felem_reduce(ftmp2, tmp);
917
918
        /* ftmp4 = z2^3 */
919
0
        felem_mul(tmp, ftmp2, z2);
920
0
        felem_reduce(ftmp4, tmp);
921
922
        /* ftmp4 = z2^3*y1 */
923
0
        felem_mul(tmp2, ftmp4, y1);
924
0
        felem_reduce(ftmp4, tmp2);
925
926
        /* ftmp2 = z2^2*x1 */
927
0
        felem_mul(tmp2, ftmp2, x1);
928
0
        felem_reduce(ftmp2, tmp2);
929
550
    } else {
930
        /*
931
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
932
         */
933
934
        /* ftmp4 = z2^3*y1 */
935
550
        felem_assign(ftmp4, y1);
936
937
        /* ftmp2 = z2^2*x1 */
938
550
        felem_assign(ftmp2, x1);
939
550
    }
940
941
    /* ftmp = z1^2 */
942
550
    felem_square(tmp, z1);
943
550
    felem_reduce(ftmp, tmp);
944
945
    /* ftmp3 = z1^3 */
946
550
    felem_mul(tmp, ftmp, z1);
947
550
    felem_reduce(ftmp3, tmp);
948
949
    /* tmp = z1^3*y2 */
950
550
    felem_mul(tmp, ftmp3, y2);
951
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
952
953
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
954
550
    felem_diff_128_64(tmp, ftmp4);
955
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
956
550
    felem_reduce(ftmp3, tmp);
957
958
    /* tmp = z1^2*x2 */
959
550
    felem_mul(tmp, ftmp, x2);
960
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
961
962
    /* ftmp = z1^2*x2 - z2^2*x1 */
963
550
    felem_diff_128_64(tmp, ftmp2);
964
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
965
550
    felem_reduce(ftmp, tmp);
966
967
    /*
968
     * The formulae are incorrect if the points are equal, in affine coordinates
969
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
970
     * happens.
971
     *
972
     * We use bitwise operations to avoid potential side-channels introduced by
973
     * the short-circuiting behaviour of boolean operators.
974
     */
975
550
    x_equal = felem_is_zero(ftmp);
976
550
    y_equal = felem_is_zero(ftmp3);
977
    /*
978
     * The special case of either point being the point at infinity (z1 and/or
979
     * z2 are zero), is handled separately later on in this function, so we
980
     * avoid jumping to point_double here in those special cases.
981
     */
982
550
    z1_is_zero = felem_is_zero(z1);
983
550
    z2_is_zero = felem_is_zero(z2);
984
985
    /*
986
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
987
     * specific implementation `felem_is_zero()` returns truth as `0x1`
988
     * (rather than `0xff..ff`).
989
     *
990
     * This implies that `~true` in this implementation becomes
991
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
992
     * the if expression, we mask out only the last bit in the next
993
     * line.
994
     */
995
550
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
996
997
550
    if (points_equal) {
998
        /*
999
         * This is obviously not constant-time but, as mentioned before, this
1000
         * case never happens during single point multiplication, so there is no
1001
         * timing leak for ECDH or ECDSA signing.
1002
         */
1003
0
        point_double(x3, y3, z3, x1, y1, z1);
1004
0
        return;
1005
0
    }
1006
1007
    /* ftmp5 = z1*z2 */
1008
550
    if (!mixed) {
1009
0
        felem_mul(tmp, z1, z2);
1010
0
        felem_reduce(ftmp5, tmp);
1011
550
    } else {
1012
        /* special case z2 = 0 is handled later */
1013
550
        felem_assign(ftmp5, z1);
1014
550
    }
1015
1016
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017
550
    felem_mul(tmp, ftmp, ftmp5);
1018
550
    felem_reduce(z_out, tmp);
1019
1020
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021
550
    felem_assign(ftmp5, ftmp);
1022
550
    felem_square(tmp, ftmp);
1023
550
    felem_reduce(ftmp, tmp);
1024
1025
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026
550
    felem_mul(tmp, ftmp, ftmp5);
1027
550
    felem_reduce(ftmp5, tmp);
1028
1029
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030
550
    felem_mul(tmp, ftmp2, ftmp);
1031
550
    felem_reduce(ftmp2, tmp);
1032
1033
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034
550
    felem_mul(tmp, ftmp4, ftmp5);
1035
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038
550
    felem_square(tmp2, ftmp3);
1039
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042
550
    felem_diff_128_64(tmp2, ftmp5);
1043
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046
550
    felem_assign(ftmp5, ftmp2);
1047
550
    felem_scalar(ftmp5, 2);
1048
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050
    /*-
1051
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053
     */
1054
550
    felem_diff_128_64(tmp2, ftmp5);
1055
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056
550
    felem_reduce(x_out, tmp2);
1057
1058
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059
550
    felem_diff(ftmp2, x_out);
1060
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062
    /*
1063
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064
     */
1065
550
    felem_mul(tmp2, ftmp3, ftmp2);
1066
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068
    /*-
1069
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071
     */
1072
550
    widefelem_diff(tmp2, tmp);
1073
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074
550
    felem_reduce(y_out, tmp2);
1075
1076
    /*
1077
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078
     * the point at infinity, so we need to check for this separately
1079
     */
1080
1081
    /*
1082
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1083
     */
1084
550
    copy_conditional(x_out, x2, z1_is_zero);
1085
550
    copy_conditional(x_out, x1, z2_is_zero);
1086
550
    copy_conditional(y_out, y2, z1_is_zero);
1087
550
    copy_conditional(y_out, y1, z2_is_zero);
1088
550
    copy_conditional(z_out, z2, z1_is_zero);
1089
550
    copy_conditional(z_out, z1, z2_is_zero);
1090
550
    felem_assign(x3, x_out);
1091
550
    felem_assign(y3, y_out);
1092
550
    felem_assign(z3, z_out);
1093
550
}
1094
1095
/*
1096
 * select_point selects the |idx|th point from a precomputation table and
1097
 * copies it to out.
1098
 * The pre_comp array argument should be size of |size| argument
1099
 */
1100
static void select_point(const u64 idx, unsigned int size,
1101
                         const felem pre_comp[][3], felem out[3])
1102
560
{
1103
560
    unsigned i, j;
1104
560
    limb *outlimbs = &out[0][0];
1105
1106
560
    memset(out, 0, sizeof(*out) * 3);
1107
9.52k
    for (i = 0; i < size; i++) {
1108
8.96k
        const limb *inlimbs = &pre_comp[i][0][0];
1109
8.96k
        u64 mask = i ^ idx;
1110
8.96k
        mask |= mask >> 4;
1111
8.96k
        mask |= mask >> 2;
1112
8.96k
        mask |= mask >> 1;
1113
8.96k
        mask &= 1;
1114
8.96k
        mask--;
1115
116k
        for (j = 0; j < 4 * 3; j++)
1116
107k
            outlimbs[j] |= inlimbs[j] & mask;
1117
8.96k
    }
1118
560
}
1119
1120
/* get_bit returns the |i|th bit in |in| */
1121
static char get_bit(const felem_bytearray in, unsigned i)
1122
2.24k
{
1123
2.24k
    if (i >= 224)
1124
0
        return 0;
1125
2.24k
    return (in[i >> 3] >> (i & 7)) & 1;
1126
2.24k
}
1127
1128
/*
1129
 * Interleaved point multiplication using precomputed point multiples: The
1130
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134
 */
1135
static void batch_mul(felem x_out, felem y_out, felem z_out,
1136
                      const felem_bytearray scalars[],
1137
                      const unsigned num_points, const u8 *g_scalar,
1138
                      const int mixed, const felem pre_comp[][17][3],
1139
                      const felem g_pre_comp[2][16][3])
1140
10
{
1141
10
    int i, skip;
1142
10
    unsigned num;
1143
10
    unsigned gen_mul = (g_scalar != NULL);
1144
10
    felem nq[3], tmp[4];
1145
10
    u64 bits;
1146
10
    u8 sign, digit;
1147
1148
    /* set nq to the point at infinity */
1149
10
    memset(nq, 0, sizeof(nq));
1150
1151
    /*
1152
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153
     * of the generator (two in each of the last 28 rounds) and additions of
1154
     * other points multiples (every 5th round).
1155
     */
1156
10
    skip = 1;                   /* save two point operations in the first
1157
                                 * round */
1158
290
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159
        /* double */
1160
280
        if (!skip)
1161
270
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163
        /* add multiples of the generator */
1164
280
        if (gen_mul && (i <= 27)) {
1165
            /* first, look 28 bits upwards */
1166
280
            bits = get_bit(g_scalar, i + 196) << 3;
1167
280
            bits |= get_bit(g_scalar, i + 140) << 2;
1168
280
            bits |= get_bit(g_scalar, i + 84) << 1;
1169
280
            bits |= get_bit(g_scalar, i + 28);
1170
            /* select the point to add, in constant time */
1171
280
            select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173
280
            if (!skip) {
1174
                /* value 1 below is argument for "mixed" */
1175
270
                point_add(nq[0], nq[1], nq[2],
1176
270
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177
270
            } else {
1178
10
                memcpy(nq, tmp, 3 * sizeof(felem));
1179
10
                skip = 0;
1180
10
            }
1181
1182
            /* second, look at the current position */
1183
280
            bits = get_bit(g_scalar, i + 168) << 3;
1184
280
            bits |= get_bit(g_scalar, i + 112) << 2;
1185
280
            bits |= get_bit(g_scalar, i + 56) << 1;
1186
280
            bits |= get_bit(g_scalar, i);
1187
            /* select the point to add, in constant time */
1188
280
            select_point(bits, 16, g_pre_comp[0], tmp);
1189
280
            point_add(nq[0], nq[1], nq[2],
1190
280
                      nq[0], nq[1], nq[2],
1191
280
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192
280
        }
1193
1194
        /* do other additions every 5 doublings */
1195
280
        if (num_points && (i % 5 == 0)) {
1196
            /* loop over all scalars */
1197
0
            for (num = 0; num < num_points; ++num) {
1198
0
                bits = get_bit(scalars[num], i + 4) << 5;
1199
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1200
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1201
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1202
0
                bits |= get_bit(scalars[num], i) << 1;
1203
0
                bits |= get_bit(scalars[num], i - 1);
1204
0
                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206
                /* select the point to add or subtract */
1207
0
                select_point(digit, 17, pre_comp[num], tmp);
1208
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209
                                            * point */
1210
0
                copy_conditional(tmp[1], tmp[3], sign);
1211
1212
0
                if (!skip) {
1213
0
                    point_add(nq[0], nq[1], nq[2],
1214
0
                              nq[0], nq[1], nq[2],
1215
0
                              mixed, tmp[0], tmp[1], tmp[2]);
1216
0
                } else {
1217
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1218
0
                    skip = 0;
1219
0
                }
1220
0
            }
1221
0
        }
1222
280
    }
1223
10
    felem_assign(x_out, nq[0]);
1224
10
    felem_assign(y_out, nq[1]);
1225
10
    felem_assign(z_out, nq[2]);
1226
10
}
1227
1228
/******************************************************************************/
1229
/*
1230
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1231
 */
1232
1233
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1234
0
{
1235
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1236
1237
0
    if (!ret) {
1238
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239
0
        return ret;
1240
0
    }
1241
1242
0
    ret->references = 1;
1243
1244
0
    ret->lock = CRYPTO_THREAD_lock_new();
1245
0
    if (ret->lock == NULL) {
1246
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1247
0
        OPENSSL_free(ret);
1248
0
        return NULL;
1249
0
    }
1250
0
    return ret;
1251
0
}
1252
1253
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1254
0
{
1255
0
    int i;
1256
0
    if (p != NULL)
1257
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1258
0
    return p;
1259
0
}
1260
1261
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1262
0
{
1263
0
    int i;
1264
1265
0
    if (p == NULL)
1266
0
        return;
1267
1268
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1269
0
    REF_PRINT_COUNT("EC_nistp224", x);
1270
0
    if (i > 0)
1271
0
        return;
1272
0
    REF_ASSERT_ISNT(i < 0);
1273
1274
0
    CRYPTO_THREAD_lock_free(p->lock);
1275
0
    OPENSSL_free(p);
1276
0
}
1277
1278
/******************************************************************************/
1279
/*
1280
 * OPENSSL EC_METHOD FUNCTIONS
1281
 */
1282
1283
int ec_GFp_nistp224_group_init(EC_GROUP *group)
1284
4.57k
{
1285
4.57k
    int ret;
1286
4.57k
    ret = ec_GFp_simple_group_init(group);
1287
4.57k
    group->a_is_minus3 = 1;
1288
4.57k
    return ret;
1289
4.57k
}
1290
1291
int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1292
                                    const BIGNUM *a, const BIGNUM *b,
1293
                                    BN_CTX *ctx)
1294
2.29k
{
1295
2.29k
    int ret = 0;
1296
2.29k
    BN_CTX *new_ctx = NULL;
1297
2.29k
    BIGNUM *curve_p, *curve_a, *curve_b;
1298
1299
2.29k
    if (ctx == NULL)
1300
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1301
0
            return 0;
1302
2.29k
    BN_CTX_start(ctx);
1303
2.29k
    curve_p = BN_CTX_get(ctx);
1304
2.29k
    curve_a = BN_CTX_get(ctx);
1305
2.29k
    curve_b = BN_CTX_get(ctx);
1306
2.29k
    if (curve_b == NULL)
1307
0
        goto err;
1308
2.29k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1309
2.29k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1310
2.29k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1311
2.29k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1312
0
        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1313
0
              EC_R_WRONG_CURVE_PARAMETERS);
1314
0
        goto err;
1315
0
    }
1316
2.29k
    group->field_mod_func = BN_nist_mod_224;
1317
2.29k
    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1318
2.29k
 err:
1319
2.29k
    BN_CTX_end(ctx);
1320
2.29k
    BN_CTX_free(new_ctx);
1321
2.29k
    return ret;
1322
2.29k
}
1323
1324
/*
1325
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1326
 * (X/Z^2, Y/Z^3)
1327
 */
1328
int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1329
                                                 const EC_POINT *point,
1330
                                                 BIGNUM *x, BIGNUM *y,
1331
                                                 BN_CTX *ctx)
1332
9
{
1333
9
    felem z1, z2, x_in, y_in, x_out, y_out;
1334
9
    widefelem tmp;
1335
1336
9
    if (EC_POINT_is_at_infinity(group, point)) {
1337
0
        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1338
0
              EC_R_POINT_AT_INFINITY);
1339
0
        return 0;
1340
0
    }
1341
9
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1342
9
        (!BN_to_felem(z1, point->Z)))
1343
0
        return 0;
1344
9
    felem_inv(z2, z1);
1345
9
    felem_square(tmp, z2);
1346
9
    felem_reduce(z1, tmp);
1347
9
    felem_mul(tmp, x_in, z1);
1348
9
    felem_reduce(x_in, tmp);
1349
9
    felem_contract(x_out, x_in);
1350
9
    if (x != NULL) {
1351
9
        if (!felem_to_BN(x, x_out)) {
1352
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1353
0
                  ERR_R_BN_LIB);
1354
0
            return 0;
1355
0
        }
1356
9
    }
1357
9
    felem_mul(tmp, z1, z2);
1358
9
    felem_reduce(z1, tmp);
1359
9
    felem_mul(tmp, y_in, z1);
1360
9
    felem_reduce(y_in, tmp);
1361
9
    felem_contract(y_out, y_in);
1362
9
    if (y != NULL) {
1363
9
        if (!felem_to_BN(y, y_out)) {
1364
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1365
0
                  ERR_R_BN_LIB);
1366
0
            return 0;
1367
0
        }
1368
9
    }
1369
9
    return 1;
1370
9
}
1371
1372
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373
                               felem tmp_felems[ /* num+1 */ ])
1374
0
{
1375
    /*
1376
     * Runs in constant time, unless an input is the point at infinity (which
1377
     * normally shouldn't happen).
1378
     */
1379
0
    ec_GFp_nistp_points_make_affine_internal(num,
1380
0
                                             points,
1381
0
                                             sizeof(felem),
1382
0
                                             tmp_felems,
1383
0
                                             (void (*)(void *))felem_one,
1384
0
                                             felem_is_zero_int,
1385
0
                                             (void (*)(void *, const void *))
1386
0
                                             felem_assign,
1387
0
                                             (void (*)(void *, const void *))
1388
0
                                             felem_square_reduce, (void (*)
1389
0
                                                                   (void *,
1390
0
                                                                    const void
1391
0
                                                                    *,
1392
0
                                                                    const void
1393
0
                                                                    *))
1394
0
                                             felem_mul_reduce,
1395
0
                                             (void (*)(void *, const void *))
1396
0
                                             felem_inv,
1397
0
                                             (void (*)(void *, const void *))
1398
0
                                             felem_contract);
1399
0
}
1400
1401
/*
1402
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403
 * values Result is stored in r (r can equal one of the inputs).
1404
 */
1405
int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406
                               const BIGNUM *scalar, size_t num,
1407
                               const EC_POINT *points[],
1408
                               const BIGNUM *scalars[], BN_CTX *ctx)
1409
10
{
1410
10
    int ret = 0;
1411
10
    int j;
1412
10
    unsigned i;
1413
10
    int mixed = 0;
1414
10
    BIGNUM *x, *y, *z, *tmp_scalar;
1415
10
    felem_bytearray g_secret;
1416
10
    felem_bytearray *secrets = NULL;
1417
10
    felem (*pre_comp)[17][3] = NULL;
1418
10
    felem *tmp_felems = NULL;
1419
10
    int num_bytes;
1420
10
    int have_pre_comp = 0;
1421
10
    size_t num_points = num;
1422
10
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1423
10
    NISTP224_PRE_COMP *pre = NULL;
1424
10
    const felem(*g_pre_comp)[16][3] = NULL;
1425
10
    EC_POINT *generator = NULL;
1426
10
    const EC_POINT *p = NULL;
1427
10
    const BIGNUM *p_scalar = NULL;
1428
1429
10
    BN_CTX_start(ctx);
1430
10
    x = BN_CTX_get(ctx);
1431
10
    y = BN_CTX_get(ctx);
1432
10
    z = BN_CTX_get(ctx);
1433
10
    tmp_scalar = BN_CTX_get(ctx);
1434
10
    if (tmp_scalar == NULL)
1435
0
        goto err;
1436
1437
10
    if (scalar != NULL) {
1438
10
        pre = group->pre_comp.nistp224;
1439
10
        if (pre)
1440
            /* we have precomputation, try to use it */
1441
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442
10
        else
1443
            /* try to use the standard precomputation */
1444
10
            g_pre_comp = &gmul[0];
1445
10
        generator = EC_POINT_new(group);
1446
10
        if (generator == NULL)
1447
0
            goto err;
1448
        /* get the generator from precomputation */
1449
10
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450
10
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451
10
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1453
0
            goto err;
1454
0
        }
1455
10
        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1456
10
                                                      generator, x, y, z,
1457
10
                                                      ctx))
1458
0
            goto err;
1459
10
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460
            /* precomputation matches generator */
1461
10
            have_pre_comp = 1;
1462
0
        else
1463
            /*
1464
             * we don't have valid precomputation: treat the generator as a
1465
             * random point
1466
             */
1467
0
            num_points = num_points + 1;
1468
10
    }
1469
1470
10
    if (num_points > 0) {
1471
0
        if (num_points >= 3) {
1472
            /*
1473
             * unless we precompute multiples for just one or two points,
1474
             * converting those into affine form is time well spent
1475
             */
1476
0
            mixed = 1;
1477
0
        }
1478
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480
0
        if (mixed)
1481
0
            tmp_felems =
1482
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483
0
        if ((secrets == NULL) || (pre_comp == NULL)
1484
0
            || (mixed && (tmp_felems == NULL))) {
1485
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1486
0
            goto err;
1487
0
        }
1488
1489
        /*
1490
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1491
         * i.e., they contribute nothing to the linear combination
1492
         */
1493
0
        for (i = 0; i < num_points; ++i) {
1494
0
            if (i == num) {
1495
                /* the generator */
1496
0
                p = EC_GROUP_get0_generator(group);
1497
0
                p_scalar = scalar;
1498
0
            } else {
1499
                /* the i^th point */
1500
0
                p = points[i];
1501
0
                p_scalar = scalars[i];
1502
0
            }
1503
0
            if ((p_scalar != NULL) && (p != NULL)) {
1504
                /* reduce scalar to 0 <= scalar < 2^224 */
1505
0
                if ((BN_num_bits(p_scalar) > 224)
1506
0
                    || (BN_is_negative(p_scalar))) {
1507
                    /*
1508
                     * this is an unusual input, and we don't guarantee
1509
                     * constant-timeness
1510
                     */
1511
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1512
0
                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513
0
                        goto err;
1514
0
                    }
1515
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1516
0
                                               secrets[i], sizeof(secrets[i]));
1517
0
                } else {
1518
0
                    num_bytes = BN_bn2lebinpad(p_scalar,
1519
0
                                               secrets[i], sizeof(secrets[i]));
1520
0
                }
1521
0
                if (num_bytes < 0) {
1522
0
                    ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523
0
                    goto err;
1524
0
                }
1525
                /* precompute multiples */
1526
0
                if ((!BN_to_felem(x_out, p->X)) ||
1527
0
                    (!BN_to_felem(y_out, p->Y)) ||
1528
0
                    (!BN_to_felem(z_out, p->Z)))
1529
0
                    goto err;
1530
0
                felem_assign(pre_comp[i][1][0], x_out);
1531
0
                felem_assign(pre_comp[i][1][1], y_out);
1532
0
                felem_assign(pre_comp[i][1][2], z_out);
1533
0
                for (j = 2; j <= 16; ++j) {
1534
0
                    if (j & 1) {
1535
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1536
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1537
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1538
0
                                  pre_comp[i][j - 1][0],
1539
0
                                  pre_comp[i][j - 1][1],
1540
0
                                  pre_comp[i][j - 1][2]);
1541
0
                    } else {
1542
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1543
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1544
0
                                     pre_comp[i][j / 2][1],
1545
0
                                     pre_comp[i][j / 2][2]);
1546
0
                    }
1547
0
                }
1548
0
            }
1549
0
        }
1550
0
        if (mixed)
1551
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1552
0
    }
1553
1554
    /* the scalar for the generator */
1555
10
    if ((scalar != NULL) && (have_pre_comp)) {
1556
10
        memset(g_secret, 0, sizeof(g_secret));
1557
        /* reduce scalar to 0 <= scalar < 2^224 */
1558
10
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1559
            /*
1560
             * this is an unusual input, and we don't guarantee
1561
             * constant-timeness
1562
             */
1563
3
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1564
0
                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1565
0
                goto err;
1566
0
            }
1567
3
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1568
7
        } else {
1569
7
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1570
7
        }
1571
        /* do the multiplication with generator precomputation */
1572
10
        batch_mul(x_out, y_out, z_out,
1573
10
                  (const felem_bytearray(*))secrets, num_points,
1574
10
                  g_secret,
1575
10
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1576
10
    } else {
1577
        /* do the multiplication without generator precomputation */
1578
0
        batch_mul(x_out, y_out, z_out,
1579
0
                  (const felem_bytearray(*))secrets, num_points,
1580
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1581
0
    }
1582
    /* reduce the output to its unique minimal representation */
1583
10
    felem_contract(x_in, x_out);
1584
10
    felem_contract(y_in, y_out);
1585
10
    felem_contract(z_in, z_out);
1586
10
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1587
10
        (!felem_to_BN(z, z_in))) {
1588
0
        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1589
0
        goto err;
1590
0
    }
1591
10
    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1592
1593
10
 err:
1594
10
    BN_CTX_end(ctx);
1595
10
    EC_POINT_free(generator);
1596
10
    OPENSSL_free(secrets);
1597
10
    OPENSSL_free(pre_comp);
1598
10
    OPENSSL_free(tmp_felems);
1599
10
    return ret;
1600
10
}
1601
1602
int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1603
0
{
1604
0
    int ret = 0;
1605
0
    NISTP224_PRE_COMP *pre = NULL;
1606
0
    int i, j;
1607
0
    BN_CTX *new_ctx = NULL;
1608
0
    BIGNUM *x, *y;
1609
0
    EC_POINT *generator = NULL;
1610
0
    felem tmp_felems[32];
1611
1612
    /* throw away old precomputation */
1613
0
    EC_pre_comp_free(group);
1614
0
    if (ctx == NULL)
1615
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1616
0
            return 0;
1617
0
    BN_CTX_start(ctx);
1618
0
    x = BN_CTX_get(ctx);
1619
0
    y = BN_CTX_get(ctx);
1620
0
    if (y == NULL)
1621
0
        goto err;
1622
    /* get the generator */
1623
0
    if (group->generator == NULL)
1624
0
        goto err;
1625
0
    generator = EC_POINT_new(group);
1626
0
    if (generator == NULL)
1627
0
        goto err;
1628
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1629
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1630
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1631
0
        goto err;
1632
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1633
0
        goto err;
1634
    /*
1635
     * if the generator is the standard one, use built-in precomputation
1636
     */
1637
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1638
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1639
0
        goto done;
1640
0
    }
1641
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1642
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1643
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1644
0
        goto err;
1645
    /*
1646
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1647
     * 2^140*G, 2^196*G for the second one
1648
     */
1649
0
    for (i = 1; i <= 8; i <<= 1) {
1650
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1651
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1652
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1653
0
        for (j = 0; j < 27; ++j) {
1654
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1655
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1656
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1657
0
        }
1658
0
        if (i == 8)
1659
0
            break;
1660
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1661
0
                     pre->g_pre_comp[0][2 * i][1],
1662
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1663
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1664
0
        for (j = 0; j < 27; ++j) {
1665
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1666
0
                         pre->g_pre_comp[0][2 * i][1],
1667
0
                         pre->g_pre_comp[0][2 * i][2],
1668
0
                         pre->g_pre_comp[0][2 * i][0],
1669
0
                         pre->g_pre_comp[0][2 * i][1],
1670
0
                         pre->g_pre_comp[0][2 * i][2]);
1671
0
        }
1672
0
    }
1673
0
    for (i = 0; i < 2; i++) {
1674
        /* g_pre_comp[i][0] is the point at infinity */
1675
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1676
        /* the remaining multiples */
1677
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1678
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1679
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1680
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1681
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1682
0
                  pre->g_pre_comp[i][2][2]);
1683
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1684
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1685
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1686
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1687
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688
0
                  pre->g_pre_comp[i][2][2]);
1689
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1690
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1691
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1692
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1694
0
                  pre->g_pre_comp[i][4][2]);
1695
        /*
1696
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1697
         */
1698
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1699
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1700
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
0
        for (j = 1; j < 8; ++j) {
1704
            /* odd multiples: add G resp. 2^28*G */
1705
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1706
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1707
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1708
0
                      pre->g_pre_comp[i][2 * j][0],
1709
0
                      pre->g_pre_comp[i][2 * j][1],
1710
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1711
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1712
0
                      pre->g_pre_comp[i][1][2]);
1713
0
        }
1714
0
    }
1715
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1716
1717
0
 done:
1718
0
    SETPRECOMP(group, nistp224, pre);
1719
0
    pre = NULL;
1720
0
    ret = 1;
1721
0
 err:
1722
0
    BN_CTX_end(ctx);
1723
0
    EC_POINT_free(generator);
1724
0
    BN_CTX_free(new_ctx);
1725
0
    EC_nistp224_pre_comp_free(pre);
1726
0
    return ret;
1727
0
}
1728
1729
int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1730
0
{
1731
0
    return HAVEPRECOMP(group, nistp224);
1732
0
}
1733
1734
#endif