Coverage Report

Created: 2023-09-25 06:45

/src/openssl111/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the OpenSSL license (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28
 *
29
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30
 * and Adam Langley's public domain 64-bit C implementation of curve25519
31
 */
32
33
#include <openssl/opensslconf.h>
34
#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35
NON_EMPTY_TRANSLATION_UNIT
36
#else
37
38
# include <stdint.h>
39
# include <string.h>
40
# include <openssl/err.h>
41
# include "ec_local.h"
42
43
# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44
  /* even with gcc, the typedef won't work for 32-bit platforms */
45
typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
46
                                 * platforms */
47
# else
48
#  error "Your compiler doesn't appear to support 128-bit integer types"
49
# endif
50
51
typedef uint8_t u8;
52
typedef uint64_t u64;
53
54
/******************************************************************************/
55
/*-
56
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57
 *
58
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
59
 * using 64-bit coefficients called 'limbs',
60
 * and sometimes (for multiplication results) as
61
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
62
 * using 128-bit coefficients called 'widelimbs'.
63
 * A 4-limb representation is an 'felem';
64
 * a 7-widelimb representation is a 'widefelem'.
65
 * Even within felems, bits of adjacent limbs overlap, and we don't always
66
 * reduce the representations: we ensure that inputs to each felem
67
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
68
 * and fit into a 128-bit word without overflow. The coefficients are then
69
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
70
 * We only reduce to the unique minimal representation at the end of the
71
 * computation.
72
 */
73
74
typedef uint64_t limb;
75
typedef uint64_t limb_aX __attribute((__aligned__(1)));
76
typedef uint128_t widelimb;
77
78
typedef limb felem[4];
79
typedef widelimb widefelem[7];
80
81
/*
82
 * Field element represented as a byte array. 28*8 = 224 bits is also the
83
 * group order size for the elliptic curve, and we also use this type for
84
 * scalars for point multiplication.
85
 */
86
typedef u8 felem_bytearray[28];
87
88
static const felem_bytearray nistp224_curve_params[5] = {
89
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
90
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
91
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
92
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
93
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
95
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
96
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
97
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
98
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
99
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
100
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
101
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
102
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
103
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104
};
105
106
/*-
107
 * Precomputed multiples of the standard generator
108
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
109
 * (0 for the point at infinity).
110
 * For each field element, slice a_0 is word 0, etc.
111
 *
112
 * The table has 2 * 16 elements, starting with the following:
113
 * index | bits    | point
114
 * ------+---------+------------------------------
115
 *     0 | 0 0 0 0 | 0G
116
 *     1 | 0 0 0 1 | 1G
117
 *     2 | 0 0 1 0 | 2^56G
118
 *     3 | 0 0 1 1 | (2^56 + 1)G
119
 *     4 | 0 1 0 0 | 2^112G
120
 *     5 | 0 1 0 1 | (2^112 + 1)G
121
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
122
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
123
 *     8 | 1 0 0 0 | 2^168G
124
 *     9 | 1 0 0 1 | (2^168 + 1)G
125
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
126
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
127
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
128
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
129
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
130
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
131
 * followed by a copy of this with each element multiplied by 2^28.
132
 *
133
 * The reason for this is so that we can clock bits into four different
134
 * locations when doing simple scalar multiplies against the base point,
135
 * and then another four locations using the second 16 elements.
136
 */
137
static const felem gmul[2][16][3] = {
138
{{{0, 0, 0, 0},
139
  {0, 0, 0, 0},
140
  {0, 0, 0, 0}},
141
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
142
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143
  {1, 0, 0, 0}},
144
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146
  {1, 0, 0, 0}},
147
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
148
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149
  {1, 0, 0, 0}},
150
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
151
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152
  {1, 0, 0, 0}},
153
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
154
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155
  {1, 0, 0, 0}},
156
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
157
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158
  {1, 0, 0, 0}},
159
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
160
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161
  {1, 0, 0, 0}},
162
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
163
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164
  {1, 0, 0, 0}},
165
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
166
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167
  {1, 0, 0, 0}},
168
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
169
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170
  {1, 0, 0, 0}},
171
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
172
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173
  {1, 0, 0, 0}},
174
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
175
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176
  {1, 0, 0, 0}},
177
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
178
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179
  {1, 0, 0, 0}},
180
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
181
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182
  {1, 0, 0, 0}},
183
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
184
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
185
  {1, 0, 0, 0}}},
186
{{{0, 0, 0, 0},
187
  {0, 0, 0, 0},
188
  {0, 0, 0, 0}},
189
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
190
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191
  {1, 0, 0, 0}},
192
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
193
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194
  {1, 0, 0, 0}},
195
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
196
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197
  {1, 0, 0, 0}},
198
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
199
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200
  {1, 0, 0, 0}},
201
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
202
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203
  {1, 0, 0, 0}},
204
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
205
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206
  {1, 0, 0, 0}},
207
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
208
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209
  {1, 0, 0, 0}},
210
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
211
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212
  {1, 0, 0, 0}},
213
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
214
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215
  {1, 0, 0, 0}},
216
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
217
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218
  {1, 0, 0, 0}},
219
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
220
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221
  {1, 0, 0, 0}},
222
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
223
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224
  {1, 0, 0, 0}},
225
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
226
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227
  {1, 0, 0, 0}},
228
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
229
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230
  {1, 0, 0, 0}},
231
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
232
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
233
  {1, 0, 0, 0}}}
234
};
235
236
/* Precomputation for the group generator. */
237
struct nistp224_pre_comp_st {
238
    felem g_pre_comp[2][16][3];
239
    CRYPTO_REF_COUNT references;
240
    CRYPTO_RWLOCK *lock;
241
};
242
243
const EC_METHOD *EC_GFp_nistp224_method(void)
244
23.8k
{
245
23.8k
    static const EC_METHOD ret = {
246
23.8k
        EC_FLAGS_DEFAULT_OCT,
247
23.8k
        NID_X9_62_prime_field,
248
23.8k
        ec_GFp_nistp224_group_init,
249
23.8k
        ec_GFp_simple_group_finish,
250
23.8k
        ec_GFp_simple_group_clear_finish,
251
23.8k
        ec_GFp_nist_group_copy,
252
23.8k
        ec_GFp_nistp224_group_set_curve,
253
23.8k
        ec_GFp_simple_group_get_curve,
254
23.8k
        ec_GFp_simple_group_get_degree,
255
23.8k
        ec_group_simple_order_bits,
256
23.8k
        ec_GFp_simple_group_check_discriminant,
257
23.8k
        ec_GFp_simple_point_init,
258
23.8k
        ec_GFp_simple_point_finish,
259
23.8k
        ec_GFp_simple_point_clear_finish,
260
23.8k
        ec_GFp_simple_point_copy,
261
23.8k
        ec_GFp_simple_point_set_to_infinity,
262
23.8k
        ec_GFp_simple_set_Jprojective_coordinates_GFp,
263
23.8k
        ec_GFp_simple_get_Jprojective_coordinates_GFp,
264
23.8k
        ec_GFp_simple_point_set_affine_coordinates,
265
23.8k
        ec_GFp_nistp224_point_get_affine_coordinates,
266
23.8k
        0 /* point_set_compressed_coordinates */ ,
267
23.8k
        0 /* point2oct */ ,
268
23.8k
        0 /* oct2point */ ,
269
23.8k
        ec_GFp_simple_add,
270
23.8k
        ec_GFp_simple_dbl,
271
23.8k
        ec_GFp_simple_invert,
272
23.8k
        ec_GFp_simple_is_at_infinity,
273
23.8k
        ec_GFp_simple_is_on_curve,
274
23.8k
        ec_GFp_simple_cmp,
275
23.8k
        ec_GFp_simple_make_affine,
276
23.8k
        ec_GFp_simple_points_make_affine,
277
23.8k
        ec_GFp_nistp224_points_mul,
278
23.8k
        ec_GFp_nistp224_precompute_mult,
279
23.8k
        ec_GFp_nistp224_have_precompute_mult,
280
23.8k
        ec_GFp_nist_field_mul,
281
23.8k
        ec_GFp_nist_field_sqr,
282
23.8k
        0 /* field_div */ ,
283
23.8k
        ec_GFp_simple_field_inv,
284
23.8k
        0 /* field_encode */ ,
285
23.8k
        0 /* field_decode */ ,
286
23.8k
        0,                      /* field_set_to_one */
287
23.8k
        ec_key_simple_priv2oct,
288
23.8k
        ec_key_simple_oct2priv,
289
23.8k
        0, /* set private */
290
23.8k
        ec_key_simple_generate_key,
291
23.8k
        ec_key_simple_check_key,
292
23.8k
        ec_key_simple_generate_public_key,
293
23.8k
        0, /* keycopy */
294
23.8k
        0, /* keyfinish */
295
23.8k
        ecdh_simple_compute_key,
296
23.8k
        0, /* field_inverse_mod_ord */
297
23.8k
        0, /* blind_coordinates */
298
23.8k
        0, /* ladder_pre */
299
23.8k
        0, /* ladder_step */
300
23.8k
        0  /* ladder_post */
301
23.8k
    };
302
303
23.8k
    return &ret;
304
23.8k
}
305
306
/*
307
 * Helper functions to convert field elements to/from internal representation
308
 */
309
static void bin28_to_felem(felem out, const u8 in[28])
310
2.85k
{
311
2.85k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
312
2.85k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
313
2.85k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
314
2.85k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
315
2.85k
}
316
317
static void felem_to_bin28(u8 out[28], const felem in)
318
3.37k
{
319
3.37k
    unsigned i;
320
26.9k
    for (i = 0; i < 7; ++i) {
321
23.5k
        out[i] = in[0] >> (8 * i);
322
23.5k
        out[i + 7] = in[1] >> (8 * i);
323
23.5k
        out[i + 14] = in[2] >> (8 * i);
324
23.5k
        out[i + 21] = in[3] >> (8 * i);
325
23.5k
    }
326
3.37k
}
327
328
/* From OpenSSL BIGNUM to internal representation */
329
static int BN_to_felem(felem out, const BIGNUM *bn)
330
2.85k
{
331
2.85k
    felem_bytearray b_out;
332
2.85k
    int num_bytes;
333
334
2.85k
    if (BN_is_negative(bn)) {
335
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
336
0
        return 0;
337
0
    }
338
2.85k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
339
2.85k
    if (num_bytes < 0) {
340
0
        ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
341
0
        return 0;
342
0
    }
343
2.85k
    bin28_to_felem(out, b_out);
344
2.85k
    return 1;
345
2.85k
}
346
347
/* From internal representation to OpenSSL BIGNUM */
348
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
349
3.37k
{
350
3.37k
    felem_bytearray b_out;
351
3.37k
    felem_to_bin28(b_out, in);
352
3.37k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
353
3.37k
}
354
355
/******************************************************************************/
356
/*-
357
 *                              FIELD OPERATIONS
358
 *
359
 * Field operations, using the internal representation of field elements.
360
 * NB! These operations are specific to our point multiplication and cannot be
361
 * expected to be correct in general - e.g., multiplication with a large scalar
362
 * will cause an overflow.
363
 *
364
 */
365
366
static void felem_one(felem out)
367
0
{
368
0
    out[0] = 1;
369
0
    out[1] = 0;
370
0
    out[2] = 0;
371
0
    out[3] = 0;
372
0
}
373
374
static void felem_assign(felem out, const felem in)
375
279k
{
376
279k
    out[0] = in[0];
377
279k
    out[1] = in[1];
378
279k
    out[2] = in[2];
379
279k
    out[3] = in[3];
380
279k
}
381
382
/* Sum two field elements: out += in */
383
static void felem_sum(felem out, const felem in)
384
109k
{
385
109k
    out[0] += in[0];
386
109k
    out[1] += in[1];
387
109k
    out[2] += in[2];
388
109k
    out[3] += in[3];
389
109k
}
390
391
/* Subtract field elements: out -= in */
392
/* Assumes in[i] < 2^57 */
393
static void felem_diff(felem out, const felem in)
394
92.1k
{
395
92.1k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
396
92.1k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
397
92.1k
    static const limb two58m42m2 = (((limb) 1) << 58) -
398
92.1k
        (((limb) 1) << 42) - (((limb) 1) << 2);
399
400
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
401
92.1k
    out[0] += two58p2;
402
92.1k
    out[1] += two58m42m2;
403
92.1k
    out[2] += two58m2;
404
92.1k
    out[3] += two58m2;
405
406
92.1k
    out[0] -= in[0];
407
92.1k
    out[1] -= in[1];
408
92.1k
    out[2] -= in[2];
409
92.1k
    out[3] -= in[3];
410
92.1k
}
411
412
/* Subtract in unreduced 128-bit mode: out -= in */
413
/* Assumes in[i] < 2^119 */
414
static void widefelem_diff(widefelem out, const widefelem in)
415
55.5k
{
416
55.5k
    static const widelimb two120 = ((widelimb) 1) << 120;
417
55.5k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
418
55.5k
        (((widelimb) 1) << 64);
419
55.5k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
420
55.5k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
421
422
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
423
55.5k
    out[0] += two120;
424
55.5k
    out[1] += two120m64;
425
55.5k
    out[2] += two120m64;
426
55.5k
    out[3] += two120;
427
55.5k
    out[4] += two120m104m64;
428
55.5k
    out[5] += two120m64;
429
55.5k
    out[6] += two120m64;
430
431
55.5k
    out[0] -= in[0];
432
55.5k
    out[1] -= in[1];
433
55.5k
    out[2] -= in[2];
434
55.5k
    out[3] -= in[3];
435
55.5k
    out[4] -= in[4];
436
55.5k
    out[5] -= in[5];
437
55.5k
    out[6] -= in[6];
438
55.5k
}
439
440
/* Subtract in mixed mode: out128 -= in64 */
441
/* in[i] < 2^63 */
442
static void felem_diff_128_64(widefelem out, const felem in)
443
155k
{
444
155k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
445
155k
        (((widelimb) 1) << 8);
446
155k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
447
155k
        (((widelimb) 1) << 8);
448
155k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
449
155k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
450
451
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
452
155k
    out[0] += two64p8;
453
155k
    out[1] += two64m48m8;
454
155k
    out[2] += two64m8;
455
155k
    out[3] += two64m8;
456
457
155k
    out[0] -= in[0];
458
155k
    out[1] -= in[1];
459
155k
    out[2] -= in[2];
460
155k
    out[3] -= in[3];
461
155k
}
462
463
/*
464
 * Multiply a field element by a scalar: out = out * scalar The scalars we
465
 * actually use are small, so results fit without overflow
466
 */
467
static void felem_scalar(felem out, const limb scalar)
468
128k
{
469
128k
    out[0] *= scalar;
470
128k
    out[1] *= scalar;
471
128k
    out[2] *= scalar;
472
128k
    out[3] *= scalar;
473
128k
}
474
475
/*
476
 * Multiply an unreduced field element by a scalar: out = out * scalar The
477
 * scalars we actually use are small, so results fit without overflow
478
 */
479
static void widefelem_scalar(widefelem out, const widelimb scalar)
480
36.5k
{
481
36.5k
    out[0] *= scalar;
482
36.5k
    out[1] *= scalar;
483
36.5k
    out[2] *= scalar;
484
36.5k
    out[3] *= scalar;
485
36.5k
    out[4] *= scalar;
486
36.5k
    out[5] *= scalar;
487
36.5k
    out[6] *= scalar;
488
36.5k
}
489
490
/* Square a field element: out = in^2 */
491
static void felem_square(widefelem out, const felem in)
492
429k
{
493
429k
    limb tmp0, tmp1, tmp2;
494
429k
    tmp0 = 2 * in[0];
495
429k
    tmp1 = 2 * in[1];
496
429k
    tmp2 = 2 * in[2];
497
429k
    out[0] = ((widelimb) in[0]) * in[0];
498
429k
    out[1] = ((widelimb) in[0]) * tmp1;
499
429k
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500
429k
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
501
429k
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
502
429k
    out[5] = ((widelimb) in[3]) * tmp2;
503
429k
    out[6] = ((widelimb) in[3]) * in[3];
504
429k
}
505
506
/* Multiply two field elements: out = in1 * in2 */
507
static void felem_mul(widefelem out, const felem in1, const felem in2)
508
300k
{
509
300k
    out[0] = ((widelimb) in1[0]) * in2[0];
510
300k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
511
300k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
512
300k
             ((widelimb) in1[2]) * in2[0];
513
300k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
514
300k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
515
300k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
516
300k
             ((widelimb) in1[3]) * in2[1];
517
300k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
518
300k
    out[6] = ((widelimb) in1[3]) * in2[3];
519
300k
}
520
521
/*-
522
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
523
 * Requires in[i] < 2^126,
524
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525
static void felem_reduce(felem out, const widefelem in)
526
681k
{
527
681k
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
528
681k
        (((widelimb) 1) << 15);
529
681k
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
530
681k
        (((widelimb) 1) << 71);
531
681k
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532
681k
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
533
681k
    widelimb output[5];
534
535
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536
681k
    output[0] = in[0] + two127p15;
537
681k
    output[1] = in[1] + two127m71m55;
538
681k
    output[2] = in[2] + two127m71;
539
681k
    output[3] = in[3];
540
681k
    output[4] = in[4];
541
542
    /* Eliminate in[4], in[5], in[6] */
543
681k
    output[4] += in[6] >> 16;
544
681k
    output[3] += (in[6] & 0xffff) << 40;
545
681k
    output[2] -= in[6];
546
547
681k
    output[3] += in[5] >> 16;
548
681k
    output[2] += (in[5] & 0xffff) << 40;
549
681k
    output[1] -= in[5];
550
551
681k
    output[2] += output[4] >> 16;
552
681k
    output[1] += (output[4] & 0xffff) << 40;
553
681k
    output[0] -= output[4];
554
555
    /* Carry 2 -> 3 -> 4 */
556
681k
    output[3] += output[2] >> 56;
557
681k
    output[2] &= 0x00ffffffffffffff;
558
559
681k
    output[4] = output[3] >> 56;
560
681k
    output[3] &= 0x00ffffffffffffff;
561
562
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
563
564
    /* Eliminate output[4] */
565
681k
    output[2] += output[4] >> 16;
566
    /* output[2] < 2^56 + 2^56 = 2^57 */
567
681k
    output[1] += (output[4] & 0xffff) << 40;
568
681k
    output[0] -= output[4];
569
570
    /* Carry 0 -> 1 -> 2 -> 3 */
571
681k
    output[1] += output[0] >> 56;
572
681k
    out[0] = output[0] & 0x00ffffffffffffff;
573
574
681k
    output[2] += output[1] >> 56;
575
    /* output[2] < 2^57 + 2^72 */
576
681k
    out[1] = output[1] & 0x00ffffffffffffff;
577
681k
    output[3] += output[2] >> 56;
578
    /* output[3] <= 2^56 + 2^16 */
579
681k
    out[2] = output[2] & 0x00ffffffffffffff;
580
581
    /*-
582
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
583
     * out[3] <= 2^56 + 2^16 (due to final carry),
584
     * so out < 2*p
585
     */
586
681k
    out[3] = output[3];
587
681k
}
588
589
static void felem_square_reduce(felem out, const felem in)
590
0
{
591
0
    widefelem tmp;
592
0
    felem_square(tmp, in);
593
0
    felem_reduce(out, tmp);
594
0
}
595
596
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
597
0
{
598
0
    widefelem tmp;
599
0
    felem_mul(tmp, in1, in2);
600
0
    felem_reduce(out, tmp);
601
0
}
602
603
/*
604
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
605
 * call felem_reduce first)
606
 */
607
static void felem_contract(felem out, const felem in)
608
2.70k
{
609
2.70k
    static const int64_t two56 = ((limb) 1) << 56;
610
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
611
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
612
2.70k
    int64_t tmp[4], a;
613
2.70k
    tmp[0] = in[0];
614
2.70k
    tmp[1] = in[1];
615
2.70k
    tmp[2] = in[2];
616
2.70k
    tmp[3] = in[3];
617
    /* Case 1: a = 1 iff in >= 2^224 */
618
2.70k
    a = (in[3] >> 56);
619
2.70k
    tmp[0] -= a;
620
2.70k
    tmp[1] += a << 40;
621
2.70k
    tmp[3] &= 0x00ffffffffffffff;
622
    /*
623
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
624
     * and the lower part is non-zero
625
     */
626
2.70k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
627
2.70k
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628
2.70k
    a &= 0x00ffffffffffffff;
629
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
630
2.70k
    a = (a - 1) >> 63;
631
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
632
2.70k
    tmp[3] &= a ^ 0xffffffffffffffff;
633
2.70k
    tmp[2] &= a ^ 0xffffffffffffffff;
634
2.70k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
635
2.70k
    tmp[0] -= 1 & a;
636
637
    /*
638
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
639
     * non-zero, so we only need one step
640
     */
641
2.70k
    a = tmp[0] >> 63;
642
2.70k
    tmp[0] += two56 & a;
643
2.70k
    tmp[1] -= 1 & a;
644
645
    /* carry 1 -> 2 -> 3 */
646
2.70k
    tmp[2] += tmp[1] >> 56;
647
2.70k
    tmp[1] &= 0x00ffffffffffffff;
648
649
2.70k
    tmp[3] += tmp[2] >> 56;
650
2.70k
    tmp[2] &= 0x00ffffffffffffff;
651
652
    /* Now 0 <= out < p */
653
2.70k
    out[0] = tmp[0];
654
2.70k
    out[1] = tmp[1];
655
2.70k
    out[2] = tmp[2];
656
2.70k
    out[3] = tmp[3];
657
2.70k
}
658
659
/*
660
 * Get negative value: out = -in
661
 * Requires in[i] < 2^63,
662
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
663
 */
664
static void felem_neg(felem out, const felem in)
665
6.03k
{
666
6.03k
    widefelem tmp = {0};
667
6.03k
    felem_diff_128_64(tmp, in);
668
6.03k
    felem_reduce(out, tmp);
669
6.03k
}
670
671
/*
672
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
673
 * elements are reduced to in < 2^225, so we only need to check three cases:
674
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
675
 */
676
static limb felem_is_zero(const felem in)
677
76.1k
{
678
76.1k
    limb zero, two224m96p1, two225m97p2;
679
680
76.1k
    zero = in[0] | in[1] | in[2] | in[3];
681
76.1k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
682
76.1k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
683
76.1k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
684
76.1k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
685
76.1k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
686
76.1k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
687
76.1k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
688
76.1k
    return (zero | two224m96p1 | two225m97p2);
689
76.1k
}
690
691
static int felem_is_zero_int(const void *in)
692
0
{
693
0
    return (int)(felem_is_zero(in) & ((limb) 1));
694
0
}
695
696
/* Invert a field element */
697
/* Computation chain copied from djb's code */
698
static void felem_inv(felem out, const felem in)
699
818
{
700
818
    felem ftmp, ftmp2, ftmp3, ftmp4;
701
818
    widefelem tmp;
702
818
    unsigned i;
703
704
818
    felem_square(tmp, in);
705
818
    felem_reduce(ftmp, tmp);    /* 2 */
706
818
    felem_mul(tmp, in, ftmp);
707
818
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
708
818
    felem_square(tmp, ftmp);
709
818
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
710
818
    felem_mul(tmp, in, ftmp);
711
818
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
712
818
    felem_square(tmp, ftmp);
713
818
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
714
818
    felem_square(tmp, ftmp2);
715
818
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
716
818
    felem_square(tmp, ftmp2);
717
818
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
718
818
    felem_mul(tmp, ftmp2, ftmp);
719
818
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
720
818
    felem_square(tmp, ftmp);
721
818
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
722
4.90k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
723
4.09k
        felem_square(tmp, ftmp2);
724
4.09k
        felem_reduce(ftmp2, tmp);
725
4.09k
    }
726
818
    felem_mul(tmp, ftmp2, ftmp);
727
818
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
728
818
    felem_square(tmp, ftmp2);
729
818
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
730
9.81k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
731
8.99k
        felem_square(tmp, ftmp3);
732
8.99k
        felem_reduce(ftmp3, tmp);
733
8.99k
    }
734
818
    felem_mul(tmp, ftmp3, ftmp2);
735
818
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
736
818
    felem_square(tmp, ftmp2);
737
818
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
738
19.6k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
739
18.8k
        felem_square(tmp, ftmp3);
740
18.8k
        felem_reduce(ftmp3, tmp);
741
18.8k
    }
742
818
    felem_mul(tmp, ftmp3, ftmp2);
743
818
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
744
818
    felem_square(tmp, ftmp3);
745
818
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
746
39.2k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
747
38.4k
        felem_square(tmp, ftmp4);
748
38.4k
        felem_reduce(ftmp4, tmp);
749
38.4k
    }
750
818
    felem_mul(tmp, ftmp3, ftmp4);
751
818
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
752
818
    felem_square(tmp, ftmp3);
753
818
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
754
19.6k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
755
18.8k
        felem_square(tmp, ftmp4);
756
18.8k
        felem_reduce(ftmp4, tmp);
757
18.8k
    }
758
818
    felem_mul(tmp, ftmp2, ftmp4);
759
818
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
760
5.72k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
761
4.90k
        felem_square(tmp, ftmp2);
762
4.90k
        felem_reduce(ftmp2, tmp);
763
4.90k
    }
764
818
    felem_mul(tmp, ftmp2, ftmp);
765
818
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
766
818
    felem_square(tmp, ftmp);
767
818
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
768
818
    felem_mul(tmp, ftmp, in);
769
818
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
770
80.1k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
771
79.3k
        felem_square(tmp, ftmp);
772
79.3k
        felem_reduce(ftmp, tmp);
773
79.3k
    }
774
818
    felem_mul(tmp, ftmp, ftmp3);
775
818
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
776
818
}
777
778
/*
779
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
780
 * out to itself.
781
 */
782
static void copy_conditional(felem out, const felem in, limb icopy)
783
120k
{
784
120k
    unsigned i;
785
    /*
786
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
787
     */
788
120k
    const limb copy = -icopy;
789
601k
    for (i = 0; i < 4; ++i) {
790
481k
        const limb tmp = copy & (in[i] ^ out[i]);
791
481k
        out[i] ^= tmp;
792
481k
    }
793
120k
}
794
795
/******************************************************************************/
796
/*-
797
 *                       ELLIPTIC CURVE POINT OPERATIONS
798
 *
799
 * Points are represented in Jacobian projective coordinates:
800
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
801
 * or to the point at infinity if Z == 0.
802
 *
803
 */
804
805
/*-
806
 * Double an elliptic curve point:
807
 * (X', Y', Z') = 2 * (X, Y, Z), where
808
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
809
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
810
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
811
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
812
 * while x_out == y_in is not (maybe this works, but it's not tested).
813
 */
814
static void
815
point_double(felem x_out, felem y_out, felem z_out,
816
             const felem x_in, const felem y_in, const felem z_in)
817
36.5k
{
818
36.5k
    widefelem tmp, tmp2;
819
36.5k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
820
821
36.5k
    felem_assign(ftmp, x_in);
822
36.5k
    felem_assign(ftmp2, x_in);
823
824
    /* delta = z^2 */
825
36.5k
    felem_square(tmp, z_in);
826
36.5k
    felem_reduce(delta, tmp);
827
828
    /* gamma = y^2 */
829
36.5k
    felem_square(tmp, y_in);
830
36.5k
    felem_reduce(gamma, tmp);
831
832
    /* beta = x*gamma */
833
36.5k
    felem_mul(tmp, x_in, gamma);
834
36.5k
    felem_reduce(beta, tmp);
835
836
    /* alpha = 3*(x-delta)*(x+delta) */
837
36.5k
    felem_diff(ftmp, delta);
838
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
839
36.5k
    felem_sum(ftmp2, delta);
840
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
841
36.5k
    felem_scalar(ftmp2, 3);
842
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
843
36.5k
    felem_mul(tmp, ftmp, ftmp2);
844
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
845
36.5k
    felem_reduce(alpha, tmp);
846
847
    /* x' = alpha^2 - 8*beta */
848
36.5k
    felem_square(tmp, alpha);
849
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
850
36.5k
    felem_assign(ftmp, beta);
851
36.5k
    felem_scalar(ftmp, 8);
852
    /* ftmp[i] < 8 * 2^57 = 2^60 */
853
36.5k
    felem_diff_128_64(tmp, ftmp);
854
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
855
36.5k
    felem_reduce(x_out, tmp);
856
857
    /* z' = (y + z)^2 - gamma - delta */
858
36.5k
    felem_sum(delta, gamma);
859
    /* delta[i] < 2^57 + 2^57 = 2^58 */
860
36.5k
    felem_assign(ftmp, y_in);
861
36.5k
    felem_sum(ftmp, z_in);
862
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
863
36.5k
    felem_square(tmp, ftmp);
864
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
865
36.5k
    felem_diff_128_64(tmp, delta);
866
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
867
36.5k
    felem_reduce(z_out, tmp);
868
869
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
870
36.5k
    felem_scalar(beta, 4);
871
    /* beta[i] < 4 * 2^57 = 2^59 */
872
36.5k
    felem_diff(beta, x_out);
873
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
874
36.5k
    felem_mul(tmp, alpha, beta);
875
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
876
36.5k
    felem_square(tmp2, gamma);
877
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
878
36.5k
    widefelem_scalar(tmp2, 8);
879
    /* tmp2[i] < 8 * 2^116 = 2^119 */
880
36.5k
    widefelem_diff(tmp, tmp2);
881
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
882
36.5k
    felem_reduce(y_out, tmp);
883
36.5k
}
884
885
/*-
886
 * Add two elliptic curve points:
887
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
888
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
889
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
890
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
891
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
892
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
893
 *
894
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
895
 */
896
897
/*
898
 * This function is not entirely constant-time: it includes a branch for
899
 * checking whether the two input points are equal, (while not equal to the
900
 * point at infinity). This case never happens during single point
901
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
902
 */
903
static void point_add(felem x3, felem y3, felem z3,
904
                      const felem x1, const felem y1, const felem z1,
905
                      const int mixed, const felem x2, const felem y2,
906
                      const felem z2)
907
19.0k
{
908
19.0k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
909
19.0k
    widefelem tmp, tmp2;
910
19.0k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
911
19.0k
    limb points_equal;
912
913
19.0k
    if (!mixed) {
914
        /* ftmp2 = z2^2 */
915
6.83k
        felem_square(tmp, z2);
916
6.83k
        felem_reduce(ftmp2, tmp);
917
918
        /* ftmp4 = z2^3 */
919
6.83k
        felem_mul(tmp, ftmp2, z2);
920
6.83k
        felem_reduce(ftmp4, tmp);
921
922
        /* ftmp4 = z2^3*y1 */
923
6.83k
        felem_mul(tmp2, ftmp4, y1);
924
6.83k
        felem_reduce(ftmp4, tmp2);
925
926
        /* ftmp2 = z2^2*x1 */
927
6.83k
        felem_mul(tmp2, ftmp2, x1);
928
6.83k
        felem_reduce(ftmp2, tmp2);
929
12.2k
    } else {
930
        /*
931
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
932
         */
933
934
        /* ftmp4 = z2^3*y1 */
935
12.2k
        felem_assign(ftmp4, y1);
936
937
        /* ftmp2 = z2^2*x1 */
938
12.2k
        felem_assign(ftmp2, x1);
939
12.2k
    }
940
941
    /* ftmp = z1^2 */
942
19.0k
    felem_square(tmp, z1);
943
19.0k
    felem_reduce(ftmp, tmp);
944
945
    /* ftmp3 = z1^3 */
946
19.0k
    felem_mul(tmp, ftmp, z1);
947
19.0k
    felem_reduce(ftmp3, tmp);
948
949
    /* tmp = z1^3*y2 */
950
19.0k
    felem_mul(tmp, ftmp3, y2);
951
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
952
953
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
954
19.0k
    felem_diff_128_64(tmp, ftmp4);
955
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
956
19.0k
    felem_reduce(ftmp3, tmp);
957
958
    /* tmp = z1^2*x2 */
959
19.0k
    felem_mul(tmp, ftmp, x2);
960
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
961
962
    /* ftmp = z1^2*x2 - z2^2*x1 */
963
19.0k
    felem_diff_128_64(tmp, ftmp2);
964
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
965
19.0k
    felem_reduce(ftmp, tmp);
966
967
    /*
968
     * The formulae are incorrect if the points are equal, in affine coordinates
969
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
970
     * happens.
971
     *
972
     * We use bitwise operations to avoid potential side-channels introduced by
973
     * the short-circuiting behaviour of boolean operators.
974
     */
975
19.0k
    x_equal = felem_is_zero(ftmp);
976
19.0k
    y_equal = felem_is_zero(ftmp3);
977
    /*
978
     * The special case of either point being the point at infinity (z1 and/or
979
     * z2 are zero), is handled separately later on in this function, so we
980
     * avoid jumping to point_double here in those special cases.
981
     */
982
19.0k
    z1_is_zero = felem_is_zero(z1);
983
19.0k
    z2_is_zero = felem_is_zero(z2);
984
985
    /*
986
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
987
     * specific implementation `felem_is_zero()` returns truth as `0x1`
988
     * (rather than `0xff..ff`).
989
     *
990
     * This implies that `~true` in this implementation becomes
991
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
992
     * the if expression, we mask out only the last bit in the next
993
     * line.
994
     */
995
19.0k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
996
997
19.0k
    if (points_equal) {
998
        /*
999
         * This is obviously not constant-time but, as mentioned before, this
1000
         * case never happens during single point multiplication, so there is no
1001
         * timing leak for ECDH or ECDSA signing.
1002
         */
1003
0
        point_double(x3, y3, z3, x1, y1, z1);
1004
0
        return;
1005
0
    }
1006
1007
    /* ftmp5 = z1*z2 */
1008
19.0k
    if (!mixed) {
1009
6.83k
        felem_mul(tmp, z1, z2);
1010
6.83k
        felem_reduce(ftmp5, tmp);
1011
12.2k
    } else {
1012
        /* special case z2 = 0 is handled later */
1013
12.2k
        felem_assign(ftmp5, z1);
1014
12.2k
    }
1015
1016
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017
19.0k
    felem_mul(tmp, ftmp, ftmp5);
1018
19.0k
    felem_reduce(z_out, tmp);
1019
1020
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021
19.0k
    felem_assign(ftmp5, ftmp);
1022
19.0k
    felem_square(tmp, ftmp);
1023
19.0k
    felem_reduce(ftmp, tmp);
1024
1025
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026
19.0k
    felem_mul(tmp, ftmp, ftmp5);
1027
19.0k
    felem_reduce(ftmp5, tmp);
1028
1029
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030
19.0k
    felem_mul(tmp, ftmp2, ftmp);
1031
19.0k
    felem_reduce(ftmp2, tmp);
1032
1033
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034
19.0k
    felem_mul(tmp, ftmp4, ftmp5);
1035
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038
19.0k
    felem_square(tmp2, ftmp3);
1039
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042
19.0k
    felem_diff_128_64(tmp2, ftmp5);
1043
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046
19.0k
    felem_assign(ftmp5, ftmp2);
1047
19.0k
    felem_scalar(ftmp5, 2);
1048
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050
    /*-
1051
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053
     */
1054
19.0k
    felem_diff_128_64(tmp2, ftmp5);
1055
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056
19.0k
    felem_reduce(x_out, tmp2);
1057
1058
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059
19.0k
    felem_diff(ftmp2, x_out);
1060
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062
    /*
1063
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064
     */
1065
19.0k
    felem_mul(tmp2, ftmp3, ftmp2);
1066
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068
    /*-
1069
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071
     */
1072
19.0k
    widefelem_diff(tmp2, tmp);
1073
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074
19.0k
    felem_reduce(y_out, tmp2);
1075
1076
    /*
1077
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078
     * the point at infinity, so we need to check for this separately
1079
     */
1080
1081
    /*
1082
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1083
     */
1084
19.0k
    copy_conditional(x_out, x2, z1_is_zero);
1085
19.0k
    copy_conditional(x_out, x1, z2_is_zero);
1086
19.0k
    copy_conditional(y_out, y2, z1_is_zero);
1087
19.0k
    copy_conditional(y_out, y1, z2_is_zero);
1088
19.0k
    copy_conditional(z_out, z2, z1_is_zero);
1089
19.0k
    copy_conditional(z_out, z1, z2_is_zero);
1090
19.0k
    felem_assign(x3, x_out);
1091
19.0k
    felem_assign(y3, y_out);
1092
19.0k
    felem_assign(z3, z_out);
1093
19.0k
}
1094
1095
/*
1096
 * select_point selects the |idx|th point from a precomputation table and
1097
 * copies it to out.
1098
 * The pre_comp array argument should be size of |size| argument
1099
 */
1100
static void select_point(const u64 idx, unsigned int size,
1101
                         const felem pre_comp[][3], felem out[3])
1102
18.4k
{
1103
18.4k
    unsigned i, j;
1104
18.4k
    limb *outlimbs = &out[0][0];
1105
1106
18.4k
    memset(out, 0, sizeof(*out) * 3);
1107
319k
    for (i = 0; i < size; i++) {
1108
301k
        const limb *inlimbs = &pre_comp[i][0][0];
1109
301k
        u64 mask = i ^ idx;
1110
301k
        mask |= mask >> 4;
1111
301k
        mask |= mask >> 2;
1112
301k
        mask |= mask >> 1;
1113
301k
        mask &= 1;
1114
301k
        mask--;
1115
3.91M
        for (j = 0; j < 4 * 3; j++)
1116
3.61M
            outlimbs[j] |= inlimbs[j] & mask;
1117
301k
    }
1118
18.4k
}
1119
1120
/* get_bit returns the |i|th bit in |in| */
1121
static char get_bit(const felem_bytearray in, unsigned i)
1122
85.9k
{
1123
85.9k
    if (i >= 224)
1124
268
        return 0;
1125
85.6k
    return (in[i >> 3] >> (i & 7)) & 1;
1126
85.9k
}
1127
1128
/*
1129
 * Interleaved point multiplication using precomputed point multiples: The
1130
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1133
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134
 */
1135
static void batch_mul(felem x_out, felem y_out, felem z_out,
1136
                      const felem_bytearray scalars[],
1137
                      const unsigned num_points, const u8 *g_scalar,
1138
                      const int mixed, const felem pre_comp[][17][3],
1139
                      const felem g_pre_comp[2][16][3])
1140
356
{
1141
356
    int i, skip;
1142
356
    unsigned num;
1143
356
    unsigned gen_mul = (g_scalar != NULL);
1144
356
    felem nq[3], tmp[4];
1145
356
    u64 bits;
1146
356
    u8 sign, digit;
1147
1148
    /* set nq to the point at infinity */
1149
356
    memset(nq, 0, sizeof(nq));
1150
1151
    /*
1152
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153
     * of the generator (two in each of the last 28 rounds) and additions of
1154
     * other points multiples (every 5th round).
1155
     */
1156
356
    skip = 1;                   /* save two point operations in the first
1157
                                 * round */
1158
36.1k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159
        /* double */
1160
35.8k
        if (!skip)
1161
35.4k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163
        /* add multiples of the generator */
1164
35.8k
        if (gen_mul && (i <= 27)) {
1165
            /* first, look 28 bits upwards */
1166
6.21k
            bits = get_bit(g_scalar, i + 196) << 3;
1167
6.21k
            bits |= get_bit(g_scalar, i + 140) << 2;
1168
6.21k
            bits |= get_bit(g_scalar, i + 84) << 1;
1169
6.21k
            bits |= get_bit(g_scalar, i + 28);
1170
            /* select the point to add, in constant time */
1171
6.21k
            select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173
6.21k
            if (!skip) {
1174
                /* value 1 below is argument for "mixed" */
1175
5.99k
                point_add(nq[0], nq[1], nq[2],
1176
5.99k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177
5.99k
            } else {
1178
222
                memcpy(nq, tmp, 3 * sizeof(felem));
1179
222
                skip = 0;
1180
222
            }
1181
1182
            /* second, look at the current position */
1183
6.21k
            bits = get_bit(g_scalar, i + 168) << 3;
1184
6.21k
            bits |= get_bit(g_scalar, i + 112) << 2;
1185
6.21k
            bits |= get_bit(g_scalar, i + 56) << 1;
1186
6.21k
            bits |= get_bit(g_scalar, i);
1187
            /* select the point to add, in constant time */
1188
6.21k
            select_point(bits, 16, g_pre_comp[0], tmp);
1189
6.21k
            point_add(nq[0], nq[1], nq[2],
1190
6.21k
                      nq[0], nq[1], nq[2],
1191
6.21k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192
6.21k
        }
1193
1194
        /* do other additions every 5 doublings */
1195
35.8k
        if (num_points && (i % 5 == 0)) {
1196
            /* loop over all scalars */
1197
12.0k
            for (num = 0; num < num_points; ++num) {
1198
6.03k
                bits = get_bit(scalars[num], i + 4) << 5;
1199
6.03k
                bits |= get_bit(scalars[num], i + 3) << 4;
1200
6.03k
                bits |= get_bit(scalars[num], i + 2) << 3;
1201
6.03k
                bits |= get_bit(scalars[num], i + 1) << 2;
1202
6.03k
                bits |= get_bit(scalars[num], i) << 1;
1203
6.03k
                bits |= get_bit(scalars[num], i - 1);
1204
6.03k
                ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206
                /* select the point to add or subtract */
1207
6.03k
                select_point(digit, 17, pre_comp[num], tmp);
1208
6.03k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209
                                            * point */
1210
6.03k
                copy_conditional(tmp[1], tmp[3], sign);
1211
1212
6.03k
                if (!skip) {
1213
5.89k
                    point_add(nq[0], nq[1], nq[2],
1214
5.89k
                              nq[0], nq[1], nq[2],
1215
5.89k
                              mixed, tmp[0], tmp[1], tmp[2]);
1216
5.89k
                } else {
1217
134
                    memcpy(nq, tmp, 3 * sizeof(felem));
1218
134
                    skip = 0;
1219
134
                }
1220
6.03k
            }
1221
6.03k
        }
1222
35.8k
    }
1223
356
    felem_assign(x_out, nq[0]);
1224
356
    felem_assign(y_out, nq[1]);
1225
356
    felem_assign(z_out, nq[2]);
1226
356
}
1227
1228
/******************************************************************************/
1229
/*
1230
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1231
 */
1232
1233
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1234
0
{
1235
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1236
1237
0
    if (!ret) {
1238
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239
0
        return ret;
1240
0
    }
1241
1242
0
    ret->references = 1;
1243
1244
0
    ret->lock = CRYPTO_THREAD_lock_new();
1245
0
    if (ret->lock == NULL) {
1246
0
        ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1247
0
        OPENSSL_free(ret);
1248
0
        return NULL;
1249
0
    }
1250
0
    return ret;
1251
0
}
1252
1253
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1254
0
{
1255
0
    int i;
1256
0
    if (p != NULL)
1257
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1258
0
    return p;
1259
0
}
1260
1261
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1262
0
{
1263
0
    int i;
1264
1265
0
    if (p == NULL)
1266
0
        return;
1267
1268
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1269
0
    REF_PRINT_COUNT("EC_nistp224", x);
1270
0
    if (i > 0)
1271
0
        return;
1272
0
    REF_ASSERT_ISNT(i < 0);
1273
1274
0
    CRYPTO_THREAD_lock_free(p->lock);
1275
0
    OPENSSL_free(p);
1276
0
}
1277
1278
/******************************************************************************/
1279
/*
1280
 * OPENSSL EC_METHOD FUNCTIONS
1281
 */
1282
1283
int ec_GFp_nistp224_group_init(EC_GROUP *group)
1284
16.2k
{
1285
16.2k
    int ret;
1286
16.2k
    ret = ec_GFp_simple_group_init(group);
1287
16.2k
    group->a_is_minus3 = 1;
1288
16.2k
    return ret;
1289
16.2k
}
1290
1291
int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1292
                                    const BIGNUM *a, const BIGNUM *b,
1293
                                    BN_CTX *ctx)
1294
8.15k
{
1295
8.15k
    int ret = 0;
1296
8.15k
    BN_CTX *new_ctx = NULL;
1297
8.15k
    BIGNUM *curve_p, *curve_a, *curve_b;
1298
1299
8.15k
    if (ctx == NULL)
1300
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1301
0
            return 0;
1302
8.15k
    BN_CTX_start(ctx);
1303
8.15k
    curve_p = BN_CTX_get(ctx);
1304
8.15k
    curve_a = BN_CTX_get(ctx);
1305
8.15k
    curve_b = BN_CTX_get(ctx);
1306
8.15k
    if (curve_b == NULL)
1307
0
        goto err;
1308
8.15k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1309
8.15k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1310
8.15k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1311
8.15k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1312
0
        ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1313
0
              EC_R_WRONG_CURVE_PARAMETERS);
1314
0
        goto err;
1315
0
    }
1316
8.15k
    group->field_mod_func = BN_nist_mod_224;
1317
8.15k
    ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1318
8.15k
 err:
1319
8.15k
    BN_CTX_end(ctx);
1320
8.15k
    BN_CTX_free(new_ctx);
1321
8.15k
    return ret;
1322
8.15k
}
1323
1324
/*
1325
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1326
 * (X/Z^2, Y/Z^3)
1327
 */
1328
int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1329
                                                 const EC_POINT *point,
1330
                                                 BIGNUM *x, BIGNUM *y,
1331
                                                 BN_CTX *ctx)
1332
168
{
1333
168
    felem z1, z2, x_in, y_in, x_out, y_out;
1334
168
    widefelem tmp;
1335
1336
168
    if (EC_POINT_is_at_infinity(group, point)) {
1337
0
        ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1338
0
              EC_R_POINT_AT_INFINITY);
1339
0
        return 0;
1340
0
    }
1341
168
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1342
168
        (!BN_to_felem(z1, point->Z)))
1343
0
        return 0;
1344
168
    felem_inv(z2, z1);
1345
168
    felem_square(tmp, z2);
1346
168
    felem_reduce(z1, tmp);
1347
168
    felem_mul(tmp, x_in, z1);
1348
168
    felem_reduce(x_in, tmp);
1349
168
    felem_contract(x_out, x_in);
1350
168
    if (x != NULL) {
1351
168
        if (!felem_to_BN(x, x_out)) {
1352
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1353
0
                  ERR_R_BN_LIB);
1354
0
            return 0;
1355
0
        }
1356
168
    }
1357
168
    felem_mul(tmp, z1, z2);
1358
168
    felem_reduce(z1, tmp);
1359
168
    felem_mul(tmp, y_in, z1);
1360
168
    felem_reduce(y_in, tmp);
1361
168
    felem_contract(y_out, y_in);
1362
168
    if (y != NULL) {
1363
168
        if (!felem_to_BN(y, y_out)) {
1364
0
            ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1365
0
                  ERR_R_BN_LIB);
1366
0
            return 0;
1367
0
        }
1368
168
    }
1369
168
    return 1;
1370
168
}
1371
1372
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373
                               felem tmp_felems[ /* num+1 */ ])
1374
0
{
1375
    /*
1376
     * Runs in constant time, unless an input is the point at infinity (which
1377
     * normally shouldn't happen).
1378
     */
1379
0
    ec_GFp_nistp_points_make_affine_internal(num,
1380
0
                                             points,
1381
0
                                             sizeof(felem),
1382
0
                                             tmp_felems,
1383
0
                                             (void (*)(void *))felem_one,
1384
0
                                             felem_is_zero_int,
1385
0
                                             (void (*)(void *, const void *))
1386
0
                                             felem_assign,
1387
0
                                             (void (*)(void *, const void *))
1388
0
                                             felem_square_reduce, (void (*)
1389
0
                                                                   (void *,
1390
0
                                                                    const void
1391
0
                                                                    *,
1392
0
                                                                    const void
1393
0
                                                                    *))
1394
0
                                             felem_mul_reduce,
1395
0
                                             (void (*)(void *, const void *))
1396
0
                                             felem_inv,
1397
0
                                             (void (*)(void *, const void *))
1398
0
                                             felem_contract);
1399
0
}
1400
1401
/*
1402
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403
 * values Result is stored in r (r can equal one of the inputs).
1404
 */
1405
int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406
                               const BIGNUM *scalar, size_t num,
1407
                               const EC_POINT *points[],
1408
                               const BIGNUM *scalars[], BN_CTX *ctx)
1409
6
{
1410
6
    int ret = 0;
1411
6
    int j;
1412
6
    unsigned i;
1413
6
    int mixed = 0;
1414
6
    BIGNUM *x, *y, *z, *tmp_scalar;
1415
6
    felem_bytearray g_secret;
1416
6
    felem_bytearray *secrets = NULL;
1417
6
    felem (*pre_comp)[17][3] = NULL;
1418
6
    felem *tmp_felems = NULL;
1419
6
    int num_bytes;
1420
6
    int have_pre_comp = 0;
1421
6
    size_t num_points = num;
1422
6
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1423
6
    NISTP224_PRE_COMP *pre = NULL;
1424
6
    const felem(*g_pre_comp)[16][3] = NULL;
1425
6
    EC_POINT *generator = NULL;
1426
6
    const EC_POINT *p = NULL;
1427
6
    const BIGNUM *p_scalar = NULL;
1428
1429
6
    BN_CTX_start(ctx);
1430
6
    x = BN_CTX_get(ctx);
1431
6
    y = BN_CTX_get(ctx);
1432
6
    z = BN_CTX_get(ctx);
1433
6
    tmp_scalar = BN_CTX_get(ctx);
1434
6
    if (tmp_scalar == NULL)
1435
0
        goto err;
1436
1437
6
    if (scalar != NULL) {
1438
6
        pre = group->pre_comp.nistp224;
1439
6
        if (pre)
1440
            /* we have precomputation, try to use it */
1441
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442
6
        else
1443
            /* try to use the standard precomputation */
1444
6
            g_pre_comp = &gmul[0];
1445
6
        generator = EC_POINT_new(group);
1446
6
        if (generator == NULL)
1447
0
            goto err;
1448
        /* get the generator from precomputation */
1449
6
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450
6
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451
6
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1453
0
            goto err;
1454
0
        }
1455
6
        if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1456
6
                                                      generator, x, y, z,
1457
6
                                                      ctx))
1458
0
            goto err;
1459
6
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460
            /* precomputation matches generator */
1461
6
            have_pre_comp = 1;
1462
0
        else
1463
            /*
1464
             * we don't have valid precomputation: treat the generator as a
1465
             * random point
1466
             */
1467
0
            num_points = num_points + 1;
1468
6
    }
1469
1470
6
    if (num_points > 0) {
1471
0
        if (num_points >= 3) {
1472
            /*
1473
             * unless we precompute multiples for just one or two points,
1474
             * converting those into affine form is time well spent
1475
             */
1476
0
            mixed = 1;
1477
0
        }
1478
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480
0
        if (mixed)
1481
0
            tmp_felems =
1482
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483
0
        if ((secrets == NULL) || (pre_comp == NULL)
1484
0
            || (mixed && (tmp_felems == NULL))) {
1485
0
            ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1486
0
            goto err;
1487
0
        }
1488
1489
        /*
1490
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1491
         * i.e., they contribute nothing to the linear combination
1492
         */
1493
0
        for (i = 0; i < num_points; ++i) {
1494
0
            if (i == num) {
1495
                /* the generator */
1496
0
                p = EC_GROUP_get0_generator(group);
1497
0
                p_scalar = scalar;
1498
0
            } else {
1499
                /* the i^th point */
1500
0
                p = points[i];
1501
0
                p_scalar = scalars[i];
1502
0
            }
1503
0
            if ((p_scalar != NULL) && (p != NULL)) {
1504
                /* reduce scalar to 0 <= scalar < 2^224 */
1505
0
                if ((BN_num_bits(p_scalar) > 224)
1506
0
                    || (BN_is_negative(p_scalar))) {
1507
                    /*
1508
                     * this is an unusual input, and we don't guarantee
1509
                     * constant-timeness
1510
                     */
1511
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1512
0
                        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513
0
                        goto err;
1514
0
                    }
1515
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1516
0
                                               secrets[i], sizeof(secrets[i]));
1517
0
                } else {
1518
0
                    num_bytes = BN_bn2lebinpad(p_scalar,
1519
0
                                               secrets[i], sizeof(secrets[i]));
1520
0
                }
1521
0
                if (num_bytes < 0) {
1522
0
                    ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523
0
                    goto err;
1524
0
                }
1525
                /* precompute multiples */
1526
0
                if ((!BN_to_felem(x_out, p->X)) ||
1527
0
                    (!BN_to_felem(y_out, p->Y)) ||
1528
0
                    (!BN_to_felem(z_out, p->Z)))
1529
0
                    goto err;
1530
0
                felem_assign(pre_comp[i][1][0], x_out);
1531
0
                felem_assign(pre_comp[i][1][1], y_out);
1532
0
                felem_assign(pre_comp[i][1][2], z_out);
1533
0
                for (j = 2; j <= 16; ++j) {
1534
0
                    if (j & 1) {
1535
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1536
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1537
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1538
0
                                  pre_comp[i][j - 1][0],
1539
0
                                  pre_comp[i][j - 1][1],
1540
0
                                  pre_comp[i][j - 1][2]);
1541
0
                    } else {
1542
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1543
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1544
0
                                     pre_comp[i][j / 2][1],
1545
0
                                     pre_comp[i][j / 2][2]);
1546
0
                    }
1547
0
                }
1548
0
            }
1549
0
        }
1550
0
        if (mixed)
1551
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1552
0
    }
1553
1554
    /* the scalar for the generator */
1555
6
    if ((scalar != NULL) && (have_pre_comp)) {
1556
6
        memset(g_secret, 0, sizeof(g_secret));
1557
        /* reduce scalar to 0 <= scalar < 2^224 */
1558
6
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1559
            /*
1560
             * this is an unusual input, and we don't guarantee
1561
             * constant-timeness
1562
             */
1563
2
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1564
0
                ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1565
0
                goto err;
1566
0
            }
1567
2
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1568
4
        } else {
1569
4
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1570
4
        }
1571
        /* do the multiplication with generator precomputation */
1572
6
        batch_mul(x_out, y_out, z_out,
1573
6
                  (const felem_bytearray(*))secrets, num_points,
1574
6
                  g_secret,
1575
6
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1576
6
    } else {
1577
        /* do the multiplication without generator precomputation */
1578
0
        batch_mul(x_out, y_out, z_out,
1579
0
                  (const felem_bytearray(*))secrets, num_points,
1580
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1581
0
    }
1582
    /* reduce the output to its unique minimal representation */
1583
6
    felem_contract(x_in, x_out);
1584
6
    felem_contract(y_in, y_out);
1585
6
    felem_contract(z_in, z_out);
1586
6
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1587
6
        (!felem_to_BN(z, z_in))) {
1588
0
        ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1589
0
        goto err;
1590
0
    }
1591
6
    ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1592
1593
6
 err:
1594
6
    BN_CTX_end(ctx);
1595
6
    EC_POINT_free(generator);
1596
6
    OPENSSL_free(secrets);
1597
6
    OPENSSL_free(pre_comp);
1598
6
    OPENSSL_free(tmp_felems);
1599
6
    return ret;
1600
6
}
1601
1602
int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1603
0
{
1604
0
    int ret = 0;
1605
0
    NISTP224_PRE_COMP *pre = NULL;
1606
0
    int i, j;
1607
0
    BN_CTX *new_ctx = NULL;
1608
0
    BIGNUM *x, *y;
1609
0
    EC_POINT *generator = NULL;
1610
0
    felem tmp_felems[32];
1611
1612
    /* throw away old precomputation */
1613
0
    EC_pre_comp_free(group);
1614
0
    if (ctx == NULL)
1615
0
        if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1616
0
            return 0;
1617
0
    BN_CTX_start(ctx);
1618
0
    x = BN_CTX_get(ctx);
1619
0
    y = BN_CTX_get(ctx);
1620
0
    if (y == NULL)
1621
0
        goto err;
1622
    /* get the generator */
1623
0
    if (group->generator == NULL)
1624
0
        goto err;
1625
0
    generator = EC_POINT_new(group);
1626
0
    if (generator == NULL)
1627
0
        goto err;
1628
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1629
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1630
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1631
0
        goto err;
1632
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1633
0
        goto err;
1634
    /*
1635
     * if the generator is the standard one, use built-in precomputation
1636
     */
1637
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1638
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1639
0
        goto done;
1640
0
    }
1641
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1642
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1643
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1644
0
        goto err;
1645
    /*
1646
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1647
     * 2^140*G, 2^196*G for the second one
1648
     */
1649
0
    for (i = 1; i <= 8; i <<= 1) {
1650
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1651
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1652
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1653
0
        for (j = 0; j < 27; ++j) {
1654
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1655
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1656
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1657
0
        }
1658
0
        if (i == 8)
1659
0
            break;
1660
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1661
0
                     pre->g_pre_comp[0][2 * i][1],
1662
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1663
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1664
0
        for (j = 0; j < 27; ++j) {
1665
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1666
0
                         pre->g_pre_comp[0][2 * i][1],
1667
0
                         pre->g_pre_comp[0][2 * i][2],
1668
0
                         pre->g_pre_comp[0][2 * i][0],
1669
0
                         pre->g_pre_comp[0][2 * i][1],
1670
0
                         pre->g_pre_comp[0][2 * i][2]);
1671
0
        }
1672
0
    }
1673
0
    for (i = 0; i < 2; i++) {
1674
        /* g_pre_comp[i][0] is the point at infinity */
1675
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1676
        /* the remaining multiples */
1677
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1678
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1679
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1680
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1681
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1682
0
                  pre->g_pre_comp[i][2][2]);
1683
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1684
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1685
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1686
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1687
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688
0
                  pre->g_pre_comp[i][2][2]);
1689
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1690
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1691
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1692
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1694
0
                  pre->g_pre_comp[i][4][2]);
1695
        /*
1696
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1697
         */
1698
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1699
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1700
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
0
        for (j = 1; j < 8; ++j) {
1704
            /* odd multiples: add G resp. 2^28*G */
1705
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1706
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1707
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1708
0
                      pre->g_pre_comp[i][2 * j][0],
1709
0
                      pre->g_pre_comp[i][2 * j][1],
1710
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1711
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1712
0
                      pre->g_pre_comp[i][1][2]);
1713
0
        }
1714
0
    }
1715
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1716
1717
0
 done:
1718
0
    SETPRECOMP(group, nistp224, pre);
1719
0
    pre = NULL;
1720
0
    ret = 1;
1721
0
 err:
1722
0
    BN_CTX_end(ctx);
1723
0
    EC_POINT_free(generator);
1724
0
    BN_CTX_free(new_ctx);
1725
0
    EC_nistp224_pre_comp_free(pre);
1726
0
    return ret;
1727
0
}
1728
1729
int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1730
0
{
1731
0
    return HAVEPRECOMP(group, nistp224);
1732
0
}
1733
1734
#endif