Coverage Report

Created: 2023-09-25 06:45

/src/openssl30/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
    CRYPTO_RWLOCK *lock;
242
};
243
244
const EC_METHOD *EC_GFp_nistp224_method(void)
245
23.8k
{
246
23.8k
    static const EC_METHOD ret = {
247
23.8k
        EC_FLAGS_DEFAULT_OCT,
248
23.8k
        NID_X9_62_prime_field,
249
23.8k
        ossl_ec_GFp_nistp224_group_init,
250
23.8k
        ossl_ec_GFp_simple_group_finish,
251
23.8k
        ossl_ec_GFp_simple_group_clear_finish,
252
23.8k
        ossl_ec_GFp_nist_group_copy,
253
23.8k
        ossl_ec_GFp_nistp224_group_set_curve,
254
23.8k
        ossl_ec_GFp_simple_group_get_curve,
255
23.8k
        ossl_ec_GFp_simple_group_get_degree,
256
23.8k
        ossl_ec_group_simple_order_bits,
257
23.8k
        ossl_ec_GFp_simple_group_check_discriminant,
258
23.8k
        ossl_ec_GFp_simple_point_init,
259
23.8k
        ossl_ec_GFp_simple_point_finish,
260
23.8k
        ossl_ec_GFp_simple_point_clear_finish,
261
23.8k
        ossl_ec_GFp_simple_point_copy,
262
23.8k
        ossl_ec_GFp_simple_point_set_to_infinity,
263
23.8k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
264
23.8k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265
23.8k
        0 /* point_set_compressed_coordinates */ ,
266
23.8k
        0 /* point2oct */ ,
267
23.8k
        0 /* oct2point */ ,
268
23.8k
        ossl_ec_GFp_simple_add,
269
23.8k
        ossl_ec_GFp_simple_dbl,
270
23.8k
        ossl_ec_GFp_simple_invert,
271
23.8k
        ossl_ec_GFp_simple_is_at_infinity,
272
23.8k
        ossl_ec_GFp_simple_is_on_curve,
273
23.8k
        ossl_ec_GFp_simple_cmp,
274
23.8k
        ossl_ec_GFp_simple_make_affine,
275
23.8k
        ossl_ec_GFp_simple_points_make_affine,
276
23.8k
        ossl_ec_GFp_nistp224_points_mul,
277
23.8k
        ossl_ec_GFp_nistp224_precompute_mult,
278
23.8k
        ossl_ec_GFp_nistp224_have_precompute_mult,
279
23.8k
        ossl_ec_GFp_nist_field_mul,
280
23.8k
        ossl_ec_GFp_nist_field_sqr,
281
23.8k
        0 /* field_div */ ,
282
23.8k
        ossl_ec_GFp_simple_field_inv,
283
23.8k
        0 /* field_encode */ ,
284
23.8k
        0 /* field_decode */ ,
285
23.8k
        0,                      /* field_set_to_one */
286
23.8k
        ossl_ec_key_simple_priv2oct,
287
23.8k
        ossl_ec_key_simple_oct2priv,
288
23.8k
        0, /* set private */
289
23.8k
        ossl_ec_key_simple_generate_key,
290
23.8k
        ossl_ec_key_simple_check_key,
291
23.8k
        ossl_ec_key_simple_generate_public_key,
292
23.8k
        0, /* keycopy */
293
23.8k
        0, /* keyfinish */
294
23.8k
        ossl_ecdh_simple_compute_key,
295
23.8k
        ossl_ecdsa_simple_sign_setup,
296
23.8k
        ossl_ecdsa_simple_sign_sig,
297
23.8k
        ossl_ecdsa_simple_verify_sig,
298
23.8k
        0, /* field_inverse_mod_ord */
299
23.8k
        0, /* blind_coordinates */
300
23.8k
        0, /* ladder_pre */
301
23.8k
        0, /* ladder_step */
302
23.8k
        0  /* ladder_post */
303
23.8k
    };
304
305
23.8k
    return &ret;
306
23.8k
}
307
308
/*
309
 * Helper functions to convert field elements to/from internal representation
310
 */
311
static void bin28_to_felem(felem out, const u8 in[28])
312
2.85k
{
313
2.85k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314
2.85k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315
2.85k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316
2.85k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317
2.85k
}
318
319
static void felem_to_bin28(u8 out[28], const felem in)
320
3.37k
{
321
3.37k
    unsigned i;
322
26.9k
    for (i = 0; i < 7; ++i) {
323
23.5k
        out[i] = in[0] >> (8 * i);
324
23.5k
        out[i + 7] = in[1] >> (8 * i);
325
23.5k
        out[i + 14] = in[2] >> (8 * i);
326
23.5k
        out[i + 21] = in[3] >> (8 * i);
327
23.5k
    }
328
3.37k
}
329
330
/* From OpenSSL BIGNUM to internal representation */
331
static int BN_to_felem(felem out, const BIGNUM *bn)
332
2.85k
{
333
2.85k
    felem_bytearray b_out;
334
2.85k
    int num_bytes;
335
336
2.85k
    if (BN_is_negative(bn)) {
337
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338
0
        return 0;
339
0
    }
340
2.85k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341
2.85k
    if (num_bytes < 0) {
342
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343
0
        return 0;
344
0
    }
345
2.85k
    bin28_to_felem(out, b_out);
346
2.85k
    return 1;
347
2.85k
}
348
349
/* From internal representation to OpenSSL BIGNUM */
350
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351
3.37k
{
352
3.37k
    felem_bytearray b_out;
353
3.37k
    felem_to_bin28(b_out, in);
354
3.37k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
355
3.37k
}
356
357
/******************************************************************************/
358
/*-
359
 *                              FIELD OPERATIONS
360
 *
361
 * Field operations, using the internal representation of field elements.
362
 * NB! These operations are specific to our point multiplication and cannot be
363
 * expected to be correct in general - e.g., multiplication with a large scalar
364
 * will cause an overflow.
365
 *
366
 */
367
368
static void felem_one(felem out)
369
0
{
370
0
    out[0] = 1;
371
0
    out[1] = 0;
372
0
    out[2] = 0;
373
0
    out[3] = 0;
374
0
}
375
376
static void felem_assign(felem out, const felem in)
377
279k
{
378
279k
    out[0] = in[0];
379
279k
    out[1] = in[1];
380
279k
    out[2] = in[2];
381
279k
    out[3] = in[3];
382
279k
}
383
384
/* Sum two field elements: out += in */
385
static void felem_sum(felem out, const felem in)
386
109k
{
387
109k
    out[0] += in[0];
388
109k
    out[1] += in[1];
389
109k
    out[2] += in[2];
390
109k
    out[3] += in[3];
391
109k
}
392
393
/* Subtract field elements: out -= in */
394
/* Assumes in[i] < 2^57 */
395
static void felem_diff(felem out, const felem in)
396
92.1k
{
397
92.1k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
398
92.1k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
399
92.1k
    static const limb two58m42m2 = (((limb) 1) << 58) -
400
92.1k
        (((limb) 1) << 42) - (((limb) 1) << 2);
401
402
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403
92.1k
    out[0] += two58p2;
404
92.1k
    out[1] += two58m42m2;
405
92.1k
    out[2] += two58m2;
406
92.1k
    out[3] += two58m2;
407
408
92.1k
    out[0] -= in[0];
409
92.1k
    out[1] -= in[1];
410
92.1k
    out[2] -= in[2];
411
92.1k
    out[3] -= in[3];
412
92.1k
}
413
414
/* Subtract in unreduced 128-bit mode: out -= in */
415
/* Assumes in[i] < 2^119 */
416
static void widefelem_diff(widefelem out, const widefelem in)
417
55.5k
{
418
55.5k
    static const widelimb two120 = ((widelimb) 1) << 120;
419
55.5k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
420
55.5k
        (((widelimb) 1) << 64);
421
55.5k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
422
55.5k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423
424
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425
55.5k
    out[0] += two120;
426
55.5k
    out[1] += two120m64;
427
55.5k
    out[2] += two120m64;
428
55.5k
    out[3] += two120;
429
55.5k
    out[4] += two120m104m64;
430
55.5k
    out[5] += two120m64;
431
55.5k
    out[6] += two120m64;
432
433
55.5k
    out[0] -= in[0];
434
55.5k
    out[1] -= in[1];
435
55.5k
    out[2] -= in[2];
436
55.5k
    out[3] -= in[3];
437
55.5k
    out[4] -= in[4];
438
55.5k
    out[5] -= in[5];
439
55.5k
    out[6] -= in[6];
440
55.5k
}
441
442
/* Subtract in mixed mode: out128 -= in64 */
443
/* in[i] < 2^63 */
444
static void felem_diff_128_64(widefelem out, const felem in)
445
155k
{
446
155k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
447
155k
        (((widelimb) 1) << 8);
448
155k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
449
155k
        (((widelimb) 1) << 8);
450
155k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
451
155k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452
453
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454
155k
    out[0] += two64p8;
455
155k
    out[1] += two64m48m8;
456
155k
    out[2] += two64m8;
457
155k
    out[3] += two64m8;
458
459
155k
    out[0] -= in[0];
460
155k
    out[1] -= in[1];
461
155k
    out[2] -= in[2];
462
155k
    out[3] -= in[3];
463
155k
}
464
465
/*
466
 * Multiply a field element by a scalar: out = out * scalar The scalars we
467
 * actually use are small, so results fit without overflow
468
 */
469
static void felem_scalar(felem out, const limb scalar)
470
128k
{
471
128k
    out[0] *= scalar;
472
128k
    out[1] *= scalar;
473
128k
    out[2] *= scalar;
474
128k
    out[3] *= scalar;
475
128k
}
476
477
/*
478
 * Multiply an unreduced field element by a scalar: out = out * scalar The
479
 * scalars we actually use are small, so results fit without overflow
480
 */
481
static void widefelem_scalar(widefelem out, const widelimb scalar)
482
36.5k
{
483
36.5k
    out[0] *= scalar;
484
36.5k
    out[1] *= scalar;
485
36.5k
    out[2] *= scalar;
486
36.5k
    out[3] *= scalar;
487
36.5k
    out[4] *= scalar;
488
36.5k
    out[5] *= scalar;
489
36.5k
    out[6] *= scalar;
490
36.5k
}
491
492
/* Square a field element: out = in^2 */
493
static void felem_square(widefelem out, const felem in)
494
429k
{
495
429k
    limb tmp0, tmp1, tmp2;
496
429k
    tmp0 = 2 * in[0];
497
429k
    tmp1 = 2 * in[1];
498
429k
    tmp2 = 2 * in[2];
499
429k
    out[0] = ((widelimb) in[0]) * in[0];
500
429k
    out[1] = ((widelimb) in[0]) * tmp1;
501
429k
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502
429k
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
503
429k
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
504
429k
    out[5] = ((widelimb) in[3]) * tmp2;
505
429k
    out[6] = ((widelimb) in[3]) * in[3];
506
429k
}
507
508
/* Multiply two field elements: out = in1 * in2 */
509
static void felem_mul(widefelem out, const felem in1, const felem in2)
510
300k
{
511
300k
    out[0] = ((widelimb) in1[0]) * in2[0];
512
300k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
513
300k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
514
300k
             ((widelimb) in1[2]) * in2[0];
515
300k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
516
300k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
517
300k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
518
300k
             ((widelimb) in1[3]) * in2[1];
519
300k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
520
300k
    out[6] = ((widelimb) in1[3]) * in2[3];
521
300k
}
522
523
/*-
524
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
525
 * Requires in[i] < 2^126,
526
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527
static void felem_reduce(felem out, const widefelem in)
528
681k
{
529
681k
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
530
681k
        (((widelimb) 1) << 15);
531
681k
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
532
681k
        (((widelimb) 1) << 71);
533
681k
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534
681k
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535
681k
    widelimb output[5];
536
537
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538
681k
    output[0] = in[0] + two127p15;
539
681k
    output[1] = in[1] + two127m71m55;
540
681k
    output[2] = in[2] + two127m71;
541
681k
    output[3] = in[3];
542
681k
    output[4] = in[4];
543
544
    /* Eliminate in[4], in[5], in[6] */
545
681k
    output[4] += in[6] >> 16;
546
681k
    output[3] += (in[6] & 0xffff) << 40;
547
681k
    output[2] -= in[6];
548
549
681k
    output[3] += in[5] >> 16;
550
681k
    output[2] += (in[5] & 0xffff) << 40;
551
681k
    output[1] -= in[5];
552
553
681k
    output[2] += output[4] >> 16;
554
681k
    output[1] += (output[4] & 0xffff) << 40;
555
681k
    output[0] -= output[4];
556
557
    /* Carry 2 -> 3 -> 4 */
558
681k
    output[3] += output[2] >> 56;
559
681k
    output[2] &= 0x00ffffffffffffff;
560
561
681k
    output[4] = output[3] >> 56;
562
681k
    output[3] &= 0x00ffffffffffffff;
563
564
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565
566
    /* Eliminate output[4] */
567
681k
    output[2] += output[4] >> 16;
568
    /* output[2] < 2^56 + 2^56 = 2^57 */
569
681k
    output[1] += (output[4] & 0xffff) << 40;
570
681k
    output[0] -= output[4];
571
572
    /* Carry 0 -> 1 -> 2 -> 3 */
573
681k
    output[1] += output[0] >> 56;
574
681k
    out[0] = output[0] & 0x00ffffffffffffff;
575
576
681k
    output[2] += output[1] >> 56;
577
    /* output[2] < 2^57 + 2^72 */
578
681k
    out[1] = output[1] & 0x00ffffffffffffff;
579
681k
    output[3] += output[2] >> 56;
580
    /* output[3] <= 2^56 + 2^16 */
581
681k
    out[2] = output[2] & 0x00ffffffffffffff;
582
583
    /*-
584
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585
     * out[3] <= 2^56 + 2^16 (due to final carry),
586
     * so out < 2*p
587
     */
588
681k
    out[3] = output[3];
589
681k
}
590
591
static void felem_square_reduce(felem out, const felem in)
592
0
{
593
0
    widefelem tmp;
594
0
    felem_square(tmp, in);
595
0
    felem_reduce(out, tmp);
596
0
}
597
598
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599
0
{
600
0
    widefelem tmp;
601
0
    felem_mul(tmp, in1, in2);
602
0
    felem_reduce(out, tmp);
603
0
}
604
605
/*
606
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607
 * call felem_reduce first)
608
 */
609
static void felem_contract(felem out, const felem in)
610
2.70k
{
611
2.70k
    static const int64_t two56 = ((limb) 1) << 56;
612
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
614
2.70k
    int64_t tmp[4], a;
615
2.70k
    tmp[0] = in[0];
616
2.70k
    tmp[1] = in[1];
617
2.70k
    tmp[2] = in[2];
618
2.70k
    tmp[3] = in[3];
619
    /* Case 1: a = 1 iff in >= 2^224 */
620
2.70k
    a = (in[3] >> 56);
621
2.70k
    tmp[0] -= a;
622
2.70k
    tmp[1] += a << 40;
623
2.70k
    tmp[3] &= 0x00ffffffffffffff;
624
    /*
625
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626
     * and the lower part is non-zero
627
     */
628
2.70k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
629
2.70k
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
630
2.70k
    a &= 0x00ffffffffffffff;
631
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632
2.70k
    a = (a - 1) >> 63;
633
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
634
2.70k
    tmp[3] &= a ^ 0xffffffffffffffff;
635
2.70k
    tmp[2] &= a ^ 0xffffffffffffffff;
636
2.70k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637
2.70k
    tmp[0] -= 1 & a;
638
639
    /*
640
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641
     * non-zero, so we only need one step
642
     */
643
2.70k
    a = tmp[0] >> 63;
644
2.70k
    tmp[0] += two56 & a;
645
2.70k
    tmp[1] -= 1 & a;
646
647
    /* carry 1 -> 2 -> 3 */
648
2.70k
    tmp[2] += tmp[1] >> 56;
649
2.70k
    tmp[1] &= 0x00ffffffffffffff;
650
651
2.70k
    tmp[3] += tmp[2] >> 56;
652
2.70k
    tmp[2] &= 0x00ffffffffffffff;
653
654
    /* Now 0 <= out < p */
655
2.70k
    out[0] = tmp[0];
656
2.70k
    out[1] = tmp[1];
657
2.70k
    out[2] = tmp[2];
658
2.70k
    out[3] = tmp[3];
659
2.70k
}
660
661
/*
662
 * Get negative value: out = -in
663
 * Requires in[i] < 2^63,
664
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665
 */
666
static void felem_neg(felem out, const felem in)
667
6.03k
{
668
6.03k
    widefelem tmp;
669
670
6.03k
    memset(tmp, 0, sizeof(tmp));
671
6.03k
    felem_diff_128_64(tmp, in);
672
6.03k
    felem_reduce(out, tmp);
673
6.03k
}
674
675
/*
676
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677
 * elements are reduced to in < 2^225, so we only need to check three cases:
678
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679
 */
680
static limb felem_is_zero(const felem in)
681
76.1k
{
682
76.1k
    limb zero, two224m96p1, two225m97p2;
683
684
76.1k
    zero = in[0] | in[1] | in[2] | in[3];
685
76.1k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
686
76.1k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
687
76.1k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
688
76.1k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
689
76.1k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
690
76.1k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
691
76.1k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
692
76.1k
    return (zero | two224m96p1 | two225m97p2);
693
76.1k
}
694
695
static int felem_is_zero_int(const void *in)
696
0
{
697
0
    return (int)(felem_is_zero(in) & ((limb) 1));
698
0
}
699
700
/* Invert a field element */
701
/* Computation chain copied from djb's code */
702
static void felem_inv(felem out, const felem in)
703
818
{
704
818
    felem ftmp, ftmp2, ftmp3, ftmp4;
705
818
    widefelem tmp;
706
818
    unsigned i;
707
708
818
    felem_square(tmp, in);
709
818
    felem_reduce(ftmp, tmp);    /* 2 */
710
818
    felem_mul(tmp, in, ftmp);
711
818
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
712
818
    felem_square(tmp, ftmp);
713
818
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
714
818
    felem_mul(tmp, in, ftmp);
715
818
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
716
818
    felem_square(tmp, ftmp);
717
818
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
718
818
    felem_square(tmp, ftmp2);
719
818
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
720
818
    felem_square(tmp, ftmp2);
721
818
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
722
818
    felem_mul(tmp, ftmp2, ftmp);
723
818
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
724
818
    felem_square(tmp, ftmp);
725
818
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
726
4.90k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
727
4.09k
        felem_square(tmp, ftmp2);
728
4.09k
        felem_reduce(ftmp2, tmp);
729
4.09k
    }
730
818
    felem_mul(tmp, ftmp2, ftmp);
731
818
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
732
818
    felem_square(tmp, ftmp2);
733
818
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
734
9.81k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
735
8.99k
        felem_square(tmp, ftmp3);
736
8.99k
        felem_reduce(ftmp3, tmp);
737
8.99k
    }
738
818
    felem_mul(tmp, ftmp3, ftmp2);
739
818
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
740
818
    felem_square(tmp, ftmp2);
741
818
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
742
19.6k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
743
18.8k
        felem_square(tmp, ftmp3);
744
18.8k
        felem_reduce(ftmp3, tmp);
745
18.8k
    }
746
818
    felem_mul(tmp, ftmp3, ftmp2);
747
818
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
748
818
    felem_square(tmp, ftmp3);
749
818
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
750
39.2k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
751
38.4k
        felem_square(tmp, ftmp4);
752
38.4k
        felem_reduce(ftmp4, tmp);
753
38.4k
    }
754
818
    felem_mul(tmp, ftmp3, ftmp4);
755
818
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
756
818
    felem_square(tmp, ftmp3);
757
818
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
758
19.6k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
759
18.8k
        felem_square(tmp, ftmp4);
760
18.8k
        felem_reduce(ftmp4, tmp);
761
18.8k
    }
762
818
    felem_mul(tmp, ftmp2, ftmp4);
763
818
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
764
5.72k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
765
4.90k
        felem_square(tmp, ftmp2);
766
4.90k
        felem_reduce(ftmp2, tmp);
767
4.90k
    }
768
818
    felem_mul(tmp, ftmp2, ftmp);
769
818
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
770
818
    felem_square(tmp, ftmp);
771
818
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
772
818
    felem_mul(tmp, ftmp, in);
773
818
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
774
80.1k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
775
79.3k
        felem_square(tmp, ftmp);
776
79.3k
        felem_reduce(ftmp, tmp);
777
79.3k
    }
778
818
    felem_mul(tmp, ftmp, ftmp3);
779
818
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
780
818
}
781
782
/*
783
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784
 * out to itself.
785
 */
786
static void copy_conditional(felem out, const felem in, limb icopy)
787
120k
{
788
120k
    unsigned i;
789
    /*
790
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791
     */
792
120k
    const limb copy = -icopy;
793
601k
    for (i = 0; i < 4; ++i) {
794
481k
        const limb tmp = copy & (in[i] ^ out[i]);
795
481k
        out[i] ^= tmp;
796
481k
    }
797
120k
}
798
799
/******************************************************************************/
800
/*-
801
 *                       ELLIPTIC CURVE POINT OPERATIONS
802
 *
803
 * Points are represented in Jacobian projective coordinates:
804
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805
 * or to the point at infinity if Z == 0.
806
 *
807
 */
808
809
/*-
810
 * Double an elliptic curve point:
811
 * (X', Y', Z') = 2 * (X, Y, Z), where
812
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816
 * while x_out == y_in is not (maybe this works, but it's not tested).
817
 */
818
static void
819
point_double(felem x_out, felem y_out, felem z_out,
820
             const felem x_in, const felem y_in, const felem z_in)
821
36.5k
{
822
36.5k
    widefelem tmp, tmp2;
823
36.5k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825
36.5k
    felem_assign(ftmp, x_in);
826
36.5k
    felem_assign(ftmp2, x_in);
827
828
    /* delta = z^2 */
829
36.5k
    felem_square(tmp, z_in);
830
36.5k
    felem_reduce(delta, tmp);
831
832
    /* gamma = y^2 */
833
36.5k
    felem_square(tmp, y_in);
834
36.5k
    felem_reduce(gamma, tmp);
835
836
    /* beta = x*gamma */
837
36.5k
    felem_mul(tmp, x_in, gamma);
838
36.5k
    felem_reduce(beta, tmp);
839
840
    /* alpha = 3*(x-delta)*(x+delta) */
841
36.5k
    felem_diff(ftmp, delta);
842
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
843
36.5k
    felem_sum(ftmp2, delta);
844
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
845
36.5k
    felem_scalar(ftmp2, 3);
846
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
847
36.5k
    felem_mul(tmp, ftmp, ftmp2);
848
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
849
36.5k
    felem_reduce(alpha, tmp);
850
851
    /* x' = alpha^2 - 8*beta */
852
36.5k
    felem_square(tmp, alpha);
853
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
854
36.5k
    felem_assign(ftmp, beta);
855
36.5k
    felem_scalar(ftmp, 8);
856
    /* ftmp[i] < 8 * 2^57 = 2^60 */
857
36.5k
    felem_diff_128_64(tmp, ftmp);
858
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
859
36.5k
    felem_reduce(x_out, tmp);
860
861
    /* z' = (y + z)^2 - gamma - delta */
862
36.5k
    felem_sum(delta, gamma);
863
    /* delta[i] < 2^57 + 2^57 = 2^58 */
864
36.5k
    felem_assign(ftmp, y_in);
865
36.5k
    felem_sum(ftmp, z_in);
866
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
867
36.5k
    felem_square(tmp, ftmp);
868
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
869
36.5k
    felem_diff_128_64(tmp, delta);
870
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
871
36.5k
    felem_reduce(z_out, tmp);
872
873
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
874
36.5k
    felem_scalar(beta, 4);
875
    /* beta[i] < 4 * 2^57 = 2^59 */
876
36.5k
    felem_diff(beta, x_out);
877
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
878
36.5k
    felem_mul(tmp, alpha, beta);
879
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
880
36.5k
    felem_square(tmp2, gamma);
881
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
882
36.5k
    widefelem_scalar(tmp2, 8);
883
    /* tmp2[i] < 8 * 2^116 = 2^119 */
884
36.5k
    widefelem_diff(tmp, tmp2);
885
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
886
36.5k
    felem_reduce(y_out, tmp);
887
36.5k
}
888
889
/*-
890
 * Add two elliptic curve points:
891
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897
 *
898
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899
 */
900
901
/*
902
 * This function is not entirely constant-time: it includes a branch for
903
 * checking whether the two input points are equal, (while not equal to the
904
 * point at infinity). This case never happens during single point
905
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906
 */
907
static void point_add(felem x3, felem y3, felem z3,
908
                      const felem x1, const felem y1, const felem z1,
909
                      const int mixed, const felem x2, const felem y2,
910
                      const felem z2)
911
19.0k
{
912
19.0k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913
19.0k
    widefelem tmp, tmp2;
914
19.0k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915
19.0k
    limb points_equal;
916
917
19.0k
    if (!mixed) {
918
        /* ftmp2 = z2^2 */
919
6.83k
        felem_square(tmp, z2);
920
6.83k
        felem_reduce(ftmp2, tmp);
921
922
        /* ftmp4 = z2^3 */
923
6.83k
        felem_mul(tmp, ftmp2, z2);
924
6.83k
        felem_reduce(ftmp4, tmp);
925
926
        /* ftmp4 = z2^3*y1 */
927
6.83k
        felem_mul(tmp2, ftmp4, y1);
928
6.83k
        felem_reduce(ftmp4, tmp2);
929
930
        /* ftmp2 = z2^2*x1 */
931
6.83k
        felem_mul(tmp2, ftmp2, x1);
932
6.83k
        felem_reduce(ftmp2, tmp2);
933
12.2k
    } else {
934
        /*
935
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
936
         */
937
938
        /* ftmp4 = z2^3*y1 */
939
12.2k
        felem_assign(ftmp4, y1);
940
941
        /* ftmp2 = z2^2*x1 */
942
12.2k
        felem_assign(ftmp2, x1);
943
12.2k
    }
944
945
    /* ftmp = z1^2 */
946
19.0k
    felem_square(tmp, z1);
947
19.0k
    felem_reduce(ftmp, tmp);
948
949
    /* ftmp3 = z1^3 */
950
19.0k
    felem_mul(tmp, ftmp, z1);
951
19.0k
    felem_reduce(ftmp3, tmp);
952
953
    /* tmp = z1^3*y2 */
954
19.0k
    felem_mul(tmp, ftmp3, y2);
955
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
958
19.0k
    felem_diff_128_64(tmp, ftmp4);
959
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960
19.0k
    felem_reduce(ftmp3, tmp);
961
962
    /* tmp = z1^2*x2 */
963
19.0k
    felem_mul(tmp, ftmp, x2);
964
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966
    /* ftmp = z1^2*x2 - z2^2*x1 */
967
19.0k
    felem_diff_128_64(tmp, ftmp2);
968
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969
19.0k
    felem_reduce(ftmp, tmp);
970
971
    /*
972
     * The formulae are incorrect if the points are equal, in affine coordinates
973
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
974
     * happens.
975
     *
976
     * We use bitwise operations to avoid potential side-channels introduced by
977
     * the short-circuiting behaviour of boolean operators.
978
     */
979
19.0k
    x_equal = felem_is_zero(ftmp);
980
19.0k
    y_equal = felem_is_zero(ftmp3);
981
    /*
982
     * The special case of either point being the point at infinity (z1 and/or
983
     * z2 are zero), is handled separately later on in this function, so we
984
     * avoid jumping to point_double here in those special cases.
985
     */
986
19.0k
    z1_is_zero = felem_is_zero(z1);
987
19.0k
    z2_is_zero = felem_is_zero(z2);
988
989
    /*
990
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
991
     * specific implementation `felem_is_zero()` returns truth as `0x1`
992
     * (rather than `0xff..ff`).
993
     *
994
     * This implies that `~true` in this implementation becomes
995
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
996
     * the if expression, we mask out only the last bit in the next
997
     * line.
998
     */
999
19.0k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001
19.0k
    if (points_equal) {
1002
        /*
1003
         * This is obviously not constant-time but, as mentioned before, this
1004
         * case never happens during single point multiplication, so there is no
1005
         * timing leak for ECDH or ECDSA signing.
1006
         */
1007
0
        point_double(x3, y3, z3, x1, y1, z1);
1008
0
        return;
1009
0
    }
1010
1011
    /* ftmp5 = z1*z2 */
1012
19.0k
    if (!mixed) {
1013
6.83k
        felem_mul(tmp, z1, z2);
1014
6.83k
        felem_reduce(ftmp5, tmp);
1015
12.2k
    } else {
1016
        /* special case z2 = 0 is handled later */
1017
12.2k
        felem_assign(ftmp5, z1);
1018
12.2k
    }
1019
1020
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021
19.0k
    felem_mul(tmp, ftmp, ftmp5);
1022
19.0k
    felem_reduce(z_out, tmp);
1023
1024
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025
19.0k
    felem_assign(ftmp5, ftmp);
1026
19.0k
    felem_square(tmp, ftmp);
1027
19.0k
    felem_reduce(ftmp, tmp);
1028
1029
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030
19.0k
    felem_mul(tmp, ftmp, ftmp5);
1031
19.0k
    felem_reduce(ftmp5, tmp);
1032
1033
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034
19.0k
    felem_mul(tmp, ftmp2, ftmp);
1035
19.0k
    felem_reduce(ftmp2, tmp);
1036
1037
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038
19.0k
    felem_mul(tmp, ftmp4, ftmp5);
1039
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042
19.0k
    felem_square(tmp2, ftmp3);
1043
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046
19.0k
    felem_diff_128_64(tmp2, ftmp5);
1047
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050
19.0k
    felem_assign(ftmp5, ftmp2);
1051
19.0k
    felem_scalar(ftmp5, 2);
1052
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054
    /*-
1055
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057
     */
1058
19.0k
    felem_diff_128_64(tmp2, ftmp5);
1059
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060
19.0k
    felem_reduce(x_out, tmp2);
1061
1062
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063
19.0k
    felem_diff(ftmp2, x_out);
1064
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066
    /*
1067
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068
     */
1069
19.0k
    felem_mul(tmp2, ftmp3, ftmp2);
1070
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072
    /*-
1073
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075
     */
1076
19.0k
    widefelem_diff(tmp2, tmp);
1077
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078
19.0k
    felem_reduce(y_out, tmp2);
1079
1080
    /*
1081
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082
     * the point at infinity, so we need to check for this separately
1083
     */
1084
1085
    /*
1086
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1087
     */
1088
19.0k
    copy_conditional(x_out, x2, z1_is_zero);
1089
19.0k
    copy_conditional(x_out, x1, z2_is_zero);
1090
19.0k
    copy_conditional(y_out, y2, z1_is_zero);
1091
19.0k
    copy_conditional(y_out, y1, z2_is_zero);
1092
19.0k
    copy_conditional(z_out, z2, z1_is_zero);
1093
19.0k
    copy_conditional(z_out, z1, z2_is_zero);
1094
19.0k
    felem_assign(x3, x_out);
1095
19.0k
    felem_assign(y3, y_out);
1096
19.0k
    felem_assign(z3, z_out);
1097
19.0k
}
1098
1099
/*
1100
 * select_point selects the |idx|th point from a precomputation table and
1101
 * copies it to out.
1102
 * The pre_comp array argument should be size of |size| argument
1103
 */
1104
static void select_point(const u64 idx, unsigned int size,
1105
                         const felem pre_comp[][3], felem out[3])
1106
18.4k
{
1107
18.4k
    unsigned i, j;
1108
18.4k
    limb *outlimbs = &out[0][0];
1109
1110
18.4k
    memset(out, 0, sizeof(*out) * 3);
1111
319k
    for (i = 0; i < size; i++) {
1112
301k
        const limb *inlimbs = &pre_comp[i][0][0];
1113
301k
        u64 mask = i ^ idx;
1114
301k
        mask |= mask >> 4;
1115
301k
        mask |= mask >> 2;
1116
301k
        mask |= mask >> 1;
1117
301k
        mask &= 1;
1118
301k
        mask--;
1119
3.91M
        for (j = 0; j < 4 * 3; j++)
1120
3.61M
            outlimbs[j] |= inlimbs[j] & mask;
1121
301k
    }
1122
18.4k
}
1123
1124
/* get_bit returns the |i|th bit in |in| */
1125
static char get_bit(const felem_bytearray in, unsigned i)
1126
85.9k
{
1127
85.9k
    if (i >= 224)
1128
268
        return 0;
1129
85.6k
    return (in[i >> 3] >> (i & 7)) & 1;
1130
85.9k
}
1131
1132
/*
1133
 * Interleaved point multiplication using precomputed point multiples: The
1134
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1137
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138
 */
1139
static void batch_mul(felem x_out, felem y_out, felem z_out,
1140
                      const felem_bytearray scalars[],
1141
                      const unsigned num_points, const u8 *g_scalar,
1142
                      const int mixed, const felem pre_comp[][17][3],
1143
                      const felem g_pre_comp[2][16][3])
1144
356
{
1145
356
    int i, skip;
1146
356
    unsigned num;
1147
356
    unsigned gen_mul = (g_scalar != NULL);
1148
356
    felem nq[3], tmp[4];
1149
356
    u64 bits;
1150
356
    u8 sign, digit;
1151
1152
    /* set nq to the point at infinity */
1153
356
    memset(nq, 0, sizeof(nq));
1154
1155
    /*
1156
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157
     * of the generator (two in each of the last 28 rounds) and additions of
1158
     * other points multiples (every 5th round).
1159
     */
1160
356
    skip = 1;                   /* save two point operations in the first
1161
                                 * round */
1162
36.1k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163
        /* double */
1164
35.8k
        if (!skip)
1165
35.4k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167
        /* add multiples of the generator */
1168
35.8k
        if (gen_mul && (i <= 27)) {
1169
            /* first, look 28 bits upwards */
1170
6.21k
            bits = get_bit(g_scalar, i + 196) << 3;
1171
6.21k
            bits |= get_bit(g_scalar, i + 140) << 2;
1172
6.21k
            bits |= get_bit(g_scalar, i + 84) << 1;
1173
6.21k
            bits |= get_bit(g_scalar, i + 28);
1174
            /* select the point to add, in constant time */
1175
6.21k
            select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177
6.21k
            if (!skip) {
1178
                /* value 1 below is argument for "mixed" */
1179
5.99k
                point_add(nq[0], nq[1], nq[2],
1180
5.99k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181
5.99k
            } else {
1182
222
                memcpy(nq, tmp, 3 * sizeof(felem));
1183
222
                skip = 0;
1184
222
            }
1185
1186
            /* second, look at the current position */
1187
6.21k
            bits = get_bit(g_scalar, i + 168) << 3;
1188
6.21k
            bits |= get_bit(g_scalar, i + 112) << 2;
1189
6.21k
            bits |= get_bit(g_scalar, i + 56) << 1;
1190
6.21k
            bits |= get_bit(g_scalar, i);
1191
            /* select the point to add, in constant time */
1192
6.21k
            select_point(bits, 16, g_pre_comp[0], tmp);
1193
6.21k
            point_add(nq[0], nq[1], nq[2],
1194
6.21k
                      nq[0], nq[1], nq[2],
1195
6.21k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196
6.21k
        }
1197
1198
        /* do other additions every 5 doublings */
1199
35.8k
        if (num_points && (i % 5 == 0)) {
1200
            /* loop over all scalars */
1201
12.0k
            for (num = 0; num < num_points; ++num) {
1202
6.03k
                bits = get_bit(scalars[num], i + 4) << 5;
1203
6.03k
                bits |= get_bit(scalars[num], i + 3) << 4;
1204
6.03k
                bits |= get_bit(scalars[num], i + 2) << 3;
1205
6.03k
                bits |= get_bit(scalars[num], i + 1) << 2;
1206
6.03k
                bits |= get_bit(scalars[num], i) << 1;
1207
6.03k
                bits |= get_bit(scalars[num], i - 1);
1208
6.03k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210
                /* select the point to add or subtract */
1211
6.03k
                select_point(digit, 17, pre_comp[num], tmp);
1212
6.03k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213
                                            * point */
1214
6.03k
                copy_conditional(tmp[1], tmp[3], sign);
1215
1216
6.03k
                if (!skip) {
1217
5.89k
                    point_add(nq[0], nq[1], nq[2],
1218
5.89k
                              nq[0], nq[1], nq[2],
1219
5.89k
                              mixed, tmp[0], tmp[1], tmp[2]);
1220
5.89k
                } else {
1221
134
                    memcpy(nq, tmp, 3 * sizeof(felem));
1222
134
                    skip = 0;
1223
134
                }
1224
6.03k
            }
1225
6.03k
        }
1226
35.8k
    }
1227
356
    felem_assign(x_out, nq[0]);
1228
356
    felem_assign(y_out, nq[1]);
1229
356
    felem_assign(z_out, nq[2]);
1230
356
}
1231
1232
/******************************************************************************/
1233
/*
1234
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1235
 */
1236
1237
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238
0
{
1239
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241
0
    if (!ret) {
1242
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243
0
        return ret;
1244
0
    }
1245
1246
0
    ret->references = 1;
1247
1248
0
    ret->lock = CRYPTO_THREAD_lock_new();
1249
0
    if (ret->lock == NULL) {
1250
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251
0
        OPENSSL_free(ret);
1252
0
        return NULL;
1253
0
    }
1254
0
    return ret;
1255
0
}
1256
1257
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258
0
{
1259
0
    int i;
1260
0
    if (p != NULL)
1261
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1262
0
    return p;
1263
0
}
1264
1265
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266
0
{
1267
0
    int i;
1268
1269
0
    if (p == NULL)
1270
0
        return;
1271
1272
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273
0
    REF_PRINT_COUNT("EC_nistp224", p);
1274
0
    if (i > 0)
1275
0
        return;
1276
0
    REF_ASSERT_ISNT(i < 0);
1277
1278
0
    CRYPTO_THREAD_lock_free(p->lock);
1279
0
    OPENSSL_free(p);
1280
0
}
1281
1282
/******************************************************************************/
1283
/*
1284
 * OPENSSL EC_METHOD FUNCTIONS
1285
 */
1286
1287
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288
30.9k
{
1289
30.9k
    int ret;
1290
30.9k
    ret = ossl_ec_GFp_simple_group_init(group);
1291
30.9k
    group->a_is_minus3 = 1;
1292
30.9k
    return ret;
1293
30.9k
}
1294
1295
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296
                                         const BIGNUM *a, const BIGNUM *b,
1297
                                         BN_CTX *ctx)
1298
15.6k
{
1299
15.6k
    int ret = 0;
1300
15.6k
    BIGNUM *curve_p, *curve_a, *curve_b;
1301
15.6k
#ifndef FIPS_MODULE
1302
15.6k
    BN_CTX *new_ctx = NULL;
1303
1304
15.6k
    if (ctx == NULL)
1305
0
        ctx = new_ctx = BN_CTX_new();
1306
15.6k
#endif
1307
15.6k
    if (ctx == NULL)
1308
0
        return 0;
1309
1310
15.6k
    BN_CTX_start(ctx);
1311
15.6k
    curve_p = BN_CTX_get(ctx);
1312
15.6k
    curve_a = BN_CTX_get(ctx);
1313
15.6k
    curve_b = BN_CTX_get(ctx);
1314
15.6k
    if (curve_b == NULL)
1315
0
        goto err;
1316
15.6k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317
15.6k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318
15.6k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319
15.6k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321
0
        goto err;
1322
0
    }
1323
15.6k
    group->field_mod_func = BN_nist_mod_224;
1324
15.6k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325
15.6k
 err:
1326
15.6k
    BN_CTX_end(ctx);
1327
15.6k
#ifndef FIPS_MODULE
1328
15.6k
    BN_CTX_free(new_ctx);
1329
15.6k
#endif
1330
15.6k
    return ret;
1331
15.6k
}
1332
1333
/*
1334
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335
 * (X/Z^2, Y/Z^3)
1336
 */
1337
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338
                                                      const EC_POINT *point,
1339
                                                      BIGNUM *x, BIGNUM *y,
1340
                                                      BN_CTX *ctx)
1341
650
{
1342
650
    felem z1, z2, x_in, y_in, x_out, y_out;
1343
650
    widefelem tmp;
1344
1345
650
    if (EC_POINT_is_at_infinity(group, point)) {
1346
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347
0
        return 0;
1348
0
    }
1349
650
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350
650
        (!BN_to_felem(z1, point->Z)))
1351
0
        return 0;
1352
650
    felem_inv(z2, z1);
1353
650
    felem_square(tmp, z2);
1354
650
    felem_reduce(z1, tmp);
1355
650
    felem_mul(tmp, x_in, z1);
1356
650
    felem_reduce(x_in, tmp);
1357
650
    felem_contract(x_out, x_in);
1358
650
    if (x != NULL) {
1359
650
        if (!felem_to_BN(x, x_out)) {
1360
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361
0
            return 0;
1362
0
        }
1363
650
    }
1364
650
    felem_mul(tmp, z1, z2);
1365
650
    felem_reduce(z1, tmp);
1366
650
    felem_mul(tmp, y_in, z1);
1367
650
    felem_reduce(y_in, tmp);
1368
650
    felem_contract(y_out, y_in);
1369
650
    if (y != NULL) {
1370
650
        if (!felem_to_BN(y, y_out)) {
1371
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372
0
            return 0;
1373
0
        }
1374
650
    }
1375
650
    return 1;
1376
650
}
1377
1378
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379
                               felem tmp_felems[ /* num+1 */ ])
1380
0
{
1381
    /*
1382
     * Runs in constant time, unless an input is the point at infinity (which
1383
     * normally shouldn't happen).
1384
     */
1385
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386
0
                                                  points,
1387
0
                                                  sizeof(felem),
1388
0
                                                  tmp_felems,
1389
0
                                                  (void (*)(void *))felem_one,
1390
0
                                                  felem_is_zero_int,
1391
0
                                                  (void (*)(void *, const void *))
1392
0
                                                  felem_assign,
1393
0
                                                  (void (*)(void *, const void *))
1394
0
                                                  felem_square_reduce, (void (*)
1395
0
                                                                        (void *,
1396
0
                                                                         const void
1397
0
                                                                         *,
1398
0
                                                                         const void
1399
0
                                                                         *))
1400
0
                                                  felem_mul_reduce,
1401
0
                                                  (void (*)(void *, const void *))
1402
0
                                                  felem_inv,
1403
0
                                                  (void (*)(void *, const void *))
1404
0
                                                  felem_contract);
1405
0
}
1406
1407
/*
1408
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409
 * values Result is stored in r (r can equal one of the inputs).
1410
 */
1411
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412
                                    const BIGNUM *scalar, size_t num,
1413
                                    const EC_POINT *points[],
1414
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1415
350
{
1416
350
    int ret = 0;
1417
350
    int j;
1418
350
    unsigned i;
1419
350
    int mixed = 0;
1420
350
    BIGNUM *x, *y, *z, *tmp_scalar;
1421
350
    felem_bytearray g_secret;
1422
350
    felem_bytearray *secrets = NULL;
1423
350
    felem (*pre_comp)[17][3] = NULL;
1424
350
    felem *tmp_felems = NULL;
1425
350
    int num_bytes;
1426
350
    int have_pre_comp = 0;
1427
350
    size_t num_points = num;
1428
350
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1429
350
    NISTP224_PRE_COMP *pre = NULL;
1430
350
    const felem(*g_pre_comp)[16][3] = NULL;
1431
350
    EC_POINT *generator = NULL;
1432
350
    const EC_POINT *p = NULL;
1433
350
    const BIGNUM *p_scalar = NULL;
1434
1435
350
    BN_CTX_start(ctx);
1436
350
    x = BN_CTX_get(ctx);
1437
350
    y = BN_CTX_get(ctx);
1438
350
    z = BN_CTX_get(ctx);
1439
350
    tmp_scalar = BN_CTX_get(ctx);
1440
350
    if (tmp_scalar == NULL)
1441
0
        goto err;
1442
1443
350
    if (scalar != NULL) {
1444
216
        pre = group->pre_comp.nistp224;
1445
216
        if (pre)
1446
            /* we have precomputation, try to use it */
1447
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448
216
        else
1449
            /* try to use the standard precomputation */
1450
216
            g_pre_comp = &gmul[0];
1451
216
        generator = EC_POINT_new(group);
1452
216
        if (generator == NULL)
1453
0
            goto err;
1454
        /* get the generator from precomputation */
1455
216
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456
216
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457
216
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459
0
            goto err;
1460
0
        }
1461
216
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462
216
                                                                generator,
1463
216
                                                                x, y, z, ctx))
1464
0
            goto err;
1465
216
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466
            /* precomputation matches generator */
1467
216
            have_pre_comp = 1;
1468
0
        else
1469
            /*
1470
             * we don't have valid precomputation: treat the generator as a
1471
             * random point
1472
             */
1473
0
            num_points = num_points + 1;
1474
216
    }
1475
1476
350
    if (num_points > 0) {
1477
134
        if (num_points >= 3) {
1478
            /*
1479
             * unless we precompute multiples for just one or two points,
1480
             * converting those into affine form is time well spent
1481
             */
1482
0
            mixed = 1;
1483
0
        }
1484
134
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485
134
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486
134
        if (mixed)
1487
0
            tmp_felems =
1488
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489
134
        if ((secrets == NULL) || (pre_comp == NULL)
1490
134
            || (mixed && (tmp_felems == NULL))) {
1491
0
            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492
0
            goto err;
1493
0
        }
1494
1495
        /*
1496
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1497
         * i.e., they contribute nothing to the linear combination
1498
         */
1499
268
        for (i = 0; i < num_points; ++i) {
1500
134
            if (i == num) {
1501
                /* the generator */
1502
0
                p = EC_GROUP_get0_generator(group);
1503
0
                p_scalar = scalar;
1504
134
            } else {
1505
                /* the i^th point */
1506
134
                p = points[i];
1507
134
                p_scalar = scalars[i];
1508
134
            }
1509
134
            if ((p_scalar != NULL) && (p != NULL)) {
1510
                /* reduce scalar to 0 <= scalar < 2^224 */
1511
134
                if ((BN_num_bits(p_scalar) > 224)
1512
134
                    || (BN_is_negative(p_scalar))) {
1513
                    /*
1514
                     * this is an unusual input, and we don't guarantee
1515
                     * constant-timeness
1516
                     */
1517
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519
0
                        goto err;
1520
0
                    }
1521
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1522
0
                                               secrets[i], sizeof(secrets[i]));
1523
134
                } else {
1524
134
                    num_bytes = BN_bn2lebinpad(p_scalar,
1525
134
                                               secrets[i], sizeof(secrets[i]));
1526
134
                }
1527
134
                if (num_bytes < 0) {
1528
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529
0
                    goto err;
1530
0
                }
1531
                /* precompute multiples */
1532
134
                if ((!BN_to_felem(x_out, p->X)) ||
1533
134
                    (!BN_to_felem(y_out, p->Y)) ||
1534
134
                    (!BN_to_felem(z_out, p->Z)))
1535
0
                    goto err;
1536
134
                felem_assign(pre_comp[i][1][0], x_out);
1537
134
                felem_assign(pre_comp[i][1][1], y_out);
1538
134
                felem_assign(pre_comp[i][1][2], z_out);
1539
2.14k
                for (j = 2; j <= 16; ++j) {
1540
2.01k
                    if (j & 1) {
1541
938
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542
938
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1543
938
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544
938
                                  pre_comp[i][j - 1][0],
1545
938
                                  pre_comp[i][j - 1][1],
1546
938
                                  pre_comp[i][j - 1][2]);
1547
1.07k
                    } else {
1548
1.07k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549
1.07k
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550
1.07k
                                     pre_comp[i][j / 2][1],
1551
1.07k
                                     pre_comp[i][j / 2][2]);
1552
1.07k
                    }
1553
2.01k
                }
1554
134
            }
1555
134
        }
1556
134
        if (mixed)
1557
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558
134
    }
1559
1560
    /* the scalar for the generator */
1561
350
    if ((scalar != NULL) && (have_pre_comp)) {
1562
216
        memset(g_secret, 0, sizeof(g_secret));
1563
        /* reduce scalar to 0 <= scalar < 2^224 */
1564
216
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565
            /*
1566
             * this is an unusual input, and we don't guarantee
1567
             * constant-timeness
1568
             */
1569
11
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571
0
                goto err;
1572
0
            }
1573
11
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574
205
        } else {
1575
205
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576
205
        }
1577
        /* do the multiplication with generator precomputation */
1578
216
        batch_mul(x_out, y_out, z_out,
1579
216
                  (const felem_bytearray(*))secrets, num_points,
1580
216
                  g_secret,
1581
216
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582
216
    } else {
1583
        /* do the multiplication without generator precomputation */
1584
134
        batch_mul(x_out, y_out, z_out,
1585
134
                  (const felem_bytearray(*))secrets, num_points,
1586
134
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587
134
    }
1588
    /* reduce the output to its unique minimal representation */
1589
350
    felem_contract(x_in, x_out);
1590
350
    felem_contract(y_in, y_out);
1591
350
    felem_contract(z_in, z_out);
1592
350
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593
350
        (!felem_to_BN(z, z_in))) {
1594
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595
0
        goto err;
1596
0
    }
1597
350
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598
350
                                                             ctx);
1599
1600
350
 err:
1601
350
    BN_CTX_end(ctx);
1602
350
    EC_POINT_free(generator);
1603
350
    OPENSSL_free(secrets);
1604
350
    OPENSSL_free(pre_comp);
1605
350
    OPENSSL_free(tmp_felems);
1606
350
    return ret;
1607
350
}
1608
1609
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610
0
{
1611
0
    int ret = 0;
1612
0
    NISTP224_PRE_COMP *pre = NULL;
1613
0
    int i, j;
1614
0
    BIGNUM *x, *y;
1615
0
    EC_POINT *generator = NULL;
1616
0
    felem tmp_felems[32];
1617
0
#ifndef FIPS_MODULE
1618
0
    BN_CTX *new_ctx = NULL;
1619
0
#endif
1620
1621
    /* throw away old precomputation */
1622
0
    EC_pre_comp_free(group);
1623
1624
0
#ifndef FIPS_MODULE
1625
0
    if (ctx == NULL)
1626
0
        ctx = new_ctx = BN_CTX_new();
1627
0
#endif
1628
0
    if (ctx == NULL)
1629
0
        return 0;
1630
1631
0
    BN_CTX_start(ctx);
1632
0
    x = BN_CTX_get(ctx);
1633
0
    y = BN_CTX_get(ctx);
1634
0
    if (y == NULL)
1635
0
        goto err;
1636
    /* get the generator */
1637
0
    if (group->generator == NULL)
1638
0
        goto err;
1639
0
    generator = EC_POINT_new(group);
1640
0
    if (generator == NULL)
1641
0
        goto err;
1642
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645
0
        goto err;
1646
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1647
0
        goto err;
1648
    /*
1649
     * if the generator is the standard one, use built-in precomputation
1650
     */
1651
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653
0
        goto done;
1654
0
    }
1655
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658
0
        goto err;
1659
    /*
1660
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661
     * 2^140*G, 2^196*G for the second one
1662
     */
1663
0
    for (i = 1; i <= 8; i <<= 1) {
1664
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667
0
        for (j = 0; j < 27; ++j) {
1668
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671
0
        }
1672
0
        if (i == 8)
1673
0
            break;
1674
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1675
0
                     pre->g_pre_comp[0][2 * i][1],
1676
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678
0
        for (j = 0; j < 27; ++j) {
1679
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1680
0
                         pre->g_pre_comp[0][2 * i][1],
1681
0
                         pre->g_pre_comp[0][2 * i][2],
1682
0
                         pre->g_pre_comp[0][2 * i][0],
1683
0
                         pre->g_pre_comp[0][2 * i][1],
1684
0
                         pre->g_pre_comp[0][2 * i][2]);
1685
0
        }
1686
0
    }
1687
0
    for (i = 0; i < 2; i++) {
1688
        /* g_pre_comp[i][0] is the point at infinity */
1689
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690
        /* the remaining multiples */
1691
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696
0
                  pre->g_pre_comp[i][2][2]);
1697
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708
0
                  pre->g_pre_comp[i][4][2]);
1709
        /*
1710
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711
         */
1712
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716
0
                  pre->g_pre_comp[i][2][2]);
1717
0
        for (j = 1; j < 8; ++j) {
1718
            /* odd multiples: add G resp. 2^28*G */
1719
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1721
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1722
0
                      pre->g_pre_comp[i][2 * j][0],
1723
0
                      pre->g_pre_comp[i][2 * j][1],
1724
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1725
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726
0
                      pre->g_pre_comp[i][1][2]);
1727
0
        }
1728
0
    }
1729
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731
0
 done:
1732
0
    SETPRECOMP(group, nistp224, pre);
1733
0
    pre = NULL;
1734
0
    ret = 1;
1735
0
 err:
1736
0
    BN_CTX_end(ctx);
1737
0
    EC_POINT_free(generator);
1738
0
#ifndef FIPS_MODULE
1739
0
    BN_CTX_free(new_ctx);
1740
0
#endif
1741
0
    EC_nistp224_pre_comp_free(pre);
1742
0
    return ret;
1743
0
}
1744
1745
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746
0
{
1747
0
    return HAVEPRECOMP(group, nistp224);
1748
0
}