Coverage Report

Created: 2024-07-27 06:39

/src/openssl31/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
    CRYPTO_RWLOCK *lock;
242
};
243
244
const EC_METHOD *EC_GFp_nistp224_method(void)
245
28.2k
{
246
28.2k
    static const EC_METHOD ret = {
247
28.2k
        EC_FLAGS_DEFAULT_OCT,
248
28.2k
        NID_X9_62_prime_field,
249
28.2k
        ossl_ec_GFp_nistp224_group_init,
250
28.2k
        ossl_ec_GFp_simple_group_finish,
251
28.2k
        ossl_ec_GFp_simple_group_clear_finish,
252
28.2k
        ossl_ec_GFp_nist_group_copy,
253
28.2k
        ossl_ec_GFp_nistp224_group_set_curve,
254
28.2k
        ossl_ec_GFp_simple_group_get_curve,
255
28.2k
        ossl_ec_GFp_simple_group_get_degree,
256
28.2k
        ossl_ec_group_simple_order_bits,
257
28.2k
        ossl_ec_GFp_simple_group_check_discriminant,
258
28.2k
        ossl_ec_GFp_simple_point_init,
259
28.2k
        ossl_ec_GFp_simple_point_finish,
260
28.2k
        ossl_ec_GFp_simple_point_clear_finish,
261
28.2k
        ossl_ec_GFp_simple_point_copy,
262
28.2k
        ossl_ec_GFp_simple_point_set_to_infinity,
263
28.2k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
264
28.2k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265
28.2k
        0 /* point_set_compressed_coordinates */ ,
266
28.2k
        0 /* point2oct */ ,
267
28.2k
        0 /* oct2point */ ,
268
28.2k
        ossl_ec_GFp_simple_add,
269
28.2k
        ossl_ec_GFp_simple_dbl,
270
28.2k
        ossl_ec_GFp_simple_invert,
271
28.2k
        ossl_ec_GFp_simple_is_at_infinity,
272
28.2k
        ossl_ec_GFp_simple_is_on_curve,
273
28.2k
        ossl_ec_GFp_simple_cmp,
274
28.2k
        ossl_ec_GFp_simple_make_affine,
275
28.2k
        ossl_ec_GFp_simple_points_make_affine,
276
28.2k
        ossl_ec_GFp_nistp224_points_mul,
277
28.2k
        ossl_ec_GFp_nistp224_precompute_mult,
278
28.2k
        ossl_ec_GFp_nistp224_have_precompute_mult,
279
28.2k
        ossl_ec_GFp_nist_field_mul,
280
28.2k
        ossl_ec_GFp_nist_field_sqr,
281
28.2k
        0 /* field_div */ ,
282
28.2k
        ossl_ec_GFp_simple_field_inv,
283
28.2k
        0 /* field_encode */ ,
284
28.2k
        0 /* field_decode */ ,
285
28.2k
        0,                      /* field_set_to_one */
286
28.2k
        ossl_ec_key_simple_priv2oct,
287
28.2k
        ossl_ec_key_simple_oct2priv,
288
28.2k
        0, /* set private */
289
28.2k
        ossl_ec_key_simple_generate_key,
290
28.2k
        ossl_ec_key_simple_check_key,
291
28.2k
        ossl_ec_key_simple_generate_public_key,
292
28.2k
        0, /* keycopy */
293
28.2k
        0, /* keyfinish */
294
28.2k
        ossl_ecdh_simple_compute_key,
295
28.2k
        ossl_ecdsa_simple_sign_setup,
296
28.2k
        ossl_ecdsa_simple_sign_sig,
297
28.2k
        ossl_ecdsa_simple_verify_sig,
298
28.2k
        0, /* field_inverse_mod_ord */
299
28.2k
        0, /* blind_coordinates */
300
28.2k
        0, /* ladder_pre */
301
28.2k
        0, /* ladder_step */
302
28.2k
        0  /* ladder_post */
303
28.2k
    };
304
305
28.2k
    return &ret;
306
28.2k
}
307
308
/*
309
 * Helper functions to convert field elements to/from internal representation
310
 */
311
static void bin28_to_felem(felem out, const u8 in[28])
312
9.87k
{
313
9.87k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314
9.87k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315
9.87k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316
9.87k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317
9.87k
}
318
319
static void felem_to_bin28(u8 out[28], const felem in)
320
12.5k
{
321
12.5k
    unsigned i;
322
100k
    for (i = 0; i < 7; ++i) {
323
87.9k
        out[i] = in[0] >> (8 * i);
324
87.9k
        out[i + 7] = in[1] >> (8 * i);
325
87.9k
        out[i + 14] = in[2] >> (8 * i);
326
87.9k
        out[i + 21] = in[3] >> (8 * i);
327
87.9k
    }
328
12.5k
}
329
330
/* From OpenSSL BIGNUM to internal representation */
331
static int BN_to_felem(felem out, const BIGNUM *bn)
332
9.87k
{
333
9.87k
    felem_bytearray b_out;
334
9.87k
    int num_bytes;
335
336
9.87k
    if (BN_is_negative(bn)) {
337
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338
0
        return 0;
339
0
    }
340
9.87k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341
9.87k
    if (num_bytes < 0) {
342
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343
0
        return 0;
344
0
    }
345
9.87k
    bin28_to_felem(out, b_out);
346
9.87k
    return 1;
347
9.87k
}
348
349
/* From internal representation to OpenSSL BIGNUM */
350
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351
12.5k
{
352
12.5k
    felem_bytearray b_out;
353
12.5k
    felem_to_bin28(b_out, in);
354
12.5k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
355
12.5k
}
356
357
/******************************************************************************/
358
/*-
359
 *                              FIELD OPERATIONS
360
 *
361
 * Field operations, using the internal representation of field elements.
362
 * NB! These operations are specific to our point multiplication and cannot be
363
 * expected to be correct in general - e.g., multiplication with a large scalar
364
 * will cause an overflow.
365
 *
366
 */
367
368
static void felem_one(felem out)
369
0
{
370
0
    out[0] = 1;
371
0
    out[1] = 0;
372
0
    out[2] = 0;
373
0
    out[3] = 0;
374
0
}
375
376
static void felem_assign(felem out, const felem in)
377
800k
{
378
800k
    out[0] = in[0];
379
800k
    out[1] = in[1];
380
800k
    out[2] = in[2];
381
800k
    out[3] = in[3];
382
800k
}
383
384
/* Sum two field elements: out += in */
385
static void felem_sum(felem out, const felem in)
386
236k
{
387
236k
    out[0] += in[0];
388
236k
    out[1] += in[1];
389
236k
    out[2] += in[2];
390
236k
    out[3] += in[3];
391
236k
}
392
393
/* Subtract field elements: out -= in */
394
/* Assumes in[i] < 2^57 */
395
static void felem_diff(felem out, const felem in)
396
222k
{
397
222k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
398
222k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
399
222k
    static const limb two58m42m2 = (((limb) 1) << 58) -
400
222k
        (((limb) 1) << 42) - (((limb) 1) << 2);
401
402
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403
222k
    out[0] += two58p2;
404
222k
    out[1] += two58m42m2;
405
222k
    out[2] += two58m2;
406
222k
    out[3] += two58m2;
407
408
222k
    out[0] -= in[0];
409
222k
    out[1] -= in[1];
410
222k
    out[2] -= in[2];
411
222k
    out[3] -= in[3];
412
222k
}
413
414
/* Subtract in unreduced 128-bit mode: out -= in */
415
/* Assumes in[i] < 2^119 */
416
static void widefelem_diff(widefelem out, const widefelem in)
417
143k
{
418
143k
    static const widelimb two120 = ((widelimb) 1) << 120;
419
143k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
420
143k
        (((widelimb) 1) << 64);
421
143k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
422
143k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423
424
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425
143k
    out[0] += two120;
426
143k
    out[1] += two120m64;
427
143k
    out[2] += two120m64;
428
143k
    out[3] += two120;
429
143k
    out[4] += two120m104m64;
430
143k
    out[5] += two120m64;
431
143k
    out[6] += two120m64;
432
433
143k
    out[0] -= in[0];
434
143k
    out[1] -= in[1];
435
143k
    out[2] -= in[2];
436
143k
    out[3] -= in[3];
437
143k
    out[4] -= in[4];
438
143k
    out[5] -= in[5];
439
143k
    out[6] -= in[6];
440
143k
}
441
442
/* Subtract in mixed mode: out128 -= in64 */
443
/* in[i] < 2^63 */
444
static void felem_diff_128_64(widefelem out, const felem in)
445
426k
{
446
426k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
447
426k
        (((widelimb) 1) << 8);
448
426k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
449
426k
        (((widelimb) 1) << 8);
450
426k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
451
426k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452
453
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454
426k
    out[0] += two64p8;
455
426k
    out[1] += two64m48m8;
456
426k
    out[2] += two64m8;
457
426k
    out[3] += two64m8;
458
459
426k
    out[0] -= in[0];
460
426k
    out[1] -= in[1];
461
426k
    out[2] -= in[2];
462
426k
    out[3] -= in[3];
463
426k
}
464
465
/*
466
 * Multiply a field element by a scalar: out = out * scalar The scalars we
467
 * actually use are small, so results fit without overflow
468
 */
469
static void felem_scalar(felem out, const limb scalar)
470
300k
{
471
300k
    out[0] *= scalar;
472
300k
    out[1] *= scalar;
473
300k
    out[2] *= scalar;
474
300k
    out[3] *= scalar;
475
300k
}
476
477
/*
478
 * Multiply an unreduced field element by a scalar: out = out * scalar The
479
 * scalars we actually use are small, so results fit without overflow
480
 */
481
static void widefelem_scalar(widefelem out, const widelimb scalar)
482
78.7k
{
483
78.7k
    out[0] *= scalar;
484
78.7k
    out[1] *= scalar;
485
78.7k
    out[2] *= scalar;
486
78.7k
    out[3] *= scalar;
487
78.7k
    out[4] *= scalar;
488
78.7k
    out[5] *= scalar;
489
78.7k
    out[6] *= scalar;
490
78.7k
}
491
492
/* Square a field element: out = in^2 */
493
static void felem_square(widefelem out, const felem in)
494
1.28M
{
495
1.28M
    limb tmp0, tmp1, tmp2;
496
1.28M
    tmp0 = 2 * in[0];
497
1.28M
    tmp1 = 2 * in[1];
498
1.28M
    tmp2 = 2 * in[2];
499
1.28M
    out[0] = ((widelimb) in[0]) * in[0];
500
1.28M
    out[1] = ((widelimb) in[0]) * tmp1;
501
1.28M
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502
1.28M
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
503
1.28M
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
504
1.28M
    out[5] = ((widelimb) in[3]) * tmp2;
505
1.28M
    out[6] = ((widelimb) in[3]) * in[3];
506
1.28M
}
507
508
/* Multiply two field elements: out = in1 * in2 */
509
static void felem_mul(widefelem out, const felem in1, const felem in2)
510
843k
{
511
843k
    out[0] = ((widelimb) in1[0]) * in2[0];
512
843k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
513
843k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
514
843k
             ((widelimb) in1[2]) * in2[0];
515
843k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
516
843k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
517
843k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
518
843k
             ((widelimb) in1[3]) * in2[1];
519
843k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
520
843k
    out[6] = ((widelimb) in1[3]) * in2[3];
521
843k
}
522
523
/*-
524
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
525
 * Requires in[i] < 2^126,
526
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527
static void felem_reduce(felem out, const widefelem in)
528
1.99M
{
529
1.99M
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
530
1.99M
        (((widelimb) 1) << 15);
531
1.99M
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
532
1.99M
        (((widelimb) 1) << 71);
533
1.99M
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534
1.99M
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535
1.99M
    widelimb output[5];
536
537
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538
1.99M
    output[0] = in[0] + two127p15;
539
1.99M
    output[1] = in[1] + two127m71m55;
540
1.99M
    output[2] = in[2] + two127m71;
541
1.99M
    output[3] = in[3];
542
1.99M
    output[4] = in[4];
543
544
    /* Eliminate in[4], in[5], in[6] */
545
1.99M
    output[4] += in[6] >> 16;
546
1.99M
    output[3] += (in[6] & 0xffff) << 40;
547
1.99M
    output[2] -= in[6];
548
549
1.99M
    output[3] += in[5] >> 16;
550
1.99M
    output[2] += (in[5] & 0xffff) << 40;
551
1.99M
    output[1] -= in[5];
552
553
1.99M
    output[2] += output[4] >> 16;
554
1.99M
    output[1] += (output[4] & 0xffff) << 40;
555
1.99M
    output[0] -= output[4];
556
557
    /* Carry 2 -> 3 -> 4 */
558
1.99M
    output[3] += output[2] >> 56;
559
1.99M
    output[2] &= 0x00ffffffffffffff;
560
561
1.99M
    output[4] = output[3] >> 56;
562
1.99M
    output[3] &= 0x00ffffffffffffff;
563
564
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565
566
    /* Eliminate output[4] */
567
1.99M
    output[2] += output[4] >> 16;
568
    /* output[2] < 2^56 + 2^56 = 2^57 */
569
1.99M
    output[1] += (output[4] & 0xffff) << 40;
570
1.99M
    output[0] -= output[4];
571
572
    /* Carry 0 -> 1 -> 2 -> 3 */
573
1.99M
    output[1] += output[0] >> 56;
574
1.99M
    out[0] = output[0] & 0x00ffffffffffffff;
575
576
1.99M
    output[2] += output[1] >> 56;
577
    /* output[2] < 2^57 + 2^72 */
578
1.99M
    out[1] = output[1] & 0x00ffffffffffffff;
579
1.99M
    output[3] += output[2] >> 56;
580
    /* output[3] <= 2^56 + 2^16 */
581
1.99M
    out[2] = output[2] & 0x00ffffffffffffff;
582
583
    /*-
584
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585
     * out[3] <= 2^56 + 2^16 (due to final carry),
586
     * so out < 2*p
587
     */
588
1.99M
    out[3] = output[3];
589
1.99M
}
590
591
static void felem_square_reduce(felem out, const felem in)
592
0
{
593
0
    widefelem tmp;
594
0
    felem_square(tmp, in);
595
0
    felem_reduce(out, tmp);
596
0
}
597
598
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599
0
{
600
0
    widefelem tmp;
601
0
    felem_mul(tmp, in1, in2);
602
0
    felem_reduce(out, tmp);
603
0
}
604
605
/*
606
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607
 * call felem_reduce first)
608
 */
609
static void felem_contract(felem out, const felem in)
610
9.68k
{
611
9.68k
    static const int64_t two56 = ((limb) 1) << 56;
612
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
614
9.68k
    int64_t tmp[4], a;
615
9.68k
    tmp[0] = in[0];
616
9.68k
    tmp[1] = in[1];
617
9.68k
    tmp[2] = in[2];
618
9.68k
    tmp[3] = in[3];
619
    /* Case 1: a = 1 iff in >= 2^224 */
620
9.68k
    a = (in[3] >> 56);
621
9.68k
    tmp[0] -= a;
622
9.68k
    tmp[1] += a << 40;
623
9.68k
    tmp[3] &= 0x00ffffffffffffff;
624
    /*
625
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626
     * and the lower part is non-zero
627
     */
628
9.68k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
629
9.68k
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
630
9.68k
    a &= 0x00ffffffffffffff;
631
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632
9.68k
    a = (a - 1) >> 63;
633
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
634
9.68k
    tmp[3] &= a ^ 0xffffffffffffffff;
635
9.68k
    tmp[2] &= a ^ 0xffffffffffffffff;
636
9.68k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637
9.68k
    tmp[0] -= 1 & a;
638
639
    /*
640
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641
     * non-zero, so we only need one step
642
     */
643
9.68k
    a = tmp[0] >> 63;
644
9.68k
    tmp[0] += two56 & a;
645
9.68k
    tmp[1] -= 1 & a;
646
647
    /* carry 1 -> 2 -> 3 */
648
9.68k
    tmp[2] += tmp[1] >> 56;
649
9.68k
    tmp[1] &= 0x00ffffffffffffff;
650
651
9.68k
    tmp[3] += tmp[2] >> 56;
652
9.68k
    tmp[2] &= 0x00ffffffffffffff;
653
654
    /* Now 0 <= out < p */
655
9.68k
    out[0] = tmp[0];
656
9.68k
    out[1] = tmp[1];
657
9.68k
    out[2] = tmp[2];
658
9.68k
    out[3] = tmp[3];
659
9.68k
}
660
661
/*
662
 * Get negative value: out = -in
663
 * Requires in[i] < 2^63,
664
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665
 */
666
static void felem_neg(felem out, const felem in)
667
10.4k
{
668
10.4k
    widefelem tmp;
669
670
10.4k
    memset(tmp, 0, sizeof(tmp));
671
10.4k
    felem_diff_128_64(tmp, in);
672
10.4k
    felem_reduce(out, tmp);
673
10.4k
}
674
675
/*
676
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677
 * elements are reduced to in < 2^225, so we only need to check three cases:
678
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679
 */
680
static limb felem_is_zero(const felem in)
681
258k
{
682
258k
    limb zero, two224m96p1, two225m97p2;
683
684
258k
    zero = in[0] | in[1] | in[2] | in[3];
685
258k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
686
258k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
687
258k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
688
258k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
689
258k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
690
258k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
691
258k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
692
258k
    return (zero | two224m96p1 | two225m97p2);
693
258k
}
694
695
static int felem_is_zero_int(const void *in)
696
0
{
697
0
    return (int)(felem_is_zero(in) & ((limb) 1));
698
0
}
699
700
/* Invert a field element */
701
/* Computation chain copied from djb's code */
702
static void felem_inv(felem out, const felem in)
703
3.05k
{
704
3.05k
    felem ftmp, ftmp2, ftmp3, ftmp4;
705
3.05k
    widefelem tmp;
706
3.05k
    unsigned i;
707
708
3.05k
    felem_square(tmp, in);
709
3.05k
    felem_reduce(ftmp, tmp);    /* 2 */
710
3.05k
    felem_mul(tmp, in, ftmp);
711
3.05k
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
712
3.05k
    felem_square(tmp, ftmp);
713
3.05k
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
714
3.05k
    felem_mul(tmp, in, ftmp);
715
3.05k
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
716
3.05k
    felem_square(tmp, ftmp);
717
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
718
3.05k
    felem_square(tmp, ftmp2);
719
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
720
3.05k
    felem_square(tmp, ftmp2);
721
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
722
3.05k
    felem_mul(tmp, ftmp2, ftmp);
723
3.05k
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
724
3.05k
    felem_square(tmp, ftmp);
725
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
726
18.3k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
727
15.2k
        felem_square(tmp, ftmp2);
728
15.2k
        felem_reduce(ftmp2, tmp);
729
15.2k
    }
730
3.05k
    felem_mul(tmp, ftmp2, ftmp);
731
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
732
3.05k
    felem_square(tmp, ftmp2);
733
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
734
36.6k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
735
33.6k
        felem_square(tmp, ftmp3);
736
33.6k
        felem_reduce(ftmp3, tmp);
737
33.6k
    }
738
3.05k
    felem_mul(tmp, ftmp3, ftmp2);
739
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
740
3.05k
    felem_square(tmp, ftmp2);
741
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
742
73.3k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
743
70.3k
        felem_square(tmp, ftmp3);
744
70.3k
        felem_reduce(ftmp3, tmp);
745
70.3k
    }
746
3.05k
    felem_mul(tmp, ftmp3, ftmp2);
747
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
748
3.05k
    felem_square(tmp, ftmp3);
749
3.05k
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
750
146k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
751
143k
        felem_square(tmp, ftmp4);
752
143k
        felem_reduce(ftmp4, tmp);
753
143k
    }
754
3.05k
    felem_mul(tmp, ftmp3, ftmp4);
755
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
756
3.05k
    felem_square(tmp, ftmp3);
757
3.05k
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
758
73.3k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
759
70.3k
        felem_square(tmp, ftmp4);
760
70.3k
        felem_reduce(ftmp4, tmp);
761
70.3k
    }
762
3.05k
    felem_mul(tmp, ftmp2, ftmp4);
763
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
764
21.4k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
765
18.3k
        felem_square(tmp, ftmp2);
766
18.3k
        felem_reduce(ftmp2, tmp);
767
18.3k
    }
768
3.05k
    felem_mul(tmp, ftmp2, ftmp);
769
3.05k
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
770
3.05k
    felem_square(tmp, ftmp);
771
3.05k
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
772
3.05k
    felem_mul(tmp, ftmp, in);
773
3.05k
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
774
299k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
775
296k
        felem_square(tmp, ftmp);
776
296k
        felem_reduce(ftmp, tmp);
777
296k
    }
778
3.05k
    felem_mul(tmp, ftmp, ftmp3);
779
3.05k
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
780
3.05k
}
781
782
/*
783
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784
 * out to itself.
785
 */
786
static void copy_conditional(felem out, const felem in, limb icopy)
787
397k
{
788
397k
    unsigned i;
789
    /*
790
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791
     */
792
397k
    const limb copy = -icopy;
793
1.98M
    for (i = 0; i < 4; ++i) {
794
1.59M
        const limb tmp = copy & (in[i] ^ out[i]);
795
1.59M
        out[i] ^= tmp;
796
1.59M
    }
797
397k
}
798
799
/******************************************************************************/
800
/*-
801
 *                       ELLIPTIC CURVE POINT OPERATIONS
802
 *
803
 * Points are represented in Jacobian projective coordinates:
804
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805
 * or to the point at infinity if Z == 0.
806
 *
807
 */
808
809
/*-
810
 * Double an elliptic curve point:
811
 * (X', Y', Z') = 2 * (X, Y, Z), where
812
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816
 * while x_out == y_in is not (maybe this works, but it's not tested).
817
 */
818
static void
819
point_double(felem x_out, felem y_out, felem z_out,
820
             const felem x_in, const felem y_in, const felem z_in)
821
78.7k
{
822
78.7k
    widefelem tmp, tmp2;
823
78.7k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825
78.7k
    felem_assign(ftmp, x_in);
826
78.7k
    felem_assign(ftmp2, x_in);
827
828
    /* delta = z^2 */
829
78.7k
    felem_square(tmp, z_in);
830
78.7k
    felem_reduce(delta, tmp);
831
832
    /* gamma = y^2 */
833
78.7k
    felem_square(tmp, y_in);
834
78.7k
    felem_reduce(gamma, tmp);
835
836
    /* beta = x*gamma */
837
78.7k
    felem_mul(tmp, x_in, gamma);
838
78.7k
    felem_reduce(beta, tmp);
839
840
    /* alpha = 3*(x-delta)*(x+delta) */
841
78.7k
    felem_diff(ftmp, delta);
842
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
843
78.7k
    felem_sum(ftmp2, delta);
844
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
845
78.7k
    felem_scalar(ftmp2, 3);
846
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
847
78.7k
    felem_mul(tmp, ftmp, ftmp2);
848
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
849
78.7k
    felem_reduce(alpha, tmp);
850
851
    /* x' = alpha^2 - 8*beta */
852
78.7k
    felem_square(tmp, alpha);
853
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
854
78.7k
    felem_assign(ftmp, beta);
855
78.7k
    felem_scalar(ftmp, 8);
856
    /* ftmp[i] < 8 * 2^57 = 2^60 */
857
78.7k
    felem_diff_128_64(tmp, ftmp);
858
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
859
78.7k
    felem_reduce(x_out, tmp);
860
861
    /* z' = (y + z)^2 - gamma - delta */
862
78.7k
    felem_sum(delta, gamma);
863
    /* delta[i] < 2^57 + 2^57 = 2^58 */
864
78.7k
    felem_assign(ftmp, y_in);
865
78.7k
    felem_sum(ftmp, z_in);
866
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
867
78.7k
    felem_square(tmp, ftmp);
868
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
869
78.7k
    felem_diff_128_64(tmp, delta);
870
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
871
78.7k
    felem_reduce(z_out, tmp);
872
873
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
874
78.7k
    felem_scalar(beta, 4);
875
    /* beta[i] < 4 * 2^57 = 2^59 */
876
78.7k
    felem_diff(beta, x_out);
877
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
878
78.7k
    felem_mul(tmp, alpha, beta);
879
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
880
78.7k
    felem_square(tmp2, gamma);
881
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
882
78.7k
    widefelem_scalar(tmp2, 8);
883
    /* tmp2[i] < 8 * 2^116 = 2^119 */
884
78.7k
    widefelem_diff(tmp, tmp2);
885
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
886
78.7k
    felem_reduce(y_out, tmp);
887
78.7k
}
888
889
/*-
890
 * Add two elliptic curve points:
891
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897
 *
898
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899
 */
900
901
/*
902
 * This function is not entirely constant-time: it includes a branch for
903
 * checking whether the two input points are equal, (while not equal to the
904
 * point at infinity). This case never happens during single point
905
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906
 */
907
static void point_add(felem x3, felem y3, felem z3,
908
                      const felem x1, const felem y1, const felem z1,
909
                      const int mixed, const felem x2, const felem y2,
910
                      const felem z2)
911
64.5k
{
912
64.5k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913
64.5k
    widefelem tmp, tmp2;
914
64.5k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915
64.5k
    limb points_equal;
916
917
64.5k
    if (!mixed) {
918
        /* ftmp2 = z2^2 */
919
11.8k
        felem_square(tmp, z2);
920
11.8k
        felem_reduce(ftmp2, tmp);
921
922
        /* ftmp4 = z2^3 */
923
11.8k
        felem_mul(tmp, ftmp2, z2);
924
11.8k
        felem_reduce(ftmp4, tmp);
925
926
        /* ftmp4 = z2^3*y1 */
927
11.8k
        felem_mul(tmp2, ftmp4, y1);
928
11.8k
        felem_reduce(ftmp4, tmp2);
929
930
        /* ftmp2 = z2^2*x1 */
931
11.8k
        felem_mul(tmp2, ftmp2, x1);
932
11.8k
        felem_reduce(ftmp2, tmp2);
933
52.7k
    } else {
934
        /*
935
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
936
         */
937
938
        /* ftmp4 = z2^3*y1 */
939
52.7k
        felem_assign(ftmp4, y1);
940
941
        /* ftmp2 = z2^2*x1 */
942
52.7k
        felem_assign(ftmp2, x1);
943
52.7k
    }
944
945
    /* ftmp = z1^2 */
946
64.5k
    felem_square(tmp, z1);
947
64.5k
    felem_reduce(ftmp, tmp);
948
949
    /* ftmp3 = z1^3 */
950
64.5k
    felem_mul(tmp, ftmp, z1);
951
64.5k
    felem_reduce(ftmp3, tmp);
952
953
    /* tmp = z1^3*y2 */
954
64.5k
    felem_mul(tmp, ftmp3, y2);
955
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
958
64.5k
    felem_diff_128_64(tmp, ftmp4);
959
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960
64.5k
    felem_reduce(ftmp3, tmp);
961
962
    /* tmp = z1^2*x2 */
963
64.5k
    felem_mul(tmp, ftmp, x2);
964
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966
    /* ftmp = z1^2*x2 - z2^2*x1 */
967
64.5k
    felem_diff_128_64(tmp, ftmp2);
968
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969
64.5k
    felem_reduce(ftmp, tmp);
970
971
    /*
972
     * The formulae are incorrect if the points are equal, in affine coordinates
973
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
974
     * happens.
975
     *
976
     * We use bitwise operations to avoid potential side-channels introduced by
977
     * the short-circuiting behaviour of boolean operators.
978
     */
979
64.5k
    x_equal = felem_is_zero(ftmp);
980
64.5k
    y_equal = felem_is_zero(ftmp3);
981
    /*
982
     * The special case of either point being the point at infinity (z1 and/or
983
     * z2 are zero), is handled separately later on in this function, so we
984
     * avoid jumping to point_double here in those special cases.
985
     */
986
64.5k
    z1_is_zero = felem_is_zero(z1);
987
64.5k
    z2_is_zero = felem_is_zero(z2);
988
989
    /*
990
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
991
     * specific implementation `felem_is_zero()` returns truth as `0x1`
992
     * (rather than `0xff..ff`).
993
     *
994
     * This implies that `~true` in this implementation becomes
995
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
996
     * the if expression, we mask out only the last bit in the next
997
     * line.
998
     */
999
64.5k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001
64.5k
    if (points_equal) {
1002
        /*
1003
         * This is obviously not constant-time but, as mentioned before, this
1004
         * case never happens during single point multiplication, so there is no
1005
         * timing leak for ECDH or ECDSA signing.
1006
         */
1007
0
        point_double(x3, y3, z3, x1, y1, z1);
1008
0
        return;
1009
0
    }
1010
1011
    /* ftmp5 = z1*z2 */
1012
64.5k
    if (!mixed) {
1013
11.8k
        felem_mul(tmp, z1, z2);
1014
11.8k
        felem_reduce(ftmp5, tmp);
1015
52.7k
    } else {
1016
        /* special case z2 = 0 is handled later */
1017
52.7k
        felem_assign(ftmp5, z1);
1018
52.7k
    }
1019
1020
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021
64.5k
    felem_mul(tmp, ftmp, ftmp5);
1022
64.5k
    felem_reduce(z_out, tmp);
1023
1024
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025
64.5k
    felem_assign(ftmp5, ftmp);
1026
64.5k
    felem_square(tmp, ftmp);
1027
64.5k
    felem_reduce(ftmp, tmp);
1028
1029
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030
64.5k
    felem_mul(tmp, ftmp, ftmp5);
1031
64.5k
    felem_reduce(ftmp5, tmp);
1032
1033
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034
64.5k
    felem_mul(tmp, ftmp2, ftmp);
1035
64.5k
    felem_reduce(ftmp2, tmp);
1036
1037
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038
64.5k
    felem_mul(tmp, ftmp4, ftmp5);
1039
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042
64.5k
    felem_square(tmp2, ftmp3);
1043
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046
64.5k
    felem_diff_128_64(tmp2, ftmp5);
1047
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050
64.5k
    felem_assign(ftmp5, ftmp2);
1051
64.5k
    felem_scalar(ftmp5, 2);
1052
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054
    /*-
1055
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057
     */
1058
64.5k
    felem_diff_128_64(tmp2, ftmp5);
1059
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060
64.5k
    felem_reduce(x_out, tmp2);
1061
1062
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063
64.5k
    felem_diff(ftmp2, x_out);
1064
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066
    /*
1067
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068
     */
1069
64.5k
    felem_mul(tmp2, ftmp3, ftmp2);
1070
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072
    /*-
1073
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075
     */
1076
64.5k
    widefelem_diff(tmp2, tmp);
1077
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078
64.5k
    felem_reduce(y_out, tmp2);
1079
1080
    /*
1081
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082
     * the point at infinity, so we need to check for this separately
1083
     */
1084
1085
    /*
1086
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1087
     */
1088
64.5k
    copy_conditional(x_out, x2, z1_is_zero);
1089
64.5k
    copy_conditional(x_out, x1, z2_is_zero);
1090
64.5k
    copy_conditional(y_out, y2, z1_is_zero);
1091
64.5k
    copy_conditional(y_out, y1, z2_is_zero);
1092
64.5k
    copy_conditional(z_out, z2, z1_is_zero);
1093
64.5k
    copy_conditional(z_out, z1, z2_is_zero);
1094
64.5k
    felem_assign(x3, x_out);
1095
64.5k
    felem_assign(y3, y_out);
1096
64.5k
    felem_assign(z3, z_out);
1097
64.5k
}
1098
1099
/*
1100
 * select_point selects the |idx|th point from a precomputation table and
1101
 * copies it to out.
1102
 * The pre_comp array argument should be size of |size| argument
1103
 */
1104
static void select_point(const u64 idx, unsigned int size,
1105
                         const felem pre_comp[][3], felem out[3])
1106
64.1k
{
1107
64.1k
    unsigned i, j;
1108
64.1k
    limb *outlimbs = &out[0][0];
1109
1110
64.1k
    memset(out, 0, sizeof(*out) * 3);
1111
1.10M
    for (i = 0; i < size; i++) {
1112
1.03M
        const limb *inlimbs = &pre_comp[i][0][0];
1113
1.03M
        u64 mask = i ^ idx;
1114
1.03M
        mask |= mask >> 4;
1115
1.03M
        mask |= mask >> 2;
1116
1.03M
        mask |= mask >> 1;
1117
1.03M
        mask &= 1;
1118
1.03M
        mask--;
1119
13.4M
        for (j = 0; j < 4 * 3; j++)
1120
12.4M
            outlimbs[j] |= inlimbs[j] & mask;
1121
1.03M
    }
1122
64.1k
}
1123
1124
/* get_bit returns the |i|th bit in |in| */
1125
static char get_bit(const felem_bytearray in, unsigned i)
1126
277k
{
1127
277k
    if (i >= 224)
1128
464
        return 0;
1129
276k
    return (in[i >> 3] >> (i & 7)) & 1;
1130
277k
}
1131
1132
/*
1133
 * Interleaved point multiplication using precomputed point multiples: The
1134
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1137
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138
 */
1139
static void batch_mul(felem x_out, felem y_out, felem z_out,
1140
                      const felem_bytearray scalars[],
1141
                      const unsigned num_points, const u8 *g_scalar,
1142
                      const int mixed, const felem pre_comp[][17][3],
1143
                      const felem g_pre_comp[2][16][3])
1144
1.19k
{
1145
1.19k
    int i, skip;
1146
1.19k
    unsigned num;
1147
1.19k
    unsigned gen_mul = (g_scalar != NULL);
1148
1.19k
    felem nq[3], tmp[4];
1149
1.19k
    u64 bits;
1150
1.19k
    u8 sign, digit;
1151
1152
    /* set nq to the point at infinity */
1153
1.19k
    memset(nq, 0, sizeof(nq));
1154
1155
    /*
1156
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157
     * of the generator (two in each of the last 28 rounds) and additions of
1158
     * other points multiples (every 5th round).
1159
     */
1160
1.19k
    skip = 1;                   /* save two point operations in the first
1161
                                 * round */
1162
79.3k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163
        /* double */
1164
78.1k
        if (!skip)
1165
76.9k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167
        /* add multiples of the generator */
1168
78.1k
        if (gen_mul && (i <= 27)) {
1169
            /* first, look 28 bits upwards */
1170
26.8k
            bits = get_bit(g_scalar, i + 196) << 3;
1171
26.8k
            bits |= get_bit(g_scalar, i + 140) << 2;
1172
26.8k
            bits |= get_bit(g_scalar, i + 84) << 1;
1173
26.8k
            bits |= get_bit(g_scalar, i + 28);
1174
            /* select the point to add, in constant time */
1175
26.8k
            select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177
26.8k
            if (!skip) {
1178
                /* value 1 below is argument for "mixed" */
1179
25.8k
                point_add(nq[0], nq[1], nq[2],
1180
25.8k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181
25.8k
            } else {
1182
959
                memcpy(nq, tmp, 3 * sizeof(felem));
1183
959
                skip = 0;
1184
959
            }
1185
1186
            /* second, look at the current position */
1187
26.8k
            bits = get_bit(g_scalar, i + 168) << 3;
1188
26.8k
            bits |= get_bit(g_scalar, i + 112) << 2;
1189
26.8k
            bits |= get_bit(g_scalar, i + 56) << 1;
1190
26.8k
            bits |= get_bit(g_scalar, i);
1191
            /* select the point to add, in constant time */
1192
26.8k
            select_point(bits, 16, g_pre_comp[0], tmp);
1193
26.8k
            point_add(nq[0], nq[1], nq[2],
1194
26.8k
                      nq[0], nq[1], nq[2],
1195
26.8k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196
26.8k
        }
1197
1198
        /* do other additions every 5 doublings */
1199
78.1k
        if (num_points && (i % 5 == 0)) {
1200
            /* loop over all scalars */
1201
20.8k
            for (num = 0; num < num_points; ++num) {
1202
10.4k
                bits = get_bit(scalars[num], i + 4) << 5;
1203
10.4k
                bits |= get_bit(scalars[num], i + 3) << 4;
1204
10.4k
                bits |= get_bit(scalars[num], i + 2) << 3;
1205
10.4k
                bits |= get_bit(scalars[num], i + 1) << 2;
1206
10.4k
                bits |= get_bit(scalars[num], i) << 1;
1207
10.4k
                bits |= get_bit(scalars[num], i - 1);
1208
10.4k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210
                /* select the point to add or subtract */
1211
10.4k
                select_point(digit, 17, pre_comp[num], tmp);
1212
10.4k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213
                                            * point */
1214
10.4k
                copy_conditional(tmp[1], tmp[3], sign);
1215
1216
10.4k
                if (!skip) {
1217
10.2k
                    point_add(nq[0], nq[1], nq[2],
1218
10.2k
                              nq[0], nq[1], nq[2],
1219
10.2k
                              mixed, tmp[0], tmp[1], tmp[2]);
1220
10.2k
                } else {
1221
232
                    memcpy(nq, tmp, 3 * sizeof(felem));
1222
232
                    skip = 0;
1223
232
                }
1224
10.4k
            }
1225
10.4k
        }
1226
78.1k
    }
1227
1.19k
    felem_assign(x_out, nq[0]);
1228
1.19k
    felem_assign(y_out, nq[1]);
1229
1.19k
    felem_assign(z_out, nq[2]);
1230
1.19k
}
1231
1232
/******************************************************************************/
1233
/*
1234
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1235
 */
1236
1237
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238
0
{
1239
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241
0
    if (!ret) {
1242
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243
0
        return ret;
1244
0
    }
1245
1246
0
    ret->references = 1;
1247
1248
0
    ret->lock = CRYPTO_THREAD_lock_new();
1249
0
    if (ret->lock == NULL) {
1250
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251
0
        OPENSSL_free(ret);
1252
0
        return NULL;
1253
0
    }
1254
0
    return ret;
1255
0
}
1256
1257
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258
0
{
1259
0
    int i;
1260
0
    if (p != NULL)
1261
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1262
0
    return p;
1263
0
}
1264
1265
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266
0
{
1267
0
    int i;
1268
1269
0
    if (p == NULL)
1270
0
        return;
1271
1272
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273
0
    REF_PRINT_COUNT("EC_nistp224", p);
1274
0
    if (i > 0)
1275
0
        return;
1276
0
    REF_ASSERT_ISNT(i < 0);
1277
1278
0
    CRYPTO_THREAD_lock_free(p->lock);
1279
0
    OPENSSL_free(p);
1280
0
}
1281
1282
/******************************************************************************/
1283
/*
1284
 * OPENSSL EC_METHOD FUNCTIONS
1285
 */
1286
1287
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288
54.4k
{
1289
54.4k
    int ret;
1290
54.4k
    ret = ossl_ec_GFp_simple_group_init(group);
1291
54.4k
    group->a_is_minus3 = 1;
1292
54.4k
    return ret;
1293
54.4k
}
1294
1295
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296
                                         const BIGNUM *a, const BIGNUM *b,
1297
                                         BN_CTX *ctx)
1298
28.2k
{
1299
28.2k
    int ret = 0;
1300
28.2k
    BIGNUM *curve_p, *curve_a, *curve_b;
1301
28.2k
#ifndef FIPS_MODULE
1302
28.2k
    BN_CTX *new_ctx = NULL;
1303
1304
28.2k
    if (ctx == NULL)
1305
0
        ctx = new_ctx = BN_CTX_new();
1306
28.2k
#endif
1307
28.2k
    if (ctx == NULL)
1308
0
        return 0;
1309
1310
28.2k
    BN_CTX_start(ctx);
1311
28.2k
    curve_p = BN_CTX_get(ctx);
1312
28.2k
    curve_a = BN_CTX_get(ctx);
1313
28.2k
    curve_b = BN_CTX_get(ctx);
1314
28.2k
    if (curve_b == NULL)
1315
0
        goto err;
1316
28.2k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317
28.2k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318
28.2k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319
28.2k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321
0
        goto err;
1322
0
    }
1323
28.2k
    group->field_mod_func = BN_nist_mod_224;
1324
28.2k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325
28.2k
 err:
1326
28.2k
    BN_CTX_end(ctx);
1327
28.2k
#ifndef FIPS_MODULE
1328
28.2k
    BN_CTX_free(new_ctx);
1329
28.2k
#endif
1330
28.2k
    return ret;
1331
28.2k
}
1332
1333
/*
1334
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335
 * (X/Z^2, Y/Z^3)
1336
 */
1337
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338
                                                      const EC_POINT *point,
1339
                                                      BIGNUM *x, BIGNUM *y,
1340
                                                      BN_CTX *ctx)
1341
3.05k
{
1342
3.05k
    felem z1, z2, x_in, y_in, x_out, y_out;
1343
3.05k
    widefelem tmp;
1344
1345
3.05k
    if (EC_POINT_is_at_infinity(group, point)) {
1346
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347
0
        return 0;
1348
0
    }
1349
3.05k
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350
3.05k
        (!BN_to_felem(z1, point->Z)))
1351
0
        return 0;
1352
3.05k
    felem_inv(z2, z1);
1353
3.05k
    felem_square(tmp, z2);
1354
3.05k
    felem_reduce(z1, tmp);
1355
3.05k
    felem_mul(tmp, x_in, z1);
1356
3.05k
    felem_reduce(x_in, tmp);
1357
3.05k
    felem_contract(x_out, x_in);
1358
3.05k
    if (x != NULL) {
1359
3.05k
        if (!felem_to_BN(x, x_out)) {
1360
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361
0
            return 0;
1362
0
        }
1363
3.05k
    }
1364
3.05k
    felem_mul(tmp, z1, z2);
1365
3.05k
    felem_reduce(z1, tmp);
1366
3.05k
    felem_mul(tmp, y_in, z1);
1367
3.05k
    felem_reduce(y_in, tmp);
1368
3.05k
    felem_contract(y_out, y_in);
1369
3.05k
    if (y != NULL) {
1370
3.05k
        if (!felem_to_BN(y, y_out)) {
1371
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372
0
            return 0;
1373
0
        }
1374
3.05k
    }
1375
3.05k
    return 1;
1376
3.05k
}
1377
1378
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379
                               felem tmp_felems[ /* num+1 */ ])
1380
0
{
1381
    /*
1382
     * Runs in constant time, unless an input is the point at infinity (which
1383
     * normally shouldn't happen).
1384
     */
1385
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386
0
                                                  points,
1387
0
                                                  sizeof(felem),
1388
0
                                                  tmp_felems,
1389
0
                                                  (void (*)(void *))felem_one,
1390
0
                                                  felem_is_zero_int,
1391
0
                                                  (void (*)(void *, const void *))
1392
0
                                                  felem_assign,
1393
0
                                                  (void (*)(void *, const void *))
1394
0
                                                  felem_square_reduce, (void (*)
1395
0
                                                                        (void *,
1396
0
                                                                         const void
1397
0
                                                                         *,
1398
0
                                                                         const void
1399
0
                                                                         *))
1400
0
                                                  felem_mul_reduce,
1401
0
                                                  (void (*)(void *, const void *))
1402
0
                                                  felem_inv,
1403
0
                                                  (void (*)(void *, const void *))
1404
0
                                                  felem_contract);
1405
0
}
1406
1407
/*
1408
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409
 * values Result is stored in r (r can equal one of the inputs).
1410
 */
1411
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412
                                    const BIGNUM *scalar, size_t num,
1413
                                    const EC_POINT *points[],
1414
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1415
1.19k
{
1416
1.19k
    int ret = 0;
1417
1.19k
    int j;
1418
1.19k
    unsigned i;
1419
1.19k
    int mixed = 0;
1420
1.19k
    BIGNUM *x, *y, *z, *tmp_scalar;
1421
1.19k
    felem_bytearray g_secret;
1422
1.19k
    felem_bytearray *secrets = NULL;
1423
1.19k
    felem (*pre_comp)[17][3] = NULL;
1424
1.19k
    felem *tmp_felems = NULL;
1425
1.19k
    int num_bytes;
1426
1.19k
    int have_pre_comp = 0;
1427
1.19k
    size_t num_points = num;
1428
1.19k
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1429
1.19k
    NISTP224_PRE_COMP *pre = NULL;
1430
1.19k
    const felem(*g_pre_comp)[16][3] = NULL;
1431
1.19k
    EC_POINT *generator = NULL;
1432
1.19k
    const EC_POINT *p = NULL;
1433
1.19k
    const BIGNUM *p_scalar = NULL;
1434
1435
1.19k
    BN_CTX_start(ctx);
1436
1.19k
    x = BN_CTX_get(ctx);
1437
1.19k
    y = BN_CTX_get(ctx);
1438
1.19k
    z = BN_CTX_get(ctx);
1439
1.19k
    tmp_scalar = BN_CTX_get(ctx);
1440
1.19k
    if (tmp_scalar == NULL)
1441
0
        goto err;
1442
1443
1.19k
    if (scalar != NULL) {
1444
959
        pre = group->pre_comp.nistp224;
1445
959
        if (pre)
1446
            /* we have precomputation, try to use it */
1447
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448
959
        else
1449
            /* try to use the standard precomputation */
1450
959
            g_pre_comp = &gmul[0];
1451
959
        generator = EC_POINT_new(group);
1452
959
        if (generator == NULL)
1453
0
            goto err;
1454
        /* get the generator from precomputation */
1455
959
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456
959
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457
959
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459
0
            goto err;
1460
0
        }
1461
959
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462
959
                                                                generator,
1463
959
                                                                x, y, z, ctx))
1464
0
            goto err;
1465
959
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466
            /* precomputation matches generator */
1467
959
            have_pre_comp = 1;
1468
0
        else
1469
            /*
1470
             * we don't have valid precomputation: treat the generator as a
1471
             * random point
1472
             */
1473
0
            num_points = num_points + 1;
1474
959
    }
1475
1476
1.19k
    if (num_points > 0) {
1477
232
        if (num_points >= 3) {
1478
            /*
1479
             * unless we precompute multiples for just one or two points,
1480
             * converting those into affine form is time well spent
1481
             */
1482
0
            mixed = 1;
1483
0
        }
1484
232
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485
232
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486
232
        if (mixed)
1487
0
            tmp_felems =
1488
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489
232
        if ((secrets == NULL) || (pre_comp == NULL)
1490
232
            || (mixed && (tmp_felems == NULL))) {
1491
0
            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492
0
            goto err;
1493
0
        }
1494
1495
        /*
1496
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1497
         * i.e., they contribute nothing to the linear combination
1498
         */
1499
464
        for (i = 0; i < num_points; ++i) {
1500
232
            if (i == num) {
1501
                /* the generator */
1502
0
                p = EC_GROUP_get0_generator(group);
1503
0
                p_scalar = scalar;
1504
232
            } else {
1505
                /* the i^th point */
1506
232
                p = points[i];
1507
232
                p_scalar = scalars[i];
1508
232
            }
1509
232
            if ((p_scalar != NULL) && (p != NULL)) {
1510
                /* reduce scalar to 0 <= scalar < 2^224 */
1511
232
                if ((BN_num_bits(p_scalar) > 224)
1512
232
                    || (BN_is_negative(p_scalar))) {
1513
                    /*
1514
                     * this is an unusual input, and we don't guarantee
1515
                     * constant-timeness
1516
                     */
1517
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519
0
                        goto err;
1520
0
                    }
1521
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1522
0
                                               secrets[i], sizeof(secrets[i]));
1523
232
                } else {
1524
232
                    num_bytes = BN_bn2lebinpad(p_scalar,
1525
232
                                               secrets[i], sizeof(secrets[i]));
1526
232
                }
1527
232
                if (num_bytes < 0) {
1528
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529
0
                    goto err;
1530
0
                }
1531
                /* precompute multiples */
1532
232
                if ((!BN_to_felem(x_out, p->X)) ||
1533
232
                    (!BN_to_felem(y_out, p->Y)) ||
1534
232
                    (!BN_to_felem(z_out, p->Z)))
1535
0
                    goto err;
1536
232
                felem_assign(pre_comp[i][1][0], x_out);
1537
232
                felem_assign(pre_comp[i][1][1], y_out);
1538
232
                felem_assign(pre_comp[i][1][2], z_out);
1539
3.71k
                for (j = 2; j <= 16; ++j) {
1540
3.48k
                    if (j & 1) {
1541
1.62k
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542
1.62k
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1543
1.62k
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544
1.62k
                                  pre_comp[i][j - 1][0],
1545
1.62k
                                  pre_comp[i][j - 1][1],
1546
1.62k
                                  pre_comp[i][j - 1][2]);
1547
1.85k
                    } else {
1548
1.85k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549
1.85k
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550
1.85k
                                     pre_comp[i][j / 2][1],
1551
1.85k
                                     pre_comp[i][j / 2][2]);
1552
1.85k
                    }
1553
3.48k
                }
1554
232
            }
1555
232
        }
1556
232
        if (mixed)
1557
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558
232
    }
1559
1560
    /* the scalar for the generator */
1561
1.19k
    if ((scalar != NULL) && (have_pre_comp)) {
1562
959
        memset(g_secret, 0, sizeof(g_secret));
1563
        /* reduce scalar to 0 <= scalar < 2^224 */
1564
959
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565
            /*
1566
             * this is an unusual input, and we don't guarantee
1567
             * constant-timeness
1568
             */
1569
73
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571
0
                goto err;
1572
0
            }
1573
73
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574
886
        } else {
1575
886
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576
886
        }
1577
        /* do the multiplication with generator precomputation */
1578
959
        batch_mul(x_out, y_out, z_out,
1579
959
                  (const felem_bytearray(*))secrets, num_points,
1580
959
                  g_secret,
1581
959
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582
959
    } else {
1583
        /* do the multiplication without generator precomputation */
1584
232
        batch_mul(x_out, y_out, z_out,
1585
232
                  (const felem_bytearray(*))secrets, num_points,
1586
232
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587
232
    }
1588
    /* reduce the output to its unique minimal representation */
1589
1.19k
    felem_contract(x_in, x_out);
1590
1.19k
    felem_contract(y_in, y_out);
1591
1.19k
    felem_contract(z_in, z_out);
1592
1.19k
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593
1.19k
        (!felem_to_BN(z, z_in))) {
1594
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595
0
        goto err;
1596
0
    }
1597
1.19k
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598
1.19k
                                                             ctx);
1599
1600
1.19k
 err:
1601
1.19k
    BN_CTX_end(ctx);
1602
1.19k
    EC_POINT_free(generator);
1603
1.19k
    OPENSSL_free(secrets);
1604
1.19k
    OPENSSL_free(pre_comp);
1605
1.19k
    OPENSSL_free(tmp_felems);
1606
1.19k
    return ret;
1607
1.19k
}
1608
1609
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610
0
{
1611
0
    int ret = 0;
1612
0
    NISTP224_PRE_COMP *pre = NULL;
1613
0
    int i, j;
1614
0
    BIGNUM *x, *y;
1615
0
    EC_POINT *generator = NULL;
1616
0
    felem tmp_felems[32];
1617
0
#ifndef FIPS_MODULE
1618
0
    BN_CTX *new_ctx = NULL;
1619
0
#endif
1620
1621
    /* throw away old precomputation */
1622
0
    EC_pre_comp_free(group);
1623
1624
0
#ifndef FIPS_MODULE
1625
0
    if (ctx == NULL)
1626
0
        ctx = new_ctx = BN_CTX_new();
1627
0
#endif
1628
0
    if (ctx == NULL)
1629
0
        return 0;
1630
1631
0
    BN_CTX_start(ctx);
1632
0
    x = BN_CTX_get(ctx);
1633
0
    y = BN_CTX_get(ctx);
1634
0
    if (y == NULL)
1635
0
        goto err;
1636
    /* get the generator */
1637
0
    if (group->generator == NULL)
1638
0
        goto err;
1639
0
    generator = EC_POINT_new(group);
1640
0
    if (generator == NULL)
1641
0
        goto err;
1642
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645
0
        goto err;
1646
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1647
0
        goto err;
1648
    /*
1649
     * if the generator is the standard one, use built-in precomputation
1650
     */
1651
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653
0
        goto done;
1654
0
    }
1655
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658
0
        goto err;
1659
    /*
1660
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661
     * 2^140*G, 2^196*G for the second one
1662
     */
1663
0
    for (i = 1; i <= 8; i <<= 1) {
1664
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667
0
        for (j = 0; j < 27; ++j) {
1668
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671
0
        }
1672
0
        if (i == 8)
1673
0
            break;
1674
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1675
0
                     pre->g_pre_comp[0][2 * i][1],
1676
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678
0
        for (j = 0; j < 27; ++j) {
1679
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1680
0
                         pre->g_pre_comp[0][2 * i][1],
1681
0
                         pre->g_pre_comp[0][2 * i][2],
1682
0
                         pre->g_pre_comp[0][2 * i][0],
1683
0
                         pre->g_pre_comp[0][2 * i][1],
1684
0
                         pre->g_pre_comp[0][2 * i][2]);
1685
0
        }
1686
0
    }
1687
0
    for (i = 0; i < 2; i++) {
1688
        /* g_pre_comp[i][0] is the point at infinity */
1689
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690
        /* the remaining multiples */
1691
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696
0
                  pre->g_pre_comp[i][2][2]);
1697
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708
0
                  pre->g_pre_comp[i][4][2]);
1709
        /*
1710
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711
         */
1712
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716
0
                  pre->g_pre_comp[i][2][2]);
1717
0
        for (j = 1; j < 8; ++j) {
1718
            /* odd multiples: add G resp. 2^28*G */
1719
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1721
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1722
0
                      pre->g_pre_comp[i][2 * j][0],
1723
0
                      pre->g_pre_comp[i][2 * j][1],
1724
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1725
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726
0
                      pre->g_pre_comp[i][1][2]);
1727
0
        }
1728
0
    }
1729
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731
0
 done:
1732
0
    SETPRECOMP(group, nistp224, pre);
1733
0
    pre = NULL;
1734
0
    ret = 1;
1735
0
 err:
1736
0
    BN_CTX_end(ctx);
1737
0
    EC_POINT_free(generator);
1738
0
#ifndef FIPS_MODULE
1739
0
    BN_CTX_free(new_ctx);
1740
0
#endif
1741
0
    EC_nistp224_pre_comp_free(pre);
1742
0
    return ret;
1743
0
}
1744
1745
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746
0
{
1747
0
    return HAVEPRECOMP(group, nistp224);
1748
0
}