Coverage Report

Created: 2023-09-25 06:43

/src/openssl30/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
    CRYPTO_RWLOCK *lock;
242
};
243
244
const EC_METHOD *EC_GFp_nistp224_method(void)
245
2.09k
{
246
2.09k
    static const EC_METHOD ret = {
247
2.09k
        EC_FLAGS_DEFAULT_OCT,
248
2.09k
        NID_X9_62_prime_field,
249
2.09k
        ossl_ec_GFp_nistp224_group_init,
250
2.09k
        ossl_ec_GFp_simple_group_finish,
251
2.09k
        ossl_ec_GFp_simple_group_clear_finish,
252
2.09k
        ossl_ec_GFp_nist_group_copy,
253
2.09k
        ossl_ec_GFp_nistp224_group_set_curve,
254
2.09k
        ossl_ec_GFp_simple_group_get_curve,
255
2.09k
        ossl_ec_GFp_simple_group_get_degree,
256
2.09k
        ossl_ec_group_simple_order_bits,
257
2.09k
        ossl_ec_GFp_simple_group_check_discriminant,
258
2.09k
        ossl_ec_GFp_simple_point_init,
259
2.09k
        ossl_ec_GFp_simple_point_finish,
260
2.09k
        ossl_ec_GFp_simple_point_clear_finish,
261
2.09k
        ossl_ec_GFp_simple_point_copy,
262
2.09k
        ossl_ec_GFp_simple_point_set_to_infinity,
263
2.09k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
264
2.09k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265
2.09k
        0 /* point_set_compressed_coordinates */ ,
266
2.09k
        0 /* point2oct */ ,
267
2.09k
        0 /* oct2point */ ,
268
2.09k
        ossl_ec_GFp_simple_add,
269
2.09k
        ossl_ec_GFp_simple_dbl,
270
2.09k
        ossl_ec_GFp_simple_invert,
271
2.09k
        ossl_ec_GFp_simple_is_at_infinity,
272
2.09k
        ossl_ec_GFp_simple_is_on_curve,
273
2.09k
        ossl_ec_GFp_simple_cmp,
274
2.09k
        ossl_ec_GFp_simple_make_affine,
275
2.09k
        ossl_ec_GFp_simple_points_make_affine,
276
2.09k
        ossl_ec_GFp_nistp224_points_mul,
277
2.09k
        ossl_ec_GFp_nistp224_precompute_mult,
278
2.09k
        ossl_ec_GFp_nistp224_have_precompute_mult,
279
2.09k
        ossl_ec_GFp_nist_field_mul,
280
2.09k
        ossl_ec_GFp_nist_field_sqr,
281
2.09k
        0 /* field_div */ ,
282
2.09k
        ossl_ec_GFp_simple_field_inv,
283
2.09k
        0 /* field_encode */ ,
284
2.09k
        0 /* field_decode */ ,
285
2.09k
        0,                      /* field_set_to_one */
286
2.09k
        ossl_ec_key_simple_priv2oct,
287
2.09k
        ossl_ec_key_simple_oct2priv,
288
2.09k
        0, /* set private */
289
2.09k
        ossl_ec_key_simple_generate_key,
290
2.09k
        ossl_ec_key_simple_check_key,
291
2.09k
        ossl_ec_key_simple_generate_public_key,
292
2.09k
        0, /* keycopy */
293
2.09k
        0, /* keyfinish */
294
2.09k
        ossl_ecdh_simple_compute_key,
295
2.09k
        ossl_ecdsa_simple_sign_setup,
296
2.09k
        ossl_ecdsa_simple_sign_sig,
297
2.09k
        ossl_ecdsa_simple_verify_sig,
298
2.09k
        0, /* field_inverse_mod_ord */
299
2.09k
        0, /* blind_coordinates */
300
2.09k
        0, /* ladder_pre */
301
2.09k
        0, /* ladder_step */
302
2.09k
        0  /* ladder_post */
303
2.09k
    };
304
305
2.09k
    return &ret;
306
2.09k
}
307
308
/*
309
 * Helper functions to convert field elements to/from internal representation
310
 */
311
static void bin28_to_felem(felem out, const u8 in[28])
312
54
{
313
54
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314
54
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315
54
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316
54
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317
54
}
318
319
static void felem_to_bin28(u8 out[28], const felem in)
320
168
{
321
168
    unsigned i;
322
1.34k
    for (i = 0; i < 7; ++i) {
323
1.17k
        out[i] = in[0] >> (8 * i);
324
1.17k
        out[i + 7] = in[1] >> (8 * i);
325
1.17k
        out[i + 14] = in[2] >> (8 * i);
326
1.17k
        out[i + 21] = in[3] >> (8 * i);
327
1.17k
    }
328
168
}
329
330
/* From OpenSSL BIGNUM to internal representation */
331
static int BN_to_felem(felem out, const BIGNUM *bn)
332
54
{
333
54
    felem_bytearray b_out;
334
54
    int num_bytes;
335
336
54
    if (BN_is_negative(bn)) {
337
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338
0
        return 0;
339
0
    }
340
54
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341
54
    if (num_bytes < 0) {
342
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343
0
        return 0;
344
0
    }
345
54
    bin28_to_felem(out, b_out);
346
54
    return 1;
347
54
}
348
349
/* From internal representation to OpenSSL BIGNUM */
350
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351
168
{
352
168
    felem_bytearray b_out;
353
168
    felem_to_bin28(b_out, in);
354
168
    return BN_lebin2bn(b_out, sizeof(b_out), out);
355
168
}
356
357
/******************************************************************************/
358
/*-
359
 *                              FIELD OPERATIONS
360
 *
361
 * Field operations, using the internal representation of field elements.
362
 * NB! These operations are specific to our point multiplication and cannot be
363
 * expected to be correct in general - e.g., multiplication with a large scalar
364
 * will cause an overflow.
365
 *
366
 */
367
368
static void felem_one(felem out)
369
0
{
370
0
    out[0] = 1;
371
0
    out[1] = 0;
372
0
    out[2] = 0;
373
0
    out[3] = 0;
374
0
}
375
376
static void felem_assign(felem out, const felem in)
377
12.1k
{
378
12.1k
    out[0] = in[0];
379
12.1k
    out[1] = in[1];
380
12.1k
    out[2] = in[2];
381
12.1k
    out[3] = in[3];
382
12.1k
}
383
384
/* Sum two field elements: out += in */
385
static void felem_sum(felem out, const felem in)
386
1.78k
{
387
1.78k
    out[0] += in[0];
388
1.78k
    out[1] += in[1];
389
1.78k
    out[2] += in[2];
390
1.78k
    out[3] += in[3];
391
1.78k
}
392
393
/* Subtract field elements: out -= in */
394
/* Assumes in[i] < 2^57 */
395
static void felem_diff(felem out, const felem in)
396
2.39k
{
397
2.39k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
398
2.39k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
399
2.39k
    static const limb two58m42m2 = (((limb) 1) << 58) -
400
2.39k
        (((limb) 1) << 42) - (((limb) 1) << 2);
401
402
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403
2.39k
    out[0] += two58p2;
404
2.39k
    out[1] += two58m42m2;
405
2.39k
    out[2] += two58m2;
406
2.39k
    out[3] += two58m2;
407
408
2.39k
    out[0] -= in[0];
409
2.39k
    out[1] -= in[1];
410
2.39k
    out[2] -= in[2];
411
2.39k
    out[3] -= in[3];
412
2.39k
}
413
414
/* Subtract in unreduced 128-bit mode: out -= in */
415
/* Assumes in[i] < 2^119 */
416
static void widefelem_diff(widefelem out, const widefelem in)
417
1.80k
{
418
1.80k
    static const widelimb two120 = ((widelimb) 1) << 120;
419
1.80k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
420
1.80k
        (((widelimb) 1) << 64);
421
1.80k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
422
1.80k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423
424
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425
1.80k
    out[0] += two120;
426
1.80k
    out[1] += two120m64;
427
1.80k
    out[2] += two120m64;
428
1.80k
    out[3] += two120;
429
1.80k
    out[4] += two120m104m64;
430
1.80k
    out[5] += two120m64;
431
1.80k
    out[6] += two120m64;
432
433
1.80k
    out[0] -= in[0];
434
1.80k
    out[1] -= in[1];
435
1.80k
    out[2] -= in[2];
436
1.80k
    out[3] -= in[3];
437
1.80k
    out[4] -= in[4];
438
1.80k
    out[5] -= in[5];
439
1.80k
    out[6] -= in[6];
440
1.80k
}
441
442
/* Subtract in mixed mode: out128 -= in64 */
443
/* in[i] < 2^63 */
444
static void felem_diff_128_64(widefelem out, const felem in)
445
6.02k
{
446
6.02k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
447
6.02k
        (((widelimb) 1) << 8);
448
6.02k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
449
6.02k
        (((widelimb) 1) << 8);
450
6.02k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
451
6.02k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452
453
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454
6.02k
    out[0] += two64p8;
455
6.02k
    out[1] += two64m48m8;
456
6.02k
    out[2] += two64m8;
457
6.02k
    out[3] += two64m8;
458
459
6.02k
    out[0] -= in[0];
460
6.02k
    out[1] -= in[1];
461
6.02k
    out[2] -= in[2];
462
6.02k
    out[3] -= in[3];
463
6.02k
}
464
465
/*
466
 * Multiply a field element by a scalar: out = out * scalar The scalars we
467
 * actually use are small, so results fit without overflow
468
 */
469
static void felem_scalar(felem out, const limb scalar)
470
2.99k
{
471
2.99k
    out[0] *= scalar;
472
2.99k
    out[1] *= scalar;
473
2.99k
    out[2] *= scalar;
474
2.99k
    out[3] *= scalar;
475
2.99k
}
476
477
/*
478
 * Multiply an unreduced field element by a scalar: out = out * scalar The
479
 * scalars we actually use are small, so results fit without overflow
480
 */
481
static void widefelem_scalar(widefelem out, const widelimb scalar)
482
594
{
483
594
    out[0] *= scalar;
484
594
    out[1] *= scalar;
485
594
    out[2] *= scalar;
486
594
    out[3] *= scalar;
487
594
    out[4] *= scalar;
488
594
    out[5] *= scalar;
489
594
    out[6] *= scalar;
490
594
}
491
492
/* Square a field element: out = in^2 */
493
static void felem_square(widefelem out, const felem in)
494
10.6k
{
495
10.6k
    limb tmp0, tmp1, tmp2;
496
10.6k
    tmp0 = 2 * in[0];
497
10.6k
    tmp1 = 2 * in[1];
498
10.6k
    tmp2 = 2 * in[2];
499
10.6k
    out[0] = ((widelimb) in[0]) * in[0];
500
10.6k
    out[1] = ((widelimb) in[0]) * tmp1;
501
10.6k
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502
10.6k
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
503
10.6k
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
504
10.6k
    out[5] = ((widelimb) in[3]) * tmp2;
505
10.6k
    out[6] = ((widelimb) in[3]) * in[3];
506
10.6k
}
507
508
/* Multiply two field elements: out = in1 * in2 */
509
static void felem_mul(widefelem out, const felem in1, const felem in2)
510
11.7k
{
511
11.7k
    out[0] = ((widelimb) in1[0]) * in2[0];
512
11.7k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
513
11.7k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
514
11.7k
             ((widelimb) in1[2]) * in2[0];
515
11.7k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
516
11.7k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
517
11.7k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
518
11.7k
             ((widelimb) in1[3]) * in2[1];
519
11.7k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
520
11.7k
    out[6] = ((widelimb) in1[3]) * in2[3];
521
11.7k
}
522
523
/*-
524
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
525
 * Requires in[i] < 2^126,
526
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527
static void felem_reduce(felem out, const widefelem in)
528
20.5k
{
529
20.5k
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
530
20.5k
        (((widelimb) 1) << 15);
531
20.5k
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
532
20.5k
        (((widelimb) 1) << 71);
533
20.5k
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534
20.5k
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535
20.5k
    widelimb output[5];
536
537
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538
20.5k
    output[0] = in[0] + two127p15;
539
20.5k
    output[1] = in[1] + two127m71m55;
540
20.5k
    output[2] = in[2] + two127m71;
541
20.5k
    output[3] = in[3];
542
20.5k
    output[4] = in[4];
543
544
    /* Eliminate in[4], in[5], in[6] */
545
20.5k
    output[4] += in[6] >> 16;
546
20.5k
    output[3] += (in[6] & 0xffff) << 40;
547
20.5k
    output[2] -= in[6];
548
549
20.5k
    output[3] += in[5] >> 16;
550
20.5k
    output[2] += (in[5] & 0xffff) << 40;
551
20.5k
    output[1] -= in[5];
552
553
20.5k
    output[2] += output[4] >> 16;
554
20.5k
    output[1] += (output[4] & 0xffff) << 40;
555
20.5k
    output[0] -= output[4];
556
557
    /* Carry 2 -> 3 -> 4 */
558
20.5k
    output[3] += output[2] >> 56;
559
20.5k
    output[2] &= 0x00ffffffffffffff;
560
561
20.5k
    output[4] = output[3] >> 56;
562
20.5k
    output[3] &= 0x00ffffffffffffff;
563
564
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565
566
    /* Eliminate output[4] */
567
20.5k
    output[2] += output[4] >> 16;
568
    /* output[2] < 2^56 + 2^56 = 2^57 */
569
20.5k
    output[1] += (output[4] & 0xffff) << 40;
570
20.5k
    output[0] -= output[4];
571
572
    /* Carry 0 -> 1 -> 2 -> 3 */
573
20.5k
    output[1] += output[0] >> 56;
574
20.5k
    out[0] = output[0] & 0x00ffffffffffffff;
575
576
20.5k
    output[2] += output[1] >> 56;
577
    /* output[2] < 2^57 + 2^72 */
578
20.5k
    out[1] = output[1] & 0x00ffffffffffffff;
579
20.5k
    output[3] += output[2] >> 56;
580
    /* output[3] <= 2^56 + 2^16 */
581
20.5k
    out[2] = output[2] & 0x00ffffffffffffff;
582
583
    /*-
584
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585
     * out[3] <= 2^56 + 2^16 (due to final carry),
586
     * so out < 2*p
587
     */
588
20.5k
    out[3] = output[3];
589
20.5k
}
590
591
static void felem_square_reduce(felem out, const felem in)
592
0
{
593
0
    widefelem tmp;
594
0
    felem_square(tmp, in);
595
0
    felem_reduce(out, tmp);
596
0
}
597
598
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599
0
{
600
0
    widefelem tmp;
601
0
    felem_mul(tmp, in1, in2);
602
0
    felem_reduce(out, tmp);
603
0
}
604
605
/*
606
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607
 * call felem_reduce first)
608
 */
609
static void felem_contract(felem out, const felem in)
610
102
{
611
102
    static const int64_t two56 = ((limb) 1) << 56;
612
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
614
102
    int64_t tmp[4], a;
615
102
    tmp[0] = in[0];
616
102
    tmp[1] = in[1];
617
102
    tmp[2] = in[2];
618
102
    tmp[3] = in[3];
619
    /* Case 1: a = 1 iff in >= 2^224 */
620
102
    a = (in[3] >> 56);
621
102
    tmp[0] -= a;
622
102
    tmp[1] += a << 40;
623
102
    tmp[3] &= 0x00ffffffffffffff;
624
    /*
625
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626
     * and the lower part is non-zero
627
     */
628
102
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
629
102
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
630
102
    a &= 0x00ffffffffffffff;
631
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632
102
    a = (a - 1) >> 63;
633
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
634
102
    tmp[3] &= a ^ 0xffffffffffffffff;
635
102
    tmp[2] &= a ^ 0xffffffffffffffff;
636
102
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637
102
    tmp[0] -= 1 & a;
638
639
    /*
640
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641
     * non-zero, so we only need one step
642
     */
643
102
    a = tmp[0] >> 63;
644
102
    tmp[0] += two56 & a;
645
102
    tmp[1] -= 1 & a;
646
647
    /* carry 1 -> 2 -> 3 */
648
102
    tmp[2] += tmp[1] >> 56;
649
102
    tmp[1] &= 0x00ffffffffffffff;
650
651
102
    tmp[3] += tmp[2] >> 56;
652
102
    tmp[2] &= 0x00ffffffffffffff;
653
654
    /* Now 0 <= out < p */
655
102
    out[0] = tmp[0];
656
102
    out[1] = tmp[1];
657
102
    out[2] = tmp[2];
658
102
    out[3] = tmp[3];
659
102
}
660
661
/*
662
 * Get negative value: out = -in
663
 * Requires in[i] < 2^63,
664
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665
 */
666
static void felem_neg(felem out, const felem in)
667
0
{
668
0
    widefelem tmp;
669
670
0
    memset(tmp, 0, sizeof(tmp));
671
0
    felem_diff_128_64(tmp, in);
672
0
    felem_reduce(out, tmp);
673
0
}
674
675
/*
676
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677
 * elements are reduced to in < 2^225, so we only need to check three cases:
678
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679
 */
680
static limb felem_is_zero(const felem in)
681
4.84k
{
682
4.84k
    limb zero, two224m96p1, two225m97p2;
683
684
4.84k
    zero = in[0] | in[1] | in[2] | in[3];
685
4.84k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
686
4.84k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
687
4.84k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
688
4.84k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
689
4.84k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
690
4.84k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
691
4.84k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
692
4.84k
    return (zero | two224m96p1 | two225m97p2);
693
4.84k
}
694
695
static int felem_is_zero_int(const void *in)
696
0
{
697
0
    return (int)(felem_is_zero(in) & ((limb) 1));
698
0
}
699
700
/* Invert a field element */
701
/* Computation chain copied from djb's code */
702
static void felem_inv(felem out, const felem in)
703
18
{
704
18
    felem ftmp, ftmp2, ftmp3, ftmp4;
705
18
    widefelem tmp;
706
18
    unsigned i;
707
708
18
    felem_square(tmp, in);
709
18
    felem_reduce(ftmp, tmp);    /* 2 */
710
18
    felem_mul(tmp, in, ftmp);
711
18
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
712
18
    felem_square(tmp, ftmp);
713
18
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
714
18
    felem_mul(tmp, in, ftmp);
715
18
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
716
18
    felem_square(tmp, ftmp);
717
18
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
718
18
    felem_square(tmp, ftmp2);
719
18
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
720
18
    felem_square(tmp, ftmp2);
721
18
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
722
18
    felem_mul(tmp, ftmp2, ftmp);
723
18
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
724
18
    felem_square(tmp, ftmp);
725
18
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
726
108
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
727
90
        felem_square(tmp, ftmp2);
728
90
        felem_reduce(ftmp2, tmp);
729
90
    }
730
18
    felem_mul(tmp, ftmp2, ftmp);
731
18
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
732
18
    felem_square(tmp, ftmp2);
733
18
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
734
216
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
735
198
        felem_square(tmp, ftmp3);
736
198
        felem_reduce(ftmp3, tmp);
737
198
    }
738
18
    felem_mul(tmp, ftmp3, ftmp2);
739
18
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
740
18
    felem_square(tmp, ftmp2);
741
18
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
742
432
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
743
414
        felem_square(tmp, ftmp3);
744
414
        felem_reduce(ftmp3, tmp);
745
414
    }
746
18
    felem_mul(tmp, ftmp3, ftmp2);
747
18
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
748
18
    felem_square(tmp, ftmp3);
749
18
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
750
864
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
751
846
        felem_square(tmp, ftmp4);
752
846
        felem_reduce(ftmp4, tmp);
753
846
    }
754
18
    felem_mul(tmp, ftmp3, ftmp4);
755
18
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
756
18
    felem_square(tmp, ftmp3);
757
18
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
758
432
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
759
414
        felem_square(tmp, ftmp4);
760
414
        felem_reduce(ftmp4, tmp);
761
414
    }
762
18
    felem_mul(tmp, ftmp2, ftmp4);
763
18
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
764
126
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
765
108
        felem_square(tmp, ftmp2);
766
108
        felem_reduce(ftmp2, tmp);
767
108
    }
768
18
    felem_mul(tmp, ftmp2, ftmp);
769
18
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
770
18
    felem_square(tmp, ftmp);
771
18
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
772
18
    felem_mul(tmp, ftmp, in);
773
18
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
774
1.76k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
775
1.74k
        felem_square(tmp, ftmp);
776
1.74k
        felem_reduce(ftmp, tmp);
777
1.74k
    }
778
18
    felem_mul(tmp, ftmp, ftmp3);
779
18
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
780
18
}
781
782
/*
783
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784
 * out to itself.
785
 */
786
static void copy_conditional(felem out, const felem in, limb icopy)
787
7.26k
{
788
7.26k
    unsigned i;
789
    /*
790
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791
     */
792
7.26k
    const limb copy = -icopy;
793
36.3k
    for (i = 0; i < 4; ++i) {
794
29.0k
        const limb tmp = copy & (in[i] ^ out[i]);
795
29.0k
        out[i] ^= tmp;
796
29.0k
    }
797
7.26k
}
798
799
/******************************************************************************/
800
/*-
801
 *                       ELLIPTIC CURVE POINT OPERATIONS
802
 *
803
 * Points are represented in Jacobian projective coordinates:
804
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805
 * or to the point at infinity if Z == 0.
806
 *
807
 */
808
809
/*-
810
 * Double an elliptic curve point:
811
 * (X', Y', Z') = 2 * (X, Y, Z), where
812
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816
 * while x_out == y_in is not (maybe this works, but it's not tested).
817
 */
818
static void
819
point_double(felem x_out, felem y_out, felem z_out,
820
             const felem x_in, const felem y_in, const felem z_in)
821
594
{
822
594
    widefelem tmp, tmp2;
823
594
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825
594
    felem_assign(ftmp, x_in);
826
594
    felem_assign(ftmp2, x_in);
827
828
    /* delta = z^2 */
829
594
    felem_square(tmp, z_in);
830
594
    felem_reduce(delta, tmp);
831
832
    /* gamma = y^2 */
833
594
    felem_square(tmp, y_in);
834
594
    felem_reduce(gamma, tmp);
835
836
    /* beta = x*gamma */
837
594
    felem_mul(tmp, x_in, gamma);
838
594
    felem_reduce(beta, tmp);
839
840
    /* alpha = 3*(x-delta)*(x+delta) */
841
594
    felem_diff(ftmp, delta);
842
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
843
594
    felem_sum(ftmp2, delta);
844
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
845
594
    felem_scalar(ftmp2, 3);
846
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
847
594
    felem_mul(tmp, ftmp, ftmp2);
848
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
849
594
    felem_reduce(alpha, tmp);
850
851
    /* x' = alpha^2 - 8*beta */
852
594
    felem_square(tmp, alpha);
853
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
854
594
    felem_assign(ftmp, beta);
855
594
    felem_scalar(ftmp, 8);
856
    /* ftmp[i] < 8 * 2^57 = 2^60 */
857
594
    felem_diff_128_64(tmp, ftmp);
858
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
859
594
    felem_reduce(x_out, tmp);
860
861
    /* z' = (y + z)^2 - gamma - delta */
862
594
    felem_sum(delta, gamma);
863
    /* delta[i] < 2^57 + 2^57 = 2^58 */
864
594
    felem_assign(ftmp, y_in);
865
594
    felem_sum(ftmp, z_in);
866
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
867
594
    felem_square(tmp, ftmp);
868
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
869
594
    felem_diff_128_64(tmp, delta);
870
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
871
594
    felem_reduce(z_out, tmp);
872
873
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
874
594
    felem_scalar(beta, 4);
875
    /* beta[i] < 4 * 2^57 = 2^59 */
876
594
    felem_diff(beta, x_out);
877
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
878
594
    felem_mul(tmp, alpha, beta);
879
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
880
594
    felem_square(tmp2, gamma);
881
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
882
594
    widefelem_scalar(tmp2, 8);
883
    /* tmp2[i] < 8 * 2^116 = 2^119 */
884
594
    widefelem_diff(tmp, tmp2);
885
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
886
594
    felem_reduce(y_out, tmp);
887
594
}
888
889
/*-
890
 * Add two elliptic curve points:
891
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897
 *
898
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899
 */
900
901
/*
902
 * This function is not entirely constant-time: it includes a branch for
903
 * checking whether the two input points are equal, (while not equal to the
904
 * point at infinity). This case never happens during single point
905
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906
 */
907
static void point_add(felem x3, felem y3, felem z3,
908
                      const felem x1, const felem y1, const felem z1,
909
                      const int mixed, const felem x2, const felem y2,
910
                      const felem z2)
911
1.21k
{
912
1.21k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913
1.21k
    widefelem tmp, tmp2;
914
1.21k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915
1.21k
    limb points_equal;
916
917
1.21k
    if (!mixed) {
918
        /* ftmp2 = z2^2 */
919
0
        felem_square(tmp, z2);
920
0
        felem_reduce(ftmp2, tmp);
921
922
        /* ftmp4 = z2^3 */
923
0
        felem_mul(tmp, ftmp2, z2);
924
0
        felem_reduce(ftmp4, tmp);
925
926
        /* ftmp4 = z2^3*y1 */
927
0
        felem_mul(tmp2, ftmp4, y1);
928
0
        felem_reduce(ftmp4, tmp2);
929
930
        /* ftmp2 = z2^2*x1 */
931
0
        felem_mul(tmp2, ftmp2, x1);
932
0
        felem_reduce(ftmp2, tmp2);
933
1.21k
    } else {
934
        /*
935
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
936
         */
937
938
        /* ftmp4 = z2^3*y1 */
939
1.21k
        felem_assign(ftmp4, y1);
940
941
        /* ftmp2 = z2^2*x1 */
942
1.21k
        felem_assign(ftmp2, x1);
943
1.21k
    }
944
945
    /* ftmp = z1^2 */
946
1.21k
    felem_square(tmp, z1);
947
1.21k
    felem_reduce(ftmp, tmp);
948
949
    /* ftmp3 = z1^3 */
950
1.21k
    felem_mul(tmp, ftmp, z1);
951
1.21k
    felem_reduce(ftmp3, tmp);
952
953
    /* tmp = z1^3*y2 */
954
1.21k
    felem_mul(tmp, ftmp3, y2);
955
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
958
1.21k
    felem_diff_128_64(tmp, ftmp4);
959
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960
1.21k
    felem_reduce(ftmp3, tmp);
961
962
    /* tmp = z1^2*x2 */
963
1.21k
    felem_mul(tmp, ftmp, x2);
964
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966
    /* ftmp = z1^2*x2 - z2^2*x1 */
967
1.21k
    felem_diff_128_64(tmp, ftmp2);
968
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969
1.21k
    felem_reduce(ftmp, tmp);
970
971
    /*
972
     * The formulae are incorrect if the points are equal, in affine coordinates
973
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
974
     * happens.
975
     *
976
     * We use bitwise operations to avoid potential side-channels introduced by
977
     * the short-circuiting behaviour of boolean operators.
978
     */
979
1.21k
    x_equal = felem_is_zero(ftmp);
980
1.21k
    y_equal = felem_is_zero(ftmp3);
981
    /*
982
     * The special case of either point being the point at infinity (z1 and/or
983
     * z2 are zero), is handled separately later on in this function, so we
984
     * avoid jumping to point_double here in those special cases.
985
     */
986
1.21k
    z1_is_zero = felem_is_zero(z1);
987
1.21k
    z2_is_zero = felem_is_zero(z2);
988
989
    /*
990
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
991
     * specific implementation `felem_is_zero()` returns truth as `0x1`
992
     * (rather than `0xff..ff`).
993
     *
994
     * This implies that `~true` in this implementation becomes
995
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
996
     * the if expression, we mask out only the last bit in the next
997
     * line.
998
     */
999
1.21k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001
1.21k
    if (points_equal) {
1002
        /*
1003
         * This is obviously not constant-time but, as mentioned before, this
1004
         * case never happens during single point multiplication, so there is no
1005
         * timing leak for ECDH or ECDSA signing.
1006
         */
1007
0
        point_double(x3, y3, z3, x1, y1, z1);
1008
0
        return;
1009
0
    }
1010
1011
    /* ftmp5 = z1*z2 */
1012
1.21k
    if (!mixed) {
1013
0
        felem_mul(tmp, z1, z2);
1014
0
        felem_reduce(ftmp5, tmp);
1015
1.21k
    } else {
1016
        /* special case z2 = 0 is handled later */
1017
1.21k
        felem_assign(ftmp5, z1);
1018
1.21k
    }
1019
1020
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021
1.21k
    felem_mul(tmp, ftmp, ftmp5);
1022
1.21k
    felem_reduce(z_out, tmp);
1023
1024
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025
1.21k
    felem_assign(ftmp5, ftmp);
1026
1.21k
    felem_square(tmp, ftmp);
1027
1.21k
    felem_reduce(ftmp, tmp);
1028
1029
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030
1.21k
    felem_mul(tmp, ftmp, ftmp5);
1031
1.21k
    felem_reduce(ftmp5, tmp);
1032
1033
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034
1.21k
    felem_mul(tmp, ftmp2, ftmp);
1035
1.21k
    felem_reduce(ftmp2, tmp);
1036
1037
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038
1.21k
    felem_mul(tmp, ftmp4, ftmp5);
1039
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042
1.21k
    felem_square(tmp2, ftmp3);
1043
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046
1.21k
    felem_diff_128_64(tmp2, ftmp5);
1047
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050
1.21k
    felem_assign(ftmp5, ftmp2);
1051
1.21k
    felem_scalar(ftmp5, 2);
1052
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054
    /*-
1055
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057
     */
1058
1.21k
    felem_diff_128_64(tmp2, ftmp5);
1059
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060
1.21k
    felem_reduce(x_out, tmp2);
1061
1062
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063
1.21k
    felem_diff(ftmp2, x_out);
1064
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066
    /*
1067
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068
     */
1069
1.21k
    felem_mul(tmp2, ftmp3, ftmp2);
1070
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072
    /*-
1073
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075
     */
1076
1.21k
    widefelem_diff(tmp2, tmp);
1077
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078
1.21k
    felem_reduce(y_out, tmp2);
1079
1080
    /*
1081
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082
     * the point at infinity, so we need to check for this separately
1083
     */
1084
1085
    /*
1086
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1087
     */
1088
1.21k
    copy_conditional(x_out, x2, z1_is_zero);
1089
1.21k
    copy_conditional(x_out, x1, z2_is_zero);
1090
1.21k
    copy_conditional(y_out, y2, z1_is_zero);
1091
1.21k
    copy_conditional(y_out, y1, z2_is_zero);
1092
1.21k
    copy_conditional(z_out, z2, z1_is_zero);
1093
1.21k
    copy_conditional(z_out, z1, z2_is_zero);
1094
1.21k
    felem_assign(x3, x_out);
1095
1.21k
    felem_assign(y3, y_out);
1096
1.21k
    felem_assign(z3, z_out);
1097
1.21k
}
1098
1099
/*
1100
 * select_point selects the |idx|th point from a precomputation table and
1101
 * copies it to out.
1102
 * The pre_comp array argument should be size of |size| argument
1103
 */
1104
static void select_point(const u64 idx, unsigned int size,
1105
                         const felem pre_comp[][3], felem out[3])
1106
1.23k
{
1107
1.23k
    unsigned i, j;
1108
1.23k
    limb *outlimbs = &out[0][0];
1109
1110
1.23k
    memset(out, 0, sizeof(*out) * 3);
1111
20.9k
    for (i = 0; i < size; i++) {
1112
19.7k
        const limb *inlimbs = &pre_comp[i][0][0];
1113
19.7k
        u64 mask = i ^ idx;
1114
19.7k
        mask |= mask >> 4;
1115
19.7k
        mask |= mask >> 2;
1116
19.7k
        mask |= mask >> 1;
1117
19.7k
        mask &= 1;
1118
19.7k
        mask--;
1119
256k
        for (j = 0; j < 4 * 3; j++)
1120
236k
            outlimbs[j] |= inlimbs[j] & mask;
1121
19.7k
    }
1122
1.23k
}
1123
1124
/* get_bit returns the |i|th bit in |in| */
1125
static char get_bit(const felem_bytearray in, unsigned i)
1126
4.92k
{
1127
4.92k
    if (i >= 224)
1128
0
        return 0;
1129
4.92k
    return (in[i >> 3] >> (i & 7)) & 1;
1130
4.92k
}
1131
1132
/*
1133
 * Interleaved point multiplication using precomputed point multiples: The
1134
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1137
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138
 */
1139
static void batch_mul(felem x_out, felem y_out, felem z_out,
1140
                      const felem_bytearray scalars[],
1141
                      const unsigned num_points, const u8 *g_scalar,
1142
                      const int mixed, const felem pre_comp[][17][3],
1143
                      const felem g_pre_comp[2][16][3])
1144
22
{
1145
22
    int i, skip;
1146
22
    unsigned num;
1147
22
    unsigned gen_mul = (g_scalar != NULL);
1148
22
    felem nq[3], tmp[4];
1149
22
    u64 bits;
1150
22
    u8 sign, digit;
1151
1152
    /* set nq to the point at infinity */
1153
22
    memset(nq, 0, sizeof(nq));
1154
1155
    /*
1156
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157
     * of the generator (two in each of the last 28 rounds) and additions of
1158
     * other points multiples (every 5th round).
1159
     */
1160
22
    skip = 1;                   /* save two point operations in the first
1161
                                 * round */
1162
638
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163
        /* double */
1164
616
        if (!skip)
1165
594
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167
        /* add multiples of the generator */
1168
616
        if (gen_mul && (i <= 27)) {
1169
            /* first, look 28 bits upwards */
1170
616
            bits = get_bit(g_scalar, i + 196) << 3;
1171
616
            bits |= get_bit(g_scalar, i + 140) << 2;
1172
616
            bits |= get_bit(g_scalar, i + 84) << 1;
1173
616
            bits |= get_bit(g_scalar, i + 28);
1174
            /* select the point to add, in constant time */
1175
616
            select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177
616
            if (!skip) {
1178
                /* value 1 below is argument for "mixed" */
1179
594
                point_add(nq[0], nq[1], nq[2],
1180
594
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181
594
            } else {
1182
22
                memcpy(nq, tmp, 3 * sizeof(felem));
1183
22
                skip = 0;
1184
22
            }
1185
1186
            /* second, look at the current position */
1187
616
            bits = get_bit(g_scalar, i + 168) << 3;
1188
616
            bits |= get_bit(g_scalar, i + 112) << 2;
1189
616
            bits |= get_bit(g_scalar, i + 56) << 1;
1190
616
            bits |= get_bit(g_scalar, i);
1191
            /* select the point to add, in constant time */
1192
616
            select_point(bits, 16, g_pre_comp[0], tmp);
1193
616
            point_add(nq[0], nq[1], nq[2],
1194
616
                      nq[0], nq[1], nq[2],
1195
616
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196
616
        }
1197
1198
        /* do other additions every 5 doublings */
1199
616
        if (num_points && (i % 5 == 0)) {
1200
            /* loop over all scalars */
1201
0
            for (num = 0; num < num_points; ++num) {
1202
0
                bits = get_bit(scalars[num], i + 4) << 5;
1203
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1204
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1205
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1206
0
                bits |= get_bit(scalars[num], i) << 1;
1207
0
                bits |= get_bit(scalars[num], i - 1);
1208
0
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210
                /* select the point to add or subtract */
1211
0
                select_point(digit, 17, pre_comp[num], tmp);
1212
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213
                                            * point */
1214
0
                copy_conditional(tmp[1], tmp[3], sign);
1215
1216
0
                if (!skip) {
1217
0
                    point_add(nq[0], nq[1], nq[2],
1218
0
                              nq[0], nq[1], nq[2],
1219
0
                              mixed, tmp[0], tmp[1], tmp[2]);
1220
0
                } else {
1221
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1222
0
                    skip = 0;
1223
0
                }
1224
0
            }
1225
0
        }
1226
616
    }
1227
22
    felem_assign(x_out, nq[0]);
1228
22
    felem_assign(y_out, nq[1]);
1229
22
    felem_assign(z_out, nq[2]);
1230
22
}
1231
1232
/******************************************************************************/
1233
/*
1234
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1235
 */
1236
1237
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238
0
{
1239
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241
0
    if (!ret) {
1242
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243
0
        return ret;
1244
0
    }
1245
1246
0
    ret->references = 1;
1247
1248
0
    ret->lock = CRYPTO_THREAD_lock_new();
1249
0
    if (ret->lock == NULL) {
1250
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251
0
        OPENSSL_free(ret);
1252
0
        return NULL;
1253
0
    }
1254
0
    return ret;
1255
0
}
1256
1257
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258
0
{
1259
0
    int i;
1260
0
    if (p != NULL)
1261
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1262
0
    return p;
1263
0
}
1264
1265
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266
0
{
1267
0
    int i;
1268
1269
0
    if (p == NULL)
1270
0
        return;
1271
1272
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273
0
    REF_PRINT_COUNT("EC_nistp224", p);
1274
0
    if (i > 0)
1275
0
        return;
1276
0
    REF_ASSERT_ISNT(i < 0);
1277
1278
0
    CRYPTO_THREAD_lock_free(p->lock);
1279
0
    OPENSSL_free(p);
1280
0
}
1281
1282
/******************************************************************************/
1283
/*
1284
 * OPENSSL EC_METHOD FUNCTIONS
1285
 */
1286
1287
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288
4.14k
{
1289
4.14k
    int ret;
1290
4.14k
    ret = ossl_ec_GFp_simple_group_init(group);
1291
4.14k
    group->a_is_minus3 = 1;
1292
4.14k
    return ret;
1293
4.14k
}
1294
1295
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296
                                         const BIGNUM *a, const BIGNUM *b,
1297
                                         BN_CTX *ctx)
1298
2.09k
{
1299
2.09k
    int ret = 0;
1300
2.09k
    BIGNUM *curve_p, *curve_a, *curve_b;
1301
2.09k
#ifndef FIPS_MODULE
1302
2.09k
    BN_CTX *new_ctx = NULL;
1303
1304
2.09k
    if (ctx == NULL)
1305
0
        ctx = new_ctx = BN_CTX_new();
1306
2.09k
#endif
1307
2.09k
    if (ctx == NULL)
1308
0
        return 0;
1309
1310
2.09k
    BN_CTX_start(ctx);
1311
2.09k
    curve_p = BN_CTX_get(ctx);
1312
2.09k
    curve_a = BN_CTX_get(ctx);
1313
2.09k
    curve_b = BN_CTX_get(ctx);
1314
2.09k
    if (curve_b == NULL)
1315
0
        goto err;
1316
2.09k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317
2.09k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318
2.09k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319
2.09k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321
0
        goto err;
1322
0
    }
1323
2.09k
    group->field_mod_func = BN_nist_mod_224;
1324
2.09k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325
2.09k
 err:
1326
2.09k
    BN_CTX_end(ctx);
1327
2.09k
#ifndef FIPS_MODULE
1328
2.09k
    BN_CTX_free(new_ctx);
1329
2.09k
#endif
1330
2.09k
    return ret;
1331
2.09k
}
1332
1333
/*
1334
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335
 * (X/Z^2, Y/Z^3)
1336
 */
1337
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338
                                                      const EC_POINT *point,
1339
                                                      BIGNUM *x, BIGNUM *y,
1340
                                                      BN_CTX *ctx)
1341
18
{
1342
18
    felem z1, z2, x_in, y_in, x_out, y_out;
1343
18
    widefelem tmp;
1344
1345
18
    if (EC_POINT_is_at_infinity(group, point)) {
1346
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347
0
        return 0;
1348
0
    }
1349
18
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350
18
        (!BN_to_felem(z1, point->Z)))
1351
0
        return 0;
1352
18
    felem_inv(z2, z1);
1353
18
    felem_square(tmp, z2);
1354
18
    felem_reduce(z1, tmp);
1355
18
    felem_mul(tmp, x_in, z1);
1356
18
    felem_reduce(x_in, tmp);
1357
18
    felem_contract(x_out, x_in);
1358
18
    if (x != NULL) {
1359
18
        if (!felem_to_BN(x, x_out)) {
1360
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361
0
            return 0;
1362
0
        }
1363
18
    }
1364
18
    felem_mul(tmp, z1, z2);
1365
18
    felem_reduce(z1, tmp);
1366
18
    felem_mul(tmp, y_in, z1);
1367
18
    felem_reduce(y_in, tmp);
1368
18
    felem_contract(y_out, y_in);
1369
18
    if (y != NULL) {
1370
18
        if (!felem_to_BN(y, y_out)) {
1371
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372
0
            return 0;
1373
0
        }
1374
18
    }
1375
18
    return 1;
1376
18
}
1377
1378
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379
                               felem tmp_felems[ /* num+1 */ ])
1380
0
{
1381
    /*
1382
     * Runs in constant time, unless an input is the point at infinity (which
1383
     * normally shouldn't happen).
1384
     */
1385
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386
0
                                                  points,
1387
0
                                                  sizeof(felem),
1388
0
                                                  tmp_felems,
1389
0
                                                  (void (*)(void *))felem_one,
1390
0
                                                  felem_is_zero_int,
1391
0
                                                  (void (*)(void *, const void *))
1392
0
                                                  felem_assign,
1393
0
                                                  (void (*)(void *, const void *))
1394
0
                                                  felem_square_reduce, (void (*)
1395
0
                                                                        (void *,
1396
0
                                                                         const void
1397
0
                                                                         *,
1398
0
                                                                         const void
1399
0
                                                                         *))
1400
0
                                                  felem_mul_reduce,
1401
0
                                                  (void (*)(void *, const void *))
1402
0
                                                  felem_inv,
1403
0
                                                  (void (*)(void *, const void *))
1404
0
                                                  felem_contract);
1405
0
}
1406
1407
/*
1408
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409
 * values Result is stored in r (r can equal one of the inputs).
1410
 */
1411
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412
                                    const BIGNUM *scalar, size_t num,
1413
                                    const EC_POINT *points[],
1414
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1415
22
{
1416
22
    int ret = 0;
1417
22
    int j;
1418
22
    unsigned i;
1419
22
    int mixed = 0;
1420
22
    BIGNUM *x, *y, *z, *tmp_scalar;
1421
22
    felem_bytearray g_secret;
1422
22
    felem_bytearray *secrets = NULL;
1423
22
    felem (*pre_comp)[17][3] = NULL;
1424
22
    felem *tmp_felems = NULL;
1425
22
    int num_bytes;
1426
22
    int have_pre_comp = 0;
1427
22
    size_t num_points = num;
1428
22
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1429
22
    NISTP224_PRE_COMP *pre = NULL;
1430
22
    const felem(*g_pre_comp)[16][3] = NULL;
1431
22
    EC_POINT *generator = NULL;
1432
22
    const EC_POINT *p = NULL;
1433
22
    const BIGNUM *p_scalar = NULL;
1434
1435
22
    BN_CTX_start(ctx);
1436
22
    x = BN_CTX_get(ctx);
1437
22
    y = BN_CTX_get(ctx);
1438
22
    z = BN_CTX_get(ctx);
1439
22
    tmp_scalar = BN_CTX_get(ctx);
1440
22
    if (tmp_scalar == NULL)
1441
0
        goto err;
1442
1443
22
    if (scalar != NULL) {
1444
22
        pre = group->pre_comp.nistp224;
1445
22
        if (pre)
1446
            /* we have precomputation, try to use it */
1447
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448
22
        else
1449
            /* try to use the standard precomputation */
1450
22
            g_pre_comp = &gmul[0];
1451
22
        generator = EC_POINT_new(group);
1452
22
        if (generator == NULL)
1453
0
            goto err;
1454
        /* get the generator from precomputation */
1455
22
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456
22
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457
22
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459
0
            goto err;
1460
0
        }
1461
22
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462
22
                                                                generator,
1463
22
                                                                x, y, z, ctx))
1464
0
            goto err;
1465
22
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466
            /* precomputation matches generator */
1467
22
            have_pre_comp = 1;
1468
0
        else
1469
            /*
1470
             * we don't have valid precomputation: treat the generator as a
1471
             * random point
1472
             */
1473
0
            num_points = num_points + 1;
1474
22
    }
1475
1476
22
    if (num_points > 0) {
1477
0
        if (num_points >= 3) {
1478
            /*
1479
             * unless we precompute multiples for just one or two points,
1480
             * converting those into affine form is time well spent
1481
             */
1482
0
            mixed = 1;
1483
0
        }
1484
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486
0
        if (mixed)
1487
0
            tmp_felems =
1488
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489
0
        if ((secrets == NULL) || (pre_comp == NULL)
1490
0
            || (mixed && (tmp_felems == NULL))) {
1491
0
            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492
0
            goto err;
1493
0
        }
1494
1495
        /*
1496
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1497
         * i.e., they contribute nothing to the linear combination
1498
         */
1499
0
        for (i = 0; i < num_points; ++i) {
1500
0
            if (i == num) {
1501
                /* the generator */
1502
0
                p = EC_GROUP_get0_generator(group);
1503
0
                p_scalar = scalar;
1504
0
            } else {
1505
                /* the i^th point */
1506
0
                p = points[i];
1507
0
                p_scalar = scalars[i];
1508
0
            }
1509
0
            if ((p_scalar != NULL) && (p != NULL)) {
1510
                /* reduce scalar to 0 <= scalar < 2^224 */
1511
0
                if ((BN_num_bits(p_scalar) > 224)
1512
0
                    || (BN_is_negative(p_scalar))) {
1513
                    /*
1514
                     * this is an unusual input, and we don't guarantee
1515
                     * constant-timeness
1516
                     */
1517
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519
0
                        goto err;
1520
0
                    }
1521
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1522
0
                                               secrets[i], sizeof(secrets[i]));
1523
0
                } else {
1524
0
                    num_bytes = BN_bn2lebinpad(p_scalar,
1525
0
                                               secrets[i], sizeof(secrets[i]));
1526
0
                }
1527
0
                if (num_bytes < 0) {
1528
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529
0
                    goto err;
1530
0
                }
1531
                /* precompute multiples */
1532
0
                if ((!BN_to_felem(x_out, p->X)) ||
1533
0
                    (!BN_to_felem(y_out, p->Y)) ||
1534
0
                    (!BN_to_felem(z_out, p->Z)))
1535
0
                    goto err;
1536
0
                felem_assign(pre_comp[i][1][0], x_out);
1537
0
                felem_assign(pre_comp[i][1][1], y_out);
1538
0
                felem_assign(pre_comp[i][1][2], z_out);
1539
0
                for (j = 2; j <= 16; ++j) {
1540
0
                    if (j & 1) {
1541
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1543
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544
0
                                  pre_comp[i][j - 1][0],
1545
0
                                  pre_comp[i][j - 1][1],
1546
0
                                  pre_comp[i][j - 1][2]);
1547
0
                    } else {
1548
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550
0
                                     pre_comp[i][j / 2][1],
1551
0
                                     pre_comp[i][j / 2][2]);
1552
0
                    }
1553
0
                }
1554
0
            }
1555
0
        }
1556
0
        if (mixed)
1557
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558
0
    }
1559
1560
    /* the scalar for the generator */
1561
22
    if ((scalar != NULL) && (have_pre_comp)) {
1562
22
        memset(g_secret, 0, sizeof(g_secret));
1563
        /* reduce scalar to 0 <= scalar < 2^224 */
1564
22
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565
            /*
1566
             * this is an unusual input, and we don't guarantee
1567
             * constant-timeness
1568
             */
1569
2
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571
0
                goto err;
1572
0
            }
1573
2
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574
20
        } else {
1575
20
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576
20
        }
1577
        /* do the multiplication with generator precomputation */
1578
22
        batch_mul(x_out, y_out, z_out,
1579
22
                  (const felem_bytearray(*))secrets, num_points,
1580
22
                  g_secret,
1581
22
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582
22
    } else {
1583
        /* do the multiplication without generator precomputation */
1584
0
        batch_mul(x_out, y_out, z_out,
1585
0
                  (const felem_bytearray(*))secrets, num_points,
1586
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587
0
    }
1588
    /* reduce the output to its unique minimal representation */
1589
22
    felem_contract(x_in, x_out);
1590
22
    felem_contract(y_in, y_out);
1591
22
    felem_contract(z_in, z_out);
1592
22
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593
22
        (!felem_to_BN(z, z_in))) {
1594
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595
0
        goto err;
1596
0
    }
1597
22
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598
22
                                                             ctx);
1599
1600
22
 err:
1601
22
    BN_CTX_end(ctx);
1602
22
    EC_POINT_free(generator);
1603
22
    OPENSSL_free(secrets);
1604
22
    OPENSSL_free(pre_comp);
1605
22
    OPENSSL_free(tmp_felems);
1606
22
    return ret;
1607
22
}
1608
1609
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610
0
{
1611
0
    int ret = 0;
1612
0
    NISTP224_PRE_COMP *pre = NULL;
1613
0
    int i, j;
1614
0
    BIGNUM *x, *y;
1615
0
    EC_POINT *generator = NULL;
1616
0
    felem tmp_felems[32];
1617
0
#ifndef FIPS_MODULE
1618
0
    BN_CTX *new_ctx = NULL;
1619
0
#endif
1620
1621
    /* throw away old precomputation */
1622
0
    EC_pre_comp_free(group);
1623
1624
0
#ifndef FIPS_MODULE
1625
0
    if (ctx == NULL)
1626
0
        ctx = new_ctx = BN_CTX_new();
1627
0
#endif
1628
0
    if (ctx == NULL)
1629
0
        return 0;
1630
1631
0
    BN_CTX_start(ctx);
1632
0
    x = BN_CTX_get(ctx);
1633
0
    y = BN_CTX_get(ctx);
1634
0
    if (y == NULL)
1635
0
        goto err;
1636
    /* get the generator */
1637
0
    if (group->generator == NULL)
1638
0
        goto err;
1639
0
    generator = EC_POINT_new(group);
1640
0
    if (generator == NULL)
1641
0
        goto err;
1642
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645
0
        goto err;
1646
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1647
0
        goto err;
1648
    /*
1649
     * if the generator is the standard one, use built-in precomputation
1650
     */
1651
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653
0
        goto done;
1654
0
    }
1655
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658
0
        goto err;
1659
    /*
1660
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661
     * 2^140*G, 2^196*G for the second one
1662
     */
1663
0
    for (i = 1; i <= 8; i <<= 1) {
1664
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667
0
        for (j = 0; j < 27; ++j) {
1668
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671
0
        }
1672
0
        if (i == 8)
1673
0
            break;
1674
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1675
0
                     pre->g_pre_comp[0][2 * i][1],
1676
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678
0
        for (j = 0; j < 27; ++j) {
1679
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1680
0
                         pre->g_pre_comp[0][2 * i][1],
1681
0
                         pre->g_pre_comp[0][2 * i][2],
1682
0
                         pre->g_pre_comp[0][2 * i][0],
1683
0
                         pre->g_pre_comp[0][2 * i][1],
1684
0
                         pre->g_pre_comp[0][2 * i][2]);
1685
0
        }
1686
0
    }
1687
0
    for (i = 0; i < 2; i++) {
1688
        /* g_pre_comp[i][0] is the point at infinity */
1689
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690
        /* the remaining multiples */
1691
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696
0
                  pre->g_pre_comp[i][2][2]);
1697
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708
0
                  pre->g_pre_comp[i][4][2]);
1709
        /*
1710
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711
         */
1712
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716
0
                  pre->g_pre_comp[i][2][2]);
1717
0
        for (j = 1; j < 8; ++j) {
1718
            /* odd multiples: add G resp. 2^28*G */
1719
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1721
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1722
0
                      pre->g_pre_comp[i][2 * j][0],
1723
0
                      pre->g_pre_comp[i][2 * j][1],
1724
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1725
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726
0
                      pre->g_pre_comp[i][1][2]);
1727
0
        }
1728
0
    }
1729
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731
0
 done:
1732
0
    SETPRECOMP(group, nistp224, pre);
1733
0
    pre = NULL;
1734
0
    ret = 1;
1735
0
 err:
1736
0
    BN_CTX_end(ctx);
1737
0
    EC_POINT_free(generator);
1738
0
#ifndef FIPS_MODULE
1739
0
    BN_CTX_free(new_ctx);
1740
0
#endif
1741
0
    EC_nistp224_pre_comp_free(pre);
1742
0
    return ret;
1743
0
}
1744
1745
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746
0
{
1747
0
    return HAVEPRECOMP(group, nistp224);
1748
0
}