Coverage Report

Created: 2023-06-08 06:43

/src/openssl30/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
    CRYPTO_RWLOCK *lock;
242
};
243
244
const EC_METHOD *EC_GFp_nistp224_method(void)
245
21.4k
{
246
21.4k
    static const EC_METHOD ret = {
247
21.4k
        EC_FLAGS_DEFAULT_OCT,
248
21.4k
        NID_X9_62_prime_field,
249
21.4k
        ossl_ec_GFp_nistp224_group_init,
250
21.4k
        ossl_ec_GFp_simple_group_finish,
251
21.4k
        ossl_ec_GFp_simple_group_clear_finish,
252
21.4k
        ossl_ec_GFp_nist_group_copy,
253
21.4k
        ossl_ec_GFp_nistp224_group_set_curve,
254
21.4k
        ossl_ec_GFp_simple_group_get_curve,
255
21.4k
        ossl_ec_GFp_simple_group_get_degree,
256
21.4k
        ossl_ec_group_simple_order_bits,
257
21.4k
        ossl_ec_GFp_simple_group_check_discriminant,
258
21.4k
        ossl_ec_GFp_simple_point_init,
259
21.4k
        ossl_ec_GFp_simple_point_finish,
260
21.4k
        ossl_ec_GFp_simple_point_clear_finish,
261
21.4k
        ossl_ec_GFp_simple_point_copy,
262
21.4k
        ossl_ec_GFp_simple_point_set_to_infinity,
263
21.4k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
264
21.4k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265
21.4k
        0 /* point_set_compressed_coordinates */ ,
266
21.4k
        0 /* point2oct */ ,
267
21.4k
        0 /* oct2point */ ,
268
21.4k
        ossl_ec_GFp_simple_add,
269
21.4k
        ossl_ec_GFp_simple_dbl,
270
21.4k
        ossl_ec_GFp_simple_invert,
271
21.4k
        ossl_ec_GFp_simple_is_at_infinity,
272
21.4k
        ossl_ec_GFp_simple_is_on_curve,
273
21.4k
        ossl_ec_GFp_simple_cmp,
274
21.4k
        ossl_ec_GFp_simple_make_affine,
275
21.4k
        ossl_ec_GFp_simple_points_make_affine,
276
21.4k
        ossl_ec_GFp_nistp224_points_mul,
277
21.4k
        ossl_ec_GFp_nistp224_precompute_mult,
278
21.4k
        ossl_ec_GFp_nistp224_have_precompute_mult,
279
21.4k
        ossl_ec_GFp_nist_field_mul,
280
21.4k
        ossl_ec_GFp_nist_field_sqr,
281
21.4k
        0 /* field_div */ ,
282
21.4k
        ossl_ec_GFp_simple_field_inv,
283
21.4k
        0 /* field_encode */ ,
284
21.4k
        0 /* field_decode */ ,
285
21.4k
        0,                      /* field_set_to_one */
286
21.4k
        ossl_ec_key_simple_priv2oct,
287
21.4k
        ossl_ec_key_simple_oct2priv,
288
21.4k
        0, /* set private */
289
21.4k
        ossl_ec_key_simple_generate_key,
290
21.4k
        ossl_ec_key_simple_check_key,
291
21.4k
        ossl_ec_key_simple_generate_public_key,
292
21.4k
        0, /* keycopy */
293
21.4k
        0, /* keyfinish */
294
21.4k
        ossl_ecdh_simple_compute_key,
295
21.4k
        ossl_ecdsa_simple_sign_setup,
296
21.4k
        ossl_ecdsa_simple_sign_sig,
297
21.4k
        ossl_ecdsa_simple_verify_sig,
298
21.4k
        0, /* field_inverse_mod_ord */
299
21.4k
        0, /* blind_coordinates */
300
21.4k
        0, /* ladder_pre */
301
21.4k
        0, /* ladder_step */
302
21.4k
        0  /* ladder_post */
303
21.4k
    };
304
305
21.4k
    return &ret;
306
21.4k
}
307
308
/*
309
 * Helper functions to convert field elements to/from internal representation
310
 */
311
static void bin28_to_felem(felem out, const u8 in[28])
312
1.28k
{
313
1.28k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314
1.28k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315
1.28k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316
1.28k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317
1.28k
}
318
319
static void felem_to_bin28(u8 out[28], const felem in)
320
1.12k
{
321
1.12k
    unsigned i;
322
8.96k
    for (i = 0; i < 7; ++i) {
323
7.84k
        out[i] = in[0] >> (8 * i);
324
7.84k
        out[i + 7] = in[1] >> (8 * i);
325
7.84k
        out[i + 14] = in[2] >> (8 * i);
326
7.84k
        out[i + 21] = in[3] >> (8 * i);
327
7.84k
    }
328
1.12k
}
329
330
/* From OpenSSL BIGNUM to internal representation */
331
static int BN_to_felem(felem out, const BIGNUM *bn)
332
1.28k
{
333
1.28k
    felem_bytearray b_out;
334
1.28k
    int num_bytes;
335
336
1.28k
    if (BN_is_negative(bn)) {
337
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338
0
        return 0;
339
0
    }
340
1.28k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341
1.28k
    if (num_bytes < 0) {
342
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343
0
        return 0;
344
0
    }
345
1.28k
    bin28_to_felem(out, b_out);
346
1.28k
    return 1;
347
1.28k
}
348
349
/* From internal representation to OpenSSL BIGNUM */
350
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351
1.12k
{
352
1.12k
    felem_bytearray b_out;
353
1.12k
    felem_to_bin28(b_out, in);
354
1.12k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
355
1.12k
}
356
357
/******************************************************************************/
358
/*-
359
 *                              FIELD OPERATIONS
360
 *
361
 * Field operations, using the internal representation of field elements.
362
 * NB! These operations are specific to our point multiplication and cannot be
363
 * expected to be correct in general - e.g., multiplication with a large scalar
364
 * will cause an overflow.
365
 *
366
 */
367
368
static void felem_one(felem out)
369
0
{
370
0
    out[0] = 1;
371
0
    out[1] = 0;
372
0
    out[2] = 0;
373
0
    out[3] = 0;
374
0
}
375
376
static void felem_assign(felem out, const felem in)
377
24.2k
{
378
24.2k
    out[0] = in[0];
379
24.2k
    out[1] = in[1];
380
24.2k
    out[2] = in[2];
381
24.2k
    out[3] = in[3];
382
24.2k
}
383
384
/* Sum two field elements: out += in */
385
static void felem_sum(felem out, const felem in)
386
3.56k
{
387
3.56k
    out[0] += in[0];
388
3.56k
    out[1] += in[1];
389
3.56k
    out[2] += in[2];
390
3.56k
    out[3] += in[3];
391
3.56k
}
392
393
/* Subtract field elements: out -= in */
394
/* Assumes in[i] < 2^57 */
395
static void felem_diff(felem out, const felem in)
396
4.79k
{
397
4.79k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
398
4.79k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
399
4.79k
    static const limb two58m42m2 = (((limb) 1) << 58) -
400
4.79k
        (((limb) 1) << 42) - (((limb) 1) << 2);
401
402
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
403
4.79k
    out[0] += two58p2;
404
4.79k
    out[1] += two58m42m2;
405
4.79k
    out[2] += two58m2;
406
4.79k
    out[3] += two58m2;
407
408
4.79k
    out[0] -= in[0];
409
4.79k
    out[1] -= in[1];
410
4.79k
    out[2] -= in[2];
411
4.79k
    out[3] -= in[3];
412
4.79k
}
413
414
/* Subtract in unreduced 128-bit mode: out -= in */
415
/* Assumes in[i] < 2^119 */
416
static void widefelem_diff(widefelem out, const widefelem in)
417
3.60k
{
418
3.60k
    static const widelimb two120 = ((widelimb) 1) << 120;
419
3.60k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
420
3.60k
        (((widelimb) 1) << 64);
421
3.60k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
422
3.60k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
423
424
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
425
3.60k
    out[0] += two120;
426
3.60k
    out[1] += two120m64;
427
3.60k
    out[2] += two120m64;
428
3.60k
    out[3] += two120;
429
3.60k
    out[4] += two120m104m64;
430
3.60k
    out[5] += two120m64;
431
3.60k
    out[6] += two120m64;
432
433
3.60k
    out[0] -= in[0];
434
3.60k
    out[1] -= in[1];
435
3.60k
    out[2] -= in[2];
436
3.60k
    out[3] -= in[3];
437
3.60k
    out[4] -= in[4];
438
3.60k
    out[5] -= in[5];
439
3.60k
    out[6] -= in[6];
440
3.60k
}
441
442
/* Subtract in mixed mode: out128 -= in64 */
443
/* in[i] < 2^63 */
444
static void felem_diff_128_64(widefelem out, const felem in)
445
12.0k
{
446
12.0k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
447
12.0k
        (((widelimb) 1) << 8);
448
12.0k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
449
12.0k
        (((widelimb) 1) << 8);
450
12.0k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
451
12.0k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
452
453
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
454
12.0k
    out[0] += two64p8;
455
12.0k
    out[1] += two64m48m8;
456
12.0k
    out[2] += two64m8;
457
12.0k
    out[3] += two64m8;
458
459
12.0k
    out[0] -= in[0];
460
12.0k
    out[1] -= in[1];
461
12.0k
    out[2] -= in[2];
462
12.0k
    out[3] -= in[3];
463
12.0k
}
464
465
/*
466
 * Multiply a field element by a scalar: out = out * scalar The scalars we
467
 * actually use are small, so results fit without overflow
468
 */
469
static void felem_scalar(felem out, const limb scalar)
470
5.98k
{
471
5.98k
    out[0] *= scalar;
472
5.98k
    out[1] *= scalar;
473
5.98k
    out[2] *= scalar;
474
5.98k
    out[3] *= scalar;
475
5.98k
}
476
477
/*
478
 * Multiply an unreduced field element by a scalar: out = out * scalar The
479
 * scalars we actually use are small, so results fit without overflow
480
 */
481
static void widefelem_scalar(widefelem out, const widelimb scalar)
482
1.18k
{
483
1.18k
    out[0] *= scalar;
484
1.18k
    out[1] *= scalar;
485
1.18k
    out[2] *= scalar;
486
1.18k
    out[3] *= scalar;
487
1.18k
    out[4] *= scalar;
488
1.18k
    out[5] *= scalar;
489
1.18k
    out[6] *= scalar;
490
1.18k
}
491
492
/* Square a field element: out = in^2 */
493
static void felem_square(widefelem out, const felem in)
494
109k
{
495
109k
    limb tmp0, tmp1, tmp2;
496
109k
    tmp0 = 2 * in[0];
497
109k
    tmp1 = 2 * in[1];
498
109k
    tmp2 = 2 * in[2];
499
109k
    out[0] = ((widelimb) in[0]) * in[0];
500
109k
    out[1] = ((widelimb) in[0]) * tmp1;
501
109k
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
502
109k
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
503
109k
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
504
109k
    out[5] = ((widelimb) in[3]) * tmp2;
505
109k
    out[6] = ((widelimb) in[3]) * in[3];
506
109k
}
507
508
/* Multiply two field elements: out = in1 * in2 */
509
static void felem_mul(widefelem out, const felem in1, const felem in2)
510
28.9k
{
511
28.9k
    out[0] = ((widelimb) in1[0]) * in2[0];
512
28.9k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
513
28.9k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
514
28.9k
             ((widelimb) in1[2]) * in2[0];
515
28.9k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
516
28.9k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
517
28.9k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
518
28.9k
             ((widelimb) in1[3]) * in2[1];
519
28.9k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
520
28.9k
    out[6] = ((widelimb) in1[3]) * in2[3];
521
28.9k
}
522
523
/*-
524
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
525
 * Requires in[i] < 2^126,
526
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
527
static void felem_reduce(felem out, const widefelem in)
528
134k
{
529
134k
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
530
134k
        (((widelimb) 1) << 15);
531
134k
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
532
134k
        (((widelimb) 1) << 71);
533
134k
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
534
134k
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
535
134k
    widelimb output[5];
536
537
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
538
134k
    output[0] = in[0] + two127p15;
539
134k
    output[1] = in[1] + two127m71m55;
540
134k
    output[2] = in[2] + two127m71;
541
134k
    output[3] = in[3];
542
134k
    output[4] = in[4];
543
544
    /* Eliminate in[4], in[5], in[6] */
545
134k
    output[4] += in[6] >> 16;
546
134k
    output[3] += (in[6] & 0xffff) << 40;
547
134k
    output[2] -= in[6];
548
549
134k
    output[3] += in[5] >> 16;
550
134k
    output[2] += (in[5] & 0xffff) << 40;
551
134k
    output[1] -= in[5];
552
553
134k
    output[2] += output[4] >> 16;
554
134k
    output[1] += (output[4] & 0xffff) << 40;
555
134k
    output[0] -= output[4];
556
557
    /* Carry 2 -> 3 -> 4 */
558
134k
    output[3] += output[2] >> 56;
559
134k
    output[2] &= 0x00ffffffffffffff;
560
561
134k
    output[4] = output[3] >> 56;
562
134k
    output[3] &= 0x00ffffffffffffff;
563
564
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
565
566
    /* Eliminate output[4] */
567
134k
    output[2] += output[4] >> 16;
568
    /* output[2] < 2^56 + 2^56 = 2^57 */
569
134k
    output[1] += (output[4] & 0xffff) << 40;
570
134k
    output[0] -= output[4];
571
572
    /* Carry 0 -> 1 -> 2 -> 3 */
573
134k
    output[1] += output[0] >> 56;
574
134k
    out[0] = output[0] & 0x00ffffffffffffff;
575
576
134k
    output[2] += output[1] >> 56;
577
    /* output[2] < 2^57 + 2^72 */
578
134k
    out[1] = output[1] & 0x00ffffffffffffff;
579
134k
    output[3] += output[2] >> 56;
580
    /* output[3] <= 2^56 + 2^16 */
581
134k
    out[2] = output[2] & 0x00ffffffffffffff;
582
583
    /*-
584
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
585
     * out[3] <= 2^56 + 2^16 (due to final carry),
586
     * so out < 2*p
587
     */
588
134k
    out[3] = output[3];
589
134k
}
590
591
static void felem_square_reduce(felem out, const felem in)
592
0
{
593
0
    widefelem tmp;
594
0
    felem_square(tmp, in);
595
0
    felem_reduce(out, tmp);
596
0
}
597
598
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
599
0
{
600
0
    widefelem tmp;
601
0
    felem_mul(tmp, in1, in2);
602
0
    felem_reduce(out, tmp);
603
0
}
604
605
/*
606
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
607
 * call felem_reduce first)
608
 */
609
static void felem_contract(felem out, const felem in)
610
988
{
611
988
    static const int64_t two56 = ((limb) 1) << 56;
612
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
613
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
614
988
    int64_t tmp[4], a;
615
988
    tmp[0] = in[0];
616
988
    tmp[1] = in[1];
617
988
    tmp[2] = in[2];
618
988
    tmp[3] = in[3];
619
    /* Case 1: a = 1 iff in >= 2^224 */
620
988
    a = (in[3] >> 56);
621
988
    tmp[0] -= a;
622
988
    tmp[1] += a << 40;
623
988
    tmp[3] &= 0x00ffffffffffffff;
624
    /*
625
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
626
     * and the lower part is non-zero
627
     */
628
988
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
629
988
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
630
988
    a &= 0x00ffffffffffffff;
631
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
632
988
    a = (a - 1) >> 63;
633
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
634
988
    tmp[3] &= a ^ 0xffffffffffffffff;
635
988
    tmp[2] &= a ^ 0xffffffffffffffff;
636
988
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
637
988
    tmp[0] -= 1 & a;
638
639
    /*
640
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
641
     * non-zero, so we only need one step
642
     */
643
988
    a = tmp[0] >> 63;
644
988
    tmp[0] += two56 & a;
645
988
    tmp[1] -= 1 & a;
646
647
    /* carry 1 -> 2 -> 3 */
648
988
    tmp[2] += tmp[1] >> 56;
649
988
    tmp[1] &= 0x00ffffffffffffff;
650
651
988
    tmp[3] += tmp[2] >> 56;
652
988
    tmp[2] &= 0x00ffffffffffffff;
653
654
    /* Now 0 <= out < p */
655
988
    out[0] = tmp[0];
656
988
    out[1] = tmp[1];
657
988
    out[2] = tmp[2];
658
988
    out[3] = tmp[3];
659
988
}
660
661
/*
662
 * Get negative value: out = -in
663
 * Requires in[i] < 2^63,
664
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
665
 */
666
static void felem_neg(felem out, const felem in)
667
0
{
668
0
    widefelem tmp;
669
670
0
    memset(tmp, 0, sizeof(tmp));
671
0
    felem_diff_128_64(tmp, in);
672
0
    felem_reduce(out, tmp);
673
0
}
674
675
/*
676
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
677
 * elements are reduced to in < 2^225, so we only need to check three cases:
678
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
679
 */
680
static limb felem_is_zero(const felem in)
681
9.68k
{
682
9.68k
    limb zero, two224m96p1, two225m97p2;
683
684
9.68k
    zero = in[0] | in[1] | in[2] | in[3];
685
9.68k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
686
9.68k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
687
9.68k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
688
9.68k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
689
9.68k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
690
9.68k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
691
9.68k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
692
9.68k
    return (zero | two224m96p1 | two225m97p2);
693
9.68k
}
694
695
static int felem_is_zero_int(const void *in)
696
0
{
697
0
    return (int)(felem_is_zero(in) & ((limb) 1));
698
0
}
699
700
/* Invert a field element */
701
/* Computation chain copied from djb's code */
702
static void felem_inv(felem out, const felem in)
703
428
{
704
428
    felem ftmp, ftmp2, ftmp3, ftmp4;
705
428
    widefelem tmp;
706
428
    unsigned i;
707
708
428
    felem_square(tmp, in);
709
428
    felem_reduce(ftmp, tmp);    /* 2 */
710
428
    felem_mul(tmp, in, ftmp);
711
428
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
712
428
    felem_square(tmp, ftmp);
713
428
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
714
428
    felem_mul(tmp, in, ftmp);
715
428
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
716
428
    felem_square(tmp, ftmp);
717
428
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
718
428
    felem_square(tmp, ftmp2);
719
428
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
720
428
    felem_square(tmp, ftmp2);
721
428
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
722
428
    felem_mul(tmp, ftmp2, ftmp);
723
428
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
724
428
    felem_square(tmp, ftmp);
725
428
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
726
2.56k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
727
2.14k
        felem_square(tmp, ftmp2);
728
2.14k
        felem_reduce(ftmp2, tmp);
729
2.14k
    }
730
428
    felem_mul(tmp, ftmp2, ftmp);
731
428
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
732
428
    felem_square(tmp, ftmp2);
733
428
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
734
5.13k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
735
4.70k
        felem_square(tmp, ftmp3);
736
4.70k
        felem_reduce(ftmp3, tmp);
737
4.70k
    }
738
428
    felem_mul(tmp, ftmp3, ftmp2);
739
428
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
740
428
    felem_square(tmp, ftmp2);
741
428
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
742
10.2k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
743
9.84k
        felem_square(tmp, ftmp3);
744
9.84k
        felem_reduce(ftmp3, tmp);
745
9.84k
    }
746
428
    felem_mul(tmp, ftmp3, ftmp2);
747
428
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
748
428
    felem_square(tmp, ftmp3);
749
428
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
750
20.5k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
751
20.1k
        felem_square(tmp, ftmp4);
752
20.1k
        felem_reduce(ftmp4, tmp);
753
20.1k
    }
754
428
    felem_mul(tmp, ftmp3, ftmp4);
755
428
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
756
428
    felem_square(tmp, ftmp3);
757
428
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
758
10.2k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
759
9.84k
        felem_square(tmp, ftmp4);
760
9.84k
        felem_reduce(ftmp4, tmp);
761
9.84k
    }
762
428
    felem_mul(tmp, ftmp2, ftmp4);
763
428
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
764
2.99k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
765
2.56k
        felem_square(tmp, ftmp2);
766
2.56k
        felem_reduce(ftmp2, tmp);
767
2.56k
    }
768
428
    felem_mul(tmp, ftmp2, ftmp);
769
428
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
770
428
    felem_square(tmp, ftmp);
771
428
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
772
428
    felem_mul(tmp, ftmp, in);
773
428
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
774
41.9k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
775
41.5k
        felem_square(tmp, ftmp);
776
41.5k
        felem_reduce(ftmp, tmp);
777
41.5k
    }
778
428
    felem_mul(tmp, ftmp, ftmp3);
779
428
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
780
428
}
781
782
/*
783
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
784
 * out to itself.
785
 */
786
static void copy_conditional(felem out, const felem in, limb icopy)
787
14.5k
{
788
14.5k
    unsigned i;
789
    /*
790
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
791
     */
792
14.5k
    const limb copy = -icopy;
793
72.6k
    for (i = 0; i < 4; ++i) {
794
58.0k
        const limb tmp = copy & (in[i] ^ out[i]);
795
58.0k
        out[i] ^= tmp;
796
58.0k
    }
797
14.5k
}
798
799
/******************************************************************************/
800
/*-
801
 *                       ELLIPTIC CURVE POINT OPERATIONS
802
 *
803
 * Points are represented in Jacobian projective coordinates:
804
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
805
 * or to the point at infinity if Z == 0.
806
 *
807
 */
808
809
/*-
810
 * Double an elliptic curve point:
811
 * (X', Y', Z') = 2 * (X, Y, Z), where
812
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
813
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
814
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
815
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
816
 * while x_out == y_in is not (maybe this works, but it's not tested).
817
 */
818
static void
819
point_double(felem x_out, felem y_out, felem z_out,
820
             const felem x_in, const felem y_in, const felem z_in)
821
1.18k
{
822
1.18k
    widefelem tmp, tmp2;
823
1.18k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
824
825
1.18k
    felem_assign(ftmp, x_in);
826
1.18k
    felem_assign(ftmp2, x_in);
827
828
    /* delta = z^2 */
829
1.18k
    felem_square(tmp, z_in);
830
1.18k
    felem_reduce(delta, tmp);
831
832
    /* gamma = y^2 */
833
1.18k
    felem_square(tmp, y_in);
834
1.18k
    felem_reduce(gamma, tmp);
835
836
    /* beta = x*gamma */
837
1.18k
    felem_mul(tmp, x_in, gamma);
838
1.18k
    felem_reduce(beta, tmp);
839
840
    /* alpha = 3*(x-delta)*(x+delta) */
841
1.18k
    felem_diff(ftmp, delta);
842
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
843
1.18k
    felem_sum(ftmp2, delta);
844
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
845
1.18k
    felem_scalar(ftmp2, 3);
846
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
847
1.18k
    felem_mul(tmp, ftmp, ftmp2);
848
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
849
1.18k
    felem_reduce(alpha, tmp);
850
851
    /* x' = alpha^2 - 8*beta */
852
1.18k
    felem_square(tmp, alpha);
853
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
854
1.18k
    felem_assign(ftmp, beta);
855
1.18k
    felem_scalar(ftmp, 8);
856
    /* ftmp[i] < 8 * 2^57 = 2^60 */
857
1.18k
    felem_diff_128_64(tmp, ftmp);
858
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
859
1.18k
    felem_reduce(x_out, tmp);
860
861
    /* z' = (y + z)^2 - gamma - delta */
862
1.18k
    felem_sum(delta, gamma);
863
    /* delta[i] < 2^57 + 2^57 = 2^58 */
864
1.18k
    felem_assign(ftmp, y_in);
865
1.18k
    felem_sum(ftmp, z_in);
866
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
867
1.18k
    felem_square(tmp, ftmp);
868
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
869
1.18k
    felem_diff_128_64(tmp, delta);
870
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
871
1.18k
    felem_reduce(z_out, tmp);
872
873
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
874
1.18k
    felem_scalar(beta, 4);
875
    /* beta[i] < 4 * 2^57 = 2^59 */
876
1.18k
    felem_diff(beta, x_out);
877
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
878
1.18k
    felem_mul(tmp, alpha, beta);
879
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
880
1.18k
    felem_square(tmp2, gamma);
881
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
882
1.18k
    widefelem_scalar(tmp2, 8);
883
    /* tmp2[i] < 8 * 2^116 = 2^119 */
884
1.18k
    widefelem_diff(tmp, tmp2);
885
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
886
1.18k
    felem_reduce(y_out, tmp);
887
1.18k
}
888
889
/*-
890
 * Add two elliptic curve points:
891
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
892
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
893
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
894
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
895
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
896
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
897
 *
898
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
899
 */
900
901
/*
902
 * This function is not entirely constant-time: it includes a branch for
903
 * checking whether the two input points are equal, (while not equal to the
904
 * point at infinity). This case never happens during single point
905
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
906
 */
907
static void point_add(felem x3, felem y3, felem z3,
908
                      const felem x1, const felem y1, const felem z1,
909
                      const int mixed, const felem x2, const felem y2,
910
                      const felem z2)
911
2.42k
{
912
2.42k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
913
2.42k
    widefelem tmp, tmp2;
914
2.42k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
915
2.42k
    limb points_equal;
916
917
2.42k
    if (!mixed) {
918
        /* ftmp2 = z2^2 */
919
0
        felem_square(tmp, z2);
920
0
        felem_reduce(ftmp2, tmp);
921
922
        /* ftmp4 = z2^3 */
923
0
        felem_mul(tmp, ftmp2, z2);
924
0
        felem_reduce(ftmp4, tmp);
925
926
        /* ftmp4 = z2^3*y1 */
927
0
        felem_mul(tmp2, ftmp4, y1);
928
0
        felem_reduce(ftmp4, tmp2);
929
930
        /* ftmp2 = z2^2*x1 */
931
0
        felem_mul(tmp2, ftmp2, x1);
932
0
        felem_reduce(ftmp2, tmp2);
933
2.42k
    } else {
934
        /*
935
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
936
         */
937
938
        /* ftmp4 = z2^3*y1 */
939
2.42k
        felem_assign(ftmp4, y1);
940
941
        /* ftmp2 = z2^2*x1 */
942
2.42k
        felem_assign(ftmp2, x1);
943
2.42k
    }
944
945
    /* ftmp = z1^2 */
946
2.42k
    felem_square(tmp, z1);
947
2.42k
    felem_reduce(ftmp, tmp);
948
949
    /* ftmp3 = z1^3 */
950
2.42k
    felem_mul(tmp, ftmp, z1);
951
2.42k
    felem_reduce(ftmp3, tmp);
952
953
    /* tmp = z1^3*y2 */
954
2.42k
    felem_mul(tmp, ftmp3, y2);
955
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
956
957
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
958
2.42k
    felem_diff_128_64(tmp, ftmp4);
959
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
960
2.42k
    felem_reduce(ftmp3, tmp);
961
962
    /* tmp = z1^2*x2 */
963
2.42k
    felem_mul(tmp, ftmp, x2);
964
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
965
966
    /* ftmp = z1^2*x2 - z2^2*x1 */
967
2.42k
    felem_diff_128_64(tmp, ftmp2);
968
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
969
2.42k
    felem_reduce(ftmp, tmp);
970
971
    /*
972
     * The formulae are incorrect if the points are equal, in affine coordinates
973
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
974
     * happens.
975
     *
976
     * We use bitwise operations to avoid potential side-channels introduced by
977
     * the short-circuiting behaviour of boolean operators.
978
     */
979
2.42k
    x_equal = felem_is_zero(ftmp);
980
2.42k
    y_equal = felem_is_zero(ftmp3);
981
    /*
982
     * The special case of either point being the point at infinity (z1 and/or
983
     * z2 are zero), is handled separately later on in this function, so we
984
     * avoid jumping to point_double here in those special cases.
985
     */
986
2.42k
    z1_is_zero = felem_is_zero(z1);
987
2.42k
    z2_is_zero = felem_is_zero(z2);
988
989
    /*
990
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
991
     * specific implementation `felem_is_zero()` returns truth as `0x1`
992
     * (rather than `0xff..ff`).
993
     *
994
     * This implies that `~true` in this implementation becomes
995
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
996
     * the if expression, we mask out only the last bit in the next
997
     * line.
998
     */
999
2.42k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
1000
1001
2.42k
    if (points_equal) {
1002
        /*
1003
         * This is obviously not constant-time but, as mentioned before, this
1004
         * case never happens during single point multiplication, so there is no
1005
         * timing leak for ECDH or ECDSA signing.
1006
         */
1007
0
        point_double(x3, y3, z3, x1, y1, z1);
1008
0
        return;
1009
0
    }
1010
1011
    /* ftmp5 = z1*z2 */
1012
2.42k
    if (!mixed) {
1013
0
        felem_mul(tmp, z1, z2);
1014
0
        felem_reduce(ftmp5, tmp);
1015
2.42k
    } else {
1016
        /* special case z2 = 0 is handled later */
1017
2.42k
        felem_assign(ftmp5, z1);
1018
2.42k
    }
1019
1020
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1021
2.42k
    felem_mul(tmp, ftmp, ftmp5);
1022
2.42k
    felem_reduce(z_out, tmp);
1023
1024
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1025
2.42k
    felem_assign(ftmp5, ftmp);
1026
2.42k
    felem_square(tmp, ftmp);
1027
2.42k
    felem_reduce(ftmp, tmp);
1028
1029
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1030
2.42k
    felem_mul(tmp, ftmp, ftmp5);
1031
2.42k
    felem_reduce(ftmp5, tmp);
1032
1033
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1034
2.42k
    felem_mul(tmp, ftmp2, ftmp);
1035
2.42k
    felem_reduce(ftmp2, tmp);
1036
1037
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1038
2.42k
    felem_mul(tmp, ftmp4, ftmp5);
1039
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1040
1041
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1042
2.42k
    felem_square(tmp2, ftmp3);
1043
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1044
1045
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1046
2.42k
    felem_diff_128_64(tmp2, ftmp5);
1047
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1048
1049
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1050
2.42k
    felem_assign(ftmp5, ftmp2);
1051
2.42k
    felem_scalar(ftmp5, 2);
1052
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1053
1054
    /*-
1055
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1056
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1057
     */
1058
2.42k
    felem_diff_128_64(tmp2, ftmp5);
1059
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1060
2.42k
    felem_reduce(x_out, tmp2);
1061
1062
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1063
2.42k
    felem_diff(ftmp2, x_out);
1064
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1065
1066
    /*
1067
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1068
     */
1069
2.42k
    felem_mul(tmp2, ftmp3, ftmp2);
1070
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1071
1072
    /*-
1073
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1074
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1075
     */
1076
2.42k
    widefelem_diff(tmp2, tmp);
1077
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1078
2.42k
    felem_reduce(y_out, tmp2);
1079
1080
    /*
1081
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1082
     * the point at infinity, so we need to check for this separately
1083
     */
1084
1085
    /*
1086
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1087
     */
1088
2.42k
    copy_conditional(x_out, x2, z1_is_zero);
1089
2.42k
    copy_conditional(x_out, x1, z2_is_zero);
1090
2.42k
    copy_conditional(y_out, y2, z1_is_zero);
1091
2.42k
    copy_conditional(y_out, y1, z2_is_zero);
1092
2.42k
    copy_conditional(z_out, z2, z1_is_zero);
1093
2.42k
    copy_conditional(z_out, z1, z2_is_zero);
1094
2.42k
    felem_assign(x3, x_out);
1095
2.42k
    felem_assign(y3, y_out);
1096
2.42k
    felem_assign(z3, z_out);
1097
2.42k
}
1098
1099
/*
1100
 * select_point selects the |idx|th point from a precomputation table and
1101
 * copies it to out.
1102
 * The pre_comp array argument should be size of |size| argument
1103
 */
1104
static void select_point(const u64 idx, unsigned int size,
1105
                         const felem pre_comp[][3], felem out[3])
1106
2.46k
{
1107
2.46k
    unsigned i, j;
1108
2.46k
    limb *outlimbs = &out[0][0];
1109
1110
2.46k
    memset(out, 0, sizeof(*out) * 3);
1111
41.8k
    for (i = 0; i < size; i++) {
1112
39.4k
        const limb *inlimbs = &pre_comp[i][0][0];
1113
39.4k
        u64 mask = i ^ idx;
1114
39.4k
        mask |= mask >> 4;
1115
39.4k
        mask |= mask >> 2;
1116
39.4k
        mask |= mask >> 1;
1117
39.4k
        mask &= 1;
1118
39.4k
        mask--;
1119
512k
        for (j = 0; j < 4 * 3; j++)
1120
473k
            outlimbs[j] |= inlimbs[j] & mask;
1121
39.4k
    }
1122
2.46k
}
1123
1124
/* get_bit returns the |i|th bit in |in| */
1125
static char get_bit(const felem_bytearray in, unsigned i)
1126
9.85k
{
1127
9.85k
    if (i >= 224)
1128
0
        return 0;
1129
9.85k
    return (in[i >> 3] >> (i & 7)) & 1;
1130
9.85k
}
1131
1132
/*
1133
 * Interleaved point multiplication using precomputed point multiples: The
1134
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1135
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1136
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1137
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1138
 */
1139
static void batch_mul(felem x_out, felem y_out, felem z_out,
1140
                      const felem_bytearray scalars[],
1141
                      const unsigned num_points, const u8 *g_scalar,
1142
                      const int mixed, const felem pre_comp[][17][3],
1143
                      const felem g_pre_comp[2][16][3])
1144
44
{
1145
44
    int i, skip;
1146
44
    unsigned num;
1147
44
    unsigned gen_mul = (g_scalar != NULL);
1148
44
    felem nq[3], tmp[4];
1149
44
    u64 bits;
1150
44
    u8 sign, digit;
1151
1152
    /* set nq to the point at infinity */
1153
44
    memset(nq, 0, sizeof(nq));
1154
1155
    /*
1156
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1157
     * of the generator (two in each of the last 28 rounds) and additions of
1158
     * other points multiples (every 5th round).
1159
     */
1160
44
    skip = 1;                   /* save two point operations in the first
1161
                                 * round */
1162
1.27k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1163
        /* double */
1164
1.23k
        if (!skip)
1165
1.18k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1166
1167
        /* add multiples of the generator */
1168
1.23k
        if (gen_mul && (i <= 27)) {
1169
            /* first, look 28 bits upwards */
1170
1.23k
            bits = get_bit(g_scalar, i + 196) << 3;
1171
1.23k
            bits |= get_bit(g_scalar, i + 140) << 2;
1172
1.23k
            bits |= get_bit(g_scalar, i + 84) << 1;
1173
1.23k
            bits |= get_bit(g_scalar, i + 28);
1174
            /* select the point to add, in constant time */
1175
1.23k
            select_point(bits, 16, g_pre_comp[1], tmp);
1176
1177
1.23k
            if (!skip) {
1178
                /* value 1 below is argument for "mixed" */
1179
1.18k
                point_add(nq[0], nq[1], nq[2],
1180
1.18k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1181
1.18k
            } else {
1182
44
                memcpy(nq, tmp, 3 * sizeof(felem));
1183
44
                skip = 0;
1184
44
            }
1185
1186
            /* second, look at the current position */
1187
1.23k
            bits = get_bit(g_scalar, i + 168) << 3;
1188
1.23k
            bits |= get_bit(g_scalar, i + 112) << 2;
1189
1.23k
            bits |= get_bit(g_scalar, i + 56) << 1;
1190
1.23k
            bits |= get_bit(g_scalar, i);
1191
            /* select the point to add, in constant time */
1192
1.23k
            select_point(bits, 16, g_pre_comp[0], tmp);
1193
1.23k
            point_add(nq[0], nq[1], nq[2],
1194
1.23k
                      nq[0], nq[1], nq[2],
1195
1.23k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1196
1.23k
        }
1197
1198
        /* do other additions every 5 doublings */
1199
1.23k
        if (num_points && (i % 5 == 0)) {
1200
            /* loop over all scalars */
1201
0
            for (num = 0; num < num_points; ++num) {
1202
0
                bits = get_bit(scalars[num], i + 4) << 5;
1203
0
                bits |= get_bit(scalars[num], i + 3) << 4;
1204
0
                bits |= get_bit(scalars[num], i + 2) << 3;
1205
0
                bits |= get_bit(scalars[num], i + 1) << 2;
1206
0
                bits |= get_bit(scalars[num], i) << 1;
1207
0
                bits |= get_bit(scalars[num], i - 1);
1208
0
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1209
1210
                /* select the point to add or subtract */
1211
0
                select_point(digit, 17, pre_comp[num], tmp);
1212
0
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1213
                                            * point */
1214
0
                copy_conditional(tmp[1], tmp[3], sign);
1215
1216
0
                if (!skip) {
1217
0
                    point_add(nq[0], nq[1], nq[2],
1218
0
                              nq[0], nq[1], nq[2],
1219
0
                              mixed, tmp[0], tmp[1], tmp[2]);
1220
0
                } else {
1221
0
                    memcpy(nq, tmp, 3 * sizeof(felem));
1222
0
                    skip = 0;
1223
0
                }
1224
0
            }
1225
0
        }
1226
1.23k
    }
1227
44
    felem_assign(x_out, nq[0]);
1228
44
    felem_assign(y_out, nq[1]);
1229
44
    felem_assign(z_out, nq[2]);
1230
44
}
1231
1232
/******************************************************************************/
1233
/*
1234
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1235
 */
1236
1237
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1238
0
{
1239
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1240
1241
0
    if (!ret) {
1242
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1243
0
        return ret;
1244
0
    }
1245
1246
0
    ret->references = 1;
1247
1248
0
    ret->lock = CRYPTO_THREAD_lock_new();
1249
0
    if (ret->lock == NULL) {
1250
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1251
0
        OPENSSL_free(ret);
1252
0
        return NULL;
1253
0
    }
1254
0
    return ret;
1255
0
}
1256
1257
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1258
0
{
1259
0
    int i;
1260
0
    if (p != NULL)
1261
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1262
0
    return p;
1263
0
}
1264
1265
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1266
0
{
1267
0
    int i;
1268
1269
0
    if (p == NULL)
1270
0
        return;
1271
1272
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1273
0
    REF_PRINT_COUNT("EC_nistp224", p);
1274
0
    if (i > 0)
1275
0
        return;
1276
0
    REF_ASSERT_ISNT(i < 0);
1277
1278
0
    CRYPTO_THREAD_lock_free(p->lock);
1279
0
    OPENSSL_free(p);
1280
0
}
1281
1282
/******************************************************************************/
1283
/*
1284
 * OPENSSL EC_METHOD FUNCTIONS
1285
 */
1286
1287
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1288
27.8k
{
1289
27.8k
    int ret;
1290
27.8k
    ret = ossl_ec_GFp_simple_group_init(group);
1291
27.8k
    group->a_is_minus3 = 1;
1292
27.8k
    return ret;
1293
27.8k
}
1294
1295
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1296
                                         const BIGNUM *a, const BIGNUM *b,
1297
                                         BN_CTX *ctx)
1298
14.1k
{
1299
14.1k
    int ret = 0;
1300
14.1k
    BIGNUM *curve_p, *curve_a, *curve_b;
1301
14.1k
#ifndef FIPS_MODULE
1302
14.1k
    BN_CTX *new_ctx = NULL;
1303
1304
14.1k
    if (ctx == NULL)
1305
0
        ctx = new_ctx = BN_CTX_new();
1306
14.1k
#endif
1307
14.1k
    if (ctx == NULL)
1308
0
        return 0;
1309
1310
14.1k
    BN_CTX_start(ctx);
1311
14.1k
    curve_p = BN_CTX_get(ctx);
1312
14.1k
    curve_a = BN_CTX_get(ctx);
1313
14.1k
    curve_b = BN_CTX_get(ctx);
1314
14.1k
    if (curve_b == NULL)
1315
0
        goto err;
1316
14.1k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317
14.1k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318
14.1k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319
14.1k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1321
0
        goto err;
1322
0
    }
1323
14.1k
    group->field_mod_func = BN_nist_mod_224;
1324
14.1k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1325
14.1k
 err:
1326
14.1k
    BN_CTX_end(ctx);
1327
14.1k
#ifndef FIPS_MODULE
1328
14.1k
    BN_CTX_free(new_ctx);
1329
14.1k
#endif
1330
14.1k
    return ret;
1331
14.1k
}
1332
1333
/*
1334
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335
 * (X/Z^2, Y/Z^3)
1336
 */
1337
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338
                                                      const EC_POINT *point,
1339
                                                      BIGNUM *x, BIGNUM *y,
1340
                                                      BN_CTX *ctx)
1341
405
{
1342
405
    felem z1, z2, x_in, y_in, x_out, y_out;
1343
405
    widefelem tmp;
1344
1345
405
    if (EC_POINT_is_at_infinity(group, point)) {
1346
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1347
0
        return 0;
1348
0
    }
1349
405
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1350
405
        (!BN_to_felem(z1, point->Z)))
1351
0
        return 0;
1352
405
    felem_inv(z2, z1);
1353
405
    felem_square(tmp, z2);
1354
405
    felem_reduce(z1, tmp);
1355
405
    felem_mul(tmp, x_in, z1);
1356
405
    felem_reduce(x_in, tmp);
1357
405
    felem_contract(x_out, x_in);
1358
405
    if (x != NULL) {
1359
405
        if (!felem_to_BN(x, x_out)) {
1360
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1361
0
            return 0;
1362
0
        }
1363
405
    }
1364
405
    felem_mul(tmp, z1, z2);
1365
405
    felem_reduce(z1, tmp);
1366
405
    felem_mul(tmp, y_in, z1);
1367
405
    felem_reduce(y_in, tmp);
1368
405
    felem_contract(y_out, y_in);
1369
405
    if (y != NULL) {
1370
405
        if (!felem_to_BN(y, y_out)) {
1371
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1372
0
            return 0;
1373
0
        }
1374
405
    }
1375
405
    return 1;
1376
405
}
1377
1378
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1379
                               felem tmp_felems[ /* num+1 */ ])
1380
0
{
1381
    /*
1382
     * Runs in constant time, unless an input is the point at infinity (which
1383
     * normally shouldn't happen).
1384
     */
1385
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1386
0
                                                  points,
1387
0
                                                  sizeof(felem),
1388
0
                                                  tmp_felems,
1389
0
                                                  (void (*)(void *))felem_one,
1390
0
                                                  felem_is_zero_int,
1391
0
                                                  (void (*)(void *, const void *))
1392
0
                                                  felem_assign,
1393
0
                                                  (void (*)(void *, const void *))
1394
0
                                                  felem_square_reduce, (void (*)
1395
0
                                                                        (void *,
1396
0
                                                                         const void
1397
0
                                                                         *,
1398
0
                                                                         const void
1399
0
                                                                         *))
1400
0
                                                  felem_mul_reduce,
1401
0
                                                  (void (*)(void *, const void *))
1402
0
                                                  felem_inv,
1403
0
                                                  (void (*)(void *, const void *))
1404
0
                                                  felem_contract);
1405
0
}
1406
1407
/*
1408
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1409
 * values Result is stored in r (r can equal one of the inputs).
1410
 */
1411
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1412
                                    const BIGNUM *scalar, size_t num,
1413
                                    const EC_POINT *points[],
1414
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1415
34
{
1416
34
    int ret = 0;
1417
34
    int j;
1418
34
    unsigned i;
1419
34
    int mixed = 0;
1420
34
    BIGNUM *x, *y, *z, *tmp_scalar;
1421
34
    felem_bytearray g_secret;
1422
34
    felem_bytearray *secrets = NULL;
1423
34
    felem (*pre_comp)[17][3] = NULL;
1424
34
    felem *tmp_felems = NULL;
1425
34
    int num_bytes;
1426
34
    int have_pre_comp = 0;
1427
34
    size_t num_points = num;
1428
34
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1429
34
    NISTP224_PRE_COMP *pre = NULL;
1430
34
    const felem(*g_pre_comp)[16][3] = NULL;
1431
34
    EC_POINT *generator = NULL;
1432
34
    const EC_POINT *p = NULL;
1433
34
    const BIGNUM *p_scalar = NULL;
1434
1435
34
    BN_CTX_start(ctx);
1436
34
    x = BN_CTX_get(ctx);
1437
34
    y = BN_CTX_get(ctx);
1438
34
    z = BN_CTX_get(ctx);
1439
34
    tmp_scalar = BN_CTX_get(ctx);
1440
34
    if (tmp_scalar == NULL)
1441
0
        goto err;
1442
1443
34
    if (scalar != NULL) {
1444
34
        pre = group->pre_comp.nistp224;
1445
34
        if (pre)
1446
            /* we have precomputation, try to use it */
1447
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1448
34
        else
1449
            /* try to use the standard precomputation */
1450
34
            g_pre_comp = &gmul[0];
1451
34
        generator = EC_POINT_new(group);
1452
34
        if (generator == NULL)
1453
0
            goto err;
1454
        /* get the generator from precomputation */
1455
34
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1456
34
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1457
34
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1458
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1459
0
            goto err;
1460
0
        }
1461
34
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1462
34
                                                                generator,
1463
34
                                                                x, y, z, ctx))
1464
0
            goto err;
1465
34
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1466
            /* precomputation matches generator */
1467
34
            have_pre_comp = 1;
1468
0
        else
1469
            /*
1470
             * we don't have valid precomputation: treat the generator as a
1471
             * random point
1472
             */
1473
0
            num_points = num_points + 1;
1474
34
    }
1475
1476
34
    if (num_points > 0) {
1477
0
        if (num_points >= 3) {
1478
            /*
1479
             * unless we precompute multiples for just one or two points,
1480
             * converting those into affine form is time well spent
1481
             */
1482
0
            mixed = 1;
1483
0
        }
1484
0
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1485
0
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1486
0
        if (mixed)
1487
0
            tmp_felems =
1488
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1489
0
        if ((secrets == NULL) || (pre_comp == NULL)
1490
0
            || (mixed && (tmp_felems == NULL))) {
1491
0
            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1492
0
            goto err;
1493
0
        }
1494
1495
        /*
1496
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1497
         * i.e., they contribute nothing to the linear combination
1498
         */
1499
0
        for (i = 0; i < num_points; ++i) {
1500
0
            if (i == num) {
1501
                /* the generator */
1502
0
                p = EC_GROUP_get0_generator(group);
1503
0
                p_scalar = scalar;
1504
0
            } else {
1505
                /* the i^th point */
1506
0
                p = points[i];
1507
0
                p_scalar = scalars[i];
1508
0
            }
1509
0
            if ((p_scalar != NULL) && (p != NULL)) {
1510
                /* reduce scalar to 0 <= scalar < 2^224 */
1511
0
                if ((BN_num_bits(p_scalar) > 224)
1512
0
                    || (BN_is_negative(p_scalar))) {
1513
                    /*
1514
                     * this is an unusual input, and we don't guarantee
1515
                     * constant-timeness
1516
                     */
1517
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1518
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1519
0
                        goto err;
1520
0
                    }
1521
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1522
0
                                               secrets[i], sizeof(secrets[i]));
1523
0
                } else {
1524
0
                    num_bytes = BN_bn2lebinpad(p_scalar,
1525
0
                                               secrets[i], sizeof(secrets[i]));
1526
0
                }
1527
0
                if (num_bytes < 0) {
1528
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1529
0
                    goto err;
1530
0
                }
1531
                /* precompute multiples */
1532
0
                if ((!BN_to_felem(x_out, p->X)) ||
1533
0
                    (!BN_to_felem(y_out, p->Y)) ||
1534
0
                    (!BN_to_felem(z_out, p->Z)))
1535
0
                    goto err;
1536
0
                felem_assign(pre_comp[i][1][0], x_out);
1537
0
                felem_assign(pre_comp[i][1][1], y_out);
1538
0
                felem_assign(pre_comp[i][1][2], z_out);
1539
0
                for (j = 2; j <= 16; ++j) {
1540
0
                    if (j & 1) {
1541
0
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1542
0
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1543
0
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1544
0
                                  pre_comp[i][j - 1][0],
1545
0
                                  pre_comp[i][j - 1][1],
1546
0
                                  pre_comp[i][j - 1][2]);
1547
0
                    } else {
1548
0
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1549
0
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1550
0
                                     pre_comp[i][j / 2][1],
1551
0
                                     pre_comp[i][j / 2][2]);
1552
0
                    }
1553
0
                }
1554
0
            }
1555
0
        }
1556
0
        if (mixed)
1557
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1558
0
    }
1559
1560
    /* the scalar for the generator */
1561
34
    if ((scalar != NULL) && (have_pre_comp)) {
1562
34
        memset(g_secret, 0, sizeof(g_secret));
1563
        /* reduce scalar to 0 <= scalar < 2^224 */
1564
34
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1565
            /*
1566
             * this is an unusual input, and we don't guarantee
1567
             * constant-timeness
1568
             */
1569
10
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1570
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1571
0
                goto err;
1572
0
            }
1573
10
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1574
24
        } else {
1575
24
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1576
24
        }
1577
        /* do the multiplication with generator precomputation */
1578
34
        batch_mul(x_out, y_out, z_out,
1579
34
                  (const felem_bytearray(*))secrets, num_points,
1580
34
                  g_secret,
1581
34
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1582
34
    } else {
1583
        /* do the multiplication without generator precomputation */
1584
0
        batch_mul(x_out, y_out, z_out,
1585
0
                  (const felem_bytearray(*))secrets, num_points,
1586
0
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1587
0
    }
1588
    /* reduce the output to its unique minimal representation */
1589
34
    felem_contract(x_in, x_out);
1590
34
    felem_contract(y_in, y_out);
1591
34
    felem_contract(z_in, z_out);
1592
34
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1593
34
        (!felem_to_BN(z, z_in))) {
1594
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1595
0
        goto err;
1596
0
    }
1597
34
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1598
34
                                                             ctx);
1599
1600
34
 err:
1601
34
    BN_CTX_end(ctx);
1602
34
    EC_POINT_free(generator);
1603
34
    OPENSSL_free(secrets);
1604
34
    OPENSSL_free(pre_comp);
1605
34
    OPENSSL_free(tmp_felems);
1606
34
    return ret;
1607
34
}
1608
1609
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1610
0
{
1611
0
    int ret = 0;
1612
0
    NISTP224_PRE_COMP *pre = NULL;
1613
0
    int i, j;
1614
0
    BIGNUM *x, *y;
1615
0
    EC_POINT *generator = NULL;
1616
0
    felem tmp_felems[32];
1617
0
#ifndef FIPS_MODULE
1618
0
    BN_CTX *new_ctx = NULL;
1619
0
#endif
1620
1621
    /* throw away old precomputation */
1622
0
    EC_pre_comp_free(group);
1623
1624
0
#ifndef FIPS_MODULE
1625
0
    if (ctx == NULL)
1626
0
        ctx = new_ctx = BN_CTX_new();
1627
0
#endif
1628
0
    if (ctx == NULL)
1629
0
        return 0;
1630
1631
0
    BN_CTX_start(ctx);
1632
0
    x = BN_CTX_get(ctx);
1633
0
    y = BN_CTX_get(ctx);
1634
0
    if (y == NULL)
1635
0
        goto err;
1636
    /* get the generator */
1637
0
    if (group->generator == NULL)
1638
0
        goto err;
1639
0
    generator = EC_POINT_new(group);
1640
0
    if (generator == NULL)
1641
0
        goto err;
1642
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1643
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1644
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1645
0
        goto err;
1646
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1647
0
        goto err;
1648
    /*
1649
     * if the generator is the standard one, use built-in precomputation
1650
     */
1651
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1652
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1653
0
        goto done;
1654
0
    }
1655
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1656
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1657
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1658
0
        goto err;
1659
    /*
1660
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1661
     * 2^140*G, 2^196*G for the second one
1662
     */
1663
0
    for (i = 1; i <= 8; i <<= 1) {
1664
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1665
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1666
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1667
0
        for (j = 0; j < 27; ++j) {
1668
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1669
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1670
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1671
0
        }
1672
0
        if (i == 8)
1673
0
            break;
1674
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1675
0
                     pre->g_pre_comp[0][2 * i][1],
1676
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1677
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1678
0
        for (j = 0; j < 27; ++j) {
1679
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1680
0
                         pre->g_pre_comp[0][2 * i][1],
1681
0
                         pre->g_pre_comp[0][2 * i][2],
1682
0
                         pre->g_pre_comp[0][2 * i][0],
1683
0
                         pre->g_pre_comp[0][2 * i][1],
1684
0
                         pre->g_pre_comp[0][2 * i][2]);
1685
0
        }
1686
0
    }
1687
0
    for (i = 0; i < 2; i++) {
1688
        /* g_pre_comp[i][0] is the point at infinity */
1689
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1690
        /* the remaining multiples */
1691
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1692
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1693
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1694
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1695
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1696
0
                  pre->g_pre_comp[i][2][2]);
1697
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1698
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1699
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1700
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1701
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702
0
                  pre->g_pre_comp[i][2][2]);
1703
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1704
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1705
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1706
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1707
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1708
0
                  pre->g_pre_comp[i][4][2]);
1709
        /*
1710
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1711
         */
1712
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1713
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1714
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1715
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1716
0
                  pre->g_pre_comp[i][2][2]);
1717
0
        for (j = 1; j < 8; ++j) {
1718
            /* odd multiples: add G resp. 2^28*G */
1719
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1720
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1721
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1722
0
                      pre->g_pre_comp[i][2 * j][0],
1723
0
                      pre->g_pre_comp[i][2 * j][1],
1724
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1725
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1726
0
                      pre->g_pre_comp[i][1][2]);
1727
0
        }
1728
0
    }
1729
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1730
1731
0
 done:
1732
0
    SETPRECOMP(group, nistp224, pre);
1733
0
    pre = NULL;
1734
0
    ret = 1;
1735
0
 err:
1736
0
    BN_CTX_end(ctx);
1737
0
    EC_POINT_free(generator);
1738
0
#ifndef FIPS_MODULE
1739
0
    BN_CTX_free(new_ctx);
1740
0
#endif
1741
0
    EC_nistp224_pre_comp_free(pre);
1742
0
    return ret;
1743
0
}
1744
1745
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1746
0
{
1747
0
    return HAVEPRECOMP(group, nistp224);
1748
0
}