Coverage Report

Created: 2024-07-27 06:39

/src/openssl32/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
};
242
243
const EC_METHOD *EC_GFp_nistp224_method(void)
244
28.2k
{
245
28.2k
    static const EC_METHOD ret = {
246
28.2k
        EC_FLAGS_DEFAULT_OCT,
247
28.2k
        NID_X9_62_prime_field,
248
28.2k
        ossl_ec_GFp_nistp224_group_init,
249
28.2k
        ossl_ec_GFp_simple_group_finish,
250
28.2k
        ossl_ec_GFp_simple_group_clear_finish,
251
28.2k
        ossl_ec_GFp_nist_group_copy,
252
28.2k
        ossl_ec_GFp_nistp224_group_set_curve,
253
28.2k
        ossl_ec_GFp_simple_group_get_curve,
254
28.2k
        ossl_ec_GFp_simple_group_get_degree,
255
28.2k
        ossl_ec_group_simple_order_bits,
256
28.2k
        ossl_ec_GFp_simple_group_check_discriminant,
257
28.2k
        ossl_ec_GFp_simple_point_init,
258
28.2k
        ossl_ec_GFp_simple_point_finish,
259
28.2k
        ossl_ec_GFp_simple_point_clear_finish,
260
28.2k
        ossl_ec_GFp_simple_point_copy,
261
28.2k
        ossl_ec_GFp_simple_point_set_to_infinity,
262
28.2k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
263
28.2k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
264
28.2k
        0 /* point_set_compressed_coordinates */ ,
265
28.2k
        0 /* point2oct */ ,
266
28.2k
        0 /* oct2point */ ,
267
28.2k
        ossl_ec_GFp_simple_add,
268
28.2k
        ossl_ec_GFp_simple_dbl,
269
28.2k
        ossl_ec_GFp_simple_invert,
270
28.2k
        ossl_ec_GFp_simple_is_at_infinity,
271
28.2k
        ossl_ec_GFp_simple_is_on_curve,
272
28.2k
        ossl_ec_GFp_simple_cmp,
273
28.2k
        ossl_ec_GFp_simple_make_affine,
274
28.2k
        ossl_ec_GFp_simple_points_make_affine,
275
28.2k
        ossl_ec_GFp_nistp224_points_mul,
276
28.2k
        ossl_ec_GFp_nistp224_precompute_mult,
277
28.2k
        ossl_ec_GFp_nistp224_have_precompute_mult,
278
28.2k
        ossl_ec_GFp_nist_field_mul,
279
28.2k
        ossl_ec_GFp_nist_field_sqr,
280
28.2k
        0 /* field_div */ ,
281
28.2k
        ossl_ec_GFp_simple_field_inv,
282
28.2k
        0 /* field_encode */ ,
283
28.2k
        0 /* field_decode */ ,
284
28.2k
        0,                      /* field_set_to_one */
285
28.2k
        ossl_ec_key_simple_priv2oct,
286
28.2k
        ossl_ec_key_simple_oct2priv,
287
28.2k
        0, /* set private */
288
28.2k
        ossl_ec_key_simple_generate_key,
289
28.2k
        ossl_ec_key_simple_check_key,
290
28.2k
        ossl_ec_key_simple_generate_public_key,
291
28.2k
        0, /* keycopy */
292
28.2k
        0, /* keyfinish */
293
28.2k
        ossl_ecdh_simple_compute_key,
294
28.2k
        ossl_ecdsa_simple_sign_setup,
295
28.2k
        ossl_ecdsa_simple_sign_sig,
296
28.2k
        ossl_ecdsa_simple_verify_sig,
297
28.2k
        0, /* field_inverse_mod_ord */
298
28.2k
        0, /* blind_coordinates */
299
28.2k
        0, /* ladder_pre */
300
28.2k
        0, /* ladder_step */
301
28.2k
        0  /* ladder_post */
302
28.2k
    };
303
304
28.2k
    return &ret;
305
28.2k
}
306
307
/*
308
 * Helper functions to convert field elements to/from internal representation
309
 */
310
static void bin28_to_felem(felem out, const u8 in[28])
311
9.87k
{
312
9.87k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
313
9.87k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
314
9.87k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
315
9.87k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
316
9.87k
}
317
318
static void felem_to_bin28(u8 out[28], const felem in)
319
12.5k
{
320
12.5k
    unsigned i;
321
100k
    for (i = 0; i < 7; ++i) {
322
87.9k
        out[i] = in[0] >> (8 * i);
323
87.9k
        out[i + 7] = in[1] >> (8 * i);
324
87.9k
        out[i + 14] = in[2] >> (8 * i);
325
87.9k
        out[i + 21] = in[3] >> (8 * i);
326
87.9k
    }
327
12.5k
}
328
329
/* From OpenSSL BIGNUM to internal representation */
330
static int BN_to_felem(felem out, const BIGNUM *bn)
331
9.87k
{
332
9.87k
    felem_bytearray b_out;
333
9.87k
    int num_bytes;
334
335
9.87k
    if (BN_is_negative(bn)) {
336
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
337
0
        return 0;
338
0
    }
339
9.87k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
340
9.87k
    if (num_bytes < 0) {
341
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
342
0
        return 0;
343
0
    }
344
9.87k
    bin28_to_felem(out, b_out);
345
9.87k
    return 1;
346
9.87k
}
347
348
/* From internal representation to OpenSSL BIGNUM */
349
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
350
12.5k
{
351
12.5k
    felem_bytearray b_out;
352
12.5k
    felem_to_bin28(b_out, in);
353
12.5k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
354
12.5k
}
355
356
/******************************************************************************/
357
/*-
358
 *                              FIELD OPERATIONS
359
 *
360
 * Field operations, using the internal representation of field elements.
361
 * NB! These operations are specific to our point multiplication and cannot be
362
 * expected to be correct in general - e.g., multiplication with a large scalar
363
 * will cause an overflow.
364
 *
365
 */
366
367
static void felem_one(felem out)
368
0
{
369
0
    out[0] = 1;
370
0
    out[1] = 0;
371
0
    out[2] = 0;
372
0
    out[3] = 0;
373
0
}
374
375
static void felem_assign(felem out, const felem in)
376
800k
{
377
800k
    out[0] = in[0];
378
800k
    out[1] = in[1];
379
800k
    out[2] = in[2];
380
800k
    out[3] = in[3];
381
800k
}
382
383
/* Sum two field elements: out += in */
384
static void felem_sum(felem out, const felem in)
385
236k
{
386
236k
    out[0] += in[0];
387
236k
    out[1] += in[1];
388
236k
    out[2] += in[2];
389
236k
    out[3] += in[3];
390
236k
}
391
392
/* Subtract field elements: out -= in */
393
/* Assumes in[i] < 2^57 */
394
static void felem_diff(felem out, const felem in)
395
222k
{
396
222k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
397
222k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
398
222k
    static const limb two58m42m2 = (((limb) 1) << 58) -
399
222k
        (((limb) 1) << 42) - (((limb) 1) << 2);
400
401
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
402
222k
    out[0] += two58p2;
403
222k
    out[1] += two58m42m2;
404
222k
    out[2] += two58m2;
405
222k
    out[3] += two58m2;
406
407
222k
    out[0] -= in[0];
408
222k
    out[1] -= in[1];
409
222k
    out[2] -= in[2];
410
222k
    out[3] -= in[3];
411
222k
}
412
413
/* Subtract in unreduced 128-bit mode: out -= in */
414
/* Assumes in[i] < 2^119 */
415
static void widefelem_diff(widefelem out, const widefelem in)
416
143k
{
417
143k
    static const widelimb two120 = ((widelimb) 1) << 120;
418
143k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
419
143k
        (((widelimb) 1) << 64);
420
143k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
421
143k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
422
423
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
424
143k
    out[0] += two120;
425
143k
    out[1] += two120m64;
426
143k
    out[2] += two120m64;
427
143k
    out[3] += two120;
428
143k
    out[4] += two120m104m64;
429
143k
    out[5] += two120m64;
430
143k
    out[6] += two120m64;
431
432
143k
    out[0] -= in[0];
433
143k
    out[1] -= in[1];
434
143k
    out[2] -= in[2];
435
143k
    out[3] -= in[3];
436
143k
    out[4] -= in[4];
437
143k
    out[5] -= in[5];
438
143k
    out[6] -= in[6];
439
143k
}
440
441
/* Subtract in mixed mode: out128 -= in64 */
442
/* in[i] < 2^63 */
443
static void felem_diff_128_64(widefelem out, const felem in)
444
426k
{
445
426k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
446
426k
        (((widelimb) 1) << 8);
447
426k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
448
426k
        (((widelimb) 1) << 8);
449
426k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
450
426k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
451
452
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
453
426k
    out[0] += two64p8;
454
426k
    out[1] += two64m48m8;
455
426k
    out[2] += two64m8;
456
426k
    out[3] += two64m8;
457
458
426k
    out[0] -= in[0];
459
426k
    out[1] -= in[1];
460
426k
    out[2] -= in[2];
461
426k
    out[3] -= in[3];
462
426k
}
463
464
/*
465
 * Multiply a field element by a scalar: out = out * scalar The scalars we
466
 * actually use are small, so results fit without overflow
467
 */
468
static void felem_scalar(felem out, const limb scalar)
469
300k
{
470
300k
    out[0] *= scalar;
471
300k
    out[1] *= scalar;
472
300k
    out[2] *= scalar;
473
300k
    out[3] *= scalar;
474
300k
}
475
476
/*
477
 * Multiply an unreduced field element by a scalar: out = out * scalar The
478
 * scalars we actually use are small, so results fit without overflow
479
 */
480
static void widefelem_scalar(widefelem out, const widelimb scalar)
481
78.7k
{
482
78.7k
    out[0] *= scalar;
483
78.7k
    out[1] *= scalar;
484
78.7k
    out[2] *= scalar;
485
78.7k
    out[3] *= scalar;
486
78.7k
    out[4] *= scalar;
487
78.7k
    out[5] *= scalar;
488
78.7k
    out[6] *= scalar;
489
78.7k
}
490
491
/* Square a field element: out = in^2 */
492
static void felem_square(widefelem out, const felem in)
493
1.28M
{
494
1.28M
    limb tmp0, tmp1, tmp2;
495
1.28M
    tmp0 = 2 * in[0];
496
1.28M
    tmp1 = 2 * in[1];
497
1.28M
    tmp2 = 2 * in[2];
498
1.28M
    out[0] = ((widelimb) in[0]) * in[0];
499
1.28M
    out[1] = ((widelimb) in[0]) * tmp1;
500
1.28M
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
501
1.28M
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
502
1.28M
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503
1.28M
    out[5] = ((widelimb) in[3]) * tmp2;
504
1.28M
    out[6] = ((widelimb) in[3]) * in[3];
505
1.28M
}
506
507
/* Multiply two field elements: out = in1 * in2 */
508
static void felem_mul(widefelem out, const felem in1, const felem in2)
509
843k
{
510
843k
    out[0] = ((widelimb) in1[0]) * in2[0];
511
843k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512
843k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
513
843k
             ((widelimb) in1[2]) * in2[0];
514
843k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
515
843k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
516
843k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
517
843k
             ((widelimb) in1[3]) * in2[1];
518
843k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519
843k
    out[6] = ((widelimb) in1[3]) * in2[3];
520
843k
}
521
522
/*-
523
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
524
 * Requires in[i] < 2^126,
525
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
526
static void felem_reduce(felem out, const widefelem in)
527
1.99M
{
528
1.99M
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
529
1.99M
        (((widelimb) 1) << 15);
530
1.99M
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
531
1.99M
        (((widelimb) 1) << 71);
532
1.99M
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
533
1.99M
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
534
1.99M
    widelimb output[5];
535
536
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
537
1.99M
    output[0] = in[0] + two127p15;
538
1.99M
    output[1] = in[1] + two127m71m55;
539
1.99M
    output[2] = in[2] + two127m71;
540
1.99M
    output[3] = in[3];
541
1.99M
    output[4] = in[4];
542
543
    /* Eliminate in[4], in[5], in[6] */
544
1.99M
    output[4] += in[6] >> 16;
545
1.99M
    output[3] += (in[6] & 0xffff) << 40;
546
1.99M
    output[2] -= in[6];
547
548
1.99M
    output[3] += in[5] >> 16;
549
1.99M
    output[2] += (in[5] & 0xffff) << 40;
550
1.99M
    output[1] -= in[5];
551
552
1.99M
    output[2] += output[4] >> 16;
553
1.99M
    output[1] += (output[4] & 0xffff) << 40;
554
1.99M
    output[0] -= output[4];
555
556
    /* Carry 2 -> 3 -> 4 */
557
1.99M
    output[3] += output[2] >> 56;
558
1.99M
    output[2] &= 0x00ffffffffffffff;
559
560
1.99M
    output[4] = output[3] >> 56;
561
1.99M
    output[3] &= 0x00ffffffffffffff;
562
563
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
564
565
    /* Eliminate output[4] */
566
1.99M
    output[2] += output[4] >> 16;
567
    /* output[2] < 2^56 + 2^56 = 2^57 */
568
1.99M
    output[1] += (output[4] & 0xffff) << 40;
569
1.99M
    output[0] -= output[4];
570
571
    /* Carry 0 -> 1 -> 2 -> 3 */
572
1.99M
    output[1] += output[0] >> 56;
573
1.99M
    out[0] = output[0] & 0x00ffffffffffffff;
574
575
1.99M
    output[2] += output[1] >> 56;
576
    /* output[2] < 2^57 + 2^72 */
577
1.99M
    out[1] = output[1] & 0x00ffffffffffffff;
578
1.99M
    output[3] += output[2] >> 56;
579
    /* output[3] <= 2^56 + 2^16 */
580
1.99M
    out[2] = output[2] & 0x00ffffffffffffff;
581
582
    /*-
583
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
584
     * out[3] <= 2^56 + 2^16 (due to final carry),
585
     * so out < 2*p
586
     */
587
1.99M
    out[3] = output[3];
588
1.99M
}
589
590
static void felem_square_reduce(felem out, const felem in)
591
0
{
592
0
    widefelem tmp;
593
0
    felem_square(tmp, in);
594
0
    felem_reduce(out, tmp);
595
0
}
596
597
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
598
0
{
599
0
    widefelem tmp;
600
0
    felem_mul(tmp, in1, in2);
601
0
    felem_reduce(out, tmp);
602
0
}
603
604
/*
605
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
606
 * call felem_reduce first)
607
 */
608
static void felem_contract(felem out, const felem in)
609
9.68k
{
610
9.68k
    static const int64_t two56 = ((limb) 1) << 56;
611
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
612
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
613
9.68k
    int64_t tmp[4], a;
614
9.68k
    tmp[0] = in[0];
615
9.68k
    tmp[1] = in[1];
616
9.68k
    tmp[2] = in[2];
617
9.68k
    tmp[3] = in[3];
618
    /* Case 1: a = 1 iff in >= 2^224 */
619
9.68k
    a = (in[3] >> 56);
620
9.68k
    tmp[0] -= a;
621
9.68k
    tmp[1] += a << 40;
622
9.68k
    tmp[3] &= 0x00ffffffffffffff;
623
    /*
624
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
625
     * and the lower part is non-zero
626
     */
627
9.68k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
628
9.68k
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
629
9.68k
    a &= 0x00ffffffffffffff;
630
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
631
9.68k
    a = (a - 1) >> 63;
632
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
633
9.68k
    tmp[3] &= a ^ 0xffffffffffffffff;
634
9.68k
    tmp[2] &= a ^ 0xffffffffffffffff;
635
9.68k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
636
9.68k
    tmp[0] -= 1 & a;
637
638
    /*
639
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
640
     * non-zero, so we only need one step
641
     */
642
9.68k
    a = tmp[0] >> 63;
643
9.68k
    tmp[0] += two56 & a;
644
9.68k
    tmp[1] -= 1 & a;
645
646
    /* carry 1 -> 2 -> 3 */
647
9.68k
    tmp[2] += tmp[1] >> 56;
648
9.68k
    tmp[1] &= 0x00ffffffffffffff;
649
650
9.68k
    tmp[3] += tmp[2] >> 56;
651
9.68k
    tmp[2] &= 0x00ffffffffffffff;
652
653
    /* Now 0 <= out < p */
654
9.68k
    out[0] = tmp[0];
655
9.68k
    out[1] = tmp[1];
656
9.68k
    out[2] = tmp[2];
657
9.68k
    out[3] = tmp[3];
658
9.68k
}
659
660
/*
661
 * Get negative value: out = -in
662
 * Requires in[i] < 2^63,
663
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
664
 */
665
static void felem_neg(felem out, const felem in)
666
10.4k
{
667
10.4k
    widefelem tmp;
668
669
10.4k
    memset(tmp, 0, sizeof(tmp));
670
10.4k
    felem_diff_128_64(tmp, in);
671
10.4k
    felem_reduce(out, tmp);
672
10.4k
}
673
674
/*
675
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
676
 * elements are reduced to in < 2^225, so we only need to check three cases:
677
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
678
 */
679
static limb felem_is_zero(const felem in)
680
258k
{
681
258k
    limb zero, two224m96p1, two225m97p2;
682
683
258k
    zero = in[0] | in[1] | in[2] | in[3];
684
258k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
685
258k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
686
258k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
687
258k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
688
258k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
689
258k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
690
258k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
691
258k
    return (zero | two224m96p1 | two225m97p2);
692
258k
}
693
694
static int felem_is_zero_int(const void *in)
695
0
{
696
0
    return (int)(felem_is_zero(in) & ((limb) 1));
697
0
}
698
699
/* Invert a field element */
700
/* Computation chain copied from djb's code */
701
static void felem_inv(felem out, const felem in)
702
3.05k
{
703
3.05k
    felem ftmp, ftmp2, ftmp3, ftmp4;
704
3.05k
    widefelem tmp;
705
3.05k
    unsigned i;
706
707
3.05k
    felem_square(tmp, in);
708
3.05k
    felem_reduce(ftmp, tmp);    /* 2 */
709
3.05k
    felem_mul(tmp, in, ftmp);
710
3.05k
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
711
3.05k
    felem_square(tmp, ftmp);
712
3.05k
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
713
3.05k
    felem_mul(tmp, in, ftmp);
714
3.05k
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
715
3.05k
    felem_square(tmp, ftmp);
716
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
717
3.05k
    felem_square(tmp, ftmp2);
718
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
719
3.05k
    felem_square(tmp, ftmp2);
720
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
721
3.05k
    felem_mul(tmp, ftmp2, ftmp);
722
3.05k
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
723
3.05k
    felem_square(tmp, ftmp);
724
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
725
18.3k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
726
15.2k
        felem_square(tmp, ftmp2);
727
15.2k
        felem_reduce(ftmp2, tmp);
728
15.2k
    }
729
3.05k
    felem_mul(tmp, ftmp2, ftmp);
730
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
731
3.05k
    felem_square(tmp, ftmp2);
732
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
733
36.6k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
734
33.6k
        felem_square(tmp, ftmp3);
735
33.6k
        felem_reduce(ftmp3, tmp);
736
33.6k
    }
737
3.05k
    felem_mul(tmp, ftmp3, ftmp2);
738
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
739
3.05k
    felem_square(tmp, ftmp2);
740
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
741
73.3k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
742
70.3k
        felem_square(tmp, ftmp3);
743
70.3k
        felem_reduce(ftmp3, tmp);
744
70.3k
    }
745
3.05k
    felem_mul(tmp, ftmp3, ftmp2);
746
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
747
3.05k
    felem_square(tmp, ftmp3);
748
3.05k
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
749
146k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
750
143k
        felem_square(tmp, ftmp4);
751
143k
        felem_reduce(ftmp4, tmp);
752
143k
    }
753
3.05k
    felem_mul(tmp, ftmp3, ftmp4);
754
3.05k
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
755
3.05k
    felem_square(tmp, ftmp3);
756
3.05k
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
757
73.3k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
758
70.3k
        felem_square(tmp, ftmp4);
759
70.3k
        felem_reduce(ftmp4, tmp);
760
70.3k
    }
761
3.05k
    felem_mul(tmp, ftmp2, ftmp4);
762
3.05k
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
763
21.4k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
764
18.3k
        felem_square(tmp, ftmp2);
765
18.3k
        felem_reduce(ftmp2, tmp);
766
18.3k
    }
767
3.05k
    felem_mul(tmp, ftmp2, ftmp);
768
3.05k
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
769
3.05k
    felem_square(tmp, ftmp);
770
3.05k
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
771
3.05k
    felem_mul(tmp, ftmp, in);
772
3.05k
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
773
299k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
774
296k
        felem_square(tmp, ftmp);
775
296k
        felem_reduce(ftmp, tmp);
776
296k
    }
777
3.05k
    felem_mul(tmp, ftmp, ftmp3);
778
3.05k
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
779
3.05k
}
780
781
/*
782
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
783
 * out to itself.
784
 */
785
static void copy_conditional(felem out, const felem in, limb icopy)
786
397k
{
787
397k
    unsigned i;
788
    /*
789
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
790
     */
791
397k
    const limb copy = -icopy;
792
1.98M
    for (i = 0; i < 4; ++i) {
793
1.59M
        const limb tmp = copy & (in[i] ^ out[i]);
794
1.59M
        out[i] ^= tmp;
795
1.59M
    }
796
397k
}
797
798
/******************************************************************************/
799
/*-
800
 *                       ELLIPTIC CURVE POINT OPERATIONS
801
 *
802
 * Points are represented in Jacobian projective coordinates:
803
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
804
 * or to the point at infinity if Z == 0.
805
 *
806
 */
807
808
/*-
809
 * Double an elliptic curve point:
810
 * (X', Y', Z') = 2 * (X, Y, Z), where
811
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
812
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
813
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
814
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
815
 * while x_out == y_in is not (maybe this works, but it's not tested).
816
 */
817
static void
818
point_double(felem x_out, felem y_out, felem z_out,
819
             const felem x_in, const felem y_in, const felem z_in)
820
78.7k
{
821
78.7k
    widefelem tmp, tmp2;
822
78.7k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
823
824
78.7k
    felem_assign(ftmp, x_in);
825
78.7k
    felem_assign(ftmp2, x_in);
826
827
    /* delta = z^2 */
828
78.7k
    felem_square(tmp, z_in);
829
78.7k
    felem_reduce(delta, tmp);
830
831
    /* gamma = y^2 */
832
78.7k
    felem_square(tmp, y_in);
833
78.7k
    felem_reduce(gamma, tmp);
834
835
    /* beta = x*gamma */
836
78.7k
    felem_mul(tmp, x_in, gamma);
837
78.7k
    felem_reduce(beta, tmp);
838
839
    /* alpha = 3*(x-delta)*(x+delta) */
840
78.7k
    felem_diff(ftmp, delta);
841
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
842
78.7k
    felem_sum(ftmp2, delta);
843
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
844
78.7k
    felem_scalar(ftmp2, 3);
845
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
846
78.7k
    felem_mul(tmp, ftmp, ftmp2);
847
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
848
78.7k
    felem_reduce(alpha, tmp);
849
850
    /* x' = alpha^2 - 8*beta */
851
78.7k
    felem_square(tmp, alpha);
852
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
853
78.7k
    felem_assign(ftmp, beta);
854
78.7k
    felem_scalar(ftmp, 8);
855
    /* ftmp[i] < 8 * 2^57 = 2^60 */
856
78.7k
    felem_diff_128_64(tmp, ftmp);
857
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
858
78.7k
    felem_reduce(x_out, tmp);
859
860
    /* z' = (y + z)^2 - gamma - delta */
861
78.7k
    felem_sum(delta, gamma);
862
    /* delta[i] < 2^57 + 2^57 = 2^58 */
863
78.7k
    felem_assign(ftmp, y_in);
864
78.7k
    felem_sum(ftmp, z_in);
865
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
866
78.7k
    felem_square(tmp, ftmp);
867
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
868
78.7k
    felem_diff_128_64(tmp, delta);
869
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
870
78.7k
    felem_reduce(z_out, tmp);
871
872
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
873
78.7k
    felem_scalar(beta, 4);
874
    /* beta[i] < 4 * 2^57 = 2^59 */
875
78.7k
    felem_diff(beta, x_out);
876
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
877
78.7k
    felem_mul(tmp, alpha, beta);
878
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
879
78.7k
    felem_square(tmp2, gamma);
880
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
881
78.7k
    widefelem_scalar(tmp2, 8);
882
    /* tmp2[i] < 8 * 2^116 = 2^119 */
883
78.7k
    widefelem_diff(tmp, tmp2);
884
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
885
78.7k
    felem_reduce(y_out, tmp);
886
78.7k
}
887
888
/*-
889
 * Add two elliptic curve points:
890
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
891
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
892
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
893
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
894
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
895
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
896
 *
897
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
898
 */
899
900
/*
901
 * This function is not entirely constant-time: it includes a branch for
902
 * checking whether the two input points are equal, (while not equal to the
903
 * point at infinity). This case never happens during single point
904
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
905
 */
906
static void point_add(felem x3, felem y3, felem z3,
907
                      const felem x1, const felem y1, const felem z1,
908
                      const int mixed, const felem x2, const felem y2,
909
                      const felem z2)
910
64.5k
{
911
64.5k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
912
64.5k
    widefelem tmp, tmp2;
913
64.5k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
914
64.5k
    limb points_equal;
915
916
64.5k
    if (!mixed) {
917
        /* ftmp2 = z2^2 */
918
11.8k
        felem_square(tmp, z2);
919
11.8k
        felem_reduce(ftmp2, tmp);
920
921
        /* ftmp4 = z2^3 */
922
11.8k
        felem_mul(tmp, ftmp2, z2);
923
11.8k
        felem_reduce(ftmp4, tmp);
924
925
        /* ftmp4 = z2^3*y1 */
926
11.8k
        felem_mul(tmp2, ftmp4, y1);
927
11.8k
        felem_reduce(ftmp4, tmp2);
928
929
        /* ftmp2 = z2^2*x1 */
930
11.8k
        felem_mul(tmp2, ftmp2, x1);
931
11.8k
        felem_reduce(ftmp2, tmp2);
932
52.7k
    } else {
933
        /*
934
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
935
         */
936
937
        /* ftmp4 = z2^3*y1 */
938
52.7k
        felem_assign(ftmp4, y1);
939
940
        /* ftmp2 = z2^2*x1 */
941
52.7k
        felem_assign(ftmp2, x1);
942
52.7k
    }
943
944
    /* ftmp = z1^2 */
945
64.5k
    felem_square(tmp, z1);
946
64.5k
    felem_reduce(ftmp, tmp);
947
948
    /* ftmp3 = z1^3 */
949
64.5k
    felem_mul(tmp, ftmp, z1);
950
64.5k
    felem_reduce(ftmp3, tmp);
951
952
    /* tmp = z1^3*y2 */
953
64.5k
    felem_mul(tmp, ftmp3, y2);
954
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
955
956
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
957
64.5k
    felem_diff_128_64(tmp, ftmp4);
958
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
959
64.5k
    felem_reduce(ftmp3, tmp);
960
961
    /* tmp = z1^2*x2 */
962
64.5k
    felem_mul(tmp, ftmp, x2);
963
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
964
965
    /* ftmp = z1^2*x2 - z2^2*x1 */
966
64.5k
    felem_diff_128_64(tmp, ftmp2);
967
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
968
64.5k
    felem_reduce(ftmp, tmp);
969
970
    /*
971
     * The formulae are incorrect if the points are equal, in affine coordinates
972
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
973
     * happens.
974
     *
975
     * We use bitwise operations to avoid potential side-channels introduced by
976
     * the short-circuiting behaviour of boolean operators.
977
     */
978
64.5k
    x_equal = felem_is_zero(ftmp);
979
64.5k
    y_equal = felem_is_zero(ftmp3);
980
    /*
981
     * The special case of either point being the point at infinity (z1 and/or
982
     * z2 are zero), is handled separately later on in this function, so we
983
     * avoid jumping to point_double here in those special cases.
984
     */
985
64.5k
    z1_is_zero = felem_is_zero(z1);
986
64.5k
    z2_is_zero = felem_is_zero(z2);
987
988
    /*
989
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
990
     * specific implementation `felem_is_zero()` returns truth as `0x1`
991
     * (rather than `0xff..ff`).
992
     *
993
     * This implies that `~true` in this implementation becomes
994
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
995
     * the if expression, we mask out only the last bit in the next
996
     * line.
997
     */
998
64.5k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
999
1000
64.5k
    if (points_equal) {
1001
        /*
1002
         * This is obviously not constant-time but, as mentioned before, this
1003
         * case never happens during single point multiplication, so there is no
1004
         * timing leak for ECDH or ECDSA signing.
1005
         */
1006
0
        point_double(x3, y3, z3, x1, y1, z1);
1007
0
        return;
1008
0
    }
1009
1010
    /* ftmp5 = z1*z2 */
1011
64.5k
    if (!mixed) {
1012
11.8k
        felem_mul(tmp, z1, z2);
1013
11.8k
        felem_reduce(ftmp5, tmp);
1014
52.7k
    } else {
1015
        /* special case z2 = 0 is handled later */
1016
52.7k
        felem_assign(ftmp5, z1);
1017
52.7k
    }
1018
1019
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1020
64.5k
    felem_mul(tmp, ftmp, ftmp5);
1021
64.5k
    felem_reduce(z_out, tmp);
1022
1023
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1024
64.5k
    felem_assign(ftmp5, ftmp);
1025
64.5k
    felem_square(tmp, ftmp);
1026
64.5k
    felem_reduce(ftmp, tmp);
1027
1028
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1029
64.5k
    felem_mul(tmp, ftmp, ftmp5);
1030
64.5k
    felem_reduce(ftmp5, tmp);
1031
1032
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033
64.5k
    felem_mul(tmp, ftmp2, ftmp);
1034
64.5k
    felem_reduce(ftmp2, tmp);
1035
1036
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1037
64.5k
    felem_mul(tmp, ftmp4, ftmp5);
1038
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1039
1040
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1041
64.5k
    felem_square(tmp2, ftmp3);
1042
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1043
1044
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1045
64.5k
    felem_diff_128_64(tmp2, ftmp5);
1046
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1047
1048
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1049
64.5k
    felem_assign(ftmp5, ftmp2);
1050
64.5k
    felem_scalar(ftmp5, 2);
1051
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1052
1053
    /*-
1054
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1055
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1056
     */
1057
64.5k
    felem_diff_128_64(tmp2, ftmp5);
1058
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1059
64.5k
    felem_reduce(x_out, tmp2);
1060
1061
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1062
64.5k
    felem_diff(ftmp2, x_out);
1063
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1064
1065
    /*
1066
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1067
     */
1068
64.5k
    felem_mul(tmp2, ftmp3, ftmp2);
1069
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1070
1071
    /*-
1072
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1073
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1074
     */
1075
64.5k
    widefelem_diff(tmp2, tmp);
1076
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1077
64.5k
    felem_reduce(y_out, tmp2);
1078
1079
    /*
1080
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1081
     * the point at infinity, so we need to check for this separately
1082
     */
1083
1084
    /*
1085
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1086
     */
1087
64.5k
    copy_conditional(x_out, x2, z1_is_zero);
1088
64.5k
    copy_conditional(x_out, x1, z2_is_zero);
1089
64.5k
    copy_conditional(y_out, y2, z1_is_zero);
1090
64.5k
    copy_conditional(y_out, y1, z2_is_zero);
1091
64.5k
    copy_conditional(z_out, z2, z1_is_zero);
1092
64.5k
    copy_conditional(z_out, z1, z2_is_zero);
1093
64.5k
    felem_assign(x3, x_out);
1094
64.5k
    felem_assign(y3, y_out);
1095
64.5k
    felem_assign(z3, z_out);
1096
64.5k
}
1097
1098
/*
1099
 * select_point selects the |idx|th point from a precomputation table and
1100
 * copies it to out.
1101
 * The pre_comp array argument should be size of |size| argument
1102
 */
1103
static void select_point(const u64 idx, unsigned int size,
1104
                         const felem pre_comp[][3], felem out[3])
1105
64.1k
{
1106
64.1k
    unsigned i, j;
1107
64.1k
    limb *outlimbs = &out[0][0];
1108
1109
64.1k
    memset(out, 0, sizeof(*out) * 3);
1110
1.10M
    for (i = 0; i < size; i++) {
1111
1.03M
        const limb *inlimbs = &pre_comp[i][0][0];
1112
1.03M
        u64 mask = i ^ idx;
1113
1.03M
        mask |= mask >> 4;
1114
1.03M
        mask |= mask >> 2;
1115
1.03M
        mask |= mask >> 1;
1116
1.03M
        mask &= 1;
1117
1.03M
        mask--;
1118
13.4M
        for (j = 0; j < 4 * 3; j++)
1119
12.4M
            outlimbs[j] |= inlimbs[j] & mask;
1120
1.03M
    }
1121
64.1k
}
1122
1123
/* get_bit returns the |i|th bit in |in| */
1124
static char get_bit(const felem_bytearray in, unsigned i)
1125
277k
{
1126
277k
    if (i >= 224)
1127
464
        return 0;
1128
276k
    return (in[i >> 3] >> (i & 7)) & 1;
1129
277k
}
1130
1131
/*
1132
 * Interleaved point multiplication using precomputed point multiples: The
1133
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1134
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1135
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1136
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1137
 */
1138
static void batch_mul(felem x_out, felem y_out, felem z_out,
1139
                      const felem_bytearray scalars[],
1140
                      const unsigned num_points, const u8 *g_scalar,
1141
                      const int mixed, const felem pre_comp[][17][3],
1142
                      const felem g_pre_comp[2][16][3])
1143
1.19k
{
1144
1.19k
    int i, skip;
1145
1.19k
    unsigned num;
1146
1.19k
    unsigned gen_mul = (g_scalar != NULL);
1147
1.19k
    felem nq[3], tmp[4];
1148
1.19k
    u64 bits;
1149
1.19k
    u8 sign, digit;
1150
1151
    /* set nq to the point at infinity */
1152
1.19k
    memset(nq, 0, sizeof(nq));
1153
1154
    /*
1155
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1156
     * of the generator (two in each of the last 28 rounds) and additions of
1157
     * other points multiples (every 5th round).
1158
     */
1159
1.19k
    skip = 1;                   /* save two point operations in the first
1160
                                 * round */
1161
79.3k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1162
        /* double */
1163
78.1k
        if (!skip)
1164
76.9k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1165
1166
        /* add multiples of the generator */
1167
78.1k
        if (gen_mul && (i <= 27)) {
1168
            /* first, look 28 bits upwards */
1169
26.8k
            bits = get_bit(g_scalar, i + 196) << 3;
1170
26.8k
            bits |= get_bit(g_scalar, i + 140) << 2;
1171
26.8k
            bits |= get_bit(g_scalar, i + 84) << 1;
1172
26.8k
            bits |= get_bit(g_scalar, i + 28);
1173
            /* select the point to add, in constant time */
1174
26.8k
            select_point(bits, 16, g_pre_comp[1], tmp);
1175
1176
26.8k
            if (!skip) {
1177
                /* value 1 below is argument for "mixed" */
1178
25.8k
                point_add(nq[0], nq[1], nq[2],
1179
25.8k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1180
25.8k
            } else {
1181
959
                memcpy(nq, tmp, 3 * sizeof(felem));
1182
959
                skip = 0;
1183
959
            }
1184
1185
            /* second, look at the current position */
1186
26.8k
            bits = get_bit(g_scalar, i + 168) << 3;
1187
26.8k
            bits |= get_bit(g_scalar, i + 112) << 2;
1188
26.8k
            bits |= get_bit(g_scalar, i + 56) << 1;
1189
26.8k
            bits |= get_bit(g_scalar, i);
1190
            /* select the point to add, in constant time */
1191
26.8k
            select_point(bits, 16, g_pre_comp[0], tmp);
1192
26.8k
            point_add(nq[0], nq[1], nq[2],
1193
26.8k
                      nq[0], nq[1], nq[2],
1194
26.8k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1195
26.8k
        }
1196
1197
        /* do other additions every 5 doublings */
1198
78.1k
        if (num_points && (i % 5 == 0)) {
1199
            /* loop over all scalars */
1200
20.8k
            for (num = 0; num < num_points; ++num) {
1201
10.4k
                bits = get_bit(scalars[num], i + 4) << 5;
1202
10.4k
                bits |= get_bit(scalars[num], i + 3) << 4;
1203
10.4k
                bits |= get_bit(scalars[num], i + 2) << 3;
1204
10.4k
                bits |= get_bit(scalars[num], i + 1) << 2;
1205
10.4k
                bits |= get_bit(scalars[num], i) << 1;
1206
10.4k
                bits |= get_bit(scalars[num], i - 1);
1207
10.4k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1208
1209
                /* select the point to add or subtract */
1210
10.4k
                select_point(digit, 17, pre_comp[num], tmp);
1211
10.4k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1212
                                            * point */
1213
10.4k
                copy_conditional(tmp[1], tmp[3], sign);
1214
1215
10.4k
                if (!skip) {
1216
10.2k
                    point_add(nq[0], nq[1], nq[2],
1217
10.2k
                              nq[0], nq[1], nq[2],
1218
10.2k
                              mixed, tmp[0], tmp[1], tmp[2]);
1219
10.2k
                } else {
1220
232
                    memcpy(nq, tmp, 3 * sizeof(felem));
1221
232
                    skip = 0;
1222
232
                }
1223
10.4k
            }
1224
10.4k
        }
1225
78.1k
    }
1226
1.19k
    felem_assign(x_out, nq[0]);
1227
1.19k
    felem_assign(y_out, nq[1]);
1228
1.19k
    felem_assign(z_out, nq[2]);
1229
1.19k
}
1230
1231
/******************************************************************************/
1232
/*
1233
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1234
 */
1235
1236
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1237
0
{
1238
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1239
1240
0
    if (ret == NULL)
1241
0
        return ret;
1242
1243
1244
0
    if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1245
0
        OPENSSL_free(ret);
1246
0
        return NULL;
1247
0
    }
1248
0
    return ret;
1249
0
}
1250
1251
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1252
0
{
1253
0
    int i;
1254
0
    if (p != NULL)
1255
0
        CRYPTO_UP_REF(&p->references, &i);
1256
0
    return p;
1257
0
}
1258
1259
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1260
0
{
1261
0
    int i;
1262
1263
0
    if (p == NULL)
1264
0
        return;
1265
1266
0
    CRYPTO_DOWN_REF(&p->references, &i);
1267
0
    REF_PRINT_COUNT("EC_nistp224", p);
1268
0
    if (i > 0)
1269
0
        return;
1270
0
    REF_ASSERT_ISNT(i < 0);
1271
1272
0
    CRYPTO_FREE_REF(&p->references);
1273
0
    OPENSSL_free(p);
1274
0
}
1275
1276
/******************************************************************************/
1277
/*
1278
 * OPENSSL EC_METHOD FUNCTIONS
1279
 */
1280
1281
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1282
54.4k
{
1283
54.4k
    int ret;
1284
54.4k
    ret = ossl_ec_GFp_simple_group_init(group);
1285
54.4k
    group->a_is_minus3 = 1;
1286
54.4k
    return ret;
1287
54.4k
}
1288
1289
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1290
                                         const BIGNUM *a, const BIGNUM *b,
1291
                                         BN_CTX *ctx)
1292
28.2k
{
1293
28.2k
    int ret = 0;
1294
28.2k
    BIGNUM *curve_p, *curve_a, *curve_b;
1295
28.2k
#ifndef FIPS_MODULE
1296
28.2k
    BN_CTX *new_ctx = NULL;
1297
1298
28.2k
    if (ctx == NULL)
1299
0
        ctx = new_ctx = BN_CTX_new();
1300
28.2k
#endif
1301
28.2k
    if (ctx == NULL)
1302
0
        return 0;
1303
1304
28.2k
    BN_CTX_start(ctx);
1305
28.2k
    curve_p = BN_CTX_get(ctx);
1306
28.2k
    curve_a = BN_CTX_get(ctx);
1307
28.2k
    curve_b = BN_CTX_get(ctx);
1308
28.2k
    if (curve_b == NULL)
1309
0
        goto err;
1310
28.2k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1311
28.2k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1312
28.2k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1313
28.2k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1314
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1315
0
        goto err;
1316
0
    }
1317
28.2k
    group->field_mod_func = BN_nist_mod_224;
1318
28.2k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1319
28.2k
 err:
1320
28.2k
    BN_CTX_end(ctx);
1321
28.2k
#ifndef FIPS_MODULE
1322
28.2k
    BN_CTX_free(new_ctx);
1323
28.2k
#endif
1324
28.2k
    return ret;
1325
28.2k
}
1326
1327
/*
1328
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1329
 * (X/Z^2, Y/Z^3)
1330
 */
1331
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1332
                                                      const EC_POINT *point,
1333
                                                      BIGNUM *x, BIGNUM *y,
1334
                                                      BN_CTX *ctx)
1335
3.05k
{
1336
3.05k
    felem z1, z2, x_in, y_in, x_out, y_out;
1337
3.05k
    widefelem tmp;
1338
1339
3.05k
    if (EC_POINT_is_at_infinity(group, point)) {
1340
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1341
0
        return 0;
1342
0
    }
1343
3.05k
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1344
3.05k
        (!BN_to_felem(z1, point->Z)))
1345
0
        return 0;
1346
3.05k
    felem_inv(z2, z1);
1347
3.05k
    felem_square(tmp, z2);
1348
3.05k
    felem_reduce(z1, tmp);
1349
3.05k
    felem_mul(tmp, x_in, z1);
1350
3.05k
    felem_reduce(x_in, tmp);
1351
3.05k
    felem_contract(x_out, x_in);
1352
3.05k
    if (x != NULL) {
1353
3.05k
        if (!felem_to_BN(x, x_out)) {
1354
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1355
0
            return 0;
1356
0
        }
1357
3.05k
    }
1358
3.05k
    felem_mul(tmp, z1, z2);
1359
3.05k
    felem_reduce(z1, tmp);
1360
3.05k
    felem_mul(tmp, y_in, z1);
1361
3.05k
    felem_reduce(y_in, tmp);
1362
3.05k
    felem_contract(y_out, y_in);
1363
3.05k
    if (y != NULL) {
1364
3.05k
        if (!felem_to_BN(y, y_out)) {
1365
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1366
0
            return 0;
1367
0
        }
1368
3.05k
    }
1369
3.05k
    return 1;
1370
3.05k
}
1371
1372
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373
                               felem tmp_felems[ /* num+1 */ ])
1374
0
{
1375
    /*
1376
     * Runs in constant time, unless an input is the point at infinity (which
1377
     * normally shouldn't happen).
1378
     */
1379
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1380
0
                                                  points,
1381
0
                                                  sizeof(felem),
1382
0
                                                  tmp_felems,
1383
0
                                                  (void (*)(void *))felem_one,
1384
0
                                                  felem_is_zero_int,
1385
0
                                                  (void (*)(void *, const void *))
1386
0
                                                  felem_assign,
1387
0
                                                  (void (*)(void *, const void *))
1388
0
                                                  felem_square_reduce, (void (*)
1389
0
                                                                        (void *,
1390
0
                                                                         const void
1391
0
                                                                         *,
1392
0
                                                                         const void
1393
0
                                                                         *))
1394
0
                                                  felem_mul_reduce,
1395
0
                                                  (void (*)(void *, const void *))
1396
0
                                                  felem_inv,
1397
0
                                                  (void (*)(void *, const void *))
1398
0
                                                  felem_contract);
1399
0
}
1400
1401
/*
1402
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403
 * values Result is stored in r (r can equal one of the inputs).
1404
 */
1405
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406
                                    const BIGNUM *scalar, size_t num,
1407
                                    const EC_POINT *points[],
1408
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1409
1.19k
{
1410
1.19k
    int ret = 0;
1411
1.19k
    int j;
1412
1.19k
    unsigned i;
1413
1.19k
    int mixed = 0;
1414
1.19k
    BIGNUM *x, *y, *z, *tmp_scalar;
1415
1.19k
    felem_bytearray g_secret;
1416
1.19k
    felem_bytearray *secrets = NULL;
1417
1.19k
    felem (*pre_comp)[17][3] = NULL;
1418
1.19k
    felem *tmp_felems = NULL;
1419
1.19k
    int num_bytes;
1420
1.19k
    int have_pre_comp = 0;
1421
1.19k
    size_t num_points = num;
1422
1.19k
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1423
1.19k
    NISTP224_PRE_COMP *pre = NULL;
1424
1.19k
    const felem(*g_pre_comp)[16][3] = NULL;
1425
1.19k
    EC_POINT *generator = NULL;
1426
1.19k
    const EC_POINT *p = NULL;
1427
1.19k
    const BIGNUM *p_scalar = NULL;
1428
1429
1.19k
    BN_CTX_start(ctx);
1430
1.19k
    x = BN_CTX_get(ctx);
1431
1.19k
    y = BN_CTX_get(ctx);
1432
1.19k
    z = BN_CTX_get(ctx);
1433
1.19k
    tmp_scalar = BN_CTX_get(ctx);
1434
1.19k
    if (tmp_scalar == NULL)
1435
0
        goto err;
1436
1437
1.19k
    if (scalar != NULL) {
1438
959
        pre = group->pre_comp.nistp224;
1439
959
        if (pre)
1440
            /* we have precomputation, try to use it */
1441
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442
959
        else
1443
            /* try to use the standard precomputation */
1444
959
            g_pre_comp = &gmul[0];
1445
959
        generator = EC_POINT_new(group);
1446
959
        if (generator == NULL)
1447
0
            goto err;
1448
        /* get the generator from precomputation */
1449
959
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450
959
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451
959
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1453
0
            goto err;
1454
0
        }
1455
959
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1456
959
                                                                generator,
1457
959
                                                                x, y, z, ctx))
1458
0
            goto err;
1459
959
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460
            /* precomputation matches generator */
1461
959
            have_pre_comp = 1;
1462
0
        else
1463
            /*
1464
             * we don't have valid precomputation: treat the generator as a
1465
             * random point
1466
             */
1467
0
            num_points = num_points + 1;
1468
959
    }
1469
1470
1.19k
    if (num_points > 0) {
1471
232
        if (num_points >= 3) {
1472
            /*
1473
             * unless we precompute multiples for just one or two points,
1474
             * converting those into affine form is time well spent
1475
             */
1476
0
            mixed = 1;
1477
0
        }
1478
232
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479
232
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480
232
        if (mixed)
1481
0
            tmp_felems =
1482
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483
232
        if ((secrets == NULL) || (pre_comp == NULL)
1484
232
            || (mixed && (tmp_felems == NULL)))
1485
0
            goto err;
1486
1487
        /*
1488
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1489
         * i.e., they contribute nothing to the linear combination
1490
         */
1491
464
        for (i = 0; i < num_points; ++i) {
1492
232
            if (i == num) {
1493
                /* the generator */
1494
0
                p = EC_GROUP_get0_generator(group);
1495
0
                p_scalar = scalar;
1496
232
            } else {
1497
                /* the i^th point */
1498
232
                p = points[i];
1499
232
                p_scalar = scalars[i];
1500
232
            }
1501
232
            if ((p_scalar != NULL) && (p != NULL)) {
1502
                /* reduce scalar to 0 <= scalar < 2^224 */
1503
232
                if ((BN_num_bits(p_scalar) > 224)
1504
232
                    || (BN_is_negative(p_scalar))) {
1505
                    /*
1506
                     * this is an unusual input, and we don't guarantee
1507
                     * constant-timeness
1508
                     */
1509
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1510
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1511
0
                        goto err;
1512
0
                    }
1513
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1514
0
                                               secrets[i], sizeof(secrets[i]));
1515
232
                } else {
1516
232
                    num_bytes = BN_bn2lebinpad(p_scalar,
1517
232
                                               secrets[i], sizeof(secrets[i]));
1518
232
                }
1519
232
                if (num_bytes < 0) {
1520
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1521
0
                    goto err;
1522
0
                }
1523
                /* precompute multiples */
1524
232
                if ((!BN_to_felem(x_out, p->X)) ||
1525
232
                    (!BN_to_felem(y_out, p->Y)) ||
1526
232
                    (!BN_to_felem(z_out, p->Z)))
1527
0
                    goto err;
1528
232
                felem_assign(pre_comp[i][1][0], x_out);
1529
232
                felem_assign(pre_comp[i][1][1], y_out);
1530
232
                felem_assign(pre_comp[i][1][2], z_out);
1531
3.71k
                for (j = 2; j <= 16; ++j) {
1532
3.48k
                    if (j & 1) {
1533
1.62k
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1534
1.62k
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1535
1.62k
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1536
1.62k
                                  pre_comp[i][j - 1][0],
1537
1.62k
                                  pre_comp[i][j - 1][1],
1538
1.62k
                                  pre_comp[i][j - 1][2]);
1539
1.85k
                    } else {
1540
1.85k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1541
1.85k
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1542
1.85k
                                     pre_comp[i][j / 2][1],
1543
1.85k
                                     pre_comp[i][j / 2][2]);
1544
1.85k
                    }
1545
3.48k
                }
1546
232
            }
1547
232
        }
1548
232
        if (mixed)
1549
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1550
232
    }
1551
1552
    /* the scalar for the generator */
1553
1.19k
    if ((scalar != NULL) && (have_pre_comp)) {
1554
959
        memset(g_secret, 0, sizeof(g_secret));
1555
        /* reduce scalar to 0 <= scalar < 2^224 */
1556
959
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1557
            /*
1558
             * this is an unusual input, and we don't guarantee
1559
             * constant-timeness
1560
             */
1561
73
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1562
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1563
0
                goto err;
1564
0
            }
1565
73
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1566
886
        } else {
1567
886
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1568
886
        }
1569
        /* do the multiplication with generator precomputation */
1570
959
        batch_mul(x_out, y_out, z_out,
1571
959
                  (const felem_bytearray(*))secrets, num_points,
1572
959
                  g_secret,
1573
959
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1574
959
    } else {
1575
        /* do the multiplication without generator precomputation */
1576
232
        batch_mul(x_out, y_out, z_out,
1577
232
                  (const felem_bytearray(*))secrets, num_points,
1578
232
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1579
232
    }
1580
    /* reduce the output to its unique minimal representation */
1581
1.19k
    felem_contract(x_in, x_out);
1582
1.19k
    felem_contract(y_in, y_out);
1583
1.19k
    felem_contract(z_in, z_out);
1584
1.19k
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1585
1.19k
        (!felem_to_BN(z, z_in))) {
1586
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1587
0
        goto err;
1588
0
    }
1589
1.19k
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1590
1.19k
                                                             ctx);
1591
1592
1.19k
 err:
1593
1.19k
    BN_CTX_end(ctx);
1594
1.19k
    EC_POINT_free(generator);
1595
1.19k
    OPENSSL_free(secrets);
1596
1.19k
    OPENSSL_free(pre_comp);
1597
1.19k
    OPENSSL_free(tmp_felems);
1598
1.19k
    return ret;
1599
1.19k
}
1600
1601
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1602
0
{
1603
0
    int ret = 0;
1604
0
    NISTP224_PRE_COMP *pre = NULL;
1605
0
    int i, j;
1606
0
    BIGNUM *x, *y;
1607
0
    EC_POINT *generator = NULL;
1608
0
    felem tmp_felems[32];
1609
0
#ifndef FIPS_MODULE
1610
0
    BN_CTX *new_ctx = NULL;
1611
0
#endif
1612
1613
    /* throw away old precomputation */
1614
0
    EC_pre_comp_free(group);
1615
1616
0
#ifndef FIPS_MODULE
1617
0
    if (ctx == NULL)
1618
0
        ctx = new_ctx = BN_CTX_new();
1619
0
#endif
1620
0
    if (ctx == NULL)
1621
0
        return 0;
1622
1623
0
    BN_CTX_start(ctx);
1624
0
    x = BN_CTX_get(ctx);
1625
0
    y = BN_CTX_get(ctx);
1626
0
    if (y == NULL)
1627
0
        goto err;
1628
    /* get the generator */
1629
0
    if (group->generator == NULL)
1630
0
        goto err;
1631
0
    generator = EC_POINT_new(group);
1632
0
    if (generator == NULL)
1633
0
        goto err;
1634
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1635
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1636
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1637
0
        goto err;
1638
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1639
0
        goto err;
1640
    /*
1641
     * if the generator is the standard one, use built-in precomputation
1642
     */
1643
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1644
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1645
0
        goto done;
1646
0
    }
1647
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1648
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1649
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1650
0
        goto err;
1651
    /*
1652
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1653
     * 2^140*G, 2^196*G for the second one
1654
     */
1655
0
    for (i = 1; i <= 8; i <<= 1) {
1656
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1657
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1658
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1659
0
        for (j = 0; j < 27; ++j) {
1660
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1662
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1663
0
        }
1664
0
        if (i == 8)
1665
0
            break;
1666
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1667
0
                     pre->g_pre_comp[0][2 * i][1],
1668
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1669
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1670
0
        for (j = 0; j < 27; ++j) {
1671
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1672
0
                         pre->g_pre_comp[0][2 * i][1],
1673
0
                         pre->g_pre_comp[0][2 * i][2],
1674
0
                         pre->g_pre_comp[0][2 * i][0],
1675
0
                         pre->g_pre_comp[0][2 * i][1],
1676
0
                         pre->g_pre_comp[0][2 * i][2]);
1677
0
        }
1678
0
    }
1679
0
    for (i = 0; i < 2; i++) {
1680
        /* g_pre_comp[i][0] is the point at infinity */
1681
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1682
        /* the remaining multiples */
1683
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1684
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1685
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1686
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1687
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688
0
                  pre->g_pre_comp[i][2][2]);
1689
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1690
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1691
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1692
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694
0
                  pre->g_pre_comp[i][2][2]);
1695
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1696
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1697
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1698
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1699
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1700
0
                  pre->g_pre_comp[i][4][2]);
1701
        /*
1702
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1703
         */
1704
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1705
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1706
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1707
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708
0
                  pre->g_pre_comp[i][2][2]);
1709
0
        for (j = 1; j < 8; ++j) {
1710
            /* odd multiples: add G resp. 2^28*G */
1711
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1712
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1713
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1714
0
                      pre->g_pre_comp[i][2 * j][0],
1715
0
                      pre->g_pre_comp[i][2 * j][1],
1716
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1717
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1718
0
                      pre->g_pre_comp[i][1][2]);
1719
0
        }
1720
0
    }
1721
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1722
1723
0
 done:
1724
0
    SETPRECOMP(group, nistp224, pre);
1725
0
    pre = NULL;
1726
0
    ret = 1;
1727
0
 err:
1728
0
    BN_CTX_end(ctx);
1729
0
    EC_POINT_free(generator);
1730
0
#ifndef FIPS_MODULE
1731
0
    BN_CTX_free(new_ctx);
1732
0
#endif
1733
0
    EC_nistp224_pre_comp_free(pre);
1734
0
    return ret;
1735
0
}
1736
1737
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1738
0
{
1739
0
    return HAVEPRECOMP(group, nistp224);
1740
0
}