Coverage Report

Created: 2025-06-13 06:58

/src/openssl32/crypto/ec/ecp_nistp224.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
# error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
93
    {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
96
    {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
     0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
     0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
99
    {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
     0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
     0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
102
    {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
     0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
     0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
{{{0, 0, 0, 0},
140
  {0, 0, 0, 0},
141
  {0, 0, 0, 0}},
142
 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
143
  {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
144
  {1, 0, 0, 0}},
145
 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
146
  {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
147
  {1, 0, 0, 0}},
148
 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149
  {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
150
  {1, 0, 0, 0}},
151
 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
152
  {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
153
  {1, 0, 0, 0}},
154
 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
155
  {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
156
  {1, 0, 0, 0}},
157
 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
158
  {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
159
  {1, 0, 0, 0}},
160
 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
161
  {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
162
  {1, 0, 0, 0}},
163
 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
164
  {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
165
  {1, 0, 0, 0}},
166
 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
167
  {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
168
  {1, 0, 0, 0}},
169
 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
170
  {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
171
  {1, 0, 0, 0}},
172
 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
173
  {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
174
  {1, 0, 0, 0}},
175
 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
176
  {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
177
  {1, 0, 0, 0}},
178
 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
179
  {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
180
  {1, 0, 0, 0}},
181
 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
182
  {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
183
  {1, 0, 0, 0}},
184
 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
185
  {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
186
  {1, 0, 0, 0}}},
187
{{{0, 0, 0, 0},
188
  {0, 0, 0, 0},
189
  {0, 0, 0, 0}},
190
 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
191
  {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
192
  {1, 0, 0, 0}},
193
 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
194
  {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
195
  {1, 0, 0, 0}},
196
 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
197
  {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
198
  {1, 0, 0, 0}},
199
 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
200
  {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
201
  {1, 0, 0, 0}},
202
 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
203
  {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
204
  {1, 0, 0, 0}},
205
 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
206
  {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
207
  {1, 0, 0, 0}},
208
 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
209
  {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
210
  {1, 0, 0, 0}},
211
 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
212
  {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
213
  {1, 0, 0, 0}},
214
 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
215
  {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
216
  {1, 0, 0, 0}},
217
 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
218
  {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
219
  {1, 0, 0, 0}},
220
 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
221
  {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
222
  {1, 0, 0, 0}},
223
 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
224
  {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
225
  {1, 0, 0, 0}},
226
 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
227
  {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
228
  {1, 0, 0, 0}},
229
 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
230
  {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
231
  {1, 0, 0, 0}},
232
 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
233
  {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
234
  {1, 0, 0, 0}}}
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
};
242
243
const EC_METHOD *EC_GFp_nistp224_method(void)
244
32.5k
{
245
32.5k
    static const EC_METHOD ret = {
246
32.5k
        EC_FLAGS_DEFAULT_OCT,
247
32.5k
        NID_X9_62_prime_field,
248
32.5k
        ossl_ec_GFp_nistp224_group_init,
249
32.5k
        ossl_ec_GFp_simple_group_finish,
250
32.5k
        ossl_ec_GFp_simple_group_clear_finish,
251
32.5k
        ossl_ec_GFp_nist_group_copy,
252
32.5k
        ossl_ec_GFp_nistp224_group_set_curve,
253
32.5k
        ossl_ec_GFp_simple_group_get_curve,
254
32.5k
        ossl_ec_GFp_simple_group_get_degree,
255
32.5k
        ossl_ec_group_simple_order_bits,
256
32.5k
        ossl_ec_GFp_simple_group_check_discriminant,
257
32.5k
        ossl_ec_GFp_simple_point_init,
258
32.5k
        ossl_ec_GFp_simple_point_finish,
259
32.5k
        ossl_ec_GFp_simple_point_clear_finish,
260
32.5k
        ossl_ec_GFp_simple_point_copy,
261
32.5k
        ossl_ec_GFp_simple_point_set_to_infinity,
262
32.5k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
263
32.5k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
264
32.5k
        0 /* point_set_compressed_coordinates */ ,
265
32.5k
        0 /* point2oct */ ,
266
32.5k
        0 /* oct2point */ ,
267
32.5k
        ossl_ec_GFp_simple_add,
268
32.5k
        ossl_ec_GFp_simple_dbl,
269
32.5k
        ossl_ec_GFp_simple_invert,
270
32.5k
        ossl_ec_GFp_simple_is_at_infinity,
271
32.5k
        ossl_ec_GFp_simple_is_on_curve,
272
32.5k
        ossl_ec_GFp_simple_cmp,
273
32.5k
        ossl_ec_GFp_simple_make_affine,
274
32.5k
        ossl_ec_GFp_simple_points_make_affine,
275
32.5k
        ossl_ec_GFp_nistp224_points_mul,
276
32.5k
        ossl_ec_GFp_nistp224_precompute_mult,
277
32.5k
        ossl_ec_GFp_nistp224_have_precompute_mult,
278
32.5k
        ossl_ec_GFp_nist_field_mul,
279
32.5k
        ossl_ec_GFp_nist_field_sqr,
280
32.5k
        0 /* field_div */ ,
281
32.5k
        ossl_ec_GFp_simple_field_inv,
282
32.5k
        0 /* field_encode */ ,
283
32.5k
        0 /* field_decode */ ,
284
32.5k
        0,                      /* field_set_to_one */
285
32.5k
        ossl_ec_key_simple_priv2oct,
286
32.5k
        ossl_ec_key_simple_oct2priv,
287
32.5k
        0, /* set private */
288
32.5k
        ossl_ec_key_simple_generate_key,
289
32.5k
        ossl_ec_key_simple_check_key,
290
32.5k
        ossl_ec_key_simple_generate_public_key,
291
32.5k
        0, /* keycopy */
292
32.5k
        0, /* keyfinish */
293
32.5k
        ossl_ecdh_simple_compute_key,
294
32.5k
        ossl_ecdsa_simple_sign_setup,
295
32.5k
        ossl_ecdsa_simple_sign_sig,
296
32.5k
        ossl_ecdsa_simple_verify_sig,
297
32.5k
        0, /* field_inverse_mod_ord */
298
32.5k
        0, /* blind_coordinates */
299
32.5k
        0, /* ladder_pre */
300
32.5k
        0, /* ladder_step */
301
32.5k
        0  /* ladder_post */
302
32.5k
    };
303
304
32.5k
    return &ret;
305
32.5k
}
306
307
/*
308
 * Helper functions to convert field elements to/from internal representation
309
 */
310
static void bin28_to_felem(felem out, const u8 in[28])
311
8.63k
{
312
8.63k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
313
8.63k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
314
8.63k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
315
8.63k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
316
8.63k
}
317
318
static void felem_to_bin28(u8 out[28], const felem in)
319
12.3k
{
320
12.3k
    unsigned i;
321
99.1k
    for (i = 0; i < 7; ++i) {
322
86.7k
        out[i] = in[0] >> (8 * i);
323
86.7k
        out[i + 7] = in[1] >> (8 * i);
324
86.7k
        out[i + 14] = in[2] >> (8 * i);
325
86.7k
        out[i + 21] = in[3] >> (8 * i);
326
86.7k
    }
327
12.3k
}
328
329
/* From OpenSSL BIGNUM to internal representation */
330
static int BN_to_felem(felem out, const BIGNUM *bn)
331
8.63k
{
332
8.63k
    felem_bytearray b_out;
333
8.63k
    int num_bytes;
334
335
8.63k
    if (BN_is_negative(bn)) {
336
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
337
0
        return 0;
338
0
    }
339
8.63k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
340
8.63k
    if (num_bytes < 0) {
341
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
342
0
        return 0;
343
0
    }
344
8.63k
    bin28_to_felem(out, b_out);
345
8.63k
    return 1;
346
8.63k
}
347
348
/* From internal representation to OpenSSL BIGNUM */
349
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
350
12.3k
{
351
12.3k
    felem_bytearray b_out;
352
12.3k
    felem_to_bin28(b_out, in);
353
12.3k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
354
12.3k
}
355
356
/******************************************************************************/
357
/*-
358
 *                              FIELD OPERATIONS
359
 *
360
 * Field operations, using the internal representation of field elements.
361
 * NB! These operations are specific to our point multiplication and cannot be
362
 * expected to be correct in general - e.g., multiplication with a large scalar
363
 * will cause an overflow.
364
 *
365
 */
366
367
static void felem_one(felem out)
368
0
{
369
0
    out[0] = 1;
370
0
    out[1] = 0;
371
0
    out[2] = 0;
372
0
    out[3] = 0;
373
0
}
374
375
static void felem_assign(felem out, const felem in)
376
867k
{
377
867k
    out[0] = in[0];
378
867k
    out[1] = in[1];
379
867k
    out[2] = in[2];
380
867k
    out[3] = in[3];
381
867k
}
382
383
/* Sum two field elements: out += in */
384
static void felem_sum(felem out, const felem in)
385
249k
{
386
249k
    out[0] += in[0];
387
249k
    out[1] += in[1];
388
249k
    out[2] += in[2];
389
249k
    out[3] += in[3];
390
249k
}
391
392
/* Subtract field elements: out -= in */
393
/* Assumes in[i] < 2^57 */
394
static void felem_diff(felem out, const felem in)
395
236k
{
396
236k
    static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
397
236k
    static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
398
236k
    static const limb two58m42m2 = (((limb) 1) << 58) -
399
236k
        (((limb) 1) << 42) - (((limb) 1) << 2);
400
401
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
402
236k
    out[0] += two58p2;
403
236k
    out[1] += two58m42m2;
404
236k
    out[2] += two58m2;
405
236k
    out[3] += two58m2;
406
407
236k
    out[0] -= in[0];
408
236k
    out[1] -= in[1];
409
236k
    out[2] -= in[2];
410
236k
    out[3] -= in[3];
411
236k
}
412
413
/* Subtract in unreduced 128-bit mode: out -= in */
414
/* Assumes in[i] < 2^119 */
415
static void widefelem_diff(widefelem out, const widefelem in)
416
153k
{
417
153k
    static const widelimb two120 = ((widelimb) 1) << 120;
418
153k
    static const widelimb two120m64 = (((widelimb) 1) << 120) -
419
153k
        (((widelimb) 1) << 64);
420
153k
    static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
421
153k
        (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
422
423
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
424
153k
    out[0] += two120;
425
153k
    out[1] += two120m64;
426
153k
    out[2] += two120m64;
427
153k
    out[3] += two120;
428
153k
    out[4] += two120m104m64;
429
153k
    out[5] += two120m64;
430
153k
    out[6] += two120m64;
431
432
153k
    out[0] -= in[0];
433
153k
    out[1] -= in[1];
434
153k
    out[2] -= in[2];
435
153k
    out[3] -= in[3];
436
153k
    out[4] -= in[4];
437
153k
    out[5] -= in[5];
438
153k
    out[6] -= in[6];
439
153k
}
440
441
/* Subtract in mixed mode: out128 -= in64 */
442
/* in[i] < 2^63 */
443
static void felem_diff_128_64(widefelem out, const felem in)
444
460k
{
445
460k
    static const widelimb two64p8 = (((widelimb) 1) << 64) +
446
460k
        (((widelimb) 1) << 8);
447
460k
    static const widelimb two64m8 = (((widelimb) 1) << 64) -
448
460k
        (((widelimb) 1) << 8);
449
460k
    static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
450
460k
        (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
451
452
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
453
460k
    out[0] += two64p8;
454
460k
    out[1] += two64m48m8;
455
460k
    out[2] += two64m8;
456
460k
    out[3] += two64m8;
457
458
460k
    out[0] -= in[0];
459
460k
    out[1] -= in[1];
460
460k
    out[2] -= in[2];
461
460k
    out[3] -= in[3];
462
460k
}
463
464
/*
465
 * Multiply a field element by a scalar: out = out * scalar The scalars we
466
 * actually use are small, so results fit without overflow
467
 */
468
static void felem_scalar(felem out, const limb scalar)
469
320k
{
470
320k
    out[0] *= scalar;
471
320k
    out[1] *= scalar;
472
320k
    out[2] *= scalar;
473
320k
    out[3] *= scalar;
474
320k
}
475
476
/*
477
 * Multiply an unreduced field element by a scalar: out = out * scalar The
478
 * scalars we actually use are small, so results fit without overflow
479
 */
480
static void widefelem_scalar(widefelem out, const widelimb scalar)
481
83.0k
{
482
83.0k
    out[0] *= scalar;
483
83.0k
    out[1] *= scalar;
484
83.0k
    out[2] *= scalar;
485
83.0k
    out[3] *= scalar;
486
83.0k
    out[4] *= scalar;
487
83.0k
    out[5] *= scalar;
488
83.0k
    out[6] *= scalar;
489
83.0k
}
490
491
/* Square a field element: out = in^2 */
492
static void felem_square(widefelem out, const felem in)
493
1.23M
{
494
1.23M
    limb tmp0, tmp1, tmp2;
495
1.23M
    tmp0 = 2 * in[0];
496
1.23M
    tmp1 = 2 * in[1];
497
1.23M
    tmp2 = 2 * in[2];
498
1.23M
    out[0] = ((widelimb) in[0]) * in[0];
499
1.23M
    out[1] = ((widelimb) in[0]) * tmp1;
500
1.23M
    out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
501
1.23M
    out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
502
1.23M
    out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
503
1.23M
    out[5] = ((widelimb) in[3]) * tmp2;
504
1.23M
    out[6] = ((widelimb) in[3]) * in[3];
505
1.23M
}
506
507
/* Multiply two field elements: out = in1 * in2 */
508
static void felem_mul(widefelem out, const felem in1, const felem in2)
509
901k
{
510
901k
    out[0] = ((widelimb) in1[0]) * in2[0];
511
901k
    out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
512
901k
    out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
513
901k
             ((widelimb) in1[2]) * in2[0];
514
901k
    out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
515
901k
             ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
516
901k
    out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
517
901k
             ((widelimb) in1[3]) * in2[1];
518
901k
    out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
519
901k
    out[6] = ((widelimb) in1[3]) * in2[3];
520
901k
}
521
522
/*-
523
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
524
 * Requires in[i] < 2^126,
525
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
526
static void felem_reduce(felem out, const widefelem in)
527
1.98M
{
528
1.98M
    static const widelimb two127p15 = (((widelimb) 1) << 127) +
529
1.98M
        (((widelimb) 1) << 15);
530
1.98M
    static const widelimb two127m71 = (((widelimb) 1) << 127) -
531
1.98M
        (((widelimb) 1) << 71);
532
1.98M
    static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
533
1.98M
        (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
534
1.98M
    widelimb output[5];
535
536
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
537
1.98M
    output[0] = in[0] + two127p15;
538
1.98M
    output[1] = in[1] + two127m71m55;
539
1.98M
    output[2] = in[2] + two127m71;
540
1.98M
    output[3] = in[3];
541
1.98M
    output[4] = in[4];
542
543
    /* Eliminate in[4], in[5], in[6] */
544
1.98M
    output[4] += in[6] >> 16;
545
1.98M
    output[3] += (in[6] & 0xffff) << 40;
546
1.98M
    output[2] -= in[6];
547
548
1.98M
    output[3] += in[5] >> 16;
549
1.98M
    output[2] += (in[5] & 0xffff) << 40;
550
1.98M
    output[1] -= in[5];
551
552
1.98M
    output[2] += output[4] >> 16;
553
1.98M
    output[1] += (output[4] & 0xffff) << 40;
554
1.98M
    output[0] -= output[4];
555
556
    /* Carry 2 -> 3 -> 4 */
557
1.98M
    output[3] += output[2] >> 56;
558
1.98M
    output[2] &= 0x00ffffffffffffff;
559
560
1.98M
    output[4] = output[3] >> 56;
561
1.98M
    output[3] &= 0x00ffffffffffffff;
562
563
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
564
565
    /* Eliminate output[4] */
566
1.98M
    output[2] += output[4] >> 16;
567
    /* output[2] < 2^56 + 2^56 = 2^57 */
568
1.98M
    output[1] += (output[4] & 0xffff) << 40;
569
1.98M
    output[0] -= output[4];
570
571
    /* Carry 0 -> 1 -> 2 -> 3 */
572
1.98M
    output[1] += output[0] >> 56;
573
1.98M
    out[0] = output[0] & 0x00ffffffffffffff;
574
575
1.98M
    output[2] += output[1] >> 56;
576
    /* output[2] < 2^57 + 2^72 */
577
1.98M
    out[1] = output[1] & 0x00ffffffffffffff;
578
1.98M
    output[3] += output[2] >> 56;
579
    /* output[3] <= 2^56 + 2^16 */
580
1.98M
    out[2] = output[2] & 0x00ffffffffffffff;
581
582
    /*-
583
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
584
     * out[3] <= 2^56 + 2^16 (due to final carry),
585
     * so out < 2*p
586
     */
587
1.98M
    out[3] = output[3];
588
1.98M
}
589
590
static void felem_square_reduce(felem out, const felem in)
591
0
{
592
0
    widefelem tmp;
593
0
    felem_square(tmp, in);
594
0
    felem_reduce(out, tmp);
595
0
}
596
597
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
598
0
{
599
0
    widefelem tmp;
600
0
    felem_mul(tmp, in1, in2);
601
0
    felem_reduce(out, tmp);
602
0
}
603
604
/*
605
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
606
 * call felem_reduce first)
607
 */
608
static void felem_contract(felem out, const felem in)
609
9.19k
{
610
9.19k
    static const int64_t two56 = ((limb) 1) << 56;
611
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
612
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
613
9.19k
    int64_t tmp[4], a;
614
9.19k
    tmp[0] = in[0];
615
9.19k
    tmp[1] = in[1];
616
9.19k
    tmp[2] = in[2];
617
9.19k
    tmp[3] = in[3];
618
    /* Case 1: a = 1 iff in >= 2^224 */
619
9.19k
    a = (in[3] >> 56);
620
9.19k
    tmp[0] -= a;
621
9.19k
    tmp[1] += a << 40;
622
9.19k
    tmp[3] &= 0x00ffffffffffffff;
623
    /*
624
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
625
     * and the lower part is non-zero
626
     */
627
9.19k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
628
9.19k
        (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
629
9.19k
    a &= 0x00ffffffffffffff;
630
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
631
9.19k
    a = (a - 1) >> 63;
632
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
633
9.19k
    tmp[3] &= a ^ 0xffffffffffffffff;
634
9.19k
    tmp[2] &= a ^ 0xffffffffffffffff;
635
9.19k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
636
9.19k
    tmp[0] -= 1 & a;
637
638
    /*
639
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
640
     * non-zero, so we only need one step
641
     */
642
9.19k
    a = tmp[0] >> 63;
643
9.19k
    tmp[0] += two56 & a;
644
9.19k
    tmp[1] -= 1 & a;
645
646
    /* carry 1 -> 2 -> 3 */
647
9.19k
    tmp[2] += tmp[1] >> 56;
648
9.19k
    tmp[1] &= 0x00ffffffffffffff;
649
650
9.19k
    tmp[3] += tmp[2] >> 56;
651
9.19k
    tmp[2] &= 0x00ffffffffffffff;
652
653
    /* Now 0 <= out < p */
654
9.19k
    out[0] = tmp[0];
655
9.19k
    out[1] = tmp[1];
656
9.19k
    out[2] = tmp[2];
657
9.19k
    out[3] = tmp[3];
658
9.19k
}
659
660
/*
661
 * Get negative value: out = -in
662
 * Requires in[i] < 2^63,
663
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
664
 */
665
static void felem_neg(felem out, const felem in)
666
10.7k
{
667
10.7k
    widefelem tmp;
668
669
10.7k
    memset(tmp, 0, sizeof(tmp));
670
10.7k
    felem_diff_128_64(tmp, in);
671
10.7k
    felem_reduce(out, tmp);
672
10.7k
}
673
674
/*
675
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
676
 * elements are reduced to in < 2^225, so we only need to check three cases:
677
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
678
 */
679
static limb felem_is_zero(const felem in)
680
283k
{
681
283k
    limb zero, two224m96p1, two225m97p2;
682
683
283k
    zero = in[0] | in[1] | in[2] | in[3];
684
283k
    zero = (((int64_t) (zero) - 1) >> 63) & 1;
685
283k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
686
283k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
687
283k
    two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
688
283k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
689
283k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
690
283k
    two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
691
283k
    return (zero | two224m96p1 | two225m97p2);
692
283k
}
693
694
static int felem_is_zero_int(const void *in)
695
0
{
696
0
    return (int)(felem_is_zero(in) & ((limb) 1));
697
0
}
698
699
/* Invert a field element */
700
/* Computation chain copied from djb's code */
701
static void felem_inv(felem out, const felem in)
702
2.64k
{
703
2.64k
    felem ftmp, ftmp2, ftmp3, ftmp4;
704
2.64k
    widefelem tmp;
705
2.64k
    unsigned i;
706
707
2.64k
    felem_square(tmp, in);
708
2.64k
    felem_reduce(ftmp, tmp);    /* 2 */
709
2.64k
    felem_mul(tmp, in, ftmp);
710
2.64k
    felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
711
2.64k
    felem_square(tmp, ftmp);
712
2.64k
    felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
713
2.64k
    felem_mul(tmp, in, ftmp);
714
2.64k
    felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
715
2.64k
    felem_square(tmp, ftmp);
716
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
717
2.64k
    felem_square(tmp, ftmp2);
718
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
719
2.64k
    felem_square(tmp, ftmp2);
720
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
721
2.64k
    felem_mul(tmp, ftmp2, ftmp);
722
2.64k
    felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
723
2.64k
    felem_square(tmp, ftmp);
724
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
725
15.8k
    for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
726
13.2k
        felem_square(tmp, ftmp2);
727
13.2k
        felem_reduce(ftmp2, tmp);
728
13.2k
    }
729
2.64k
    felem_mul(tmp, ftmp2, ftmp);
730
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
731
2.64k
    felem_square(tmp, ftmp2);
732
2.64k
    felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
733
31.6k
    for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
734
29.0k
        felem_square(tmp, ftmp3);
735
29.0k
        felem_reduce(ftmp3, tmp);
736
29.0k
    }
737
2.64k
    felem_mul(tmp, ftmp3, ftmp2);
738
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
739
2.64k
    felem_square(tmp, ftmp2);
740
2.64k
    felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
741
63.3k
    for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
742
60.7k
        felem_square(tmp, ftmp3);
743
60.7k
        felem_reduce(ftmp3, tmp);
744
60.7k
    }
745
2.64k
    felem_mul(tmp, ftmp3, ftmp2);
746
2.64k
    felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
747
2.64k
    felem_square(tmp, ftmp3);
748
2.64k
    felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
749
126k
    for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
750
124k
        felem_square(tmp, ftmp4);
751
124k
        felem_reduce(ftmp4, tmp);
752
124k
    }
753
2.64k
    felem_mul(tmp, ftmp3, ftmp4);
754
2.64k
    felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
755
2.64k
    felem_square(tmp, ftmp3);
756
2.64k
    felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
757
63.3k
    for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
758
60.7k
        felem_square(tmp, ftmp4);
759
60.7k
        felem_reduce(ftmp4, tmp);
760
60.7k
    }
761
2.64k
    felem_mul(tmp, ftmp2, ftmp4);
762
2.64k
    felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
763
18.4k
    for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
764
15.8k
        felem_square(tmp, ftmp2);
765
15.8k
        felem_reduce(ftmp2, tmp);
766
15.8k
    }
767
2.64k
    felem_mul(tmp, ftmp2, ftmp);
768
2.64k
    felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
769
2.64k
    felem_square(tmp, ftmp);
770
2.64k
    felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
771
2.64k
    felem_mul(tmp, ftmp, in);
772
2.64k
    felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
773
258k
    for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
774
256k
        felem_square(tmp, ftmp);
775
256k
        felem_reduce(ftmp, tmp);
776
256k
    }
777
2.64k
    felem_mul(tmp, ftmp, ftmp3);
778
2.64k
    felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
779
2.64k
}
780
781
/*
782
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
783
 * out to itself.
784
 */
785
static void copy_conditional(felem out, const felem in, limb icopy)
786
435k
{
787
435k
    unsigned i;
788
    /*
789
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
790
     */
791
435k
    const limb copy = -icopy;
792
2.17M
    for (i = 0; i < 4; ++i) {
793
1.74M
        const limb tmp = copy & (in[i] ^ out[i]);
794
1.74M
        out[i] ^= tmp;
795
1.74M
    }
796
435k
}
797
798
/******************************************************************************/
799
/*-
800
 *                       ELLIPTIC CURVE POINT OPERATIONS
801
 *
802
 * Points are represented in Jacobian projective coordinates:
803
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
804
 * or to the point at infinity if Z == 0.
805
 *
806
 */
807
808
/*-
809
 * Double an elliptic curve point:
810
 * (X', Y', Z') = 2 * (X, Y, Z), where
811
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
812
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
813
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
814
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
815
 * while x_out == y_in is not (maybe this works, but it's not tested).
816
 */
817
static void
818
point_double(felem x_out, felem y_out, felem z_out,
819
             const felem x_in, const felem y_in, const felem z_in)
820
83.0k
{
821
83.0k
    widefelem tmp, tmp2;
822
83.0k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
823
824
83.0k
    felem_assign(ftmp, x_in);
825
83.0k
    felem_assign(ftmp2, x_in);
826
827
    /* delta = z^2 */
828
83.0k
    felem_square(tmp, z_in);
829
83.0k
    felem_reduce(delta, tmp);
830
831
    /* gamma = y^2 */
832
83.0k
    felem_square(tmp, y_in);
833
83.0k
    felem_reduce(gamma, tmp);
834
835
    /* beta = x*gamma */
836
83.0k
    felem_mul(tmp, x_in, gamma);
837
83.0k
    felem_reduce(beta, tmp);
838
839
    /* alpha = 3*(x-delta)*(x+delta) */
840
83.0k
    felem_diff(ftmp, delta);
841
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
842
83.0k
    felem_sum(ftmp2, delta);
843
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
844
83.0k
    felem_scalar(ftmp2, 3);
845
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
846
83.0k
    felem_mul(tmp, ftmp, ftmp2);
847
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
848
83.0k
    felem_reduce(alpha, tmp);
849
850
    /* x' = alpha^2 - 8*beta */
851
83.0k
    felem_square(tmp, alpha);
852
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
853
83.0k
    felem_assign(ftmp, beta);
854
83.0k
    felem_scalar(ftmp, 8);
855
    /* ftmp[i] < 8 * 2^57 = 2^60 */
856
83.0k
    felem_diff_128_64(tmp, ftmp);
857
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
858
83.0k
    felem_reduce(x_out, tmp);
859
860
    /* z' = (y + z)^2 - gamma - delta */
861
83.0k
    felem_sum(delta, gamma);
862
    /* delta[i] < 2^57 + 2^57 = 2^58 */
863
83.0k
    felem_assign(ftmp, y_in);
864
83.0k
    felem_sum(ftmp, z_in);
865
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
866
83.0k
    felem_square(tmp, ftmp);
867
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
868
83.0k
    felem_diff_128_64(tmp, delta);
869
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
870
83.0k
    felem_reduce(z_out, tmp);
871
872
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
873
83.0k
    felem_scalar(beta, 4);
874
    /* beta[i] < 4 * 2^57 = 2^59 */
875
83.0k
    felem_diff(beta, x_out);
876
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
877
83.0k
    felem_mul(tmp, alpha, beta);
878
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
879
83.0k
    felem_square(tmp2, gamma);
880
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
881
83.0k
    widefelem_scalar(tmp2, 8);
882
    /* tmp2[i] < 8 * 2^116 = 2^119 */
883
83.0k
    widefelem_diff(tmp, tmp2);
884
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
885
83.0k
    felem_reduce(y_out, tmp);
886
83.0k
}
887
888
/*-
889
 * Add two elliptic curve points:
890
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
891
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
892
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
893
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
894
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
895
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
896
 *
897
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
898
 */
899
900
/*
901
 * This function is not entirely constant-time: it includes a branch for
902
 * checking whether the two input points are equal, (while not equal to the
903
 * point at infinity). This case never happens during single point
904
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
905
 */
906
static void point_add(felem x3, felem y3, felem z3,
907
                      const felem x1, const felem y1, const felem z1,
908
                      const int mixed, const felem x2, const felem y2,
909
                      const felem z2)
910
70.8k
{
911
70.8k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
912
70.8k
    widefelem tmp, tmp2;
913
70.8k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
914
70.8k
    limb points_equal;
915
916
70.8k
    if (!mixed) {
917
        /* ftmp2 = z2^2 */
918
12.1k
        felem_square(tmp, z2);
919
12.1k
        felem_reduce(ftmp2, tmp);
920
921
        /* ftmp4 = z2^3 */
922
12.1k
        felem_mul(tmp, ftmp2, z2);
923
12.1k
        felem_reduce(ftmp4, tmp);
924
925
        /* ftmp4 = z2^3*y1 */
926
12.1k
        felem_mul(tmp2, ftmp4, y1);
927
12.1k
        felem_reduce(ftmp4, tmp2);
928
929
        /* ftmp2 = z2^2*x1 */
930
12.1k
        felem_mul(tmp2, ftmp2, x1);
931
12.1k
        felem_reduce(ftmp2, tmp2);
932
58.6k
    } else {
933
        /*
934
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
935
         */
936
937
        /* ftmp4 = z2^3*y1 */
938
58.6k
        felem_assign(ftmp4, y1);
939
940
        /* ftmp2 = z2^2*x1 */
941
58.6k
        felem_assign(ftmp2, x1);
942
58.6k
    }
943
944
    /* ftmp = z1^2 */
945
70.8k
    felem_square(tmp, z1);
946
70.8k
    felem_reduce(ftmp, tmp);
947
948
    /* ftmp3 = z1^3 */
949
70.8k
    felem_mul(tmp, ftmp, z1);
950
70.8k
    felem_reduce(ftmp3, tmp);
951
952
    /* tmp = z1^3*y2 */
953
70.8k
    felem_mul(tmp, ftmp3, y2);
954
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
955
956
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
957
70.8k
    felem_diff_128_64(tmp, ftmp4);
958
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
959
70.8k
    felem_reduce(ftmp3, tmp);
960
961
    /* tmp = z1^2*x2 */
962
70.8k
    felem_mul(tmp, ftmp, x2);
963
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
964
965
    /* ftmp = z1^2*x2 - z2^2*x1 */
966
70.8k
    felem_diff_128_64(tmp, ftmp2);
967
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
968
70.8k
    felem_reduce(ftmp, tmp);
969
970
    /*
971
     * The formulae are incorrect if the points are equal, in affine coordinates
972
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
973
     * happens.
974
     *
975
     * We use bitwise operations to avoid potential side-channels introduced by
976
     * the short-circuiting behaviour of boolean operators.
977
     */
978
70.8k
    x_equal = felem_is_zero(ftmp);
979
70.8k
    y_equal = felem_is_zero(ftmp3);
980
    /*
981
     * The special case of either point being the point at infinity (z1 and/or
982
     * z2 are zero), is handled separately later on in this function, so we
983
     * avoid jumping to point_double here in those special cases.
984
     */
985
70.8k
    z1_is_zero = felem_is_zero(z1);
986
70.8k
    z2_is_zero = felem_is_zero(z2);
987
988
    /*
989
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
990
     * specific implementation `felem_is_zero()` returns truth as `0x1`
991
     * (rather than `0xff..ff`).
992
     *
993
     * This implies that `~true` in this implementation becomes
994
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
995
     * the if expression, we mask out only the last bit in the next
996
     * line.
997
     */
998
70.8k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
999
1000
70.8k
    if (points_equal) {
1001
        /*
1002
         * This is obviously not constant-time but, as mentioned before, this
1003
         * case never happens during single point multiplication, so there is no
1004
         * timing leak for ECDH or ECDSA signing.
1005
         */
1006
0
        point_double(x3, y3, z3, x1, y1, z1);
1007
0
        return;
1008
0
    }
1009
1010
    /* ftmp5 = z1*z2 */
1011
70.8k
    if (!mixed) {
1012
12.1k
        felem_mul(tmp, z1, z2);
1013
12.1k
        felem_reduce(ftmp5, tmp);
1014
58.6k
    } else {
1015
        /* special case z2 = 0 is handled later */
1016
58.6k
        felem_assign(ftmp5, z1);
1017
58.6k
    }
1018
1019
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1020
70.8k
    felem_mul(tmp, ftmp, ftmp5);
1021
70.8k
    felem_reduce(z_out, tmp);
1022
1023
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1024
70.8k
    felem_assign(ftmp5, ftmp);
1025
70.8k
    felem_square(tmp, ftmp);
1026
70.8k
    felem_reduce(ftmp, tmp);
1027
1028
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1029
70.8k
    felem_mul(tmp, ftmp, ftmp5);
1030
70.8k
    felem_reduce(ftmp5, tmp);
1031
1032
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1033
70.8k
    felem_mul(tmp, ftmp2, ftmp);
1034
70.8k
    felem_reduce(ftmp2, tmp);
1035
1036
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1037
70.8k
    felem_mul(tmp, ftmp4, ftmp5);
1038
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1039
1040
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1041
70.8k
    felem_square(tmp2, ftmp3);
1042
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1043
1044
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1045
70.8k
    felem_diff_128_64(tmp2, ftmp5);
1046
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1047
1048
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1049
70.8k
    felem_assign(ftmp5, ftmp2);
1050
70.8k
    felem_scalar(ftmp5, 2);
1051
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1052
1053
    /*-
1054
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1055
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1056
     */
1057
70.8k
    felem_diff_128_64(tmp2, ftmp5);
1058
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1059
70.8k
    felem_reduce(x_out, tmp2);
1060
1061
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1062
70.8k
    felem_diff(ftmp2, x_out);
1063
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1064
1065
    /*
1066
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1067
     */
1068
70.8k
    felem_mul(tmp2, ftmp3, ftmp2);
1069
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1070
1071
    /*-
1072
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1073
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1074
     */
1075
70.8k
    widefelem_diff(tmp2, tmp);
1076
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1077
70.8k
    felem_reduce(y_out, tmp2);
1078
1079
    /*
1080
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1081
     * the point at infinity, so we need to check for this separately
1082
     */
1083
1084
    /*
1085
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1086
     */
1087
70.8k
    copy_conditional(x_out, x2, z1_is_zero);
1088
70.8k
    copy_conditional(x_out, x1, z2_is_zero);
1089
70.8k
    copy_conditional(y_out, y2, z1_is_zero);
1090
70.8k
    copy_conditional(y_out, y1, z2_is_zero);
1091
70.8k
    copy_conditional(z_out, z2, z1_is_zero);
1092
70.8k
    copy_conditional(z_out, z1, z2_is_zero);
1093
70.8k
    felem_assign(x3, x_out);
1094
70.8k
    felem_assign(y3, y_out);
1095
70.8k
    felem_assign(z3, z_out);
1096
70.8k
}
1097
1098
/*
1099
 * select_point selects the |idx|th point from a precomputation table and
1100
 * copies it to out.
1101
 * The pre_comp array argument should be size of |size| argument
1102
 */
1103
static void select_point(const u64 idx, unsigned int size,
1104
                         const felem pre_comp[][3], felem out[3])
1105
70.4k
{
1106
70.4k
    unsigned i, j;
1107
70.4k
    limb *outlimbs = &out[0][0];
1108
1109
70.4k
    memset(out, 0, sizeof(*out) * 3);
1110
1.20M
    for (i = 0; i < size; i++) {
1111
1.13M
        const limb *inlimbs = &pre_comp[i][0][0];
1112
1.13M
        u64 mask = i ^ idx;
1113
1.13M
        mask |= mask >> 4;
1114
1.13M
        mask |= mask >> 2;
1115
1.13M
        mask |= mask >> 1;
1116
1.13M
        mask &= 1;
1117
1.13M
        mask--;
1118
14.7M
        for (j = 0; j < 4 * 3; j++)
1119
13.6M
            outlimbs[j] |= inlimbs[j] & mask;
1120
1.13M
    }
1121
70.4k
}
1122
1123
/* get_bit returns the |i|th bit in |in| */
1124
static char get_bit(const felem_bytearray in, unsigned i)
1125
303k
{
1126
303k
    if (i >= 224)
1127
476
        return 0;
1128
302k
    return (in[i >> 3] >> (i & 7)) & 1;
1129
303k
}
1130
1131
/*
1132
 * Interleaved point multiplication using precomputed point multiples: The
1133
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1134
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1135
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1136
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1137
 */
1138
static void batch_mul(felem x_out, felem y_out, felem z_out,
1139
                      const felem_bytearray scalars[],
1140
                      const unsigned num_points, const u8 *g_scalar,
1141
                      const int mixed, const felem pre_comp[][17][3],
1142
                      const felem g_pre_comp[2][16][3])
1143
1.30k
{
1144
1.30k
    int i, skip;
1145
1.30k
    unsigned num;
1146
1.30k
    unsigned gen_mul = (g_scalar != NULL);
1147
1.30k
    felem nq[3], tmp[4];
1148
1.30k
    u64 bits;
1149
1.30k
    u8 sign, digit;
1150
1151
    /* set nq to the point at infinity */
1152
1.30k
    memset(nq, 0, sizeof(nq));
1153
1154
    /*
1155
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1156
     * of the generator (two in each of the last 28 rounds) and additions of
1157
     * other points multiples (every 5th round).
1158
     */
1159
1.30k
    skip = 1;                   /* save two point operations in the first
1160
                                 * round */
1161
83.7k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1162
        /* double */
1163
82.4k
        if (!skip)
1164
81.1k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1165
1166
        /* add multiples of the generator */
1167
82.4k
        if (gen_mul && (i <= 27)) {
1168
            /* first, look 28 bits upwards */
1169
29.8k
            bits = get_bit(g_scalar, i + 196) << 3;
1170
29.8k
            bits |= get_bit(g_scalar, i + 140) << 2;
1171
29.8k
            bits |= get_bit(g_scalar, i + 84) << 1;
1172
29.8k
            bits |= get_bit(g_scalar, i + 28);
1173
            /* select the point to add, in constant time */
1174
29.8k
            select_point(bits, 16, g_pre_comp[1], tmp);
1175
1176
29.8k
            if (!skip) {
1177
                /* value 1 below is argument for "mixed" */
1178
28.8k
                point_add(nq[0], nq[1], nq[2],
1179
28.8k
                          nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1180
28.8k
            } else {
1181
1.06k
                memcpy(nq, tmp, 3 * sizeof(felem));
1182
1.06k
                skip = 0;
1183
1.06k
            }
1184
1185
            /* second, look at the current position */
1186
29.8k
            bits = get_bit(g_scalar, i + 168) << 3;
1187
29.8k
            bits |= get_bit(g_scalar, i + 112) << 2;
1188
29.8k
            bits |= get_bit(g_scalar, i + 56) << 1;
1189
29.8k
            bits |= get_bit(g_scalar, i);
1190
            /* select the point to add, in constant time */
1191
29.8k
            select_point(bits, 16, g_pre_comp[0], tmp);
1192
29.8k
            point_add(nq[0], nq[1], nq[2],
1193
29.8k
                      nq[0], nq[1], nq[2],
1194
29.8k
                      1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1195
29.8k
        }
1196
1197
        /* do other additions every 5 doublings */
1198
82.4k
        if (num_points && (i % 5 == 0)) {
1199
            /* loop over all scalars */
1200
21.4k
            for (num = 0; num < num_points; ++num) {
1201
10.7k
                bits = get_bit(scalars[num], i + 4) << 5;
1202
10.7k
                bits |= get_bit(scalars[num], i + 3) << 4;
1203
10.7k
                bits |= get_bit(scalars[num], i + 2) << 3;
1204
10.7k
                bits |= get_bit(scalars[num], i + 1) << 2;
1205
10.7k
                bits |= get_bit(scalars[num], i) << 1;
1206
10.7k
                bits |= get_bit(scalars[num], i - 1);
1207
10.7k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1208
1209
                /* select the point to add or subtract */
1210
10.7k
                select_point(digit, 17, pre_comp[num], tmp);
1211
10.7k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1212
                                            * point */
1213
10.7k
                copy_conditional(tmp[1], tmp[3], sign);
1214
1215
10.7k
                if (!skip) {
1216
10.4k
                    point_add(nq[0], nq[1], nq[2],
1217
10.4k
                              nq[0], nq[1], nq[2],
1218
10.4k
                              mixed, tmp[0], tmp[1], tmp[2]);
1219
10.4k
                } else {
1220
238
                    memcpy(nq, tmp, 3 * sizeof(felem));
1221
238
                    skip = 0;
1222
238
                }
1223
10.7k
            }
1224
10.7k
        }
1225
82.4k
    }
1226
1.30k
    felem_assign(x_out, nq[0]);
1227
1.30k
    felem_assign(y_out, nq[1]);
1228
1.30k
    felem_assign(z_out, nq[2]);
1229
1.30k
}
1230
1231
/******************************************************************************/
1232
/*
1233
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1234
 */
1235
1236
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1237
0
{
1238
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1239
1240
0
    if (ret == NULL)
1241
0
        return ret;
1242
1243
1244
0
    if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1245
0
        OPENSSL_free(ret);
1246
0
        return NULL;
1247
0
    }
1248
0
    return ret;
1249
0
}
1250
1251
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1252
0
{
1253
0
    int i;
1254
0
    if (p != NULL)
1255
0
        CRYPTO_UP_REF(&p->references, &i);
1256
0
    return p;
1257
0
}
1258
1259
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1260
0
{
1261
0
    int i;
1262
1263
0
    if (p == NULL)
1264
0
        return;
1265
1266
0
    CRYPTO_DOWN_REF(&p->references, &i);
1267
0
    REF_PRINT_COUNT("EC_nistp224", i, p);
1268
0
    if (i > 0)
1269
0
        return;
1270
0
    REF_ASSERT_ISNT(i < 0);
1271
1272
0
    CRYPTO_FREE_REF(&p->references);
1273
0
    OPENSSL_free(p);
1274
0
}
1275
1276
/******************************************************************************/
1277
/*
1278
 * OPENSSL EC_METHOD FUNCTIONS
1279
 */
1280
1281
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1282
63.2k
{
1283
63.2k
    int ret;
1284
63.2k
    ret = ossl_ec_GFp_simple_group_init(group);
1285
63.2k
    group->a_is_minus3 = 1;
1286
63.2k
    return ret;
1287
63.2k
}
1288
1289
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1290
                                         const BIGNUM *a, const BIGNUM *b,
1291
                                         BN_CTX *ctx)
1292
32.5k
{
1293
32.5k
    int ret = 0;
1294
32.5k
    BIGNUM *curve_p, *curve_a, *curve_b;
1295
32.5k
#ifndef FIPS_MODULE
1296
32.5k
    BN_CTX *new_ctx = NULL;
1297
1298
32.5k
    if (ctx == NULL)
1299
0
        ctx = new_ctx = BN_CTX_new();
1300
32.5k
#endif
1301
32.5k
    if (ctx == NULL)
1302
0
        return 0;
1303
1304
32.5k
    BN_CTX_start(ctx);
1305
32.5k
    curve_p = BN_CTX_get(ctx);
1306
32.5k
    curve_a = BN_CTX_get(ctx);
1307
32.5k
    curve_b = BN_CTX_get(ctx);
1308
32.5k
    if (curve_b == NULL)
1309
0
        goto err;
1310
32.5k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1311
32.5k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1312
32.5k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1313
32.5k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1314
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1315
0
        goto err;
1316
0
    }
1317
32.5k
    group->field_mod_func = BN_nist_mod_224;
1318
32.5k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1319
32.5k
 err:
1320
32.5k
    BN_CTX_end(ctx);
1321
32.5k
#ifndef FIPS_MODULE
1322
32.5k
    BN_CTX_free(new_ctx);
1323
32.5k
#endif
1324
32.5k
    return ret;
1325
32.5k
}
1326
1327
/*
1328
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1329
 * (X/Z^2, Y/Z^3)
1330
 */
1331
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1332
                                                      const EC_POINT *point,
1333
                                                      BIGNUM *x, BIGNUM *y,
1334
                                                      BN_CTX *ctx)
1335
2.64k
{
1336
2.64k
    felem z1, z2, x_in, y_in, x_out, y_out;
1337
2.64k
    widefelem tmp;
1338
1339
2.64k
    if (EC_POINT_is_at_infinity(group, point)) {
1340
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1341
0
        return 0;
1342
0
    }
1343
2.64k
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1344
2.64k
        (!BN_to_felem(z1, point->Z)))
1345
0
        return 0;
1346
2.64k
    felem_inv(z2, z1);
1347
2.64k
    felem_square(tmp, z2);
1348
2.64k
    felem_reduce(z1, tmp);
1349
2.64k
    felem_mul(tmp, x_in, z1);
1350
2.64k
    felem_reduce(x_in, tmp);
1351
2.64k
    felem_contract(x_out, x_in);
1352
2.64k
    if (x != NULL) {
1353
2.64k
        if (!felem_to_BN(x, x_out)) {
1354
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1355
0
            return 0;
1356
0
        }
1357
2.64k
    }
1358
2.64k
    felem_mul(tmp, z1, z2);
1359
2.64k
    felem_reduce(z1, tmp);
1360
2.64k
    felem_mul(tmp, y_in, z1);
1361
2.64k
    felem_reduce(y_in, tmp);
1362
2.64k
    felem_contract(y_out, y_in);
1363
2.64k
    if (y != NULL) {
1364
2.64k
        if (!felem_to_BN(y, y_out)) {
1365
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1366
0
            return 0;
1367
0
        }
1368
2.64k
    }
1369
2.64k
    return 1;
1370
2.64k
}
1371
1372
static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373
                               felem tmp_felems[ /* num+1 */ ])
1374
0
{
1375
    /*
1376
     * Runs in constant time, unless an input is the point at infinity (which
1377
     * normally shouldn't happen).
1378
     */
1379
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1380
0
                                                  points,
1381
0
                                                  sizeof(felem),
1382
0
                                                  tmp_felems,
1383
0
                                                  (void (*)(void *))felem_one,
1384
0
                                                  felem_is_zero_int,
1385
0
                                                  (void (*)(void *, const void *))
1386
0
                                                  felem_assign,
1387
0
                                                  (void (*)(void *, const void *))
1388
0
                                                  felem_square_reduce, (void (*)
1389
0
                                                                        (void *,
1390
0
                                                                         const void
1391
0
                                                                         *,
1392
0
                                                                         const void
1393
0
                                                                         *))
1394
0
                                                  felem_mul_reduce,
1395
0
                                                  (void (*)(void *, const void *))
1396
0
                                                  felem_inv,
1397
0
                                                  (void (*)(void *, const void *))
1398
0
                                                  felem_contract);
1399
0
}
1400
1401
/*
1402
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1403
 * values Result is stored in r (r can equal one of the inputs).
1404
 */
1405
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1406
                                    const BIGNUM *scalar, size_t num,
1407
                                    const EC_POINT *points[],
1408
                                    const BIGNUM *scalars[], BN_CTX *ctx)
1409
1.30k
{
1410
1.30k
    int ret = 0;
1411
1.30k
    int j;
1412
1.30k
    unsigned i;
1413
1.30k
    int mixed = 0;
1414
1.30k
    BIGNUM *x, *y, *z, *tmp_scalar;
1415
1.30k
    felem_bytearray g_secret;
1416
1.30k
    felem_bytearray *secrets = NULL;
1417
1.30k
    felem (*pre_comp)[17][3] = NULL;
1418
1.30k
    felem *tmp_felems = NULL;
1419
1.30k
    int num_bytes;
1420
1.30k
    int have_pre_comp = 0;
1421
1.30k
    size_t num_points = num;
1422
1.30k
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1423
1.30k
    NISTP224_PRE_COMP *pre = NULL;
1424
1.30k
    const felem(*g_pre_comp)[16][3] = NULL;
1425
1.30k
    EC_POINT *generator = NULL;
1426
1.30k
    const EC_POINT *p = NULL;
1427
1.30k
    const BIGNUM *p_scalar = NULL;
1428
1429
1.30k
    BN_CTX_start(ctx);
1430
1.30k
    x = BN_CTX_get(ctx);
1431
1.30k
    y = BN_CTX_get(ctx);
1432
1.30k
    z = BN_CTX_get(ctx);
1433
1.30k
    tmp_scalar = BN_CTX_get(ctx);
1434
1.30k
    if (tmp_scalar == NULL)
1435
0
        goto err;
1436
1437
1.30k
    if (scalar != NULL) {
1438
1.06k
        pre = group->pre_comp.nistp224;
1439
1.06k
        if (pre)
1440
            /* we have precomputation, try to use it */
1441
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442
1.06k
        else
1443
            /* try to use the standard precomputation */
1444
1.06k
            g_pre_comp = &gmul[0];
1445
1.06k
        generator = EC_POINT_new(group);
1446
1.06k
        if (generator == NULL)
1447
0
            goto err;
1448
        /* get the generator from precomputation */
1449
1.06k
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1450
1.06k
            !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1451
1.06k
            !felem_to_BN(z, g_pre_comp[0][1][2])) {
1452
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1453
0
            goto err;
1454
0
        }
1455
1.06k
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1456
1.06k
                                                                generator,
1457
1.06k
                                                                x, y, z, ctx))
1458
0
            goto err;
1459
1.06k
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460
            /* precomputation matches generator */
1461
1.06k
            have_pre_comp = 1;
1462
0
        else
1463
            /*
1464
             * we don't have valid precomputation: treat the generator as a
1465
             * random point
1466
             */
1467
0
            num_points = num_points + 1;
1468
1.06k
    }
1469
1470
1.30k
    if (num_points > 0) {
1471
238
        if (num_points >= 3) {
1472
            /*
1473
             * unless we precompute multiples for just one or two points,
1474
             * converting those into affine form is time well spent
1475
             */
1476
0
            mixed = 1;
1477
0
        }
1478
238
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1479
238
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1480
238
        if (mixed)
1481
0
            tmp_felems =
1482
0
                OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483
238
        if ((secrets == NULL) || (pre_comp == NULL)
1484
238
            || (mixed && (tmp_felems == NULL)))
1485
0
            goto err;
1486
1487
        /*
1488
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1489
         * i.e., they contribute nothing to the linear combination
1490
         */
1491
476
        for (i = 0; i < num_points; ++i) {
1492
238
            if (i == num) {
1493
                /* the generator */
1494
0
                p = EC_GROUP_get0_generator(group);
1495
0
                p_scalar = scalar;
1496
238
            } else {
1497
                /* the i^th point */
1498
238
                p = points[i];
1499
238
                p_scalar = scalars[i];
1500
238
            }
1501
238
            if ((p_scalar != NULL) && (p != NULL)) {
1502
                /* reduce scalar to 0 <= scalar < 2^224 */
1503
238
                if ((BN_num_bits(p_scalar) > 224)
1504
238
                    || (BN_is_negative(p_scalar))) {
1505
                    /*
1506
                     * this is an unusual input, and we don't guarantee
1507
                     * constant-timeness
1508
                     */
1509
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1510
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1511
0
                        goto err;
1512
0
                    }
1513
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1514
0
                                               secrets[i], sizeof(secrets[i]));
1515
238
                } else {
1516
238
                    num_bytes = BN_bn2lebinpad(p_scalar,
1517
238
                                               secrets[i], sizeof(secrets[i]));
1518
238
                }
1519
238
                if (num_bytes < 0) {
1520
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1521
0
                    goto err;
1522
0
                }
1523
                /* precompute multiples */
1524
238
                if ((!BN_to_felem(x_out, p->X)) ||
1525
238
                    (!BN_to_felem(y_out, p->Y)) ||
1526
238
                    (!BN_to_felem(z_out, p->Z)))
1527
0
                    goto err;
1528
238
                felem_assign(pre_comp[i][1][0], x_out);
1529
238
                felem_assign(pre_comp[i][1][1], y_out);
1530
238
                felem_assign(pre_comp[i][1][2], z_out);
1531
3.80k
                for (j = 2; j <= 16; ++j) {
1532
3.57k
                    if (j & 1) {
1533
1.66k
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1534
1.66k
                                  pre_comp[i][j][2], pre_comp[i][1][0],
1535
1.66k
                                  pre_comp[i][1][1], pre_comp[i][1][2], 0,
1536
1.66k
                                  pre_comp[i][j - 1][0],
1537
1.66k
                                  pre_comp[i][j - 1][1],
1538
1.66k
                                  pre_comp[i][j - 1][2]);
1539
1.90k
                    } else {
1540
1.90k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1541
1.90k
                                     pre_comp[i][j][2], pre_comp[i][j / 2][0],
1542
1.90k
                                     pre_comp[i][j / 2][1],
1543
1.90k
                                     pre_comp[i][j / 2][2]);
1544
1.90k
                    }
1545
3.57k
                }
1546
238
            }
1547
238
        }
1548
238
        if (mixed)
1549
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1550
238
    }
1551
1552
    /* the scalar for the generator */
1553
1.30k
    if ((scalar != NULL) && (have_pre_comp)) {
1554
1.06k
        memset(g_secret, 0, sizeof(g_secret));
1555
        /* reduce scalar to 0 <= scalar < 2^224 */
1556
1.06k
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1557
            /*
1558
             * this is an unusual input, and we don't guarantee
1559
             * constant-timeness
1560
             */
1561
111
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1562
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1563
0
                goto err;
1564
0
            }
1565
111
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1566
956
        } else {
1567
956
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1568
956
        }
1569
        /* do the multiplication with generator precomputation */
1570
1.06k
        batch_mul(x_out, y_out, z_out,
1571
1.06k
                  (const felem_bytearray(*))secrets, num_points,
1572
1.06k
                  g_secret,
1573
1.06k
                  mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1574
1.06k
    } else {
1575
        /* do the multiplication without generator precomputation */
1576
238
        batch_mul(x_out, y_out, z_out,
1577
238
                  (const felem_bytearray(*))secrets, num_points,
1578
238
                  NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1579
238
    }
1580
    /* reduce the output to its unique minimal representation */
1581
1.30k
    felem_contract(x_in, x_out);
1582
1.30k
    felem_contract(y_in, y_out);
1583
1.30k
    felem_contract(z_in, z_out);
1584
1.30k
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1585
1.30k
        (!felem_to_BN(z, z_in))) {
1586
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1587
0
        goto err;
1588
0
    }
1589
1.30k
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1590
1.30k
                                                             ctx);
1591
1592
1.30k
 err:
1593
1.30k
    BN_CTX_end(ctx);
1594
1.30k
    EC_POINT_free(generator);
1595
1.30k
    OPENSSL_free(secrets);
1596
1.30k
    OPENSSL_free(pre_comp);
1597
1.30k
    OPENSSL_free(tmp_felems);
1598
1.30k
    return ret;
1599
1.30k
}
1600
1601
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1602
0
{
1603
0
    int ret = 0;
1604
0
    NISTP224_PRE_COMP *pre = NULL;
1605
0
    int i, j;
1606
0
    BIGNUM *x, *y;
1607
0
    EC_POINT *generator = NULL;
1608
0
    felem tmp_felems[32];
1609
0
#ifndef FIPS_MODULE
1610
0
    BN_CTX *new_ctx = NULL;
1611
0
#endif
1612
1613
    /* throw away old precomputation */
1614
0
    EC_pre_comp_free(group);
1615
1616
0
#ifndef FIPS_MODULE
1617
0
    if (ctx == NULL)
1618
0
        ctx = new_ctx = BN_CTX_new();
1619
0
#endif
1620
0
    if (ctx == NULL)
1621
0
        return 0;
1622
1623
0
    BN_CTX_start(ctx);
1624
0
    x = BN_CTX_get(ctx);
1625
0
    y = BN_CTX_get(ctx);
1626
0
    if (y == NULL)
1627
0
        goto err;
1628
    /* get the generator */
1629
0
    if (group->generator == NULL)
1630
0
        goto err;
1631
0
    generator = EC_POINT_new(group);
1632
0
    if (generator == NULL)
1633
0
        goto err;
1634
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1635
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1636
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1637
0
        goto err;
1638
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1639
0
        goto err;
1640
    /*
1641
     * if the generator is the standard one, use built-in precomputation
1642
     */
1643
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1644
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1645
0
        goto done;
1646
0
    }
1647
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) ||
1648
0
        (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) ||
1649
0
        (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1650
0
        goto err;
1651
    /*
1652
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1653
     * 2^140*G, 2^196*G for the second one
1654
     */
1655
0
    for (i = 1; i <= 8; i <<= 1) {
1656
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1657
0
                     pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1658
0
                     pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1659
0
        for (j = 0; j < 27; ++j) {
1660
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1661
0
                         pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1662
0
                         pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1663
0
        }
1664
0
        if (i == 8)
1665
0
            break;
1666
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1667
0
                     pre->g_pre_comp[0][2 * i][1],
1668
0
                     pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1669
0
                     pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1670
0
        for (j = 0; j < 27; ++j) {
1671
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1672
0
                         pre->g_pre_comp[0][2 * i][1],
1673
0
                         pre->g_pre_comp[0][2 * i][2],
1674
0
                         pre->g_pre_comp[0][2 * i][0],
1675
0
                         pre->g_pre_comp[0][2 * i][1],
1676
0
                         pre->g_pre_comp[0][2 * i][2]);
1677
0
        }
1678
0
    }
1679
0
    for (i = 0; i < 2; i++) {
1680
        /* g_pre_comp[i][0] is the point at infinity */
1681
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1682
        /* the remaining multiples */
1683
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1684
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1685
0
                  pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1686
0
                  pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1687
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688
0
                  pre->g_pre_comp[i][2][2]);
1689
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1690
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1691
0
                  pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1692
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694
0
                  pre->g_pre_comp[i][2][2]);
1695
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1696
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1697
0
                  pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1698
0
                  pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1699
0
                  0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1700
0
                  pre->g_pre_comp[i][4][2]);
1701
        /*
1702
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1703
         */
1704
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1705
0
                  pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1706
0
                  pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1707
0
                  0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1708
0
                  pre->g_pre_comp[i][2][2]);
1709
0
        for (j = 1; j < 8; ++j) {
1710
            /* odd multiples: add G resp. 2^28*G */
1711
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1712
0
                      pre->g_pre_comp[i][2 * j + 1][1],
1713
0
                      pre->g_pre_comp[i][2 * j + 1][2],
1714
0
                      pre->g_pre_comp[i][2 * j][0],
1715
0
                      pre->g_pre_comp[i][2 * j][1],
1716
0
                      pre->g_pre_comp[i][2 * j][2], 0,
1717
0
                      pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1718
0
                      pre->g_pre_comp[i][1][2]);
1719
0
        }
1720
0
    }
1721
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1722
1723
0
 done:
1724
0
    SETPRECOMP(group, nistp224, pre);
1725
0
    pre = NULL;
1726
0
    ret = 1;
1727
0
 err:
1728
0
    BN_CTX_end(ctx);
1729
0
    EC_POINT_free(generator);
1730
0
#ifndef FIPS_MODULE
1731
0
    BN_CTX_free(new_ctx);
1732
0
#endif
1733
0
    EC_nistp224_pre_comp_free(pre);
1734
0
    return ret;
1735
0
}
1736
1737
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1738
0
{
1739
0
    return HAVEPRECOMP(group, nistp224);
1740
0
}