Coverage Report

Created: 2025-12-31 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl30/crypto/ec/ecp_nistp224.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
#error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
93
    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE },
96
    { 0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
        0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
        0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4 },
99
    { 0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
        0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
        0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21 },
102
    { 0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
        0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
        0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34 }
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
    { { { 0, 0, 0, 0 },
140
          { 0, 0, 0, 0 },
141
          { 0, 0, 0, 0 } },
142
        { { 0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf },
143
            { 0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723 },
144
            { 1, 0, 0, 0 } },
145
        { { 0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5 },
146
            { 0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321 },
147
            { 1, 0, 0, 0 } },
148
        { { 0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748 },
149
            { 0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17 },
150
            { 1, 0, 0, 0 } },
151
        { { 0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe },
152
            { 0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b },
153
            { 1, 0, 0, 0 } },
154
        { { 0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3 },
155
            { 0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a },
156
            { 1, 0, 0, 0 } },
157
        { { 0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c },
158
            { 0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244 },
159
            { 1, 0, 0, 0 } },
160
        { { 0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849 },
161
            { 0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112 },
162
            { 1, 0, 0, 0 } },
163
        { { 0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47 },
164
            { 0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394 },
165
            { 1, 0, 0, 0 } },
166
        { { 0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d },
167
            { 0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7 },
168
            { 1, 0, 0, 0 } },
169
        { { 0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24 },
170
            { 0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881 },
171
            { 1, 0, 0, 0 } },
172
        { { 0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984 },
173
            { 0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369 },
174
            { 1, 0, 0, 0 } },
175
        { { 0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3 },
176
            { 0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60 },
177
            { 1, 0, 0, 0 } },
178
        { { 0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057 },
179
            { 0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9 },
180
            { 1, 0, 0, 0 } },
181
        { { 0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9 },
182
            { 0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc },
183
            { 1, 0, 0, 0 } },
184
        { { 0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58 },
185
            { 0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558 },
186
            { 1, 0, 0, 0 } } },
187
    { { { 0, 0, 0, 0 },
188
          { 0, 0, 0, 0 },
189
          { 0, 0, 0, 0 } },
190
        { { 0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31 },
191
            { 0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d },
192
            { 1, 0, 0, 0 } },
193
        { { 0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3 },
194
            { 0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a },
195
            { 1, 0, 0, 0 } },
196
        { { 0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33 },
197
            { 0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100 },
198
            { 1, 0, 0, 0 } },
199
        { { 0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5 },
200
            { 0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea },
201
            { 1, 0, 0, 0 } },
202
        { { 0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be },
203
            { 0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51 },
204
            { 1, 0, 0, 0 } },
205
        { { 0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1 },
206
            { 0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb },
207
            { 1, 0, 0, 0 } },
208
        { { 0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233 },
209
            { 0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def },
210
            { 1, 0, 0, 0 } },
211
        { { 0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae },
212
            { 0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45 },
213
            { 1, 0, 0, 0 } },
214
        { { 0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e },
215
            { 0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb },
216
            { 1, 0, 0, 0 } },
217
        { { 0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de },
218
            { 0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3 },
219
            { 1, 0, 0, 0 } },
220
        { { 0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05 },
221
            { 0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58 },
222
            { 1, 0, 0, 0 } },
223
        { { 0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb },
224
            { 0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0 },
225
            { 1, 0, 0, 0 } },
226
        { { 0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9 },
227
            { 0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea },
228
            { 1, 0, 0, 0 } },
229
        { { 0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba },
230
            { 0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405 },
231
            { 1, 0, 0, 0 } },
232
        { { 0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e },
233
            { 0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e },
234
            { 1, 0, 0, 0 } } }
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
    CRYPTO_RWLOCK *lock;
242
};
243
244
const EC_METHOD *EC_GFp_nistp224_method(void)
245
48.6k
{
246
48.6k
    static const EC_METHOD ret = {
247
48.6k
        EC_FLAGS_DEFAULT_OCT,
248
48.6k
        NID_X9_62_prime_field,
249
48.6k
        ossl_ec_GFp_nistp224_group_init,
250
48.6k
        ossl_ec_GFp_simple_group_finish,
251
48.6k
        ossl_ec_GFp_simple_group_clear_finish,
252
48.6k
        ossl_ec_GFp_nist_group_copy,
253
48.6k
        ossl_ec_GFp_nistp224_group_set_curve,
254
48.6k
        ossl_ec_GFp_simple_group_get_curve,
255
48.6k
        ossl_ec_GFp_simple_group_get_degree,
256
48.6k
        ossl_ec_group_simple_order_bits,
257
48.6k
        ossl_ec_GFp_simple_group_check_discriminant,
258
48.6k
        ossl_ec_GFp_simple_point_init,
259
48.6k
        ossl_ec_GFp_simple_point_finish,
260
48.6k
        ossl_ec_GFp_simple_point_clear_finish,
261
48.6k
        ossl_ec_GFp_simple_point_copy,
262
48.6k
        ossl_ec_GFp_simple_point_set_to_infinity,
263
48.6k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
264
48.6k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
265
48.6k
        0 /* point_set_compressed_coordinates */,
266
48.6k
        0 /* point2oct */,
267
48.6k
        0 /* oct2point */,
268
48.6k
        ossl_ec_GFp_simple_add,
269
48.6k
        ossl_ec_GFp_simple_dbl,
270
48.6k
        ossl_ec_GFp_simple_invert,
271
48.6k
        ossl_ec_GFp_simple_is_at_infinity,
272
48.6k
        ossl_ec_GFp_simple_is_on_curve,
273
48.6k
        ossl_ec_GFp_simple_cmp,
274
48.6k
        ossl_ec_GFp_simple_make_affine,
275
48.6k
        ossl_ec_GFp_simple_points_make_affine,
276
48.6k
        ossl_ec_GFp_nistp224_points_mul,
277
48.6k
        ossl_ec_GFp_nistp224_precompute_mult,
278
48.6k
        ossl_ec_GFp_nistp224_have_precompute_mult,
279
48.6k
        ossl_ec_GFp_nist_field_mul,
280
48.6k
        ossl_ec_GFp_nist_field_sqr,
281
48.6k
        0 /* field_div */,
282
48.6k
        ossl_ec_GFp_simple_field_inv,
283
48.6k
        0 /* field_encode */,
284
48.6k
        0 /* field_decode */,
285
48.6k
        0, /* field_set_to_one */
286
48.6k
        ossl_ec_key_simple_priv2oct,
287
48.6k
        ossl_ec_key_simple_oct2priv,
288
48.6k
        0, /* set private */
289
48.6k
        ossl_ec_key_simple_generate_key,
290
48.6k
        ossl_ec_key_simple_check_key,
291
48.6k
        ossl_ec_key_simple_generate_public_key,
292
48.6k
        0, /* keycopy */
293
48.6k
        0, /* keyfinish */
294
48.6k
        ossl_ecdh_simple_compute_key,
295
48.6k
        ossl_ecdsa_simple_sign_setup,
296
48.6k
        ossl_ecdsa_simple_sign_sig,
297
48.6k
        ossl_ecdsa_simple_verify_sig,
298
48.6k
        0, /* field_inverse_mod_ord */
299
48.6k
        0, /* blind_coordinates */
300
48.6k
        0, /* ladder_pre */
301
48.6k
        0, /* ladder_step */
302
48.6k
        0 /* ladder_post */
303
48.6k
    };
304
305
48.6k
    return &ret;
306
48.6k
}
307
308
/*
309
 * Helper functions to convert field elements to/from internal representation
310
 */
311
static void bin28_to_felem(felem out, const u8 in[28])
312
19.2k
{
313
19.2k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
314
19.2k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
315
19.2k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
316
19.2k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
317
19.2k
}
318
319
static void felem_to_bin28(u8 out[28], const felem in)
320
29.2k
{
321
29.2k
    unsigned i;
322
234k
    for (i = 0; i < 7; ++i) {
323
204k
        out[i] = in[0] >> (8 * i);
324
204k
        out[i + 7] = in[1] >> (8 * i);
325
204k
        out[i + 14] = in[2] >> (8 * i);
326
204k
        out[i + 21] = in[3] >> (8 * i);
327
204k
    }
328
29.2k
}
329
330
/* From OpenSSL BIGNUM to internal representation */
331
static int BN_to_felem(felem out, const BIGNUM *bn)
332
19.2k
{
333
19.2k
    felem_bytearray b_out;
334
19.2k
    int num_bytes;
335
336
19.2k
    if (BN_is_negative(bn)) {
337
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
338
0
        return 0;
339
0
    }
340
19.2k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
341
19.2k
    if (num_bytes < 0) {
342
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
343
0
        return 0;
344
0
    }
345
19.2k
    bin28_to_felem(out, b_out);
346
19.2k
    return 1;
347
19.2k
}
348
349
/* From internal representation to OpenSSL BIGNUM */
350
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
351
29.2k
{
352
29.2k
    felem_bytearray b_out;
353
29.2k
    felem_to_bin28(b_out, in);
354
29.2k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
355
29.2k
}
356
357
/******************************************************************************/
358
/*-
359
 *                              FIELD OPERATIONS
360
 *
361
 * Field operations, using the internal representation of field elements.
362
 * NB! These operations are specific to our point multiplication and cannot be
363
 * expected to be correct in general - e.g., multiplication with a large scalar
364
 * will cause an overflow.
365
 *
366
 */
367
368
static void felem_one(felem out)
369
0
{
370
0
    out[0] = 1;
371
0
    out[1] = 0;
372
0
    out[2] = 0;
373
0
    out[3] = 0;
374
0
}
375
376
static void felem_assign(felem out, const felem in)
377
1.84M
{
378
1.84M
    out[0] = in[0];
379
1.84M
    out[1] = in[1];
380
1.84M
    out[2] = in[2];
381
1.84M
    out[3] = in[3];
382
1.84M
}
383
384
/* Sum two field elements: out += in */
385
static void felem_sum(felem out, const felem in)
386
430k
{
387
430k
    out[0] += in[0];
388
430k
    out[1] += in[1];
389
430k
    out[2] += in[2];
390
430k
    out[3] += in[3];
391
430k
}
392
393
/* Subtract field elements: out -= in */
394
/* Assumes in[i] < 2^57 */
395
static void felem_diff(felem out, const felem in)
396
450k
{
397
450k
    static const limb two58p2 = (((limb)1) << 58) + (((limb)1) << 2);
398
450k
    static const limb two58m2 = (((limb)1) << 58) - (((limb)1) << 2);
399
450k
    static const limb two58m42m2 = (((limb)1) << 58) - (((limb)1) << 42) - (((limb)1) << 2);
400
401
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
402
450k
    out[0] += two58p2;
403
450k
    out[1] += two58m42m2;
404
450k
    out[2] += two58m2;
405
450k
    out[3] += two58m2;
406
407
450k
    out[0] -= in[0];
408
450k
    out[1] -= in[1];
409
450k
    out[2] -= in[2];
410
450k
    out[3] -= in[3];
411
450k
}
412
413
/* Subtract in unreduced 128-bit mode: out -= in */
414
/* Assumes in[i] < 2^119 */
415
static void widefelem_diff(widefelem out, const widefelem in)
416
307k
{
417
307k
    static const widelimb two120 = ((widelimb)1) << 120;
418
307k
    static const widelimb two120m64 = (((widelimb)1) << 120) - (((widelimb)1) << 64);
419
307k
    static const widelimb two120m104m64 = (((widelimb)1) << 120) - (((widelimb)1) << 104) - (((widelimb)1) << 64);
420
421
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
422
307k
    out[0] += two120;
423
307k
    out[1] += two120m64;
424
307k
    out[2] += two120m64;
425
307k
    out[3] += two120;
426
307k
    out[4] += two120m104m64;
427
307k
    out[5] += two120m64;
428
307k
    out[6] += two120m64;
429
430
307k
    out[0] -= in[0];
431
307k
    out[1] -= in[1];
432
307k
    out[2] -= in[2];
433
307k
    out[3] -= in[3];
434
307k
    out[4] -= in[4];
435
307k
    out[5] -= in[5];
436
307k
    out[6] -= in[6];
437
307k
}
438
439
/* Subtract in mixed mode: out128 -= in64 */
440
/* in[i] < 2^63 */
441
static void felem_diff_128_64(widefelem out, const felem in)
442
955k
{
443
955k
    static const widelimb two64p8 = (((widelimb)1) << 64) + (((widelimb)1) << 8);
444
955k
    static const widelimb two64m8 = (((widelimb)1) << 64) - (((widelimb)1) << 8);
445
955k
    static const widelimb two64m48m8 = (((widelimb)1) << 64) - (((widelimb)1) << 48) - (((widelimb)1) << 8);
446
447
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
448
955k
    out[0] += two64p8;
449
955k
    out[1] += two64m48m8;
450
955k
    out[2] += two64m8;
451
955k
    out[3] += two64m8;
452
453
955k
    out[0] -= in[0];
454
955k
    out[1] -= in[1];
455
955k
    out[2] -= in[2];
456
955k
    out[3] -= in[3];
457
955k
}
458
459
/*
460
 * Multiply a field element by a scalar: out = out * scalar The scalars we
461
 * actually use are small, so results fit without overflow
462
 */
463
static void felem_scalar(felem out, const limb scalar)
464
594k
{
465
594k
    out[0] *= scalar;
466
594k
    out[1] *= scalar;
467
594k
    out[2] *= scalar;
468
594k
    out[3] *= scalar;
469
594k
}
470
471
/*
472
 * Multiply an unreduced field element by a scalar: out = out * scalar The
473
 * scalars we actually use are small, so results fit without overflow
474
 */
475
static void widefelem_scalar(widefelem out, const widelimb scalar)
476
143k
{
477
143k
    out[0] *= scalar;
478
143k
    out[1] *= scalar;
479
143k
    out[2] *= scalar;
480
143k
    out[3] *= scalar;
481
143k
    out[4] *= scalar;
482
143k
    out[5] *= scalar;
483
143k
    out[6] *= scalar;
484
143k
}
485
486
/* Square a field element: out = in^2 */
487
static void felem_square(widefelem out, const felem in)
488
2.59M
{
489
2.59M
    limb tmp0, tmp1, tmp2;
490
2.59M
    tmp0 = 2 * in[0];
491
2.59M
    tmp1 = 2 * in[1];
492
2.59M
    tmp2 = 2 * in[2];
493
2.59M
    out[0] = ((widelimb)in[0]) * in[0];
494
2.59M
    out[1] = ((widelimb)in[0]) * tmp1;
495
2.59M
    out[2] = ((widelimb)in[0]) * tmp2 + ((widelimb)in[1]) * in[1];
496
2.59M
    out[3] = ((widelimb)in[3]) * tmp0 + ((widelimb)in[1]) * tmp2;
497
2.59M
    out[4] = ((widelimb)in[3]) * tmp1 + ((widelimb)in[2]) * in[2];
498
2.59M
    out[5] = ((widelimb)in[3]) * tmp2;
499
2.59M
    out[6] = ((widelimb)in[3]) * in[3];
500
2.59M
}
501
502
/* Multiply two field elements: out = in1 * in2 */
503
static void felem_mul(widefelem out, const felem in1, const felem in2)
504
1.88M
{
505
1.88M
    out[0] = ((widelimb)in1[0]) * in2[0];
506
1.88M
    out[1] = ((widelimb)in1[0]) * in2[1] + ((widelimb)in1[1]) * in2[0];
507
1.88M
    out[2] = ((widelimb)in1[0]) * in2[2] + ((widelimb)in1[1]) * in2[1] + ((widelimb)in1[2]) * in2[0];
508
1.88M
    out[3] = ((widelimb)in1[0]) * in2[3] + ((widelimb)in1[1]) * in2[2] + ((widelimb)in1[2]) * in2[1] + ((widelimb)in1[3]) * in2[0];
509
1.88M
    out[4] = ((widelimb)in1[1]) * in2[3] + ((widelimb)in1[2]) * in2[2] + ((widelimb)in1[3]) * in2[1];
510
1.88M
    out[5] = ((widelimb)in1[2]) * in2[3] + ((widelimb)in1[3]) * in2[2];
511
1.88M
    out[6] = ((widelimb)in1[3]) * in2[3];
512
1.88M
}
513
514
/*-
515
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
516
 * Requires in[i] < 2^126,
517
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
518
static void felem_reduce(felem out, const widefelem in)
519
4.19M
{
520
4.19M
    static const widelimb two127p15 = (((widelimb)1) << 127) + (((widelimb)1) << 15);
521
4.19M
    static const widelimb two127m71 = (((widelimb)1) << 127) - (((widelimb)1) << 71);
522
4.19M
    static const widelimb two127m71m55 = (((widelimb)1) << 127) - (((widelimb)1) << 71) - (((widelimb)1) << 55);
523
4.19M
    widelimb output[5];
524
525
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
526
4.19M
    output[0] = in[0] + two127p15;
527
4.19M
    output[1] = in[1] + two127m71m55;
528
4.19M
    output[2] = in[2] + two127m71;
529
4.19M
    output[3] = in[3];
530
4.19M
    output[4] = in[4];
531
532
    /* Eliminate in[4], in[5], in[6] */
533
4.19M
    output[4] += in[6] >> 16;
534
4.19M
    output[3] += (in[6] & 0xffff) << 40;
535
4.19M
    output[2] -= in[6];
536
537
4.19M
    output[3] += in[5] >> 16;
538
4.19M
    output[2] += (in[5] & 0xffff) << 40;
539
4.19M
    output[1] -= in[5];
540
541
4.19M
    output[2] += output[4] >> 16;
542
4.19M
    output[1] += (output[4] & 0xffff) << 40;
543
4.19M
    output[0] -= output[4];
544
545
    /* Carry 2 -> 3 -> 4 */
546
4.19M
    output[3] += output[2] >> 56;
547
4.19M
    output[2] &= 0x00ffffffffffffff;
548
549
4.19M
    output[4] = output[3] >> 56;
550
4.19M
    output[3] &= 0x00ffffffffffffff;
551
552
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
553
554
    /* Eliminate output[4] */
555
4.19M
    output[2] += output[4] >> 16;
556
    /* output[2] < 2^56 + 2^56 = 2^57 */
557
4.19M
    output[1] += (output[4] & 0xffff) << 40;
558
4.19M
    output[0] -= output[4];
559
560
    /* Carry 0 -> 1 -> 2 -> 3 */
561
4.19M
    output[1] += output[0] >> 56;
562
4.19M
    out[0] = output[0] & 0x00ffffffffffffff;
563
564
4.19M
    output[2] += output[1] >> 56;
565
    /* output[2] < 2^57 + 2^72 */
566
4.19M
    out[1] = output[1] & 0x00ffffffffffffff;
567
4.19M
    output[3] += output[2] >> 56;
568
    /* output[3] <= 2^56 + 2^16 */
569
4.19M
    out[2] = output[2] & 0x00ffffffffffffff;
570
571
    /*-
572
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
573
     * out[3] <= 2^56 + 2^16 (due to final carry),
574
     * so out < 2*p
575
     */
576
4.19M
    out[3] = output[3];
577
4.19M
}
578
579
static void felem_square_reduce(felem out, const felem in)
580
0
{
581
0
    widefelem tmp;
582
0
    felem_square(tmp, in);
583
0
    felem_reduce(out, tmp);
584
0
}
585
586
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
587
0
{
588
0
    widefelem tmp;
589
0
    felem_mul(tmp, in1, in2);
590
0
    felem_reduce(out, tmp);
591
0
}
592
593
/*
594
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
595
 * call felem_reduce first)
596
 */
597
static void felem_contract(felem out, const felem in)
598
21.2k
{
599
21.2k
    static const int64_t two56 = ((limb)1) << 56;
600
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
601
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
602
21.2k
    int64_t tmp[4], a;
603
21.2k
    tmp[0] = in[0];
604
21.2k
    tmp[1] = in[1];
605
21.2k
    tmp[2] = in[2];
606
21.2k
    tmp[3] = in[3];
607
    /* Case 1: a = 1 iff in >= 2^224 */
608
21.2k
    a = (in[3] >> 56);
609
21.2k
    tmp[0] -= a;
610
21.2k
    tmp[1] += a << 40;
611
21.2k
    tmp[3] &= 0x00ffffffffffffff;
612
    /*
613
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
614
     * and the lower part is non-zero
615
     */
616
21.2k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
617
21.2k
    a &= 0x00ffffffffffffff;
618
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
619
21.2k
    a = (a - 1) >> 63;
620
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
621
21.2k
    tmp[3] &= a ^ 0xffffffffffffffff;
622
21.2k
    tmp[2] &= a ^ 0xffffffffffffffff;
623
21.2k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
624
21.2k
    tmp[0] -= 1 & a;
625
626
    /*
627
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
628
     * non-zero, so we only need one step
629
     */
630
21.2k
    a = tmp[0] >> 63;
631
21.2k
    tmp[0] += two56 & a;
632
21.2k
    tmp[1] -= 1 & a;
633
634
    /* carry 1 -> 2 -> 3 */
635
21.2k
    tmp[2] += tmp[1] >> 56;
636
21.2k
    tmp[1] &= 0x00ffffffffffffff;
637
638
21.2k
    tmp[3] += tmp[2] >> 56;
639
21.2k
    tmp[2] &= 0x00ffffffffffffff;
640
641
    /* Now 0 <= out < p */
642
21.2k
    out[0] = tmp[0];
643
21.2k
    out[1] = tmp[1];
644
21.2k
    out[2] = tmp[2];
645
21.2k
    out[3] = tmp[3];
646
21.2k
}
647
648
/*
649
 * Get negative value: out = -in
650
 * Requires in[i] < 2^63,
651
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
652
 */
653
static void felem_neg(felem out, const felem in)
654
14.0k
{
655
14.0k
    widefelem tmp;
656
657
14.0k
    memset(tmp, 0, sizeof(tmp));
658
14.0k
    felem_diff_128_64(tmp, in);
659
14.0k
    felem_reduce(out, tmp);
660
14.0k
}
661
662
/*
663
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
664
 * elements are reduced to in < 2^225, so we only need to check three cases:
665
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
666
 */
667
static limb felem_is_zero(const felem in)
668
654k
{
669
654k
    limb zero, two224m96p1, two225m97p2;
670
671
654k
    zero = in[0] | in[1] | in[2] | in[3];
672
654k
    zero = (((int64_t)(zero)-1) >> 63) & 1;
673
654k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
674
654k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
675
654k
    two224m96p1 = (((int64_t)(two224m96p1)-1) >> 63) & 1;
676
654k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
677
654k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
678
654k
    two225m97p2 = (((int64_t)(two225m97p2)-1) >> 63) & 1;
679
654k
    return (zero | two224m96p1 | two225m97p2);
680
654k
}
681
682
static int felem_is_zero_int(const void *in)
683
0
{
684
0
    return (int)(felem_is_zero(in) & ((limb)1));
685
0
}
686
687
/* Invert a field element */
688
/* Computation chain copied from djb's code */
689
static void felem_inv(felem out, const felem in)
690
6.11k
{
691
6.11k
    felem ftmp, ftmp2, ftmp3, ftmp4;
692
6.11k
    widefelem tmp;
693
6.11k
    unsigned i;
694
695
6.11k
    felem_square(tmp, in);
696
6.11k
    felem_reduce(ftmp, tmp); /* 2 */
697
6.11k
    felem_mul(tmp, in, ftmp);
698
6.11k
    felem_reduce(ftmp, tmp); /* 2^2 - 1 */
699
6.11k
    felem_square(tmp, ftmp);
700
6.11k
    felem_reduce(ftmp, tmp); /* 2^3 - 2 */
701
6.11k
    felem_mul(tmp, in, ftmp);
702
6.11k
    felem_reduce(ftmp, tmp); /* 2^3 - 1 */
703
6.11k
    felem_square(tmp, ftmp);
704
6.11k
    felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
705
6.11k
    felem_square(tmp, ftmp2);
706
6.11k
    felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
707
6.11k
    felem_square(tmp, ftmp2);
708
6.11k
    felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
709
6.11k
    felem_mul(tmp, ftmp2, ftmp);
710
6.11k
    felem_reduce(ftmp, tmp); /* 2^6 - 1 */
711
6.11k
    felem_square(tmp, ftmp);
712
6.11k
    felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
713
36.6k
    for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
714
30.5k
        felem_square(tmp, ftmp2);
715
30.5k
        felem_reduce(ftmp2, tmp);
716
30.5k
    }
717
6.11k
    felem_mul(tmp, ftmp2, ftmp);
718
6.11k
    felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
719
6.11k
    felem_square(tmp, ftmp2);
720
6.11k
    felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
721
73.3k
    for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
722
67.2k
        felem_square(tmp, ftmp3);
723
67.2k
        felem_reduce(ftmp3, tmp);
724
67.2k
    }
725
6.11k
    felem_mul(tmp, ftmp3, ftmp2);
726
6.11k
    felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
727
6.11k
    felem_square(tmp, ftmp2);
728
6.11k
    felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
729
146k
    for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
730
140k
        felem_square(tmp, ftmp3);
731
140k
        felem_reduce(ftmp3, tmp);
732
140k
    }
733
6.11k
    felem_mul(tmp, ftmp3, ftmp2);
734
6.11k
    felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
735
6.11k
    felem_square(tmp, ftmp3);
736
6.11k
    felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
737
293k
    for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
738
287k
        felem_square(tmp, ftmp4);
739
287k
        felem_reduce(ftmp4, tmp);
740
287k
    }
741
6.11k
    felem_mul(tmp, ftmp3, ftmp4);
742
6.11k
    felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
743
6.11k
    felem_square(tmp, ftmp3);
744
6.11k
    felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
745
146k
    for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
746
140k
        felem_square(tmp, ftmp4);
747
140k
        felem_reduce(ftmp4, tmp);
748
140k
    }
749
6.11k
    felem_mul(tmp, ftmp2, ftmp4);
750
6.11k
    felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
751
42.7k
    for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
752
36.6k
        felem_square(tmp, ftmp2);
753
36.6k
        felem_reduce(ftmp2, tmp);
754
36.6k
    }
755
6.11k
    felem_mul(tmp, ftmp2, ftmp);
756
6.11k
    felem_reduce(ftmp, tmp); /* 2^126 - 1 */
757
6.11k
    felem_square(tmp, ftmp);
758
6.11k
    felem_reduce(ftmp, tmp); /* 2^127 - 2 */
759
6.11k
    felem_mul(tmp, ftmp, in);
760
6.11k
    felem_reduce(ftmp, tmp); /* 2^127 - 1 */
761
598k
    for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
762
592k
        felem_square(tmp, ftmp);
763
592k
        felem_reduce(ftmp, tmp);
764
592k
    }
765
6.11k
    felem_mul(tmp, ftmp, ftmp3);
766
6.11k
    felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
767
6.11k
}
768
769
/*
770
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
771
 * out to itself.
772
 */
773
static void copy_conditional(felem out, const felem in, limb icopy)
774
995k
{
775
995k
    unsigned i;
776
    /*
777
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
778
     */
779
995k
    const limb copy = -icopy;
780
4.97M
    for (i = 0; i < 4; ++i) {
781
3.98M
        const limb tmp = copy & (in[i] ^ out[i]);
782
3.98M
        out[i] ^= tmp;
783
3.98M
    }
784
995k
}
785
786
/******************************************************************************/
787
/*-
788
 *                       ELLIPTIC CURVE POINT OPERATIONS
789
 *
790
 * Points are represented in Jacobian projective coordinates:
791
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
792
 * or to the point at infinity if Z == 0.
793
 *
794
 */
795
796
/*-
797
 * Double an elliptic curve point:
798
 * (X', Y', Z') = 2 * (X, Y, Z), where
799
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
800
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
801
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
802
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
803
 * while x_out == y_in is not (maybe this works, but it's not tested).
804
 */
805
static void
806
point_double(felem x_out, felem y_out, felem z_out,
807
    const felem x_in, const felem y_in, const felem z_in)
808
143k
{
809
143k
    widefelem tmp, tmp2;
810
143k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
811
812
143k
    felem_assign(ftmp, x_in);
813
143k
    felem_assign(ftmp2, x_in);
814
815
    /* delta = z^2 */
816
143k
    felem_square(tmp, z_in);
817
143k
    felem_reduce(delta, tmp);
818
819
    /* gamma = y^2 */
820
143k
    felem_square(tmp, y_in);
821
143k
    felem_reduce(gamma, tmp);
822
823
    /* beta = x*gamma */
824
143k
    felem_mul(tmp, x_in, gamma);
825
143k
    felem_reduce(beta, tmp);
826
827
    /* alpha = 3*(x-delta)*(x+delta) */
828
143k
    felem_diff(ftmp, delta);
829
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
830
143k
    felem_sum(ftmp2, delta);
831
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
832
143k
    felem_scalar(ftmp2, 3);
833
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
834
143k
    felem_mul(tmp, ftmp, ftmp2);
835
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
836
143k
    felem_reduce(alpha, tmp);
837
838
    /* x' = alpha^2 - 8*beta */
839
143k
    felem_square(tmp, alpha);
840
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
841
143k
    felem_assign(ftmp, beta);
842
143k
    felem_scalar(ftmp, 8);
843
    /* ftmp[i] < 8 * 2^57 = 2^60 */
844
143k
    felem_diff_128_64(tmp, ftmp);
845
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
846
143k
    felem_reduce(x_out, tmp);
847
848
    /* z' = (y + z)^2 - gamma - delta */
849
143k
    felem_sum(delta, gamma);
850
    /* delta[i] < 2^57 + 2^57 = 2^58 */
851
143k
    felem_assign(ftmp, y_in);
852
143k
    felem_sum(ftmp, z_in);
853
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
854
143k
    felem_square(tmp, ftmp);
855
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
856
143k
    felem_diff_128_64(tmp, delta);
857
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
858
143k
    felem_reduce(z_out, tmp);
859
860
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
861
143k
    felem_scalar(beta, 4);
862
    /* beta[i] < 4 * 2^57 = 2^59 */
863
143k
    felem_diff(beta, x_out);
864
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
865
143k
    felem_mul(tmp, alpha, beta);
866
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
867
143k
    felem_square(tmp2, gamma);
868
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
869
143k
    widefelem_scalar(tmp2, 8);
870
    /* tmp2[i] < 8 * 2^116 = 2^119 */
871
143k
    widefelem_diff(tmp, tmp2);
872
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
873
143k
    felem_reduce(y_out, tmp);
874
143k
}
875
876
/*-
877
 * Add two elliptic curve points:
878
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
879
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
880
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
881
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
882
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
883
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
884
 *
885
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
886
 */
887
888
/*
889
 * This function is not entirely constant-time: it includes a branch for
890
 * checking whether the two input points are equal, (while not equal to the
891
 * point at infinity). This case never happens during single point
892
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
893
 */
894
static void point_add(felem x3, felem y3, felem z3,
895
    const felem x1, const felem y1, const felem z1,
896
    const int mixed, const felem x2, const felem y2,
897
    const felem z2)
898
163k
{
899
163k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
900
163k
    widefelem tmp, tmp2;
901
163k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
902
163k
    limb points_equal;
903
904
163k
    if (!mixed) {
905
        /* ftmp2 = z2^2 */
906
15.9k
        felem_square(tmp, z2);
907
15.9k
        felem_reduce(ftmp2, tmp);
908
909
        /* ftmp4 = z2^3 */
910
15.9k
        felem_mul(tmp, ftmp2, z2);
911
15.9k
        felem_reduce(ftmp4, tmp);
912
913
        /* ftmp4 = z2^3*y1 */
914
15.9k
        felem_mul(tmp2, ftmp4, y1);
915
15.9k
        felem_reduce(ftmp4, tmp2);
916
917
        /* ftmp2 = z2^2*x1 */
918
15.9k
        felem_mul(tmp2, ftmp2, x1);
919
15.9k
        felem_reduce(ftmp2, tmp2);
920
147k
    } else {
921
        /*
922
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
923
         */
924
925
        /* ftmp4 = z2^3*y1 */
926
147k
        felem_assign(ftmp4, y1);
927
928
        /* ftmp2 = z2^2*x1 */
929
147k
        felem_assign(ftmp2, x1);
930
147k
    }
931
932
    /* ftmp = z1^2 */
933
163k
    felem_square(tmp, z1);
934
163k
    felem_reduce(ftmp, tmp);
935
936
    /* ftmp3 = z1^3 */
937
163k
    felem_mul(tmp, ftmp, z1);
938
163k
    felem_reduce(ftmp3, tmp);
939
940
    /* tmp = z1^3*y2 */
941
163k
    felem_mul(tmp, ftmp3, y2);
942
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
943
944
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
945
163k
    felem_diff_128_64(tmp, ftmp4);
946
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
947
163k
    felem_reduce(ftmp3, tmp);
948
949
    /* tmp = z1^2*x2 */
950
163k
    felem_mul(tmp, ftmp, x2);
951
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
952
953
    /* ftmp = z1^2*x2 - z2^2*x1 */
954
163k
    felem_diff_128_64(tmp, ftmp2);
955
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
956
163k
    felem_reduce(ftmp, tmp);
957
958
    /*
959
     * The formulae are incorrect if the points are equal, in affine coordinates
960
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
961
     * happens.
962
     *
963
     * We use bitwise operations to avoid potential side-channels introduced by
964
     * the short-circuiting behaviour of boolean operators.
965
     */
966
163k
    x_equal = felem_is_zero(ftmp);
967
163k
    y_equal = felem_is_zero(ftmp3);
968
    /*
969
     * The special case of either point being the point at infinity (z1 and/or
970
     * z2 are zero), is handled separately later on in this function, so we
971
     * avoid jumping to point_double here in those special cases.
972
     */
973
163k
    z1_is_zero = felem_is_zero(z1);
974
163k
    z2_is_zero = felem_is_zero(z2);
975
976
    /*
977
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
978
     * specific implementation `felem_is_zero()` returns truth as `0x1`
979
     * (rather than `0xff..ff`).
980
     *
981
     * This implies that `~true` in this implementation becomes
982
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
983
     * the if expression, we mask out only the last bit in the next
984
     * line.
985
     */
986
163k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
987
988
163k
    if (points_equal) {
989
        /*
990
         * This is obviously not constant-time but, as mentioned before, this
991
         * case never happens during single point multiplication, so there is no
992
         * timing leak for ECDH or ECDSA signing.
993
         */
994
0
        point_double(x3, y3, z3, x1, y1, z1);
995
0
        return;
996
0
    }
997
998
    /* ftmp5 = z1*z2 */
999
163k
    if (!mixed) {
1000
15.9k
        felem_mul(tmp, z1, z2);
1001
15.9k
        felem_reduce(ftmp5, tmp);
1002
147k
    } else {
1003
        /* special case z2 = 0 is handled later */
1004
147k
        felem_assign(ftmp5, z1);
1005
147k
    }
1006
1007
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1008
163k
    felem_mul(tmp, ftmp, ftmp5);
1009
163k
    felem_reduce(z_out, tmp);
1010
1011
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1012
163k
    felem_assign(ftmp5, ftmp);
1013
163k
    felem_square(tmp, ftmp);
1014
163k
    felem_reduce(ftmp, tmp);
1015
1016
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1017
163k
    felem_mul(tmp, ftmp, ftmp5);
1018
163k
    felem_reduce(ftmp5, tmp);
1019
1020
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1021
163k
    felem_mul(tmp, ftmp2, ftmp);
1022
163k
    felem_reduce(ftmp2, tmp);
1023
1024
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1025
163k
    felem_mul(tmp, ftmp4, ftmp5);
1026
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1027
1028
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1029
163k
    felem_square(tmp2, ftmp3);
1030
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1031
1032
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1033
163k
    felem_diff_128_64(tmp2, ftmp5);
1034
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1035
1036
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1037
163k
    felem_assign(ftmp5, ftmp2);
1038
163k
    felem_scalar(ftmp5, 2);
1039
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1040
1041
    /*-
1042
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1043
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1044
     */
1045
163k
    felem_diff_128_64(tmp2, ftmp5);
1046
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1047
163k
    felem_reduce(x_out, tmp2);
1048
1049
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1050
163k
    felem_diff(ftmp2, x_out);
1051
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1052
1053
    /*
1054
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1055
     */
1056
163k
    felem_mul(tmp2, ftmp3, ftmp2);
1057
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1058
1059
    /*-
1060
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1061
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1062
     */
1063
163k
    widefelem_diff(tmp2, tmp);
1064
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1065
163k
    felem_reduce(y_out, tmp2);
1066
1067
    /*
1068
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1069
     * the point at infinity, so we need to check for this separately
1070
     */
1071
1072
    /*
1073
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1074
     */
1075
163k
    copy_conditional(x_out, x2, z1_is_zero);
1076
163k
    copy_conditional(x_out, x1, z2_is_zero);
1077
163k
    copy_conditional(y_out, y2, z1_is_zero);
1078
163k
    copy_conditional(y_out, y1, z2_is_zero);
1079
163k
    copy_conditional(z_out, z2, z1_is_zero);
1080
163k
    copy_conditional(z_out, z1, z2_is_zero);
1081
163k
    felem_assign(x3, x_out);
1082
163k
    felem_assign(y3, y_out);
1083
163k
    felem_assign(z3, z_out);
1084
163k
}
1085
1086
/*
1087
 * select_point selects the |idx|th point from a precomputation table and
1088
 * copies it to out.
1089
 * The pre_comp array argument should be size of |size| argument
1090
 */
1091
static void select_point(const u64 idx, unsigned int size,
1092
    const felem pre_comp[][3], felem out[3])
1093
164k
{
1094
164k
    unsigned i, j;
1095
164k
    limb *outlimbs = &out[0][0];
1096
1097
164k
    memset(out, 0, sizeof(*out) * 3);
1098
2.80M
    for (i = 0; i < size; i++) {
1099
2.64M
        const limb *inlimbs = &pre_comp[i][0][0];
1100
2.64M
        u64 mask = i ^ idx;
1101
2.64M
        mask |= mask >> 4;
1102
2.64M
        mask |= mask >> 2;
1103
2.64M
        mask |= mask >> 1;
1104
2.64M
        mask &= 1;
1105
2.64M
        mask--;
1106
34.3M
        for (j = 0; j < 4 * 3; j++)
1107
31.7M
            outlimbs[j] |= inlimbs[j] & mask;
1108
2.64M
    }
1109
164k
}
1110
1111
/* get_bit returns the |i|th bit in |in| */
1112
static char get_bit(const felem_bytearray in, unsigned i)
1113
685k
{
1114
685k
    if (i >= 224)
1115
624
        return 0;
1116
685k
    return (in[i >> 3] >> (i & 7)) & 1;
1117
685k
}
1118
1119
/*
1120
 * Interleaved point multiplication using precomputed point multiples: The
1121
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1122
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1123
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1124
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1125
 */
1126
static void batch_mul(felem x_out, felem y_out, felem z_out,
1127
    const felem_bytearray scalars[],
1128
    const unsigned num_points, const u8 *g_scalar,
1129
    const int mixed, const felem pre_comp[][17][3],
1130
    const felem g_pre_comp[2][16][3])
1131
2.99k
{
1132
2.99k
    int i, skip;
1133
2.99k
    unsigned num;
1134
2.99k
    unsigned gen_mul = (g_scalar != NULL);
1135
2.99k
    felem nq[3], tmp[4];
1136
2.99k
    u64 bits;
1137
2.99k
    u8 sign, digit;
1138
1139
    /* set nq to the point at infinity */
1140
2.99k
    memset(nq, 0, sizeof(nq));
1141
1142
    /*
1143
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1144
     * of the generator (two in each of the last 28 rounds) and additions of
1145
     * other points multiples (every 5th round).
1146
     */
1147
2.99k
    skip = 1; /* save two point operations in the first
1148
               * round */
1149
147k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1150
        /* double */
1151
144k
        if (!skip)
1152
141k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1153
1154
        /* add multiples of the generator */
1155
144k
        if (gen_mul && (i <= 27)) {
1156
            /* first, look 28 bits upwards */
1157
75.2k
            bits = get_bit(g_scalar, i + 196) << 3;
1158
75.2k
            bits |= get_bit(g_scalar, i + 140) << 2;
1159
75.2k
            bits |= get_bit(g_scalar, i + 84) << 1;
1160
75.2k
            bits |= get_bit(g_scalar, i + 28);
1161
            /* select the point to add, in constant time */
1162
75.2k
            select_point(bits, 16, g_pre_comp[1], tmp);
1163
1164
75.2k
            if (!skip) {
1165
                /* value 1 below is argument for "mixed" */
1166
72.5k
                point_add(nq[0], nq[1], nq[2],
1167
72.5k
                    nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1168
72.5k
            } else {
1169
2.68k
                memcpy(nq, tmp, 3 * sizeof(felem));
1170
2.68k
                skip = 0;
1171
2.68k
            }
1172
1173
            /* second, look at the current position */
1174
75.2k
            bits = get_bit(g_scalar, i + 168) << 3;
1175
75.2k
            bits |= get_bit(g_scalar, i + 112) << 2;
1176
75.2k
            bits |= get_bit(g_scalar, i + 56) << 1;
1177
75.2k
            bits |= get_bit(g_scalar, i);
1178
            /* select the point to add, in constant time */
1179
75.2k
            select_point(bits, 16, g_pre_comp[0], tmp);
1180
75.2k
            point_add(nq[0], nq[1], nq[2],
1181
75.2k
                nq[0], nq[1], nq[2],
1182
75.2k
                1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1183
75.2k
        }
1184
1185
        /* do other additions every 5 doublings */
1186
144k
        if (num_points && (i % 5 == 0)) {
1187
            /* loop over all scalars */
1188
28.0k
            for (num = 0; num < num_points; ++num) {
1189
14.0k
                bits = get_bit(scalars[num], i + 4) << 5;
1190
14.0k
                bits |= get_bit(scalars[num], i + 3) << 4;
1191
14.0k
                bits |= get_bit(scalars[num], i + 2) << 3;
1192
14.0k
                bits |= get_bit(scalars[num], i + 1) << 2;
1193
14.0k
                bits |= get_bit(scalars[num], i) << 1;
1194
14.0k
                bits |= get_bit(scalars[num], i - 1);
1195
14.0k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1196
1197
                /* select the point to add or subtract */
1198
14.0k
                select_point(digit, 17, pre_comp[num], tmp);
1199
14.0k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1200
                                            * point */
1201
14.0k
                copy_conditional(tmp[1], tmp[3], sign);
1202
1203
14.0k
                if (!skip) {
1204
13.7k
                    point_add(nq[0], nq[1], nq[2],
1205
13.7k
                        nq[0], nq[1], nq[2],
1206
13.7k
                        mixed, tmp[0], tmp[1], tmp[2]);
1207
13.7k
                } else {
1208
312
                    memcpy(nq, tmp, 3 * sizeof(felem));
1209
312
                    skip = 0;
1210
312
                }
1211
14.0k
            }
1212
14.0k
        }
1213
144k
    }
1214
2.99k
    felem_assign(x_out, nq[0]);
1215
2.99k
    felem_assign(y_out, nq[1]);
1216
2.99k
    felem_assign(z_out, nq[2]);
1217
2.99k
}
1218
1219
/******************************************************************************/
1220
/*
1221
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1222
 */
1223
1224
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1225
0
{
1226
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1227
1228
0
    if (!ret) {
1229
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1230
0
        return ret;
1231
0
    }
1232
1233
0
    ret->references = 1;
1234
1235
0
    ret->lock = CRYPTO_THREAD_lock_new();
1236
0
    if (ret->lock == NULL) {
1237
0
        ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1238
0
        OPENSSL_free(ret);
1239
0
        return NULL;
1240
0
    }
1241
0
    return ret;
1242
0
}
1243
1244
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1245
0
{
1246
0
    int i;
1247
0
    if (p != NULL)
1248
0
        CRYPTO_UP_REF(&p->references, &i, p->lock);
1249
0
    return p;
1250
0
}
1251
1252
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1253
0
{
1254
0
    int i;
1255
1256
0
    if (p == NULL)
1257
0
        return;
1258
1259
0
    CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1260
0
    REF_PRINT_COUNT("EC_nistp224", p);
1261
0
    if (i > 0)
1262
0
        return;
1263
0
    REF_ASSERT_ISNT(i < 0);
1264
1265
0
    CRYPTO_THREAD_lock_free(p->lock);
1266
0
    OPENSSL_free(p);
1267
0
}
1268
1269
/******************************************************************************/
1270
/*
1271
 * OPENSSL EC_METHOD FUNCTIONS
1272
 */
1273
1274
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1275
93.1k
{
1276
93.1k
    int ret;
1277
93.1k
    ret = ossl_ec_GFp_simple_group_init(group);
1278
93.1k
    group->a_is_minus3 = 1;
1279
93.1k
    return ret;
1280
93.1k
}
1281
1282
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1283
    const BIGNUM *a, const BIGNUM *b,
1284
    BN_CTX *ctx)
1285
48.6k
{
1286
48.6k
    int ret = 0;
1287
48.6k
    BIGNUM *curve_p, *curve_a, *curve_b;
1288
48.6k
#ifndef FIPS_MODULE
1289
48.6k
    BN_CTX *new_ctx = NULL;
1290
1291
48.6k
    if (ctx == NULL)
1292
0
        ctx = new_ctx = BN_CTX_new();
1293
48.6k
#endif
1294
48.6k
    if (ctx == NULL)
1295
0
        return 0;
1296
1297
48.6k
    BN_CTX_start(ctx);
1298
48.6k
    curve_p = BN_CTX_get(ctx);
1299
48.6k
    curve_a = BN_CTX_get(ctx);
1300
48.6k
    curve_b = BN_CTX_get(ctx);
1301
48.6k
    if (curve_b == NULL)
1302
0
        goto err;
1303
48.6k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1304
48.6k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1305
48.6k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1306
48.6k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1307
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1308
0
        goto err;
1309
0
    }
1310
48.6k
    group->field_mod_func = BN_nist_mod_224;
1311
48.6k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1312
48.6k
err:
1313
48.6k
    BN_CTX_end(ctx);
1314
48.6k
#ifndef FIPS_MODULE
1315
48.6k
    BN_CTX_free(new_ctx);
1316
48.6k
#endif
1317
48.6k
    return ret;
1318
48.6k
}
1319
1320
/*
1321
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1322
 * (X/Z^2, Y/Z^3)
1323
 */
1324
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1325
    const EC_POINT *point,
1326
    BIGNUM *x, BIGNUM *y,
1327
    BN_CTX *ctx)
1328
6.11k
{
1329
6.11k
    felem z1, z2, x_in, y_in, x_out, y_out;
1330
6.11k
    widefelem tmp;
1331
1332
6.11k
    if (EC_POINT_is_at_infinity(group, point)) {
1333
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1334
0
        return 0;
1335
0
    }
1336
6.11k
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) || (!BN_to_felem(z1, point->Z)))
1337
0
        return 0;
1338
6.11k
    felem_inv(z2, z1);
1339
6.11k
    felem_square(tmp, z2);
1340
6.11k
    felem_reduce(z1, tmp);
1341
6.11k
    felem_mul(tmp, x_in, z1);
1342
6.11k
    felem_reduce(x_in, tmp);
1343
6.11k
    felem_contract(x_out, x_in);
1344
6.11k
    if (x != NULL) {
1345
6.11k
        if (!felem_to_BN(x, x_out)) {
1346
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1347
0
            return 0;
1348
0
        }
1349
6.11k
    }
1350
6.11k
    felem_mul(tmp, z1, z2);
1351
6.11k
    felem_reduce(z1, tmp);
1352
6.11k
    felem_mul(tmp, y_in, z1);
1353
6.11k
    felem_reduce(y_in, tmp);
1354
6.11k
    felem_contract(y_out, y_in);
1355
6.11k
    if (y != NULL) {
1356
6.11k
        if (!felem_to_BN(y, y_out)) {
1357
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1358
0
            return 0;
1359
0
        }
1360
6.11k
    }
1361
6.11k
    return 1;
1362
6.11k
}
1363
1364
static void make_points_affine(size_t num, felem points[/* num */][3],
1365
    felem tmp_felems[/* num+1 */])
1366
0
{
1367
    /*
1368
     * Runs in constant time, unless an input is the point at infinity (which
1369
     * normally shouldn't happen).
1370
     */
1371
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1372
0
        points,
1373
0
        sizeof(felem),
1374
0
        tmp_felems,
1375
0
        (void (*)(void *))felem_one,
1376
0
        felem_is_zero_int,
1377
0
        (void (*)(void *, const void *))
1378
0
            felem_assign,
1379
0
        (void (*)(void *, const void *))
1380
0
            felem_square_reduce,
1381
0
        (void (*)(void *,
1382
0
            const void
1383
0
                *,
1384
0
            const void
1385
0
                *))
1386
0
            felem_mul_reduce,
1387
0
        (void (*)(void *, const void *))
1388
0
            felem_inv,
1389
0
        (void (*)(void *, const void *))
1390
0
            felem_contract);
1391
0
}
1392
1393
/*
1394
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1395
 * values Result is stored in r (r can equal one of the inputs).
1396
 */
1397
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1398
    const BIGNUM *scalar, size_t num,
1399
    const EC_POINT *points[],
1400
    const BIGNUM *scalars[], BN_CTX *ctx)
1401
2.99k
{
1402
2.99k
    int ret = 0;
1403
2.99k
    int j;
1404
2.99k
    unsigned i;
1405
2.99k
    int mixed = 0;
1406
2.99k
    BIGNUM *x, *y, *z, *tmp_scalar;
1407
2.99k
    felem_bytearray g_secret;
1408
2.99k
    felem_bytearray *secrets = NULL;
1409
2.99k
    felem(*pre_comp)[17][3] = NULL;
1410
2.99k
    felem *tmp_felems = NULL;
1411
2.99k
    int num_bytes;
1412
2.99k
    int have_pre_comp = 0;
1413
2.99k
    size_t num_points = num;
1414
2.99k
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1415
2.99k
    NISTP224_PRE_COMP *pre = NULL;
1416
2.99k
    const felem(*g_pre_comp)[16][3] = NULL;
1417
2.99k
    EC_POINT *generator = NULL;
1418
2.99k
    const EC_POINT *p = NULL;
1419
2.99k
    const BIGNUM *p_scalar = NULL;
1420
1421
2.99k
    BN_CTX_start(ctx);
1422
2.99k
    x = BN_CTX_get(ctx);
1423
2.99k
    y = BN_CTX_get(ctx);
1424
2.99k
    z = BN_CTX_get(ctx);
1425
2.99k
    tmp_scalar = BN_CTX_get(ctx);
1426
2.99k
    if (tmp_scalar == NULL)
1427
0
        goto err;
1428
1429
2.99k
    if (scalar != NULL) {
1430
2.68k
        pre = group->pre_comp.nistp224;
1431
2.68k
        if (pre)
1432
            /* we have precomputation, try to use it */
1433
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1434
2.68k
        else
1435
            /* try to use the standard precomputation */
1436
2.68k
            g_pre_comp = &gmul[0];
1437
2.68k
        generator = EC_POINT_new(group);
1438
2.68k
        if (generator == NULL)
1439
0
            goto err;
1440
        /* get the generator from precomputation */
1441
2.68k
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) || !felem_to_BN(y, g_pre_comp[0][1][1]) || !felem_to_BN(z, g_pre_comp[0][1][2])) {
1442
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1443
0
            goto err;
1444
0
        }
1445
2.68k
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1446
2.68k
                generator,
1447
2.68k
                x, y, z, ctx))
1448
0
            goto err;
1449
2.68k
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1450
            /* precomputation matches generator */
1451
2.68k
            have_pre_comp = 1;
1452
0
        else
1453
            /*
1454
             * we don't have valid precomputation: treat the generator as a
1455
             * random point
1456
             */
1457
0
            num_points = num_points + 1;
1458
2.68k
    }
1459
1460
2.99k
    if (num_points > 0) {
1461
312
        if (num_points >= 3) {
1462
            /*
1463
             * unless we precompute multiples for just one or two points,
1464
             * converting those into affine form is time well spent
1465
             */
1466
0
            mixed = 1;
1467
0
        }
1468
312
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1469
312
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1470
312
        if (mixed)
1471
0
            tmp_felems = OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1472
312
        if ((secrets == NULL) || (pre_comp == NULL)
1473
312
            || (mixed && (tmp_felems == NULL))) {
1474
0
            ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
1475
0
            goto err;
1476
0
        }
1477
1478
        /*
1479
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1480
         * i.e., they contribute nothing to the linear combination
1481
         */
1482
624
        for (i = 0; i < num_points; ++i) {
1483
312
            if (i == num) {
1484
                /* the generator */
1485
0
                p = EC_GROUP_get0_generator(group);
1486
0
                p_scalar = scalar;
1487
312
            } else {
1488
                /* the i^th point */
1489
312
                p = points[i];
1490
312
                p_scalar = scalars[i];
1491
312
            }
1492
312
            if ((p_scalar != NULL) && (p != NULL)) {
1493
                /* reduce scalar to 0 <= scalar < 2^224 */
1494
312
                if ((BN_num_bits(p_scalar) > 224)
1495
312
                    || (BN_is_negative(p_scalar))) {
1496
                    /*
1497
                     * this is an unusual input, and we don't guarantee
1498
                     * constant-timeness
1499
                     */
1500
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1501
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1502
0
                        goto err;
1503
0
                    }
1504
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1505
0
                        secrets[i], sizeof(secrets[i]));
1506
312
                } else {
1507
312
                    num_bytes = BN_bn2lebinpad(p_scalar,
1508
312
                        secrets[i], sizeof(secrets[i]));
1509
312
                }
1510
312
                if (num_bytes < 0) {
1511
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1512
0
                    goto err;
1513
0
                }
1514
                /* precompute multiples */
1515
312
                if ((!BN_to_felem(x_out, p->X)) || (!BN_to_felem(y_out, p->Y)) || (!BN_to_felem(z_out, p->Z)))
1516
0
                    goto err;
1517
312
                felem_assign(pre_comp[i][1][0], x_out);
1518
312
                felem_assign(pre_comp[i][1][1], y_out);
1519
312
                felem_assign(pre_comp[i][1][2], z_out);
1520
4.99k
                for (j = 2; j <= 16; ++j) {
1521
4.68k
                    if (j & 1) {
1522
2.18k
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1523
2.18k
                            pre_comp[i][j][2], pre_comp[i][1][0],
1524
2.18k
                            pre_comp[i][1][1], pre_comp[i][1][2], 0,
1525
2.18k
                            pre_comp[i][j - 1][0],
1526
2.18k
                            pre_comp[i][j - 1][1],
1527
2.18k
                            pre_comp[i][j - 1][2]);
1528
2.49k
                    } else {
1529
2.49k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1530
2.49k
                            pre_comp[i][j][2], pre_comp[i][j / 2][0],
1531
2.49k
                            pre_comp[i][j / 2][1],
1532
2.49k
                            pre_comp[i][j / 2][2]);
1533
2.49k
                    }
1534
4.68k
                }
1535
312
            }
1536
312
        }
1537
312
        if (mixed)
1538
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1539
312
    }
1540
1541
    /* the scalar for the generator */
1542
2.99k
    if ((scalar != NULL) && (have_pre_comp)) {
1543
2.68k
        memset(g_secret, 0, sizeof(g_secret));
1544
        /* reduce scalar to 0 <= scalar < 2^224 */
1545
2.68k
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1546
            /*
1547
             * this is an unusual input, and we don't guarantee
1548
             * constant-timeness
1549
             */
1550
634
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1551
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1552
0
                goto err;
1553
0
            }
1554
634
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1555
2.05k
        } else {
1556
2.05k
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1557
2.05k
        }
1558
        /* do the multiplication with generator precomputation */
1559
2.68k
        batch_mul(x_out, y_out, z_out,
1560
2.68k
            (const felem_bytearray(*))secrets, num_points,
1561
2.68k
            g_secret,
1562
2.68k
            mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1563
2.68k
    } else {
1564
        /* do the multiplication without generator precomputation */
1565
312
        batch_mul(x_out, y_out, z_out,
1566
312
            (const felem_bytearray(*))secrets, num_points,
1567
312
            NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1568
312
    }
1569
    /* reduce the output to its unique minimal representation */
1570
2.99k
    felem_contract(x_in, x_out);
1571
2.99k
    felem_contract(y_in, y_out);
1572
2.99k
    felem_contract(z_in, z_out);
1573
2.99k
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || (!felem_to_BN(z, z_in))) {
1574
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1575
0
        goto err;
1576
0
    }
1577
2.99k
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1578
2.99k
        ctx);
1579
1580
2.99k
err:
1581
2.99k
    BN_CTX_end(ctx);
1582
2.99k
    EC_POINT_free(generator);
1583
2.99k
    OPENSSL_free(secrets);
1584
2.99k
    OPENSSL_free(pre_comp);
1585
2.99k
    OPENSSL_free(tmp_felems);
1586
2.99k
    return ret;
1587
2.99k
}
1588
1589
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1590
0
{
1591
0
    int ret = 0;
1592
0
    NISTP224_PRE_COMP *pre = NULL;
1593
0
    int i, j;
1594
0
    BIGNUM *x, *y;
1595
0
    EC_POINT *generator = NULL;
1596
0
    felem tmp_felems[32];
1597
0
#ifndef FIPS_MODULE
1598
0
    BN_CTX *new_ctx = NULL;
1599
0
#endif
1600
1601
    /* throw away old precomputation */
1602
0
    EC_pre_comp_free(group);
1603
1604
0
#ifndef FIPS_MODULE
1605
0
    if (ctx == NULL)
1606
0
        ctx = new_ctx = BN_CTX_new();
1607
0
#endif
1608
0
    if (ctx == NULL)
1609
0
        return 0;
1610
1611
0
    BN_CTX_start(ctx);
1612
0
    x = BN_CTX_get(ctx);
1613
0
    y = BN_CTX_get(ctx);
1614
0
    if (y == NULL)
1615
0
        goto err;
1616
    /* get the generator */
1617
0
    if (group->generator == NULL)
1618
0
        goto err;
1619
0
    generator = EC_POINT_new(group);
1620
0
    if (generator == NULL)
1621
0
        goto err;
1622
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1623
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1624
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1625
0
        goto err;
1626
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1627
0
        goto err;
1628
    /*
1629
     * if the generator is the standard one, use built-in precomputation
1630
     */
1631
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1632
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1633
0
        goto done;
1634
0
    }
1635
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) || (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1636
0
        goto err;
1637
    /*
1638
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1639
     * 2^140*G, 2^196*G for the second one
1640
     */
1641
0
    for (i = 1; i <= 8; i <<= 1) {
1642
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1643
0
            pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1644
0
            pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1645
0
        for (j = 0; j < 27; ++j) {
1646
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1647
0
                pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1648
0
                pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1649
0
        }
1650
0
        if (i == 8)
1651
0
            break;
1652
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1653
0
            pre->g_pre_comp[0][2 * i][1],
1654
0
            pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1655
0
            pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1656
0
        for (j = 0; j < 27; ++j) {
1657
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1658
0
                pre->g_pre_comp[0][2 * i][1],
1659
0
                pre->g_pre_comp[0][2 * i][2],
1660
0
                pre->g_pre_comp[0][2 * i][0],
1661
0
                pre->g_pre_comp[0][2 * i][1],
1662
0
                pre->g_pre_comp[0][2 * i][2]);
1663
0
        }
1664
0
    }
1665
0
    for (i = 0; i < 2; i++) {
1666
        /* g_pre_comp[i][0] is the point at infinity */
1667
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1668
        /* the remaining multiples */
1669
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1670
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1671
0
            pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1672
0
            pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1673
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1674
0
            pre->g_pre_comp[i][2][2]);
1675
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1676
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1677
0
            pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1678
0
            pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1679
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1680
0
            pre->g_pre_comp[i][2][2]);
1681
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1682
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1683
0
            pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1684
0
            pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1685
0
            0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1686
0
            pre->g_pre_comp[i][4][2]);
1687
        /*
1688
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1689
         */
1690
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1691
0
            pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1692
0
            pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1693
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1694
0
            pre->g_pre_comp[i][2][2]);
1695
0
        for (j = 1; j < 8; ++j) {
1696
            /* odd multiples: add G resp. 2^28*G */
1697
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1698
0
                pre->g_pre_comp[i][2 * j + 1][1],
1699
0
                pre->g_pre_comp[i][2 * j + 1][2],
1700
0
                pre->g_pre_comp[i][2 * j][0],
1701
0
                pre->g_pre_comp[i][2 * j][1],
1702
0
                pre->g_pre_comp[i][2 * j][2], 0,
1703
0
                pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1704
0
                pre->g_pre_comp[i][1][2]);
1705
0
        }
1706
0
    }
1707
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1708
1709
0
done:
1710
0
    SETPRECOMP(group, nistp224, pre);
1711
0
    pre = NULL;
1712
0
    ret = 1;
1713
0
err:
1714
0
    BN_CTX_end(ctx);
1715
0
    EC_POINT_free(generator);
1716
0
#ifndef FIPS_MODULE
1717
0
    BN_CTX_free(new_ctx);
1718
0
#endif
1719
0
    EC_nistp224_pre_comp_free(pre);
1720
0
    return ret;
1721
0
}
1722
1723
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1724
0
{
1725
    return HAVEPRECOMP(group, nistp224);
1726
0
}