Coverage Report

Created: 2026-04-09 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openssl35/crypto/ec/ecp_nistp224.c
Line
Count
Source
1
/*
2
 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
10
/* Copyright 2011 Google Inc.
11
 *
12
 * Licensed under the Apache License, Version 2.0 (the "License");
13
 *
14
 * you may not use this file except in compliance with the License.
15
 * You may obtain a copy of the License at
16
 *
17
 *     http://www.apache.org/licenses/LICENSE-2.0
18
 *
19
 *  Unless required by applicable law or agreed to in writing, software
20
 *  distributed under the License is distributed on an "AS IS" BASIS,
21
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
 *  See the License for the specific language governing permissions and
23
 *  limitations under the License.
24
 */
25
26
/*
27
 * ECDSA low level APIs are deprecated for public use, but still ok for
28
 * internal use.
29
 */
30
#include "internal/deprecated.h"
31
32
/*
33
 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
34
 *
35
 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
36
 * and Adam Langley's public domain 64-bit C implementation of curve25519
37
 */
38
39
#include <openssl/opensslconf.h>
40
41
#include <stdint.h>
42
#include <string.h>
43
#include <openssl/err.h>
44
#include "ec_local.h"
45
46
#include "internal/numbers.h"
47
48
#ifndef INT128_MAX
49
#error "Your compiler doesn't appear to support 128-bit integer types"
50
#endif
51
52
typedef uint8_t u8;
53
typedef uint64_t u64;
54
55
/******************************************************************************/
56
/*-
57
 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
58
 *
59
 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
60
 * using 64-bit coefficients called 'limbs',
61
 * and sometimes (for multiplication results) as
62
 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
63
 * using 128-bit coefficients called 'widelimbs'.
64
 * A 4-limb representation is an 'felem';
65
 * a 7-widelimb representation is a 'widefelem'.
66
 * Even within felems, bits of adjacent limbs overlap, and we don't always
67
 * reduce the representations: we ensure that inputs to each felem
68
 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
69
 * and fit into a 128-bit word without overflow. The coefficients are then
70
 * again partially reduced to obtain an felem satisfying a_i < 2^57.
71
 * We only reduce to the unique minimal representation at the end of the
72
 * computation.
73
 */
74
75
typedef uint64_t limb;
76
typedef uint64_t limb_aX __attribute((__aligned__(1)));
77
typedef uint128_t widelimb;
78
79
typedef limb felem[4];
80
typedef widelimb widefelem[7];
81
82
/*
83
 * Field element represented as a byte array. 28*8 = 224 bits is also the
84
 * group order size for the elliptic curve, and we also use this type for
85
 * scalars for point multiplication.
86
 */
87
typedef u8 felem_bytearray[28];
88
89
static const felem_bytearray nistp224_curve_params[5] = {
90
    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
91
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
92
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
93
    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
94
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
95
        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE },
96
    { 0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
97
        0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
98
        0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4 },
99
    { 0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
100
        0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
101
        0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21 },
102
    { 0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
103
        0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
104
        0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34 }
105
};
106
107
/*-
108
 * Precomputed multiples of the standard generator
109
 * Points are given in coordinates (X, Y, Z) where Z normally is 1
110
 * (0 for the point at infinity).
111
 * For each field element, slice a_0 is word 0, etc.
112
 *
113
 * The table has 2 * 16 elements, starting with the following:
114
 * index | bits    | point
115
 * ------+---------+------------------------------
116
 *     0 | 0 0 0 0 | 0G
117
 *     1 | 0 0 0 1 | 1G
118
 *     2 | 0 0 1 0 | 2^56G
119
 *     3 | 0 0 1 1 | (2^56 + 1)G
120
 *     4 | 0 1 0 0 | 2^112G
121
 *     5 | 0 1 0 1 | (2^112 + 1)G
122
 *     6 | 0 1 1 0 | (2^112 + 2^56)G
123
 *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
124
 *     8 | 1 0 0 0 | 2^168G
125
 *     9 | 1 0 0 1 | (2^168 + 1)G
126
 *    10 | 1 0 1 0 | (2^168 + 2^56)G
127
 *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
128
 *    12 | 1 1 0 0 | (2^168 + 2^112)G
129
 *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
130
 *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
131
 *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
132
 * followed by a copy of this with each element multiplied by 2^28.
133
 *
134
 * The reason for this is so that we can clock bits into four different
135
 * locations when doing simple scalar multiplies against the base point,
136
 * and then another four locations using the second 16 elements.
137
 */
138
static const felem gmul[2][16][3] = {
139
    { { { 0, 0, 0, 0 },
140
          { 0, 0, 0, 0 },
141
          { 0, 0, 0, 0 } },
142
        { { 0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf },
143
            { 0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723 },
144
            { 1, 0, 0, 0 } },
145
        { { 0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5 },
146
            { 0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321 },
147
            { 1, 0, 0, 0 } },
148
        { { 0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748 },
149
            { 0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17 },
150
            { 1, 0, 0, 0 } },
151
        { { 0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe },
152
            { 0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b },
153
            { 1, 0, 0, 0 } },
154
        { { 0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3 },
155
            { 0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a },
156
            { 1, 0, 0, 0 } },
157
        { { 0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c },
158
            { 0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244 },
159
            { 1, 0, 0, 0 } },
160
        { { 0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849 },
161
            { 0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112 },
162
            { 1, 0, 0, 0 } },
163
        { { 0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47 },
164
            { 0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394 },
165
            { 1, 0, 0, 0 } },
166
        { { 0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d },
167
            { 0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7 },
168
            { 1, 0, 0, 0 } },
169
        { { 0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24 },
170
            { 0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881 },
171
            { 1, 0, 0, 0 } },
172
        { { 0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984 },
173
            { 0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369 },
174
            { 1, 0, 0, 0 } },
175
        { { 0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3 },
176
            { 0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60 },
177
            { 1, 0, 0, 0 } },
178
        { { 0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057 },
179
            { 0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9 },
180
            { 1, 0, 0, 0 } },
181
        { { 0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9 },
182
            { 0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc },
183
            { 1, 0, 0, 0 } },
184
        { { 0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58 },
185
            { 0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558 },
186
            { 1, 0, 0, 0 } } },
187
    { { { 0, 0, 0, 0 },
188
          { 0, 0, 0, 0 },
189
          { 0, 0, 0, 0 } },
190
        { { 0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31 },
191
            { 0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d },
192
            { 1, 0, 0, 0 } },
193
        { { 0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3 },
194
            { 0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a },
195
            { 1, 0, 0, 0 } },
196
        { { 0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33 },
197
            { 0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100 },
198
            { 1, 0, 0, 0 } },
199
        { { 0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5 },
200
            { 0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea },
201
            { 1, 0, 0, 0 } },
202
        { { 0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be },
203
            { 0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51 },
204
            { 1, 0, 0, 0 } },
205
        { { 0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1 },
206
            { 0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb },
207
            { 1, 0, 0, 0 } },
208
        { { 0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233 },
209
            { 0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def },
210
            { 1, 0, 0, 0 } },
211
        { { 0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae },
212
            { 0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45 },
213
            { 1, 0, 0, 0 } },
214
        { { 0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e },
215
            { 0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb },
216
            { 1, 0, 0, 0 } },
217
        { { 0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de },
218
            { 0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3 },
219
            { 1, 0, 0, 0 } },
220
        { { 0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05 },
221
            { 0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58 },
222
            { 1, 0, 0, 0 } },
223
        { { 0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb },
224
            { 0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0 },
225
            { 1, 0, 0, 0 } },
226
        { { 0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9 },
227
            { 0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea },
228
            { 1, 0, 0, 0 } },
229
        { { 0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba },
230
            { 0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405 },
231
            { 1, 0, 0, 0 } },
232
        { { 0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e },
233
            { 0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e },
234
            { 1, 0, 0, 0 } } }
235
};
236
237
/* Precomputation for the group generator. */
238
struct nistp224_pre_comp_st {
239
    felem g_pre_comp[2][16][3];
240
    CRYPTO_REF_COUNT references;
241
};
242
243
const EC_METHOD *EC_GFp_nistp224_method(void)
244
57.3k
{
245
57.3k
    static const EC_METHOD ret = {
246
57.3k
        EC_FLAGS_DEFAULT_OCT,
247
57.3k
        NID_X9_62_prime_field,
248
57.3k
        ossl_ec_GFp_nistp224_group_init,
249
57.3k
        ossl_ec_GFp_simple_group_finish,
250
57.3k
        ossl_ec_GFp_simple_group_clear_finish,
251
57.3k
        ossl_ec_GFp_nist_group_copy,
252
57.3k
        ossl_ec_GFp_nistp224_group_set_curve,
253
57.3k
        ossl_ec_GFp_simple_group_get_curve,
254
57.3k
        ossl_ec_GFp_simple_group_get_degree,
255
57.3k
        ossl_ec_group_simple_order_bits,
256
57.3k
        ossl_ec_GFp_simple_group_check_discriminant,
257
57.3k
        ossl_ec_GFp_simple_point_init,
258
57.3k
        ossl_ec_GFp_simple_point_finish,
259
57.3k
        ossl_ec_GFp_simple_point_clear_finish,
260
57.3k
        ossl_ec_GFp_simple_point_copy,
261
57.3k
        ossl_ec_GFp_simple_point_set_to_infinity,
262
57.3k
        ossl_ec_GFp_simple_point_set_affine_coordinates,
263
57.3k
        ossl_ec_GFp_nistp224_point_get_affine_coordinates,
264
57.3k
        0 /* point_set_compressed_coordinates */,
265
57.3k
        0 /* point2oct */,
266
57.3k
        0 /* oct2point */,
267
57.3k
        ossl_ec_GFp_simple_add,
268
57.3k
        ossl_ec_GFp_simple_dbl,
269
57.3k
        ossl_ec_GFp_simple_invert,
270
57.3k
        ossl_ec_GFp_simple_is_at_infinity,
271
57.3k
        ossl_ec_GFp_simple_is_on_curve,
272
57.3k
        ossl_ec_GFp_simple_cmp,
273
57.3k
        ossl_ec_GFp_simple_make_affine,
274
57.3k
        ossl_ec_GFp_simple_points_make_affine,
275
57.3k
        ossl_ec_GFp_nistp224_points_mul,
276
57.3k
        ossl_ec_GFp_nistp224_precompute_mult,
277
57.3k
        ossl_ec_GFp_nistp224_have_precompute_mult,
278
57.3k
        ossl_ec_GFp_nist_field_mul,
279
57.3k
        ossl_ec_GFp_nist_field_sqr,
280
57.3k
        0 /* field_div */,
281
57.3k
        ossl_ec_GFp_simple_field_inv,
282
57.3k
        0 /* field_encode */,
283
57.3k
        0 /* field_decode */,
284
57.3k
        0, /* field_set_to_one */
285
57.3k
        ossl_ec_key_simple_priv2oct,
286
57.3k
        ossl_ec_key_simple_oct2priv,
287
57.3k
        0, /* set private */
288
57.3k
        ossl_ec_key_simple_generate_key,
289
57.3k
        ossl_ec_key_simple_check_key,
290
57.3k
        ossl_ec_key_simple_generate_public_key,
291
57.3k
        0, /* keycopy */
292
57.3k
        0, /* keyfinish */
293
57.3k
        ossl_ecdh_simple_compute_key,
294
57.3k
        ossl_ecdsa_simple_sign_setup,
295
57.3k
        ossl_ecdsa_simple_sign_sig,
296
57.3k
        ossl_ecdsa_simple_verify_sig,
297
57.3k
        0, /* field_inverse_mod_ord */
298
57.3k
        0, /* blind_coordinates */
299
57.3k
        0, /* ladder_pre */
300
57.3k
        0, /* ladder_step */
301
57.3k
        0 /* ladder_post */
302
57.3k
    };
303
304
57.3k
    return &ret;
305
57.3k
}
306
307
/*
308
 * Helper functions to convert field elements to/from internal representation
309
 */
310
static void bin28_to_felem(felem out, const u8 in[28])
311
15.4k
{
312
15.4k
    out[0] = *((const limb *)(in)) & 0x00ffffffffffffff;
313
15.4k
    out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff;
314
15.4k
    out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff;
315
15.4k
    out[3] = (*((const limb_aX *)(in + 20))) >> 8;
316
15.4k
}
317
318
static void felem_to_bin28(u8 out[28], const felem in)
319
25.3k
{
320
25.3k
    unsigned i;
321
202k
    for (i = 0; i < 7; ++i) {
322
177k
        out[i] = in[0] >> (8 * i);
323
177k
        out[i + 7] = in[1] >> (8 * i);
324
177k
        out[i + 14] = in[2] >> (8 * i);
325
177k
        out[i + 21] = in[3] >> (8 * i);
326
177k
    }
327
25.3k
}
328
329
/* From OpenSSL BIGNUM to internal representation */
330
static int BN_to_felem(felem out, const BIGNUM *bn)
331
15.4k
{
332
15.4k
    felem_bytearray b_out;
333
15.4k
    int num_bytes;
334
335
15.4k
    if (BN_is_negative(bn)) {
336
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
337
0
        return 0;
338
0
    }
339
15.4k
    num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
340
15.4k
    if (num_bytes < 0) {
341
0
        ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
342
0
        return 0;
343
0
    }
344
15.4k
    bin28_to_felem(out, b_out);
345
15.4k
    return 1;
346
15.4k
}
347
348
/* From internal representation to OpenSSL BIGNUM */
349
static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
350
25.3k
{
351
25.3k
    felem_bytearray b_out;
352
25.3k
    felem_to_bin28(b_out, in);
353
25.3k
    return BN_lebin2bn(b_out, sizeof(b_out), out);
354
25.3k
}
355
356
/******************************************************************************/
357
/*-
358
 *                              FIELD OPERATIONS
359
 *
360
 * Field operations, using the internal representation of field elements.
361
 * NB! These operations are specific to our point multiplication and cannot be
362
 * expected to be correct in general - e.g., multiplication with a large scalar
363
 * will cause an overflow.
364
 *
365
 */
366
367
static void felem_one(felem out)
368
0
{
369
0
    out[0] = 1;
370
0
    out[1] = 0;
371
0
    out[2] = 0;
372
0
    out[3] = 0;
373
0
}
374
375
static void felem_assign(felem out, const felem in)
376
1.66M
{
377
1.66M
    out[0] = in[0];
378
1.66M
    out[1] = in[1];
379
1.66M
    out[2] = in[2];
380
1.66M
    out[3] = in[3];
381
1.66M
}
382
383
/* Sum two field elements: out += in */
384
static void felem_sum(felem out, const felem in)
385
380k
{
386
380k
    out[0] += in[0];
387
380k
    out[1] += in[1];
388
380k
    out[2] += in[2];
389
380k
    out[3] += in[3];
390
380k
}
391
392
/* Subtract field elements: out -= in */
393
/* Assumes in[i] < 2^57 */
394
static void felem_diff(felem out, const felem in)
395
402k
{
396
402k
    static const limb two58p2 = (((limb)1) << 58) + (((limb)1) << 2);
397
402k
    static const limb two58m2 = (((limb)1) << 58) - (((limb)1) << 2);
398
402k
    static const limb two58m42m2 = (((limb)1) << 58) - (((limb)1) << 42) - (((limb)1) << 2);
399
400
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
401
402k
    out[0] += two58p2;
402
402k
    out[1] += two58m42m2;
403
402k
    out[2] += two58m2;
404
402k
    out[3] += two58m2;
405
406
402k
    out[0] -= in[0];
407
402k
    out[1] -= in[1];
408
402k
    out[2] -= in[2];
409
402k
    out[3] -= in[3];
410
402k
}
411
412
/* Subtract in unreduced 128-bit mode: out -= in */
413
/* Assumes in[i] < 2^119 */
414
static void widefelem_diff(widefelem out, const widefelem in)
415
275k
{
416
275k
    static const widelimb two120 = ((widelimb)1) << 120;
417
275k
    static const widelimb two120m64 = (((widelimb)1) << 120) - (((widelimb)1) << 64);
418
275k
    static const widelimb two120m104m64 = (((widelimb)1) << 120) - (((widelimb)1) << 104) - (((widelimb)1) << 64);
419
420
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
421
275k
    out[0] += two120;
422
275k
    out[1] += two120m64;
423
275k
    out[2] += two120m64;
424
275k
    out[3] += two120;
425
275k
    out[4] += two120m104m64;
426
275k
    out[5] += two120m64;
427
275k
    out[6] += two120m64;
428
429
275k
    out[0] -= in[0];
430
275k
    out[1] -= in[1];
431
275k
    out[2] -= in[2];
432
275k
    out[3] -= in[3];
433
275k
    out[4] -= in[4];
434
275k
    out[5] -= in[5];
435
275k
    out[6] -= in[6];
436
275k
}
437
438
/* Subtract in mixed mode: out128 -= in64 */
439
/* in[i] < 2^63 */
440
static void felem_diff_128_64(widefelem out, const felem in)
441
861k
{
442
861k
    static const widelimb two64p8 = (((widelimb)1) << 64) + (((widelimb)1) << 8);
443
861k
    static const widelimb two64m8 = (((widelimb)1) << 64) - (((widelimb)1) << 8);
444
861k
    static const widelimb two64m48m8 = (((widelimb)1) << 64) - (((widelimb)1) << 48) - (((widelimb)1) << 8);
445
446
    /* Add 0 mod 2^224-2^96+1 to ensure out > in */
447
861k
    out[0] += two64p8;
448
861k
    out[1] += two64m48m8;
449
861k
    out[2] += two64m8;
450
861k
    out[3] += two64m8;
451
452
861k
    out[0] -= in[0];
453
861k
    out[1] -= in[1];
454
861k
    out[2] -= in[2];
455
861k
    out[3] -= in[3];
456
861k
}
457
458
/*
459
 * Multiply a field element by a scalar: out = out * scalar The scalars we
460
 * actually use are small, so results fit without overflow
461
 */
462
static void felem_scalar(felem out, const limb scalar)
463
529k
{
464
529k
    out[0] *= scalar;
465
529k
    out[1] *= scalar;
466
529k
    out[2] *= scalar;
467
529k
    out[3] *= scalar;
468
529k
}
469
470
/*
471
 * Multiply an unreduced field element by a scalar: out = out * scalar The
472
 * scalars we actually use are small, so results fit without overflow
473
 */
474
static void widefelem_scalar(widefelem out, const widelimb scalar)
475
126k
{
476
126k
    out[0] *= scalar;
477
126k
    out[1] *= scalar;
478
126k
    out[2] *= scalar;
479
126k
    out[3] *= scalar;
480
126k
    out[4] *= scalar;
481
126k
    out[5] *= scalar;
482
126k
    out[6] *= scalar;
483
126k
}
484
485
/* Square a field element: out = in^2 */
486
static void felem_square(widefelem out, const felem in)
487
2.18M
{
488
2.18M
    limb tmp0, tmp1, tmp2;
489
2.18M
    tmp0 = 2 * in[0];
490
2.18M
    tmp1 = 2 * in[1];
491
2.18M
    tmp2 = 2 * in[2];
492
2.18M
    out[0] = ((widelimb)in[0]) * in[0];
493
2.18M
    out[1] = ((widelimb)in[0]) * tmp1;
494
2.18M
    out[2] = ((widelimb)in[0]) * tmp2 + ((widelimb)in[1]) * in[1];
495
2.18M
    out[3] = ((widelimb)in[3]) * tmp0 + ((widelimb)in[1]) * tmp2;
496
2.18M
    out[4] = ((widelimb)in[3]) * tmp1 + ((widelimb)in[2]) * in[2];
497
2.18M
    out[5] = ((widelimb)in[3]) * tmp2;
498
2.18M
    out[6] = ((widelimb)in[3]) * in[3];
499
2.18M
}
500
501
/* Multiply two field elements: out = in1 * in2 */
502
static void felem_mul(widefelem out, const felem in1, const felem in2)
503
1.69M
{
504
1.69M
    out[0] = ((widelimb)in1[0]) * in2[0];
505
1.69M
    out[1] = ((widelimb)in1[0]) * in2[1] + ((widelimb)in1[1]) * in2[0];
506
1.69M
    out[2] = ((widelimb)in1[0]) * in2[2] + ((widelimb)in1[1]) * in2[1] + ((widelimb)in1[2]) * in2[0];
507
1.69M
    out[3] = ((widelimb)in1[0]) * in2[3] + ((widelimb)in1[1]) * in2[2] + ((widelimb)in1[2]) * in2[1] + ((widelimb)in1[3]) * in2[0];
508
1.69M
    out[4] = ((widelimb)in1[1]) * in2[3] + ((widelimb)in1[2]) * in2[2] + ((widelimb)in1[3]) * in2[1];
509
1.69M
    out[5] = ((widelimb)in1[2]) * in2[3] + ((widelimb)in1[3]) * in2[2];
510
1.69M
    out[6] = ((widelimb)in1[3]) * in2[3];
511
1.69M
}
512
513
/*-
514
 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
515
 * Requires in[i] < 2^126,
516
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
517
static void felem_reduce(felem out, const widefelem in)
518
3.62M
{
519
3.62M
    static const widelimb two127p15 = (((widelimb)1) << 127) + (((widelimb)1) << 15);
520
3.62M
    static const widelimb two127m71 = (((widelimb)1) << 127) - (((widelimb)1) << 71);
521
3.62M
    static const widelimb two127m71m55 = (((widelimb)1) << 127) - (((widelimb)1) << 71) - (((widelimb)1) << 55);
522
3.62M
    widelimb output[5];
523
524
    /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
525
3.62M
    output[0] = in[0] + two127p15;
526
3.62M
    output[1] = in[1] + two127m71m55;
527
3.62M
    output[2] = in[2] + two127m71;
528
3.62M
    output[3] = in[3];
529
3.62M
    output[4] = in[4];
530
531
    /* Eliminate in[4], in[5], in[6] */
532
3.62M
    output[4] += in[6] >> 16;
533
3.62M
    output[3] += (in[6] & 0xffff) << 40;
534
3.62M
    output[2] -= in[6];
535
536
3.62M
    output[3] += in[5] >> 16;
537
3.62M
    output[2] += (in[5] & 0xffff) << 40;
538
3.62M
    output[1] -= in[5];
539
540
3.62M
    output[2] += output[4] >> 16;
541
3.62M
    output[1] += (output[4] & 0xffff) << 40;
542
3.62M
    output[0] -= output[4];
543
544
    /* Carry 2 -> 3 -> 4 */
545
3.62M
    output[3] += output[2] >> 56;
546
3.62M
    output[2] &= 0x00ffffffffffffff;
547
548
3.62M
    output[4] = output[3] >> 56;
549
3.62M
    output[3] &= 0x00ffffffffffffff;
550
551
    /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
552
553
    /* Eliminate output[4] */
554
3.62M
    output[2] += output[4] >> 16;
555
    /* output[2] < 2^56 + 2^56 = 2^57 */
556
3.62M
    output[1] += (output[4] & 0xffff) << 40;
557
3.62M
    output[0] -= output[4];
558
559
    /* Carry 0 -> 1 -> 2 -> 3 */
560
3.62M
    output[1] += output[0] >> 56;
561
3.62M
    out[0] = output[0] & 0x00ffffffffffffff;
562
563
3.62M
    output[2] += output[1] >> 56;
564
    /* output[2] < 2^57 + 2^72 */
565
3.62M
    out[1] = output[1] & 0x00ffffffffffffff;
566
3.62M
    output[3] += output[2] >> 56;
567
    /* output[3] <= 2^56 + 2^16 */
568
3.62M
    out[2] = output[2] & 0x00ffffffffffffff;
569
570
    /*-
571
     * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
572
     * out[3] <= 2^56 + 2^16 (due to final carry),
573
     * so out < 2*p
574
     */
575
3.62M
    out[3] = output[3];
576
3.62M
}
577
578
static void felem_square_reduce(felem out, const felem in)
579
0
{
580
0
    widefelem tmp;
581
0
    felem_square(tmp, in);
582
0
    felem_reduce(out, tmp);
583
0
}
584
585
static void felem_mul_reduce(felem out, const felem in1, const felem in2)
586
0
{
587
0
    widefelem tmp;
588
0
    felem_mul(tmp, in1, in2);
589
0
    felem_reduce(out, tmp);
590
0
}
591
592
/*
593
 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
594
 * call felem_reduce first)
595
 */
596
static void felem_contract(felem out, const felem in)
597
17.9k
{
598
17.9k
    static const int64_t two56 = ((limb)1) << 56;
599
    /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
600
    /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
601
17.9k
    int64_t tmp[4], a;
602
17.9k
    tmp[0] = in[0];
603
17.9k
    tmp[1] = in[1];
604
17.9k
    tmp[2] = in[2];
605
17.9k
    tmp[3] = in[3];
606
    /* Case 1: a = 1 iff in >= 2^224 */
607
17.9k
    a = (in[3] >> 56);
608
17.9k
    tmp[0] -= a;
609
17.9k
    tmp[1] += a << 40;
610
17.9k
    tmp[3] &= 0x00ffffffffffffff;
611
    /*
612
     * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
613
     * and the lower part is non-zero
614
     */
615
17.9k
    a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | (((int64_t)(in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
616
17.9k
    a &= 0x00ffffffffffffff;
617
    /* turn a into an all-one mask (if a = 0) or an all-zero mask */
618
17.9k
    a = (a - 1) >> 63;
619
    /* subtract 2^224 - 2^96 + 1 if a is all-one */
620
17.9k
    tmp[3] &= a ^ 0xffffffffffffffff;
621
17.9k
    tmp[2] &= a ^ 0xffffffffffffffff;
622
17.9k
    tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
623
17.9k
    tmp[0] -= 1 & a;
624
625
    /*
626
     * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
627
     * non-zero, so we only need one step
628
     */
629
17.9k
    a = tmp[0] >> 63;
630
17.9k
    tmp[0] += two56 & a;
631
17.9k
    tmp[1] -= 1 & a;
632
633
    /* carry 1 -> 2 -> 3 */
634
17.9k
    tmp[2] += tmp[1] >> 56;
635
17.9k
    tmp[1] &= 0x00ffffffffffffff;
636
637
17.9k
    tmp[3] += tmp[2] >> 56;
638
17.9k
    tmp[2] &= 0x00ffffffffffffff;
639
640
    /* Now 0 <= out < p */
641
17.9k
    out[0] = tmp[0];
642
17.9k
    out[1] = tmp[1];
643
17.9k
    out[2] = tmp[2];
644
17.9k
    out[3] = tmp[3];
645
17.9k
}
646
647
/*
648
 * Get negative value: out = -in
649
 * Requires in[i] < 2^63,
650
 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
651
 */
652
static void felem_neg(felem out, const felem in)
653
11.8k
{
654
11.8k
    widefelem tmp;
655
656
11.8k
    memset(tmp, 0, sizeof(tmp));
657
11.8k
    felem_diff_128_64(tmp, in);
658
11.8k
    felem_reduce(out, tmp);
659
11.8k
}
660
661
/*
662
 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
663
 * elements are reduced to in < 2^225, so we only need to check three cases:
664
 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
665
 */
666
static limb felem_is_zero(const felem in)
667
596k
{
668
596k
    limb zero, two224m96p1, two225m97p2;
669
670
596k
    zero = in[0] | in[1] | in[2] | in[3];
671
596k
    zero = (((int64_t)(zero)-1) >> 63) & 1;
672
596k
    two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
673
596k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
674
596k
    two224m96p1 = (((int64_t)(two224m96p1)-1) >> 63) & 1;
675
596k
    two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
676
596k
        | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
677
596k
    two225m97p2 = (((int64_t)(two225m97p2)-1) >> 63) & 1;
678
596k
    return (zero | two224m96p1 | two225m97p2);
679
596k
}
680
681
static int felem_is_zero_int(const void *in)
682
0
{
683
0
    return (int)(felem_is_zero(in) & ((limb)1));
684
0
}
685
686
/* Invert a field element */
687
/* Computation chain copied from djb's code */
688
static void felem_inv(felem out, const felem in)
689
4.88k
{
690
4.88k
    felem ftmp, ftmp2, ftmp3, ftmp4;
691
4.88k
    widefelem tmp;
692
4.88k
    unsigned i;
693
694
4.88k
    felem_square(tmp, in);
695
4.88k
    felem_reduce(ftmp, tmp); /* 2 */
696
4.88k
    felem_mul(tmp, in, ftmp);
697
4.88k
    felem_reduce(ftmp, tmp); /* 2^2 - 1 */
698
4.88k
    felem_square(tmp, ftmp);
699
4.88k
    felem_reduce(ftmp, tmp); /* 2^3 - 2 */
700
4.88k
    felem_mul(tmp, in, ftmp);
701
4.88k
    felem_reduce(ftmp, tmp); /* 2^3 - 1 */
702
4.88k
    felem_square(tmp, ftmp);
703
4.88k
    felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
704
4.88k
    felem_square(tmp, ftmp2);
705
4.88k
    felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
706
4.88k
    felem_square(tmp, ftmp2);
707
4.88k
    felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
708
4.88k
    felem_mul(tmp, ftmp2, ftmp);
709
4.88k
    felem_reduce(ftmp, tmp); /* 2^6 - 1 */
710
4.88k
    felem_square(tmp, ftmp);
711
4.88k
    felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
712
29.3k
    for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
713
24.4k
        felem_square(tmp, ftmp2);
714
24.4k
        felem_reduce(ftmp2, tmp);
715
24.4k
    }
716
4.88k
    felem_mul(tmp, ftmp2, ftmp);
717
4.88k
    felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
718
4.88k
    felem_square(tmp, ftmp2);
719
4.88k
    felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
720
58.6k
    for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
721
53.7k
        felem_square(tmp, ftmp3);
722
53.7k
        felem_reduce(ftmp3, tmp);
723
53.7k
    }
724
4.88k
    felem_mul(tmp, ftmp3, ftmp2);
725
4.88k
    felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
726
4.88k
    felem_square(tmp, ftmp2);
727
4.88k
    felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
728
117k
    for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
729
112k
        felem_square(tmp, ftmp3);
730
112k
        felem_reduce(ftmp3, tmp);
731
112k
    }
732
4.88k
    felem_mul(tmp, ftmp3, ftmp2);
733
4.88k
    felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
734
4.88k
    felem_square(tmp, ftmp3);
735
4.88k
    felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
736
234k
    for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
737
229k
        felem_square(tmp, ftmp4);
738
229k
        felem_reduce(ftmp4, tmp);
739
229k
    }
740
4.88k
    felem_mul(tmp, ftmp3, ftmp4);
741
4.88k
    felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
742
4.88k
    felem_square(tmp, ftmp3);
743
4.88k
    felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
744
117k
    for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
745
112k
        felem_square(tmp, ftmp4);
746
112k
        felem_reduce(ftmp4, tmp);
747
112k
    }
748
4.88k
    felem_mul(tmp, ftmp2, ftmp4);
749
4.88k
    felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
750
34.2k
    for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
751
29.3k
        felem_square(tmp, ftmp2);
752
29.3k
        felem_reduce(ftmp2, tmp);
753
29.3k
    }
754
4.88k
    felem_mul(tmp, ftmp2, ftmp);
755
4.88k
    felem_reduce(ftmp, tmp); /* 2^126 - 1 */
756
4.88k
    felem_square(tmp, ftmp);
757
4.88k
    felem_reduce(ftmp, tmp); /* 2^127 - 2 */
758
4.88k
    felem_mul(tmp, ftmp, in);
759
4.88k
    felem_reduce(ftmp, tmp); /* 2^127 - 1 */
760
478k
    for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
761
473k
        felem_square(tmp, ftmp);
762
473k
        felem_reduce(ftmp, tmp);
763
473k
    }
764
4.88k
    felem_mul(tmp, ftmp, ftmp3);
765
4.88k
    felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
766
4.88k
}
767
768
/*
769
 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
770
 * out to itself.
771
 */
772
static void copy_conditional(felem out, const felem in, limb icopy)
773
906k
{
774
906k
    unsigned i;
775
    /*
776
     * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
777
     */
778
906k
    const limb copy = -icopy;
779
4.53M
    for (i = 0; i < 4; ++i) {
780
3.62M
        const limb tmp = copy & (in[i] ^ out[i]);
781
3.62M
        out[i] ^= tmp;
782
3.62M
    }
783
906k
}
784
785
/******************************************************************************/
786
/*-
787
 *                       ELLIPTIC CURVE POINT OPERATIONS
788
 *
789
 * Points are represented in Jacobian projective coordinates:
790
 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
791
 * or to the point at infinity if Z == 0.
792
 *
793
 */
794
795
/*-
796
 * Double an elliptic curve point:
797
 * (X', Y', Z') = 2 * (X, Y, Z), where
798
 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
799
 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
800
 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
801
 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
802
 * while x_out == y_in is not (maybe this works, but it's not tested).
803
 */
804
static void
805
point_double(felem x_out, felem y_out, felem z_out,
806
    const felem x_in, const felem y_in, const felem z_in)
807
126k
{
808
126k
    widefelem tmp, tmp2;
809
126k
    felem delta, gamma, beta, alpha, ftmp, ftmp2;
810
811
126k
    felem_assign(ftmp, x_in);
812
126k
    felem_assign(ftmp2, x_in);
813
814
    /* delta = z^2 */
815
126k
    felem_square(tmp, z_in);
816
126k
    felem_reduce(delta, tmp);
817
818
    /* gamma = y^2 */
819
126k
    felem_square(tmp, y_in);
820
126k
    felem_reduce(gamma, tmp);
821
822
    /* beta = x*gamma */
823
126k
    felem_mul(tmp, x_in, gamma);
824
126k
    felem_reduce(beta, tmp);
825
826
    /* alpha = 3*(x-delta)*(x+delta) */
827
126k
    felem_diff(ftmp, delta);
828
    /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
829
126k
    felem_sum(ftmp2, delta);
830
    /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
831
126k
    felem_scalar(ftmp2, 3);
832
    /* ftmp2[i] < 3 * 2^58 < 2^60 */
833
126k
    felem_mul(tmp, ftmp, ftmp2);
834
    /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
835
126k
    felem_reduce(alpha, tmp);
836
837
    /* x' = alpha^2 - 8*beta */
838
126k
    felem_square(tmp, alpha);
839
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
840
126k
    felem_assign(ftmp, beta);
841
126k
    felem_scalar(ftmp, 8);
842
    /* ftmp[i] < 8 * 2^57 = 2^60 */
843
126k
    felem_diff_128_64(tmp, ftmp);
844
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
845
126k
    felem_reduce(x_out, tmp);
846
847
    /* z' = (y + z)^2 - gamma - delta */
848
126k
    felem_sum(delta, gamma);
849
    /* delta[i] < 2^57 + 2^57 = 2^58 */
850
126k
    felem_assign(ftmp, y_in);
851
126k
    felem_sum(ftmp, z_in);
852
    /* ftmp[i] < 2^57 + 2^57 = 2^58 */
853
126k
    felem_square(tmp, ftmp);
854
    /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
855
126k
    felem_diff_128_64(tmp, delta);
856
    /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
857
126k
    felem_reduce(z_out, tmp);
858
859
    /* y' = alpha*(4*beta - x') - 8*gamma^2 */
860
126k
    felem_scalar(beta, 4);
861
    /* beta[i] < 4 * 2^57 = 2^59 */
862
126k
    felem_diff(beta, x_out);
863
    /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
864
126k
    felem_mul(tmp, alpha, beta);
865
    /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
866
126k
    felem_square(tmp2, gamma);
867
    /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
868
126k
    widefelem_scalar(tmp2, 8);
869
    /* tmp2[i] < 8 * 2^116 = 2^119 */
870
126k
    widefelem_diff(tmp, tmp2);
871
    /* tmp[i] < 2^119 + 2^120 < 2^121 */
872
126k
    felem_reduce(y_out, tmp);
873
126k
}
874
875
/*-
876
 * Add two elliptic curve points:
877
 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
878
 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
879
 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
880
 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
881
 *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
882
 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
883
 *
884
 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
885
 */
886
887
/*
888
 * This function is not entirely constant-time: it includes a branch for
889
 * checking whether the two input points are equal, (while not equal to the
890
 * point at infinity). This case never happens during single point
891
 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
892
 */
893
static void point_add(felem x3, felem y3, felem z3,
894
    const felem x1, const felem y1, const felem z1,
895
    const int mixed, const felem x2, const felem y2,
896
    const felem z2)
897
149k
{
898
149k
    felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
899
149k
    widefelem tmp, tmp2;
900
149k
    limb z1_is_zero, z2_is_zero, x_equal, y_equal;
901
149k
    limb points_equal;
902
903
149k
    if (!mixed) {
904
        /* ftmp2 = z2^2 */
905
13.4k
        felem_square(tmp, z2);
906
13.4k
        felem_reduce(ftmp2, tmp);
907
908
        /* ftmp4 = z2^3 */
909
13.4k
        felem_mul(tmp, ftmp2, z2);
910
13.4k
        felem_reduce(ftmp4, tmp);
911
912
        /* ftmp4 = z2^3*y1 */
913
13.4k
        felem_mul(tmp2, ftmp4, y1);
914
13.4k
        felem_reduce(ftmp4, tmp2);
915
916
        /* ftmp2 = z2^2*x1 */
917
13.4k
        felem_mul(tmp2, ftmp2, x1);
918
13.4k
        felem_reduce(ftmp2, tmp2);
919
135k
    } else {
920
        /*
921
         * We'll assume z2 = 1 (special case z2 = 0 is handled later)
922
         */
923
924
        /* ftmp4 = z2^3*y1 */
925
135k
        felem_assign(ftmp4, y1);
926
927
        /* ftmp2 = z2^2*x1 */
928
135k
        felem_assign(ftmp2, x1);
929
135k
    }
930
931
    /* ftmp = z1^2 */
932
149k
    felem_square(tmp, z1);
933
149k
    felem_reduce(ftmp, tmp);
934
935
    /* ftmp3 = z1^3 */
936
149k
    felem_mul(tmp, ftmp, z1);
937
149k
    felem_reduce(ftmp3, tmp);
938
939
    /* tmp = z1^3*y2 */
940
149k
    felem_mul(tmp, ftmp3, y2);
941
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
942
943
    /* ftmp3 = z1^3*y2 - z2^3*y1 */
944
149k
    felem_diff_128_64(tmp, ftmp4);
945
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
946
149k
    felem_reduce(ftmp3, tmp);
947
948
    /* tmp = z1^2*x2 */
949
149k
    felem_mul(tmp, ftmp, x2);
950
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
951
952
    /* ftmp = z1^2*x2 - z2^2*x1 */
953
149k
    felem_diff_128_64(tmp, ftmp2);
954
    /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
955
149k
    felem_reduce(ftmp, tmp);
956
957
    /*
958
     * The formulae are incorrect if the points are equal, in affine coordinates
959
     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
960
     * happens.
961
     *
962
     * We use bitwise operations to avoid potential side-channels introduced by
963
     * the short-circuiting behaviour of boolean operators.
964
     */
965
149k
    x_equal = felem_is_zero(ftmp);
966
149k
    y_equal = felem_is_zero(ftmp3);
967
    /*
968
     * The special case of either point being the point at infinity (z1 and/or
969
     * z2 are zero), is handled separately later on in this function, so we
970
     * avoid jumping to point_double here in those special cases.
971
     */
972
149k
    z1_is_zero = felem_is_zero(z1);
973
149k
    z2_is_zero = felem_is_zero(z2);
974
975
    /*
976
     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
977
     * specific implementation `felem_is_zero()` returns truth as `0x1`
978
     * (rather than `0xff..ff`).
979
     *
980
     * This implies that `~true` in this implementation becomes
981
     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
982
     * the if expression, we mask out only the last bit in the next
983
     * line.
984
     */
985
149k
    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
986
987
149k
    if (points_equal) {
988
        /*
989
         * This is obviously not constant-time but, as mentioned before, this
990
         * case never happens during single point multiplication, so there is no
991
         * timing leak for ECDH or ECDSA signing.
992
         */
993
0
        point_double(x3, y3, z3, x1, y1, z1);
994
0
        return;
995
0
    }
996
997
    /* ftmp5 = z1*z2 */
998
149k
    if (!mixed) {
999
13.4k
        felem_mul(tmp, z1, z2);
1000
13.4k
        felem_reduce(ftmp5, tmp);
1001
135k
    } else {
1002
        /* special case z2 = 0 is handled later */
1003
135k
        felem_assign(ftmp5, z1);
1004
135k
    }
1005
1006
    /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1007
149k
    felem_mul(tmp, ftmp, ftmp5);
1008
149k
    felem_reduce(z_out, tmp);
1009
1010
    /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1011
149k
    felem_assign(ftmp5, ftmp);
1012
149k
    felem_square(tmp, ftmp);
1013
149k
    felem_reduce(ftmp, tmp);
1014
1015
    /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1016
149k
    felem_mul(tmp, ftmp, ftmp5);
1017
149k
    felem_reduce(ftmp5, tmp);
1018
1019
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1020
149k
    felem_mul(tmp, ftmp2, ftmp);
1021
149k
    felem_reduce(ftmp2, tmp);
1022
1023
    /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1024
149k
    felem_mul(tmp, ftmp4, ftmp5);
1025
    /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1026
1027
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1028
149k
    felem_square(tmp2, ftmp3);
1029
    /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1030
1031
    /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1032
149k
    felem_diff_128_64(tmp2, ftmp5);
1033
    /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1034
1035
    /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1036
149k
    felem_assign(ftmp5, ftmp2);
1037
149k
    felem_scalar(ftmp5, 2);
1038
    /* ftmp5[i] < 2 * 2^57 = 2^58 */
1039
1040
    /*-
1041
     * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1042
     *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1043
     */
1044
149k
    felem_diff_128_64(tmp2, ftmp5);
1045
    /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1046
149k
    felem_reduce(x_out, tmp2);
1047
1048
    /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1049
149k
    felem_diff(ftmp2, x_out);
1050
    /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1051
1052
    /*
1053
     * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1054
     */
1055
149k
    felem_mul(tmp2, ftmp3, ftmp2);
1056
    /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1057
1058
    /*-
1059
     * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1060
     *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1061
     */
1062
149k
    widefelem_diff(tmp2, tmp);
1063
    /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1064
149k
    felem_reduce(y_out, tmp2);
1065
1066
    /*
1067
     * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1068
     * the point at infinity, so we need to check for this separately
1069
     */
1070
1071
    /*
1072
     * if point 1 is at infinity, copy point 2 to output, and vice versa
1073
     */
1074
149k
    copy_conditional(x_out, x2, z1_is_zero);
1075
149k
    copy_conditional(x_out, x1, z2_is_zero);
1076
149k
    copy_conditional(y_out, y2, z1_is_zero);
1077
149k
    copy_conditional(y_out, y1, z2_is_zero);
1078
149k
    copy_conditional(z_out, z2, z1_is_zero);
1079
149k
    copy_conditional(z_out, z1, z2_is_zero);
1080
149k
    felem_assign(x3, x_out);
1081
149k
    felem_assign(y3, y_out);
1082
149k
    felem_assign(z3, z_out);
1083
149k
}
1084
1085
/*
1086
 * select_point selects the |idx|th point from a precomputation table and
1087
 * copies it to out.
1088
 * The pre_comp array argument should be size of |size| argument
1089
 */
1090
static void select_point(const u64 idx, unsigned int size,
1091
    const felem pre_comp[][3], felem out[3])
1092
149k
{
1093
149k
    unsigned i, j;
1094
149k
    limb *outlimbs = &out[0][0];
1095
1096
149k
    memset(out, 0, sizeof(*out) * 3);
1097
2.56M
    for (i = 0; i < size; i++) {
1098
2.41M
        const limb *inlimbs = &pre_comp[i][0][0];
1099
2.41M
        u64 mask = i ^ idx;
1100
2.41M
        mask |= mask >> 4;
1101
2.41M
        mask |= mask >> 2;
1102
2.41M
        mask |= mask >> 1;
1103
2.41M
        mask &= 1;
1104
2.41M
        mask--;
1105
31.3M
        for (j = 0; j < 4 * 3; j++)
1106
28.9M
            outlimbs[j] |= inlimbs[j] & mask;
1107
2.41M
    }
1108
149k
}
1109
1110
/* get_bit returns the |i|th bit in |in| */
1111
static char get_bit(const felem_bytearray in, unsigned i)
1112
623k
{
1113
623k
    if (i >= 224)
1114
528
        return 0;
1115
623k
    return (in[i >> 3] >> (i & 7)) & 1;
1116
623k
}
1117
1118
/*
1119
 * Interleaved point multiplication using precomputed point multiples: The
1120
 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1121
 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1122
 * generator, using certain (large) precomputed multiples in g_pre_comp.
1123
 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1124
 */
1125
static void batch_mul(felem x_out, felem y_out, felem z_out,
1126
    const felem_bytearray scalars[],
1127
    const unsigned num_points, const u8 *g_scalar,
1128
    const int mixed, const felem pre_comp[][17][3],
1129
    const felem g_pre_comp[2][16][3])
1130
2.73k
{
1131
2.73k
    int i, skip;
1132
2.73k
    unsigned num;
1133
2.73k
    unsigned gen_mul = (g_scalar != NULL);
1134
2.73k
    felem nq[3], tmp[4];
1135
2.73k
    u64 bits;
1136
2.73k
    u8 sign, digit;
1137
1138
    /* set nq to the point at infinity */
1139
2.73k
    memset(nq, 0, sizeof(nq));
1140
1141
    /*
1142
     * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1143
     * of the generator (two in each of the last 28 rounds) and additions of
1144
     * other points multiples (every 5th round).
1145
     */
1146
2.73k
    skip = 1; /* save two point operations in the first
1147
               * round */
1148
130k
    for (i = (num_points ? 220 : 27); i >= 0; --i) {
1149
        /* double */
1150
127k
        if (!skip)
1151
124k
            point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1152
1153
        /* add multiples of the generator */
1154
127k
        if (gen_mul && (i <= 27)) {
1155
            /* first, look 28 bits upwards */
1156
69.0k
            bits = get_bit(g_scalar, i + 196) << 3;
1157
69.0k
            bits |= get_bit(g_scalar, i + 140) << 2;
1158
69.0k
            bits |= get_bit(g_scalar, i + 84) << 1;
1159
69.0k
            bits |= get_bit(g_scalar, i + 28);
1160
            /* select the point to add, in constant time */
1161
69.0k
            select_point(bits, 16, g_pre_comp[1], tmp);
1162
1163
69.0k
            if (!skip) {
1164
                /* value 1 below is argument for "mixed" */
1165
66.5k
                point_add(nq[0], nq[1], nq[2],
1166
66.5k
                    nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1167
66.5k
            } else {
1168
2.46k
                memcpy(nq, tmp, 3 * sizeof(felem));
1169
2.46k
                skip = 0;
1170
2.46k
            }
1171
1172
            /* second, look at the current position */
1173
69.0k
            bits = get_bit(g_scalar, i + 168) << 3;
1174
69.0k
            bits |= get_bit(g_scalar, i + 112) << 2;
1175
69.0k
            bits |= get_bit(g_scalar, i + 56) << 1;
1176
69.0k
            bits |= get_bit(g_scalar, i);
1177
            /* select the point to add, in constant time */
1178
69.0k
            select_point(bits, 16, g_pre_comp[0], tmp);
1179
69.0k
            point_add(nq[0], nq[1], nq[2],
1180
69.0k
                nq[0], nq[1], nq[2],
1181
69.0k
                1 /* mixed */, tmp[0], tmp[1], tmp[2]);
1182
69.0k
        }
1183
1184
        /* do other additions every 5 doublings */
1185
127k
        if (num_points && (i % 5 == 0)) {
1186
            /* loop over all scalars */
1187
23.7k
            for (num = 0; num < num_points; ++num) {
1188
11.8k
                bits = get_bit(scalars[num], i + 4) << 5;
1189
11.8k
                bits |= get_bit(scalars[num], i + 3) << 4;
1190
11.8k
                bits |= get_bit(scalars[num], i + 2) << 3;
1191
11.8k
                bits |= get_bit(scalars[num], i + 1) << 2;
1192
11.8k
                bits |= get_bit(scalars[num], i) << 1;
1193
11.8k
                bits |= get_bit(scalars[num], i - 1);
1194
11.8k
                ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1195
1196
                /* select the point to add or subtract */
1197
11.8k
                select_point(digit, 17, pre_comp[num], tmp);
1198
11.8k
                felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1199
                                            * point */
1200
11.8k
                copy_conditional(tmp[1], tmp[3], sign);
1201
1202
11.8k
                if (!skip) {
1203
11.6k
                    point_add(nq[0], nq[1], nq[2],
1204
11.6k
                        nq[0], nq[1], nq[2],
1205
11.6k
                        mixed, tmp[0], tmp[1], tmp[2]);
1206
11.6k
                } else {
1207
264
                    memcpy(nq, tmp, 3 * sizeof(felem));
1208
264
                    skip = 0;
1209
264
                }
1210
11.8k
            }
1211
11.8k
        }
1212
127k
    }
1213
2.73k
    felem_assign(x_out, nq[0]);
1214
2.73k
    felem_assign(y_out, nq[1]);
1215
2.73k
    felem_assign(z_out, nq[2]);
1216
2.73k
}
1217
1218
/******************************************************************************/
1219
/*
1220
 * FUNCTIONS TO MANAGE PRECOMPUTATION
1221
 */
1222
1223
static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1224
0
{
1225
0
    NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1226
1227
0
    if (ret == NULL)
1228
0
        return ret;
1229
1230
0
    if (!CRYPTO_NEW_REF(&ret->references, 1)) {
1231
0
        OPENSSL_free(ret);
1232
0
        return NULL;
1233
0
    }
1234
0
    return ret;
1235
0
}
1236
1237
NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p)
1238
0
{
1239
0
    int i;
1240
0
    if (p != NULL)
1241
0
        CRYPTO_UP_REF(&p->references, &i);
1242
0
    return p;
1243
0
}
1244
1245
void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1246
0
{
1247
0
    int i;
1248
1249
0
    if (p == NULL)
1250
0
        return;
1251
1252
0
    CRYPTO_DOWN_REF(&p->references, &i);
1253
0
    REF_PRINT_COUNT("EC_nistp224", i, p);
1254
0
    if (i > 0)
1255
0
        return;
1256
0
    REF_ASSERT_ISNT(i < 0);
1257
1258
0
    CRYPTO_FREE_REF(&p->references);
1259
0
    OPENSSL_free(p);
1260
0
}
1261
1262
/******************************************************************************/
1263
/*
1264
 * OPENSSL EC_METHOD FUNCTIONS
1265
 */
1266
1267
int ossl_ec_GFp_nistp224_group_init(EC_GROUP *group)
1268
110k
{
1269
110k
    int ret;
1270
110k
    ret = ossl_ec_GFp_simple_group_init(group);
1271
110k
    group->a_is_minus3 = 1;
1272
110k
    return ret;
1273
110k
}
1274
1275
int ossl_ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1276
    const BIGNUM *a, const BIGNUM *b,
1277
    BN_CTX *ctx)
1278
57.3k
{
1279
57.3k
    int ret = 0;
1280
57.3k
    BIGNUM *curve_p, *curve_a, *curve_b;
1281
57.3k
#ifndef FIPS_MODULE
1282
57.3k
    BN_CTX *new_ctx = NULL;
1283
1284
57.3k
    if (ctx == NULL)
1285
0
        ctx = new_ctx = BN_CTX_new();
1286
57.3k
#endif
1287
57.3k
    if (ctx == NULL)
1288
0
        return 0;
1289
1290
57.3k
    BN_CTX_start(ctx);
1291
57.3k
    curve_p = BN_CTX_get(ctx);
1292
57.3k
    curve_a = BN_CTX_get(ctx);
1293
57.3k
    curve_b = BN_CTX_get(ctx);
1294
57.3k
    if (curve_b == NULL)
1295
0
        goto err;
1296
57.3k
    BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1297
57.3k
    BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1298
57.3k
    BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1299
57.3k
    if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1300
0
        ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
1301
0
        goto err;
1302
0
    }
1303
57.3k
    group->field_mod_func = BN_nist_mod_224;
1304
57.3k
    ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1305
57.3k
err:
1306
57.3k
    BN_CTX_end(ctx);
1307
57.3k
#ifndef FIPS_MODULE
1308
57.3k
    BN_CTX_free(new_ctx);
1309
57.3k
#endif
1310
57.3k
    return ret;
1311
57.3k
}
1312
1313
/*
1314
 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1315
 * (X/Z^2, Y/Z^3)
1316
 */
1317
int ossl_ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1318
    const EC_POINT *point,
1319
    BIGNUM *x, BIGNUM *y,
1320
    BN_CTX *ctx)
1321
4.88k
{
1322
4.88k
    felem z1, z2, x_in, y_in, x_out, y_out;
1323
4.88k
    widefelem tmp;
1324
1325
4.88k
    if (EC_POINT_is_at_infinity(group, point)) {
1326
0
        ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
1327
0
        return 0;
1328
0
    }
1329
4.88k
    if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) || (!BN_to_felem(z1, point->Z)))
1330
0
        return 0;
1331
4.88k
    felem_inv(z2, z1);
1332
4.88k
    felem_square(tmp, z2);
1333
4.88k
    felem_reduce(z1, tmp);
1334
4.88k
    felem_mul(tmp, x_in, z1);
1335
4.88k
    felem_reduce(x_in, tmp);
1336
4.88k
    felem_contract(x_out, x_in);
1337
4.88k
    if (x != NULL) {
1338
4.88k
        if (!felem_to_BN(x, x_out)) {
1339
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1340
0
            return 0;
1341
0
        }
1342
4.88k
    }
1343
4.88k
    felem_mul(tmp, z1, z2);
1344
4.88k
    felem_reduce(z1, tmp);
1345
4.88k
    felem_mul(tmp, y_in, z1);
1346
4.88k
    felem_reduce(y_in, tmp);
1347
4.88k
    felem_contract(y_out, y_in);
1348
4.88k
    if (y != NULL) {
1349
4.88k
        if (!felem_to_BN(y, y_out)) {
1350
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1351
0
            return 0;
1352
0
        }
1353
4.88k
    }
1354
4.88k
    return 1;
1355
4.88k
}
1356
1357
static void make_points_affine(size_t num, felem points[/* num */][3],
1358
    felem tmp_felems[/* num+1 */])
1359
0
{
1360
    /*
1361
     * Runs in constant time, unless an input is the point at infinity (which
1362
     * normally shouldn't happen).
1363
     */
1364
0
    ossl_ec_GFp_nistp_points_make_affine_internal(num,
1365
0
        points,
1366
0
        sizeof(felem),
1367
0
        tmp_felems,
1368
0
        (void (*)(void *))felem_one,
1369
0
        felem_is_zero_int,
1370
0
        (void (*)(void *, const void *))
1371
0
            felem_assign,
1372
0
        (void (*)(void *, const void *))
1373
0
            felem_square_reduce,
1374
0
        (void (*)(void *,
1375
0
            const void
1376
0
                *,
1377
0
            const void
1378
0
                *))
1379
0
            felem_mul_reduce,
1380
0
        (void (*)(void *, const void *))
1381
0
            felem_inv,
1382
0
        (void (*)(void *, const void *))
1383
0
            felem_contract);
1384
0
}
1385
1386
/*
1387
 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1388
 * values Result is stored in r (r can equal one of the inputs).
1389
 */
1390
int ossl_ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1391
    const BIGNUM *scalar, size_t num,
1392
    const EC_POINT *points[],
1393
    const BIGNUM *scalars[], BN_CTX *ctx)
1394
2.73k
{
1395
2.73k
    int ret = 0;
1396
2.73k
    int j;
1397
2.73k
    unsigned i;
1398
2.73k
    int mixed = 0;
1399
2.73k
    BIGNUM *x, *y, *z, *tmp_scalar;
1400
2.73k
    felem_bytearray g_secret;
1401
2.73k
    felem_bytearray *secrets = NULL;
1402
2.73k
    felem(*pre_comp)[17][3] = NULL;
1403
2.73k
    felem *tmp_felems = NULL;
1404
2.73k
    int num_bytes;
1405
2.73k
    int have_pre_comp = 0;
1406
2.73k
    size_t num_points = num;
1407
2.73k
    felem x_in, y_in, z_in, x_out, y_out, z_out;
1408
2.73k
    NISTP224_PRE_COMP *pre = NULL;
1409
2.73k
    const felem(*g_pre_comp)[16][3] = NULL;
1410
2.73k
    EC_POINT *generator = NULL;
1411
2.73k
    const EC_POINT *p = NULL;
1412
2.73k
    const BIGNUM *p_scalar = NULL;
1413
1414
2.73k
    BN_CTX_start(ctx);
1415
2.73k
    x = BN_CTX_get(ctx);
1416
2.73k
    y = BN_CTX_get(ctx);
1417
2.73k
    z = BN_CTX_get(ctx);
1418
2.73k
    tmp_scalar = BN_CTX_get(ctx);
1419
2.73k
    if (tmp_scalar == NULL)
1420
0
        goto err;
1421
1422
2.73k
    if (scalar != NULL) {
1423
2.46k
        pre = group->pre_comp.nistp224;
1424
2.46k
        if (pre)
1425
            /* we have precomputation, try to use it */
1426
0
            g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1427
2.46k
        else
1428
            /* try to use the standard precomputation */
1429
2.46k
            g_pre_comp = &gmul[0];
1430
2.46k
        generator = EC_POINT_new(group);
1431
2.46k
        if (generator == NULL)
1432
0
            goto err;
1433
        /* get the generator from precomputation */
1434
2.46k
        if (!felem_to_BN(x, g_pre_comp[0][1][0]) || !felem_to_BN(y, g_pre_comp[0][1][1]) || !felem_to_BN(z, g_pre_comp[0][1][2])) {
1435
0
            ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1436
0
            goto err;
1437
0
        }
1438
2.46k
        if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
1439
2.46k
                generator,
1440
2.46k
                x, y, z, ctx))
1441
0
            goto err;
1442
2.46k
        if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1443
            /* precomputation matches generator */
1444
2.46k
            have_pre_comp = 1;
1445
0
        else
1446
            /*
1447
             * we don't have valid precomputation: treat the generator as a
1448
             * random point
1449
             */
1450
0
            num_points = num_points + 1;
1451
2.46k
    }
1452
1453
2.73k
    if (num_points > 0) {
1454
264
        if (num_points >= 3) {
1455
            /*
1456
             * unless we precompute multiples for just one or two points,
1457
             * converting those into affine form is time well spent
1458
             */
1459
0
            mixed = 1;
1460
0
        }
1461
264
        secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
1462
264
        pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
1463
264
        if (mixed)
1464
0
            tmp_felems = OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1465
264
        if ((secrets == NULL) || (pre_comp == NULL)
1466
264
            || (mixed && (tmp_felems == NULL)))
1467
0
            goto err;
1468
1469
        /*
1470
         * we treat NULL scalars as 0, and NULL points as points at infinity,
1471
         * i.e., they contribute nothing to the linear combination
1472
         */
1473
528
        for (i = 0; i < num_points; ++i) {
1474
264
            if (i == num) {
1475
                /* the generator */
1476
0
                p = EC_GROUP_get0_generator(group);
1477
0
                p_scalar = scalar;
1478
264
            } else {
1479
                /* the i^th point */
1480
264
                p = points[i];
1481
264
                p_scalar = scalars[i];
1482
264
            }
1483
264
            if ((p_scalar != NULL) && (p != NULL)) {
1484
                /* reduce scalar to 0 <= scalar < 2^224 */
1485
264
                if ((BN_num_bits(p_scalar) > 224)
1486
264
                    || (BN_is_negative(p_scalar))) {
1487
                    /*
1488
                     * this is an unusual input, and we don't guarantee
1489
                     * constant-timeness
1490
                     */
1491
0
                    if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1492
0
                        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1493
0
                        goto err;
1494
0
                    }
1495
0
                    num_bytes = BN_bn2lebinpad(tmp_scalar,
1496
0
                        secrets[i], sizeof(secrets[i]));
1497
264
                } else {
1498
264
                    num_bytes = BN_bn2lebinpad(p_scalar,
1499
264
                        secrets[i], sizeof(secrets[i]));
1500
264
                }
1501
264
                if (num_bytes < 0) {
1502
0
                    ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1503
0
                    goto err;
1504
0
                }
1505
                /* precompute multiples */
1506
264
                if ((!BN_to_felem(x_out, p->X)) || (!BN_to_felem(y_out, p->Y)) || (!BN_to_felem(z_out, p->Z)))
1507
0
                    goto err;
1508
264
                felem_assign(pre_comp[i][1][0], x_out);
1509
264
                felem_assign(pre_comp[i][1][1], y_out);
1510
264
                felem_assign(pre_comp[i][1][2], z_out);
1511
4.22k
                for (j = 2; j <= 16; ++j) {
1512
3.96k
                    if (j & 1) {
1513
1.84k
                        point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1514
1.84k
                            pre_comp[i][j][2], pre_comp[i][1][0],
1515
1.84k
                            pre_comp[i][1][1], pre_comp[i][1][2], 0,
1516
1.84k
                            pre_comp[i][j - 1][0],
1517
1.84k
                            pre_comp[i][j - 1][1],
1518
1.84k
                            pre_comp[i][j - 1][2]);
1519
2.11k
                    } else {
1520
2.11k
                        point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1521
2.11k
                            pre_comp[i][j][2], pre_comp[i][j / 2][0],
1522
2.11k
                            pre_comp[i][j / 2][1],
1523
2.11k
                            pre_comp[i][j / 2][2]);
1524
2.11k
                    }
1525
3.96k
                }
1526
264
            }
1527
264
        }
1528
264
        if (mixed)
1529
0
            make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1530
264
    }
1531
1532
    /* the scalar for the generator */
1533
2.73k
    if ((scalar != NULL) && (have_pre_comp)) {
1534
2.46k
        memset(g_secret, 0, sizeof(g_secret));
1535
        /* reduce scalar to 0 <= scalar < 2^224 */
1536
2.46k
        if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1537
            /*
1538
             * this is an unusual input, and we don't guarantee
1539
             * constant-timeness
1540
             */
1541
552
            if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1542
0
                ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1543
0
                goto err;
1544
0
            }
1545
552
            num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1546
1.91k
        } else {
1547
1.91k
            num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1548
1.91k
        }
1549
        /* do the multiplication with generator precomputation */
1550
2.46k
        batch_mul(x_out, y_out, z_out,
1551
2.46k
            (const felem_bytearray(*))secrets, num_points,
1552
2.46k
            g_secret,
1553
2.46k
            mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1554
2.46k
    } else {
1555
        /* do the multiplication without generator precomputation */
1556
264
        batch_mul(x_out, y_out, z_out,
1557
264
            (const felem_bytearray(*))secrets, num_points,
1558
264
            NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1559
264
    }
1560
    /* reduce the output to its unique minimal representation */
1561
2.73k
    felem_contract(x_in, x_out);
1562
2.73k
    felem_contract(y_in, y_out);
1563
2.73k
    felem_contract(z_in, z_out);
1564
2.73k
    if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || (!felem_to_BN(z, z_in))) {
1565
0
        ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
1566
0
        goto err;
1567
0
    }
1568
2.73k
    ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
1569
2.73k
        ctx);
1570
1571
2.73k
err:
1572
2.73k
    BN_CTX_end(ctx);
1573
2.73k
    EC_POINT_free(generator);
1574
2.73k
    OPENSSL_free(secrets);
1575
2.73k
    OPENSSL_free(pre_comp);
1576
2.73k
    OPENSSL_free(tmp_felems);
1577
2.73k
    return ret;
1578
2.73k
}
1579
1580
int ossl_ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1581
0
{
1582
0
    int ret = 0;
1583
0
    NISTP224_PRE_COMP *pre = NULL;
1584
0
    int i, j;
1585
0
    BIGNUM *x, *y;
1586
0
    EC_POINT *generator = NULL;
1587
0
    felem tmp_felems[32];
1588
0
#ifndef FIPS_MODULE
1589
0
    BN_CTX *new_ctx = NULL;
1590
0
#endif
1591
1592
    /* throw away old precomputation */
1593
0
    EC_pre_comp_free(group);
1594
1595
0
#ifndef FIPS_MODULE
1596
0
    if (ctx == NULL)
1597
0
        ctx = new_ctx = BN_CTX_new();
1598
0
#endif
1599
0
    if (ctx == NULL)
1600
0
        return 0;
1601
1602
0
    BN_CTX_start(ctx);
1603
0
    x = BN_CTX_get(ctx);
1604
0
    y = BN_CTX_get(ctx);
1605
0
    if (y == NULL)
1606
0
        goto err;
1607
    /* get the generator */
1608
0
    if (group->generator == NULL)
1609
0
        goto err;
1610
0
    generator = EC_POINT_new(group);
1611
0
    if (generator == NULL)
1612
0
        goto err;
1613
0
    BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1614
0
    BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1615
0
    if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1616
0
        goto err;
1617
0
    if ((pre = nistp224_pre_comp_new()) == NULL)
1618
0
        goto err;
1619
    /*
1620
     * if the generator is the standard one, use built-in precomputation
1621
     */
1622
0
    if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1623
0
        memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1624
0
        goto done;
1625
0
    }
1626
0
    if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) || (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) || (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1627
0
        goto err;
1628
    /*
1629
     * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1630
     * 2^140*G, 2^196*G for the second one
1631
     */
1632
0
    for (i = 1; i <= 8; i <<= 1) {
1633
0
        point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1634
0
            pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1635
0
            pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1636
0
        for (j = 0; j < 27; ++j) {
1637
0
            point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1638
0
                pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1639
0
                pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1640
0
        }
1641
0
        if (i == 8)
1642
0
            break;
1643
0
        point_double(pre->g_pre_comp[0][2 * i][0],
1644
0
            pre->g_pre_comp[0][2 * i][1],
1645
0
            pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1646
0
            pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1647
0
        for (j = 0; j < 27; ++j) {
1648
0
            point_double(pre->g_pre_comp[0][2 * i][0],
1649
0
                pre->g_pre_comp[0][2 * i][1],
1650
0
                pre->g_pre_comp[0][2 * i][2],
1651
0
                pre->g_pre_comp[0][2 * i][0],
1652
0
                pre->g_pre_comp[0][2 * i][1],
1653
0
                pre->g_pre_comp[0][2 * i][2]);
1654
0
        }
1655
0
    }
1656
0
    for (i = 0; i < 2; i++) {
1657
        /* g_pre_comp[i][0] is the point at infinity */
1658
0
        memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1659
        /* the remaining multiples */
1660
        /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1661
0
        point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1662
0
            pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1663
0
            pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1664
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1665
0
            pre->g_pre_comp[i][2][2]);
1666
        /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1667
0
        point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1668
0
            pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1669
0
            pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1670
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1671
0
            pre->g_pre_comp[i][2][2]);
1672
        /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1673
0
        point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1674
0
            pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1675
0
            pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1676
0
            0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1677
0
            pre->g_pre_comp[i][4][2]);
1678
        /*
1679
         * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1680
         */
1681
0
        point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1682
0
            pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1683
0
            pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1684
0
            0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1685
0
            pre->g_pre_comp[i][2][2]);
1686
0
        for (j = 1; j < 8; ++j) {
1687
            /* odd multiples: add G resp. 2^28*G */
1688
0
            point_add(pre->g_pre_comp[i][2 * j + 1][0],
1689
0
                pre->g_pre_comp[i][2 * j + 1][1],
1690
0
                pre->g_pre_comp[i][2 * j + 1][2],
1691
0
                pre->g_pre_comp[i][2 * j][0],
1692
0
                pre->g_pre_comp[i][2 * j][1],
1693
0
                pre->g_pre_comp[i][2 * j][2], 0,
1694
0
                pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1695
0
                pre->g_pre_comp[i][1][2]);
1696
0
        }
1697
0
    }
1698
0
    make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1699
1700
0
done:
1701
0
    SETPRECOMP(group, nistp224, pre);
1702
0
    pre = NULL;
1703
0
    ret = 1;
1704
0
err:
1705
0
    BN_CTX_end(ctx);
1706
0
    EC_POINT_free(generator);
1707
0
#ifndef FIPS_MODULE
1708
0
    BN_CTX_free(new_ctx);
1709
0
#endif
1710
0
    EC_nistp224_pre_comp_free(pre);
1711
0
    return ret;
1712
0
}
1713
1714
int ossl_ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1715
0
{
1716
    return HAVEPRECOMP(group, nistp224);
1717
0
}