Coverage Report

Created: 2024-11-21 07:03

/src/boringssl/crypto/fipsmodule/ec/p256.c.inc
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2020, Google Inc.
2
 *
3
 * Permission to use, copy, modify, and/or distribute this software for any
4
 * purpose with or without fee is hereby granted, provided that the above
5
 * copyright notice and this permission notice appear in all copies.
6
 *
7
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15
// An implementation of the NIST P-256 elliptic curve point multiplication.
16
// 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by
17
// Fiat, which lives in //third_party/fiat.
18
19
#include <openssl/base.h>
20
21
#include <openssl/bn.h>
22
#include <openssl/ec.h>
23
#include <openssl/err.h>
24
#include <openssl/mem.h>
25
26
#include <assert.h>
27
#include <string.h>
28
29
#include "../../internal.h"
30
#include "../delocate.h"
31
#include "./internal.h"
32
33
#if defined(BORINGSSL_HAS_UINT128)
34
#include "../../../third_party/fiat/p256_64.h"
35
#elif defined(OPENSSL_64_BIT)
36
#include "../../../third_party/fiat/p256_64_msvc.h"
37
#else
38
#include "../../../third_party/fiat/p256_32.h"
39
#endif
40
41
42
// utility functions, handwritten
43
44
#if defined(OPENSSL_64_BIT)
45
7.24k
#define FIAT_P256_NLIMBS 4
46
typedef uint64_t fiat_p256_limb_t;
47
typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS];
48
static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000,
49
                                              0xffffffffffffffff, 0xfffffffe};
50
#else  // 64BIT; else 32BIT
51
#define FIAT_P256_NLIMBS 8
52
typedef uint32_t fiat_p256_limb_t;
53
typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS];
54
static const fiat_p256_felem fiat_p256_one = {
55
    0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0};
56
#endif  // 64BIT
57
58
59
static fiat_p256_limb_t fiat_p256_nz(
60
5.10k
    const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
61
5.10k
  fiat_p256_limb_t ret;
62
5.10k
  fiat_p256_nonzero(&ret, in1);
63
5.10k
  return ret;
64
5.10k
}
65
66
static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
67
1.44k
                           const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
68
7.24k
  for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
69
5.79k
    out[i] = in1[i];
70
5.79k
  }
71
1.44k
}
72
73
static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
74
                              fiat_p256_limb_t t,
75
                              const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
76
56.5k
                              const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
77
56.5k
  fiat_p256_selectznz(out, !!t, z, nz);
78
56.5k
}
79
80
static void fiat_p256_from_words(fiat_p256_felem out,
81
115
                                 const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
82
  // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
83
  // 64-bit platforms without |uint128_t|, they are different. However, on
84
  // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same
85
  // layout.
86
115
  OPENSSL_memcpy(out, in, 32);
87
115
}
88
89
115
static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) {
90
115
  fiat_p256_from_words(out, in->words);
91
115
}
92
93
116
static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) {
94
  // See |fiat_p256_from_words|.
95
116
  OPENSSL_memcpy(out->words, in, 32);
96
116
}
97
98
// fiat_p256_inv_square calculates |out| = |in|^{-2}
99
//
100
// Based on Fermat's Little Theorem:
101
//   a^p = a (mod p)
102
//   a^{p-1} = 1 (mod p)
103
//   a^{p-3} = a^{-2} (mod p)
104
static void fiat_p256_inv_square(fiat_p256_felem out,
105
23
                                 const fiat_p256_felem in) {
106
  // This implements the addition chain described in
107
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
108
23
  fiat_p256_felem x2, x3, x6, x12, x15, x30, x32;
109
23
  fiat_p256_square(x2, in);   // 2^2 - 2^1
110
23
  fiat_p256_mul(x2, x2, in);  // 2^2 - 2^0
111
112
23
  fiat_p256_square(x3, x2);   // 2^3 - 2^1
113
23
  fiat_p256_mul(x3, x3, in);  // 2^3 - 2^0
114
115
23
  fiat_p256_square(x6, x3);
116
69
  for (int i = 1; i < 3; i++) {
117
46
    fiat_p256_square(x6, x6);
118
46
  }                           // 2^6 - 2^3
119
23
  fiat_p256_mul(x6, x6, x3);  // 2^6 - 2^0
120
121
23
  fiat_p256_square(x12, x6);
122
138
  for (int i = 1; i < 6; i++) {
123
115
    fiat_p256_square(x12, x12);
124
115
  }                             // 2^12 - 2^6
125
23
  fiat_p256_mul(x12, x12, x6);  // 2^12 - 2^0
126
127
23
  fiat_p256_square(x15, x12);
128
69
  for (int i = 1; i < 3; i++) {
129
46
    fiat_p256_square(x15, x15);
130
46
  }                             // 2^15 - 2^3
131
23
  fiat_p256_mul(x15, x15, x3);  // 2^15 - 2^0
132
133
23
  fiat_p256_square(x30, x15);
134
345
  for (int i = 1; i < 15; i++) {
135
322
    fiat_p256_square(x30, x30);
136
322
  }                              // 2^30 - 2^15
137
23
  fiat_p256_mul(x30, x30, x15);  // 2^30 - 2^0
138
139
23
  fiat_p256_square(x32, x30);
140
23
  fiat_p256_square(x32, x32);   // 2^32 - 2^2
141
23
  fiat_p256_mul(x32, x32, x2);  // 2^32 - 2^0
142
143
23
  fiat_p256_felem ret;
144
23
  fiat_p256_square(ret, x32);
145
736
  for (int i = 1; i < 31 + 1; i++) {
146
713
    fiat_p256_square(ret, ret);
147
713
  }                             // 2^64 - 2^32
148
23
  fiat_p256_mul(ret, ret, in);  // 2^64 - 2^32 + 2^0
149
150
2.96k
  for (int i = 0; i < 96 + 32; i++) {
151
2.94k
    fiat_p256_square(ret, ret);
152
2.94k
  }                              // 2^192 - 2^160 + 2^128
153
23
  fiat_p256_mul(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
154
155
759
  for (int i = 0; i < 32; i++) {
156
736
    fiat_p256_square(ret, ret);
157
736
  }                              // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
158
23
  fiat_p256_mul(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
159
160
713
  for (int i = 0; i < 30; i++) {
161
690
    fiat_p256_square(ret, ret);
162
690
  }                              // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
163
23
  fiat_p256_mul(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
164
165
23
  fiat_p256_square(ret, ret);
166
23
  fiat_p256_square(out, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
167
23
}
168
169
// Group operations
170
// ----------------
171
//
172
// Building on top of the field operations we have the operations on the
173
// elliptic curve group itself. Points on the curve are represented in Jacobian
174
// coordinates.
175
//
176
// Both operations were transcribed to Coq and proven to correspond to naive
177
// implementations using Affine coordinates, for all suitable fields.  In the
178
// Coq proofs, issues of constant-time execution and memory layout (aliasing)
179
// conventions were not considered. Specification of affine coordinates:
180
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28>
181
// As a sanity check, a proof that these points form a commutative group:
182
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33>
183
184
// fiat_p256_point_double calculates 2*(x_in, y_in, z_in)
185
//
186
// The method is taken from:
187
//   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
188
//
189
// Coq transcription and correctness proof:
190
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
191
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
192
//
193
// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
194
// while x_out == y_in is not (maybe this works, but it's not tested).
195
static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out,
196
                                   fiat_p256_felem z_out,
197
                                   const fiat_p256_felem x_in,
198
                                   const fiat_p256_felem y_in,
199
2.97k
                                   const fiat_p256_felem z_in) {
200
2.97k
  fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
201
  // delta = z^2
202
2.97k
  fiat_p256_square(delta, z_in);
203
  // gamma = y^2
204
2.97k
  fiat_p256_square(gamma, y_in);
205
  // beta = x*gamma
206
2.97k
  fiat_p256_mul(beta, x_in, gamma);
207
208
  // alpha = 3*(x-delta)*(x+delta)
209
2.97k
  fiat_p256_sub(ftmp, x_in, delta);
210
2.97k
  fiat_p256_add(ftmp2, x_in, delta);
211
212
2.97k
  fiat_p256_add(tmptmp, ftmp2, ftmp2);
213
2.97k
  fiat_p256_add(ftmp2, ftmp2, tmptmp);
214
2.97k
  fiat_p256_mul(alpha, ftmp, ftmp2);
215
216
  // x' = alpha^2 - 8*beta
217
2.97k
  fiat_p256_square(x_out, alpha);
218
2.97k
  fiat_p256_add(fourbeta, beta, beta);
219
2.97k
  fiat_p256_add(fourbeta, fourbeta, fourbeta);
220
2.97k
  fiat_p256_add(tmptmp, fourbeta, fourbeta);
221
2.97k
  fiat_p256_sub(x_out, x_out, tmptmp);
222
223
  // z' = (y + z)^2 - gamma - delta
224
2.97k
  fiat_p256_add(delta, gamma, delta);
225
2.97k
  fiat_p256_add(ftmp, y_in, z_in);
226
2.97k
  fiat_p256_square(z_out, ftmp);
227
2.97k
  fiat_p256_sub(z_out, z_out, delta);
228
229
  // y' = alpha*(4*beta - x') - 8*gamma^2
230
2.97k
  fiat_p256_sub(y_out, fourbeta, x_out);
231
2.97k
  fiat_p256_add(gamma, gamma, gamma);
232
2.97k
  fiat_p256_square(gamma, gamma);
233
2.97k
  fiat_p256_mul(y_out, alpha, y_out);
234
2.97k
  fiat_p256_add(gamma, gamma, gamma);
235
2.97k
  fiat_p256_sub(y_out, y_out, gamma);
236
2.97k
}
237
238
// fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
239
//
240
// The method is taken from:
241
//   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
242
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
243
//
244
// Coq transcription and correctness proof:
245
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
246
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
247
//
248
// This function includes a branch for checking whether the two input points
249
// are equal, (while not equal to the point at infinity). This case never
250
// happens during single point multiplication, so there is no timing leak for
251
// ECDH or ECDSA signing.
252
static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
253
                                fiat_p256_felem z3, const fiat_p256_felem x1,
254
                                const fiat_p256_felem y1,
255
                                const fiat_p256_felem z1, const int mixed,
256
                                const fiat_p256_felem x2,
257
                                const fiat_p256_felem y2,
258
1.27k
                                const fiat_p256_felem z2) {
259
1.27k
  fiat_p256_felem x_out, y_out, z_out;
260
1.27k
  fiat_p256_limb_t z1nz = fiat_p256_nz(z1);
261
1.27k
  fiat_p256_limb_t z2nz = fiat_p256_nz(z2);
262
263
  // z1z1 = z1z1 = z1**2
264
1.27k
  fiat_p256_felem z1z1;
265
1.27k
  fiat_p256_square(z1z1, z1);
266
267
1.27k
  fiat_p256_felem u1, s1, two_z1z2;
268
1.27k
  if (!mixed) {
269
    // z2z2 = z2**2
270
583
    fiat_p256_felem z2z2;
271
583
    fiat_p256_square(z2z2, z2);
272
273
    // u1 = x1*z2z2
274
583
    fiat_p256_mul(u1, x1, z2z2);
275
276
    // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
277
583
    fiat_p256_add(two_z1z2, z1, z2);
278
583
    fiat_p256_square(two_z1z2, two_z1z2);
279
583
    fiat_p256_sub(two_z1z2, two_z1z2, z1z1);
280
583
    fiat_p256_sub(two_z1z2, two_z1z2, z2z2);
281
282
    // s1 = y1 * z2**3
283
583
    fiat_p256_mul(s1, z2, z2z2);
284
583
    fiat_p256_mul(s1, s1, y1);
285
693
  } else {
286
    // We'll assume z2 = 1 (special case z2 = 0 is handled later).
287
288
    // u1 = x1*z2z2
289
693
    fiat_p256_copy(u1, x1);
290
    // two_z1z2 = 2z1z2
291
693
    fiat_p256_add(two_z1z2, z1, z1);
292
    // s1 = y1 * z2**3
293
693
    fiat_p256_copy(s1, y1);
294
693
  }
295
296
  // u2 = x2*z1z1
297
1.27k
  fiat_p256_felem u2;
298
1.27k
  fiat_p256_mul(u2, x2, z1z1);
299
300
  // h = u2 - u1
301
1.27k
  fiat_p256_felem h;
302
1.27k
  fiat_p256_sub(h, u2, u1);
303
304
1.27k
  fiat_p256_limb_t xneq = fiat_p256_nz(h);
305
306
  // z_out = two_z1z2 * h
307
1.27k
  fiat_p256_mul(z_out, h, two_z1z2);
308
309
  // z1z1z1 = z1 * z1z1
310
1.27k
  fiat_p256_felem z1z1z1;
311
1.27k
  fiat_p256_mul(z1z1z1, z1, z1z1);
312
313
  // s2 = y2 * z1**3
314
1.27k
  fiat_p256_felem s2;
315
1.27k
  fiat_p256_mul(s2, y2, z1z1z1);
316
317
  // r = (s2 - s1)*2
318
1.27k
  fiat_p256_felem r;
319
1.27k
  fiat_p256_sub(r, s2, s1);
320
1.27k
  fiat_p256_add(r, r, r);
321
322
1.27k
  fiat_p256_limb_t yneq = fiat_p256_nz(r);
323
324
1.27k
  fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
325
1.27k
                                          ~constant_time_is_zero_w(z1nz) &
326
1.27k
                                          ~constant_time_is_zero_w(z2nz);
327
1.27k
  if (constant_time_declassify_w(is_nontrivial_double)) {
328
0
    fiat_p256_point_double(x3, y3, z3, x1, y1, z1);
329
0
    return;
330
0
  }
331
332
  // I = (2h)**2
333
1.27k
  fiat_p256_felem i;
334
1.27k
  fiat_p256_add(i, h, h);
335
1.27k
  fiat_p256_square(i, i);
336
337
  // J = h * I
338
1.27k
  fiat_p256_felem j;
339
1.27k
  fiat_p256_mul(j, h, i);
340
341
  // V = U1 * I
342
1.27k
  fiat_p256_felem v;
343
1.27k
  fiat_p256_mul(v, u1, i);
344
345
  // x_out = r**2 - J - 2V
346
1.27k
  fiat_p256_square(x_out, r);
347
1.27k
  fiat_p256_sub(x_out, x_out, j);
348
1.27k
  fiat_p256_sub(x_out, x_out, v);
349
1.27k
  fiat_p256_sub(x_out, x_out, v);
350
351
  // y_out = r(V-x_out) - 2 * s1 * J
352
1.27k
  fiat_p256_sub(y_out, v, x_out);
353
1.27k
  fiat_p256_mul(y_out, y_out, r);
354
1.27k
  fiat_p256_felem s1j;
355
1.27k
  fiat_p256_mul(s1j, s1, j);
356
1.27k
  fiat_p256_sub(y_out, y_out, s1j);
357
1.27k
  fiat_p256_sub(y_out, y_out, s1j);
358
359
1.27k
  fiat_p256_cmovznz(x_out, z1nz, x2, x_out);
360
1.27k
  fiat_p256_cmovznz(x3, z2nz, x1, x_out);
361
1.27k
  fiat_p256_cmovznz(y_out, z1nz, y2, y_out);
362
1.27k
  fiat_p256_cmovznz(y3, z2nz, y1, y_out);
363
1.27k
  fiat_p256_cmovznz(z_out, z1nz, z2, z_out);
364
1.27k
  fiat_p256_cmovznz(z3, z2nz, z1, z_out);
365
1.27k
}
366
367
#include "./p256_table.h"
368
369
// fiat_p256_select_point_affine selects the |idx-1|th point from a
370
// precomputation table and copies it to out. If |idx| is zero, the output is
371
// the point at infinity.
372
static void fiat_p256_select_point_affine(
373
    const fiat_p256_limb_t idx, size_t size,
374
704
    const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
375
704
  OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
376
11.2k
  for (size_t i = 0; i < size; i++) {
377
10.5k
    fiat_p256_limb_t mismatch = i ^ (idx - 1);
378
10.5k
    fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
379
10.5k
    fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
380
10.5k
  }
381
704
  fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
382
704
}
383
384
// fiat_p256_select_point selects the |idx|th point from a precomputation table
385
// and copies it to out.
386
static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size,
387
                                   const fiat_p256_felem pre_comp[/*size*/][3],
388
520
                                   fiat_p256_felem out[3]) {
389
520
  OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
390
9.36k
  for (size_t i = 0; i < size; i++) {
391
8.84k
    fiat_p256_limb_t mismatch = i ^ idx;
392
8.84k
    fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
393
8.84k
    fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
394
8.84k
    fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]);
395
8.84k
  }
396
520
}
397
398
// fiat_p256_get_bit returns the |i|th bit in |in|.
399
5.93k
static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) {
400
5.93k
  if (i < 0 || i >= 256) {
401
50
    return 0;
402
50
  }
403
5.88k
#if defined(OPENSSL_64_BIT)
404
5.88k
  static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit");
405
5.88k
  return (in->words[i >> 6] >> (i & 63)) & 1;
406
#else
407
  static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit");
408
  return (in->words[i >> 5] >> (i & 31)) & 1;
409
#endif
410
5.93k
}
411
412
// OPENSSL EC_METHOD FUNCTIONS
413
414
// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
415
// (X/Z^2, Y/Z^3).
416
static int ec_GFp_nistp256_point_get_affine_coordinates(
417
    const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out,
418
23
    EC_FELEM *y_out) {
419
23
  if (constant_time_declassify_int(
420
23
          ec_GFp_simple_is_at_infinity(group, point))) {
421
0
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
422
0
    return 0;
423
0
  }
424
425
23
  fiat_p256_felem z1, z2;
426
23
  fiat_p256_from_generic(z1, &point->Z);
427
23
  fiat_p256_inv_square(z2, z1);
428
429
23
  if (x_out != NULL) {
430
23
    fiat_p256_felem x;
431
23
    fiat_p256_from_generic(x, &point->X);
432
23
    fiat_p256_mul(x, x, z2);
433
23
    fiat_p256_to_generic(x_out, x);
434
23
  }
435
436
23
  if (y_out != NULL) {
437
21
    fiat_p256_felem y;
438
21
    fiat_p256_from_generic(y, &point->Y);
439
21
    fiat_p256_square(z2, z2);  // z^-4
440
21
    fiat_p256_mul(y, y, z1);   // y * z
441
21
    fiat_p256_mul(y, y, z2);   // y * z^-3
442
21
    fiat_p256_to_generic(y_out, y);
443
21
  }
444
445
23
  return 1;
446
23
}
447
448
static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r,
449
3
                                const EC_JACOBIAN *a, const EC_JACOBIAN *b) {
450
3
  fiat_p256_felem x1, y1, z1, x2, y2, z2;
451
3
  fiat_p256_from_generic(x1, &a->X);
452
3
  fiat_p256_from_generic(y1, &a->Y);
453
3
  fiat_p256_from_generic(z1, &a->Z);
454
3
  fiat_p256_from_generic(x2, &b->X);
455
3
  fiat_p256_from_generic(y2, &b->Y);
456
3
  fiat_p256_from_generic(z2, &b->Z);
457
3
  fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2,
458
3
                      z2);
459
3
  fiat_p256_to_generic(&r->X, x1);
460
3
  fiat_p256_to_generic(&r->Y, y1);
461
3
  fiat_p256_to_generic(&r->Z, z1);
462
3
}
463
464
static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r,
465
0
                                const EC_JACOBIAN *a) {
466
0
  fiat_p256_felem x, y, z;
467
0
  fiat_p256_from_generic(x, &a->X);
468
0
  fiat_p256_from_generic(y, &a->Y);
469
0
  fiat_p256_from_generic(z, &a->Z);
470
0
  fiat_p256_point_double(x, y, z, x, y, z);
471
0
  fiat_p256_to_generic(&r->X, x);
472
0
  fiat_p256_to_generic(&r->Y, y);
473
0
  fiat_p256_to_generic(&r->Z, z);
474
0
}
475
476
static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
477
                                      const EC_JACOBIAN *p,
478
10
                                      const EC_SCALAR *scalar) {
479
10
  fiat_p256_felem p_pre_comp[17][3];
480
10
  OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
481
  // Precompute multiples.
482
10
  fiat_p256_from_generic(p_pre_comp[1][0], &p->X);
483
10
  fiat_p256_from_generic(p_pre_comp[1][1], &p->Y);
484
10
  fiat_p256_from_generic(p_pre_comp[1][2], &p->Z);
485
160
  for (size_t j = 2; j <= 16; ++j) {
486
150
    if (j & 1) {
487
70
      fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
488
70
                          p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
489
70
                          0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
490
70
                          p_pre_comp[j - 1][2]);
491
80
    } else {
492
80
      fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
493
80
                             p_pre_comp[j][2], p_pre_comp[j / 2][0],
494
80
                             p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
495
80
    }
496
150
  }
497
498
  // Set nq to the point at infinity.
499
10
  fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
500
501
  // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
502
10
  int skip = 1;  // Save two point operations in the first round.
503
2.57k
  for (size_t i = 255; i < 256; i--) {
504
    // double
505
2.56k
    if (!skip) {
506
2.55k
      fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
507
2.55k
    }
508
509
    // do other additions every 5 doublings
510
2.56k
    if (i % 5 == 0) {
511
520
      crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5;
512
520
      bits |= fiat_p256_get_bit(scalar, i + 3) << 4;
513
520
      bits |= fiat_p256_get_bit(scalar, i + 2) << 3;
514
520
      bits |= fiat_p256_get_bit(scalar, i + 1) << 2;
515
520
      bits |= fiat_p256_get_bit(scalar, i) << 1;
516
520
      bits |= fiat_p256_get_bit(scalar, i - 1);
517
520
      crypto_word_t sign, digit;
518
520
      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
519
520
      // select the point to add or subtract, in constant time.
521
520
      fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
522
520
                             (const fiat_p256_felem(*)[3])p_pre_comp, tmp);
523
520
      fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
524
520
      fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
525
526
520
      if (!skip) {
527
510
        fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
528
510
                            0 /* mixed */, tmp[0], tmp[1], tmp[2]);
529
510
      } else {
530
10
        fiat_p256_copy(nq[0], tmp[0]);
531
10
        fiat_p256_copy(nq[1], tmp[1]);
532
10
        fiat_p256_copy(nq[2], tmp[2]);
533
10
        skip = 0;
534
10
      }
535
520
    }
536
2.56k
  }
537
538
10
  fiat_p256_to_generic(&r->X, nq[0]);
539
10
  fiat_p256_to_generic(&r->Y, nq[1]);
540
10
  fiat_p256_to_generic(&r->Z, nq[2]);
541
10
}
542
543
static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
544
                                           EC_JACOBIAN *r,
545
11
                                           const EC_SCALAR *scalar) {
546
  // Set nq to the point at infinity.
547
11
  fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
548
549
11
  int skip = 1;  // Save two point operations in the first round.
550
363
  for (size_t i = 31; i < 32; i--) {
551
352
    if (!skip) {
552
341
      fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
553
341
    }
554
555
    // First, look 32 bits upwards.
556
352
    crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
557
352
    bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
558
352
    bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
559
352
    bits |= fiat_p256_get_bit(scalar, i + 32);
560
    // Select the point to add, in constant time.
561
352
    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
562
352
                                  fiat_p256_g_pre_comp[1], tmp);
563
564
352
    if (!skip) {
565
341
      fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
566
341
                          1 /* mixed */, tmp[0], tmp[1], tmp[2]);
567
341
    } else {
568
11
      fiat_p256_copy(nq[0], tmp[0]);
569
11
      fiat_p256_copy(nq[1], tmp[1]);
570
11
      fiat_p256_copy(nq[2], tmp[2]);
571
11
      skip = 0;
572
11
    }
573
574
    // Second, look at the current position.
575
352
    bits = fiat_p256_get_bit(scalar, i + 192) << 3;
576
352
    bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
577
352
    bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
578
352
    bits |= fiat_p256_get_bit(scalar, i);
579
    // Select the point to add, in constant time.
580
352
    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
581
352
                                  fiat_p256_g_pre_comp[0], tmp);
582
352
    fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
583
352
                        tmp[0], tmp[1], tmp[2]);
584
352
  }
585
586
11
  fiat_p256_to_generic(&r->X, nq[0]);
587
11
  fiat_p256_to_generic(&r->Y, nq[1]);
588
11
  fiat_p256_to_generic(&r->Z, nq[2]);
589
11
}
590
591
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
592
                                             EC_JACOBIAN *r,
593
                                             const EC_SCALAR *g_scalar,
594
                                             const EC_JACOBIAN *p,
595
0
                                             const EC_SCALAR *p_scalar) {
596
0
#define P256_WSIZE_PUBLIC 4
597
  // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
598
0
  fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3];
599
0
  fiat_p256_from_generic(p_pre_comp[0][0], &p->X);
600
0
  fiat_p256_from_generic(p_pre_comp[0][1], &p->Y);
601
0
  fiat_p256_from_generic(p_pre_comp[0][2], &p->Z);
602
0
  fiat_p256_felem p2[3];
603
0
  fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0],
604
0
                         p_pre_comp[0][1], p_pre_comp[0][2]);
605
0
  for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) {
606
0
    fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
607
0
                        p_pre_comp[i - 1][0], p_pre_comp[i - 1][1],
608
0
                        p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1],
609
0
                        p2[2]);
610
0
  }
611
612
  // Set up the coefficients for |p_scalar|.
613
0
  int8_t p_wNAF[257];
614
0
  ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
615
616
  // Set |ret| to the point at infinity.
617
0
  int skip = 1;  // Save some point operations.
618
0
  fiat_p256_felem ret[3] = {{0}, {0}, {0}};
619
0
  for (int i = 256; i >= 0; i--) {
620
0
    if (!skip) {
621
0
      fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
622
0
    }
623
624
    // For the |g_scalar|, we use the precomputed table without the
625
    // constant-time lookup.
626
0
    if (i <= 31) {
627
      // First, look 32 bits upwards.
628
0
      crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3;
629
0
      bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2;
630
0
      bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1;
631
0
      bits |= fiat_p256_get_bit(g_scalar, i + 32);
632
0
      if (bits != 0) {
633
0
        size_t index = (size_t)(bits - 1);
634
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
635
0
                            1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
636
0
                            fiat_p256_g_pre_comp[1][index][1],
637
0
                            fiat_p256_one);
638
0
        skip = 0;
639
0
      }
640
641
      // Second, look at the current position.
642
0
      bits = fiat_p256_get_bit(g_scalar, i + 192) << 3;
643
0
      bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2;
644
0
      bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1;
645
0
      bits |= fiat_p256_get_bit(g_scalar, i);
646
0
      if (bits != 0) {
647
0
        size_t index = (size_t)(bits - 1);
648
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
649
0
                            1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
650
0
                            fiat_p256_g_pre_comp[0][index][1],
651
0
                            fiat_p256_one);
652
0
        skip = 0;
653
0
      }
654
0
    }
655
656
0
    int digit = p_wNAF[i];
657
0
    if (digit != 0) {
658
0
      assert(digit & 1);
659
0
      size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
660
0
      fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
661
0
      if (digit < 0) {
662
0
        fiat_p256_opp(tmp, p_pre_comp[idx][1]);
663
0
        y = &tmp;
664
0
      }
665
0
      if (!skip) {
666
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
667
0
                            0 /* not mixed */, p_pre_comp[idx][0], *y,
668
0
                            p_pre_comp[idx][2]);
669
0
      } else {
670
0
        fiat_p256_copy(ret[0], p_pre_comp[idx][0]);
671
0
        fiat_p256_copy(ret[1], *y);
672
0
        fiat_p256_copy(ret[2], p_pre_comp[idx][2]);
673
0
        skip = 0;
674
0
      }
675
0
    }
676
0
  }
677
678
0
  fiat_p256_to_generic(&r->X, ret[0]);
679
0
  fiat_p256_to_generic(&r->Y, ret[1]);
680
0
  fiat_p256_to_generic(&r->Z, ret[2]);
681
0
}
682
683
static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
684
                                            const EC_JACOBIAN *p,
685
0
                                            const EC_SCALAR *r) {
686
0
  if (ec_GFp_simple_is_at_infinity(group, p)) {
687
0
    return 0;
688
0
  }
689
690
  // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
691
  // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
692
  // not.
693
0
  fiat_p256_felem Z2_mont;
694
0
  fiat_p256_from_generic(Z2_mont, &p->Z);
695
0
  fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont);
696
697
0
  fiat_p256_felem r_Z2;
698
0
  fiat_p256_from_words(r_Z2, r->words);  // r < order < p, so this is valid.
699
0
  fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
700
701
0
  fiat_p256_felem X;
702
0
  fiat_p256_from_generic(X, &p->X);
703
0
  fiat_p256_from_montgomery(X, X);
704
705
0
  if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
706
0
    return 1;
707
0
  }
708
709
  // During signing the x coefficient is reduced modulo the group order.
710
  // Therefore there is a small possibility, less than 1/2^128, that group_order
711
  // < p.x < P. in that case we need not only to compare against |r| but also to
712
  // compare against r+group_order.
713
0
  assert(group->field.N.width == group->order.N.width);
714
0
  EC_FELEM tmp;
715
0
  BN_ULONG carry =
716
0
      bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width);
717
0
  if (carry == 0 &&
718
0
      bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) {
719
0
    fiat_p256_from_generic(r_Z2, &tmp);
720
0
    fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
721
0
    if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
722
0
      return 1;
723
0
    }
724
0
  }
725
726
0
  return 0;
727
0
}
728
729
1
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
730
1
  out->point_get_affine_coordinates =
731
1
      ec_GFp_nistp256_point_get_affine_coordinates;
732
1
  out->add = ec_GFp_nistp256_add;
733
1
  out->dbl = ec_GFp_nistp256_dbl;
734
1
  out->mul = ec_GFp_nistp256_point_mul;
735
1
  out->mul_base = ec_GFp_nistp256_point_mul_base;
736
1
  out->mul_public = ec_GFp_nistp256_point_mul_public;
737
1
  out->felem_mul = ec_GFp_mont_felem_mul;
738
1
  out->felem_sqr = ec_GFp_mont_felem_sqr;
739
1
  out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
740
1
  out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
741
1
  out->felem_reduce = ec_GFp_mont_felem_reduce;
742
  // TODO(davidben): This should use the specialized field arithmetic
743
  // implementation, rather than the generic one.
744
1
  out->felem_exp = ec_GFp_mont_felem_exp;
745
1
  out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery;
746
1
  out->scalar_to_montgomery_inv_vartime =
747
1
      ec_simple_scalar_to_montgomery_inv_vartime;
748
1
  out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate;
749
1
}