Coverage Report

Created: 2024-11-21 07:03

/src/boringssl/crypto/fipsmodule/ec/p256-nistz.c.inc
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3
 * Copyright (c) 2014, Intel Corporation. All Rights Reserved.
4
 *
5
 * Licensed under the OpenSSL license (the "License").  You may not use
6
 * this file except in compliance with the License.  You can obtain a copy
7
 * in the file LICENSE in the source distribution or at
8
 * https://www.openssl.org/source/license.html
9
 *
10
 * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
11
 * (1) Intel Corporation, Israel Development Center, Haifa, Israel
12
 * (2) University of Haifa, Israel
13
 *
14
 * Reference:
15
 * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
16
 *                          256 Bit Primes"
17
 */
18
19
#include <openssl/ec.h>
20
21
#include <assert.h>
22
#include <stdint.h>
23
#include <string.h>
24
25
#include <openssl/bn.h>
26
#include <openssl/crypto.h>
27
#include <openssl/err.h>
28
29
#include "../bn/internal.h"
30
#include "../delocate.h"
31
#include "../../internal.h"
32
#include "internal.h"
33
#include "p256-nistz.h"
34
35
#if !defined(OPENSSL_NO_ASM) &&  \
36
    (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) &&    \
37
    !defined(OPENSSL_SMALL)
38
39
typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
40
41
// One converted into the Montgomery domain
42
static const BN_ULONG ONE_MONT[P256_LIMBS] = {
43
    TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
44
    TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
45
};
46
47
// Precomputed tables for the default generator
48
#include "p256-nistz-table.h"
49
50
// Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in
51
// util.c for details
52
884
static crypto_word_t booth_recode_w5(crypto_word_t in) {
53
884
  crypto_word_t s, d;
54
55
884
  s = ~((in >> 5) - 1);
56
884
  d = (1 << 6) - in - 1;
57
884
  d = (d & s) | (in & ~s);
58
884
  d = (d >> 1) + (d & 1);
59
60
884
  return (d << 1) + (s & 1);
61
884
}
62
63
407
static crypto_word_t booth_recode_w7(crypto_word_t in) {
64
407
  crypto_word_t s, d;
65
66
407
  s = ~((in >> 7) - 1);
67
407
  d = (1 << 8) - in - 1;
68
407
  d = (d & s) | (in & ~s);
69
407
  d = (d >> 1) + (d & 1);
70
71
407
  return (d << 1) + (s & 1);
72
407
}
73
74
// copy_conditional copies |src| to |dst| if |move| is one and leaves it as-is
75
// if |move| is zero.
76
//
77
// WARNING: this breaks the usual convention of constant-time functions
78
// returning masks.
79
static void copy_conditional(BN_ULONG dst[P256_LIMBS],
80
943
                             const BN_ULONG src[P256_LIMBS], BN_ULONG move) {
81
943
  BN_ULONG mask1 = ((BN_ULONG)0) - move;
82
943
  BN_ULONG mask2 = ~mask1;
83
84
943
  dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
85
943
  dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
86
943
  dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
87
943
  dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
88
943
  if (P256_LIMBS == 8) {
89
0
    dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
90
0
    dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
91
0
    dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
92
0
    dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
93
0
  }
94
943
}
95
96
// is_not_zero returns one iff in != 0 and zero otherwise.
97
//
98
// WARNING: this breaks the usual convention of constant-time functions
99
// returning masks.
100
//
101
// (define-fun is_not_zero ((in (_ BitVec 64))) (_ BitVec 64)
102
//   (bvlshr (bvor in (bvsub #x0000000000000000 in)) #x000000000000003f)
103
// )
104
//
105
// (declare-fun x () (_ BitVec 64))
106
//
107
// (assert (and (= x #x0000000000000000) (= (is_not_zero x) #x0000000000000001)))
108
// (check-sat)
109
//
110
// (assert (and (not (= x #x0000000000000000)) (= (is_not_zero x) #x0000000000000000)))
111
// (check-sat)
112
//
113
2
static BN_ULONG is_not_zero(BN_ULONG in) {
114
2
  in |= (0 - in);
115
2
  in >>= BN_BITS2 - 1;
116
2
  return in;
117
2
}
118
119
#if defined(OPENSSL_X86_64)
120
// Dispatch between CPU variations. The "_adx" suffixed functions use MULX in
121
// addition to ADCX/ADOX. MULX is part of BMI2, not ADX, so we must check both
122
// capabilities.
123
static void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
124
                                  const BN_ULONG a[P256_LIMBS],
125
191
                                  const BN_ULONG b[P256_LIMBS]) {
126
191
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
127
191
    ecp_nistz256_mul_mont_adx(res, a, b);
128
191
  } else {
129
0
    ecp_nistz256_mul_mont_nohw(res, a, b);
130
0
  }
131
191
}
132
133
static void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
134
3.07k
                                  const BN_ULONG a[P256_LIMBS]) {
135
3.07k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
136
3.07k
    ecp_nistz256_sqr_mont_adx(res, a);
137
3.07k
  } else {
138
0
    ecp_nistz256_sqr_mont_nohw(res, a);
139
0
  }
140
3.07k
}
141
142
static void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
143
                                      const BN_ULONG a[P256_LIMBS],
144
0
                                      const BN_ULONG b[P256_LIMBS]) {
145
0
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
146
0
    ecp_nistz256_ord_mul_mont_adx(res, a, b);
147
0
  } else {
148
0
    ecp_nistz256_ord_mul_mont_nohw(res, a, b);
149
0
  }
150
0
}
151
152
static void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
153
                                      const BN_ULONG a[P256_LIMBS],
154
0
                                      BN_ULONG rep) {
155
0
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
156
0
    ecp_nistz256_ord_sqr_mont_adx(res, a, rep);
157
0
  } else {
158
0
    ecp_nistz256_ord_sqr_mont_nohw(res, a, rep);
159
0
  }
160
0
}
161
162
static void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
163
884
                                   int index) {
164
884
  if (CRYPTO_is_AVX2_capable()) {
165
884
    ecp_nistz256_select_w5_avx2(val, in_t, index);
166
884
  } else {
167
0
    ecp_nistz256_select_w5_nohw(val, in_t, index);
168
0
  }
169
884
}
170
171
static void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
172
                                   const P256_POINT_AFFINE in_t[64],
173
74
                                   int index) {
174
74
  if (CRYPTO_is_AVX2_capable()) {
175
74
    ecp_nistz256_select_w7_avx2(val, in_t, index);
176
74
  } else {
177
0
    ecp_nistz256_select_w7_nohw(val, in_t, index);
178
0
  }
179
74
}
180
181
4.47k
static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a) {
182
4.47k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
183
4.47k
    ecp_nistz256_point_double_adx(r, a);
184
4.47k
  } else {
185
0
    ecp_nistz256_point_double_nohw(r, a);
186
0
  }
187
4.47k
}
188
189
static void ecp_nistz256_point_add(P256_POINT *r, const P256_POINT *a,
190
996
                                   const P256_POINT *b) {
191
996
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
192
996
    ecp_nistz256_point_add_adx(r, a, b);
193
996
  } else {
194
0
    ecp_nistz256_point_add_nohw(r, a, b);
195
0
  }
196
996
}
197
198
static void ecp_nistz256_point_add_affine(P256_POINT *r, const P256_POINT *a,
199
392
                                          const P256_POINT_AFFINE *b) {
200
392
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
201
392
    ecp_nistz256_point_add_affine_adx(r, a, b);
202
392
  } else {
203
0
    ecp_nistz256_point_add_affine_nohw(r, a, b);
204
0
  }
205
392
}
206
#endif  // OPENSSL_X86_64
207
208
// ecp_nistz256_from_mont sets |res| to |in|, converted from Montgomery domain
209
// by multiplying with 1.
210
static void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
211
9
                                   const BN_ULONG in[P256_LIMBS]) {
212
9
  static const BN_ULONG ONE[P256_LIMBS] = {1};
213
9
  ecp_nistz256_mul_mont(res, in, ONE);
214
9
}
215
216
// ecp_nistz256_mod_inverse_sqr_mont sets |r| to (|in| * 2^-256)^-2 * 2^256 mod
217
// p. That is, |r| is the modular inverse square of |in| for input and output in
218
// the Montgomery domain.
219
static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],
220
12
                                              const BN_ULONG in[P256_LIMBS]) {
221
  // This implements the addition chain described in
222
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
223
12
  BN_ULONG x2[P256_LIMBS], x3[P256_LIMBS], x6[P256_LIMBS], x12[P256_LIMBS],
224
12
      x15[P256_LIMBS], x30[P256_LIMBS], x32[P256_LIMBS];
225
12
  ecp_nistz256_sqr_mont(x2, in);      // 2^2 - 2^1
226
12
  ecp_nistz256_mul_mont(x2, x2, in);  // 2^2 - 2^0
227
228
12
  ecp_nistz256_sqr_mont(x3, x2);      // 2^3 - 2^1
229
12
  ecp_nistz256_mul_mont(x3, x3, in);  // 2^3 - 2^0
230
231
12
  ecp_nistz256_sqr_mont(x6, x3);
232
36
  for (int i = 1; i < 3; i++) {
233
24
    ecp_nistz256_sqr_mont(x6, x6);
234
24
  }                                   // 2^6 - 2^3
235
12
  ecp_nistz256_mul_mont(x6, x6, x3);  // 2^6 - 2^0
236
237
12
  ecp_nistz256_sqr_mont(x12, x6);
238
72
  for (int i = 1; i < 6; i++) {
239
60
    ecp_nistz256_sqr_mont(x12, x12);
240
60
  }                                     // 2^12 - 2^6
241
12
  ecp_nistz256_mul_mont(x12, x12, x6);  // 2^12 - 2^0
242
243
12
  ecp_nistz256_sqr_mont(x15, x12);
244
36
  for (int i = 1; i < 3; i++) {
245
24
    ecp_nistz256_sqr_mont(x15, x15);
246
24
  }                                     // 2^15 - 2^3
247
12
  ecp_nistz256_mul_mont(x15, x15, x3);  // 2^15 - 2^0
248
249
12
  ecp_nistz256_sqr_mont(x30, x15);
250
180
  for (int i = 1; i < 15; i++) {
251
168
    ecp_nistz256_sqr_mont(x30, x30);
252
168
  }                                      // 2^30 - 2^15
253
12
  ecp_nistz256_mul_mont(x30, x30, x15);  // 2^30 - 2^0
254
255
12
  ecp_nistz256_sqr_mont(x32, x30);
256
12
  ecp_nistz256_sqr_mont(x32, x32);      // 2^32 - 2^2
257
12
  ecp_nistz256_mul_mont(x32, x32, x2);  // 2^32 - 2^0
258
259
12
  BN_ULONG ret[P256_LIMBS];
260
12
  ecp_nistz256_sqr_mont(ret, x32);
261
384
  for (int i = 1; i < 31 + 1; i++) {
262
372
    ecp_nistz256_sqr_mont(ret, ret);
263
372
  }                                     // 2^64 - 2^32
264
12
  ecp_nistz256_mul_mont(ret, ret, in);  // 2^64 - 2^32 + 2^0
265
266
1.54k
  for (int i = 0; i < 96 + 32; i++) {
267
1.53k
    ecp_nistz256_sqr_mont(ret, ret);
268
1.53k
  }                                      // 2^192 - 2^160 + 2^128
269
12
  ecp_nistz256_mul_mont(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
270
271
396
  for (int i = 0; i < 32; i++) {
272
384
    ecp_nistz256_sqr_mont(ret, ret);
273
384
  }                                      // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
274
12
  ecp_nistz256_mul_mont(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
275
276
372
  for (int i = 0; i < 30; i++) {
277
360
    ecp_nistz256_sqr_mont(ret, ret);
278
360
  }                                      // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
279
12
  ecp_nistz256_mul_mont(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
280
281
12
  ecp_nistz256_sqr_mont(ret, ret);
282
12
  ecp_nistz256_sqr_mont(r, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
283
12
}
284
285
// r = p * p_scalar
286
static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
287
                                      const EC_JACOBIAN *p,
288
17
                                      const EC_SCALAR *p_scalar) {
289
17
  assert(p != NULL);
290
17
  assert(p_scalar != NULL);
291
17
  assert(group->field.N.width == P256_LIMBS);
292
293
17
  static const size_t kWindowSize = 5;
294
17
  static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
295
296
  // A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
297
  // add no more than 63 bytes of overhead. Thus, |table| should require
298
  // ~1599 ((96 * 16) + 63) bytes of stack space.
299
17
  alignas(64) P256_POINT table[16];
300
17
  uint8_t p_str[33];
301
17
  OPENSSL_memcpy(p_str, p_scalar->words, 32);
302
17
  p_str[32] = 0;
303
304
  // table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
305
  // not stored. All other values are actually stored with an offset of -1 in
306
  // table.
307
17
  P256_POINT *row = table;
308
17
  assert(group->field.N.width == P256_LIMBS);
309
17
  OPENSSL_memcpy(row[1 - 1].X, p->X.words, P256_LIMBS * sizeof(BN_ULONG));
310
17
  OPENSSL_memcpy(row[1 - 1].Y, p->Y.words, P256_LIMBS * sizeof(BN_ULONG));
311
17
  OPENSSL_memcpy(row[1 - 1].Z, p->Z.words, P256_LIMBS * sizeof(BN_ULONG));
312
313
17
  ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
314
17
  ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
315
17
  ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
316
17
  ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
317
17
  ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
318
17
  ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
319
17
  ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
320
17
  ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
321
17
  ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
322
17
  ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
323
17
  ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
324
17
  ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
325
17
  ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
326
17
  ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
327
17
  ecp_nistz256_point_double(&row[16 - 1], &row[8 - 1]);
328
329
17
  BN_ULONG tmp[P256_LIMBS];
330
17
  alignas(32) P256_POINT h;
331
17
  size_t index = 255;
332
17
  crypto_word_t wvalue = p_str[(index - 1) / 8];
333
17
  wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
334
335
17
  ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
336
337
884
  while (index >= 5) {
338
867
    if (index != 255) {
339
850
      size_t off = (index - 1) / 8;
340
341
850
      wvalue = (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
342
850
      wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
343
344
850
      wvalue = booth_recode_w5(wvalue);
345
346
850
      ecp_nistz256_select_w5(&h, table, wvalue >> 1);
347
348
850
      ecp_nistz256_neg(tmp, h.Y);
349
850
      copy_conditional(h.Y, tmp, (wvalue & 1));
350
351
850
      ecp_nistz256_point_add(r, r, &h);
352
850
    }
353
354
867
    index -= kWindowSize;
355
356
867
    ecp_nistz256_point_double(r, r);
357
867
    ecp_nistz256_point_double(r, r);
358
867
    ecp_nistz256_point_double(r, r);
359
867
    ecp_nistz256_point_double(r, r);
360
867
    ecp_nistz256_point_double(r, r);
361
867
  }
362
363
  // Final window
364
17
  wvalue = p_str[0];
365
17
  wvalue = (wvalue << 1) & kMask;
366
367
17
  wvalue = booth_recode_w5(wvalue);
368
369
17
  ecp_nistz256_select_w5(&h, table, wvalue >> 1);
370
371
17
  ecp_nistz256_neg(tmp, h.Y);
372
17
  copy_conditional(h.Y, tmp, wvalue & 1);
373
374
17
  ecp_nistz256_point_add(r, r, &h);
375
17
}
376
377
11
static crypto_word_t calc_first_wvalue(size_t *index, const uint8_t p_str[33]) {
378
11
  static const size_t kWindowSize = 7;
379
11
  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
380
11
  *index = kWindowSize;
381
382
11
  crypto_word_t wvalue = (p_str[0] << 1) & kMask;
383
11
  return booth_recode_w7(wvalue);
384
11
}
385
386
396
static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) {
387
396
  static const size_t kWindowSize = 7;
388
396
  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
389
390
396
  const size_t off = (*index - 1) / 8;
391
396
  crypto_word_t wvalue =
392
396
      (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
393
396
  wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
394
396
  *index += kWindowSize;
395
396
396
  return booth_recode_w7(wvalue);
397
396
}
398
399
static void ecp_nistz256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
400
                                   const EC_JACOBIAN *p,
401
8
                                   const EC_SCALAR *scalar) {
402
8
  alignas(32) P256_POINT out;
403
8
  ecp_nistz256_windowed_mul(group, &out, p, scalar);
404
405
8
  assert(group->field.N.width == P256_LIMBS);
406
8
  OPENSSL_memcpy(r->X.words, out.X, P256_LIMBS * sizeof(BN_ULONG));
407
8
  OPENSSL_memcpy(r->Y.words, out.Y, P256_LIMBS * sizeof(BN_ULONG));
408
8
  OPENSSL_memcpy(r->Z.words, out.Z, P256_LIMBS * sizeof(BN_ULONG));
409
8
}
410
411
static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_JACOBIAN *r,
412
2
                                        const EC_SCALAR *scalar) {
413
2
  uint8_t p_str[33];
414
2
  OPENSSL_memcpy(p_str, scalar->words, 32);
415
2
  p_str[32] = 0;
416
417
  // First window
418
2
  size_t index = 0;
419
2
  crypto_word_t wvalue = calc_first_wvalue(&index, p_str);
420
421
2
  alignas(32) P256_POINT_AFFINE t;
422
2
  alignas(32) P256_POINT p;
423
2
  ecp_nistz256_select_w7(&t, ecp_nistz256_precomputed[0], wvalue >> 1);
424
2
  ecp_nistz256_neg(p.Z, t.Y);
425
2
  copy_conditional(t.Y, p.Z, wvalue & 1);
426
427
  // Convert |t| from affine to Jacobian coordinates. We set Z to zero if |t|
428
  // is infinity and |ONE_MONT| otherwise. |t| was computed from the table, so
429
  // it is infinity iff |wvalue >> 1| is zero.
430
2
  OPENSSL_memcpy(p.X, t.X, sizeof(p.X));
431
2
  OPENSSL_memcpy(p.Y, t.Y, sizeof(p.Y));
432
2
  OPENSSL_memset(p.Z, 0, sizeof(p.Z));
433
2
  copy_conditional(p.Z, ONE_MONT, is_not_zero(wvalue >> 1));
434
435
74
  for (int i = 1; i < 37; i++) {
436
72
    wvalue = calc_wvalue(&index, p_str);
437
438
72
    ecp_nistz256_select_w7(&t, ecp_nistz256_precomputed[i], wvalue >> 1);
439
440
72
    alignas(32) BN_ULONG neg_Y[P256_LIMBS];
441
72
    ecp_nistz256_neg(neg_Y, t.Y);
442
72
    copy_conditional(t.Y, neg_Y, wvalue & 1);
443
444
    // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are the
445
    // same non-infinity point.
446
72
    ecp_nistz256_point_add_affine(&p, &p, &t);
447
72
  }
448
449
2
  assert(group->field.N.width == P256_LIMBS);
450
2
  OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG));
451
2
  OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG));
452
2
  OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG));
453
2
}
454
455
static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
456
                                           EC_JACOBIAN *r,
457
                                           const EC_SCALAR *g_scalar,
458
                                           const EC_JACOBIAN *p_,
459
9
                                           const EC_SCALAR *p_scalar) {
460
9
  assert(p_ != NULL && p_scalar != NULL && g_scalar != NULL);
461
462
9
  alignas(32) P256_POINT p;
463
9
  uint8_t p_str[33];
464
9
  OPENSSL_memcpy(p_str, g_scalar->words, 32);
465
9
  p_str[32] = 0;
466
467
  // First window
468
9
  size_t index = 0;
469
9
  size_t wvalue = calc_first_wvalue(&index, p_str);
470
471
  // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
472
  // is infinity and |ONE_MONT| otherwise. |p| was computed from the table, so
473
  // it is infinity iff |wvalue >> 1| is zero.
474
9
  if ((wvalue >> 1) != 0) {
475
9
    OPENSSL_memcpy(p.X, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].X,
476
9
                   sizeof(p.X));
477
9
    OPENSSL_memcpy(p.Y, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].Y,
478
9
                   sizeof(p.Y));
479
9
    OPENSSL_memcpy(p.Z, ONE_MONT, sizeof(p.Z));
480
9
  } else {
481
0
    OPENSSL_memset(p.X, 0, sizeof(p.X));
482
0
    OPENSSL_memset(p.Y, 0, sizeof(p.Y));
483
0
    OPENSSL_memset(p.Z, 0, sizeof(p.Z));
484
0
  }
485
486
9
  if ((wvalue & 1) == 1) {
487
6
    ecp_nistz256_neg(p.Y, p.Y);
488
6
  }
489
490
333
  for (int i = 1; i < 37; i++) {
491
324
    wvalue = calc_wvalue(&index, p_str);
492
324
    if ((wvalue >> 1) == 0) {
493
4
      continue;
494
4
    }
495
496
320
    alignas(32) P256_POINT_AFFINE t;
497
320
    OPENSSL_memcpy(&t, &ecp_nistz256_precomputed[i][(wvalue >> 1) - 1],
498
320
                   sizeof(t));
499
320
    if ((wvalue & 1) == 1) {
500
156
      ecp_nistz256_neg(t.Y, t.Y);
501
156
    }
502
503
    // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are
504
    // the same non-infinity point, so it is important that we compute the
505
    // |g_scalar| term before the |p_scalar| term.
506
320
    ecp_nistz256_point_add_affine(&p, &p, &t);
507
320
  }
508
509
9
  alignas(32) P256_POINT tmp;
510
9
  ecp_nistz256_windowed_mul(group, &tmp, p_, p_scalar);
511
9
  ecp_nistz256_point_add(&p, &p, &tmp);
512
513
9
  assert(group->field.N.width == P256_LIMBS);
514
9
  OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG));
515
9
  OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG));
516
9
  OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG));
517
9
}
518
519
static int ecp_nistz256_get_affine(const EC_GROUP *group,
520
                                   const EC_JACOBIAN *point, EC_FELEM *x,
521
12
                                   EC_FELEM *y) {
522
12
  if (constant_time_declassify_int(
523
12
          ec_GFp_simple_is_at_infinity(group, point))) {
524
0
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
525
0
    return 0;
526
0
  }
527
528
12
  BN_ULONG z_inv2[P256_LIMBS];
529
12
  assert(group->field.N.width == P256_LIMBS);
530
12
  ecp_nistz256_mod_inverse_sqr_mont(z_inv2, point->Z.words);
531
532
12
  if (x != NULL) {
533
12
    ecp_nistz256_mul_mont(x->words, z_inv2, point->X.words);
534
12
  }
535
536
12
  if (y != NULL) {
537
10
    ecp_nistz256_sqr_mont(z_inv2, z_inv2);                            // z^-4
538
10
    ecp_nistz256_mul_mont(y->words, point->Y.words, point->Z.words);  // y * z
539
10
    ecp_nistz256_mul_mont(y->words, y->words, z_inv2);  // y * z^-3
540
10
  }
541
542
12
  return 1;
543
12
}
544
545
static void ecp_nistz256_add(const EC_GROUP *group, EC_JACOBIAN *r,
546
1
                             const EC_JACOBIAN *a_, const EC_JACOBIAN *b_) {
547
1
  P256_POINT a, b;
548
1
  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
549
1
  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
550
1
  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
551
1
  OPENSSL_memcpy(b.X, b_->X.words, P256_LIMBS * sizeof(BN_ULONG));
552
1
  OPENSSL_memcpy(b.Y, b_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
553
1
  OPENSSL_memcpy(b.Z, b_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
554
1
  ecp_nistz256_point_add(&a, &a, &b);
555
1
  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
556
1
  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
557
1
  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
558
1
}
559
560
static void ecp_nistz256_dbl(const EC_GROUP *group, EC_JACOBIAN *r,
561
1
                             const EC_JACOBIAN *a_) {
562
1
  P256_POINT a;
563
1
  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
564
1
  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
565
1
  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
566
1
  ecp_nistz256_point_double(&a, &a);
567
1
  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
568
1
  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
569
1
  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
570
1
}
571
572
static void ecp_nistz256_inv0_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
573
0
                                      const EC_SCALAR *in) {
574
  // table[i] stores a power of |in| corresponding to the matching enum value.
575
0
  enum {
576
    // The following indices specify the power in binary.
577
0
    i_1 = 0,
578
0
    i_10,
579
0
    i_11,
580
0
    i_101,
581
0
    i_111,
582
0
    i_1010,
583
0
    i_1111,
584
0
    i_10101,
585
0
    i_101010,
586
0
    i_101111,
587
    // The following indices specify 2^N-1, or N ones in a row.
588
0
    i_x6,
589
0
    i_x8,
590
0
    i_x16,
591
0
    i_x32
592
0
  };
593
0
  BN_ULONG table[15][P256_LIMBS];
594
595
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
596
  //
597
  // Even though this code path spares 12 squarings, 4.5%, and 13
598
  // multiplications, 25%, the overall sign operation is not that much faster,
599
  // not more that 2%. Most of the performance of this function comes from the
600
  // scalar operations.
601
602
  // Pre-calculate powers.
603
0
  OPENSSL_memcpy(table[i_1], in->words, P256_LIMBS * sizeof(BN_ULONG));
604
605
0
  ecp_nistz256_ord_sqr_mont(table[i_10], table[i_1], 1);
606
607
0
  ecp_nistz256_ord_mul_mont(table[i_11], table[i_1], table[i_10]);
608
609
0
  ecp_nistz256_ord_mul_mont(table[i_101], table[i_11], table[i_10]);
610
611
0
  ecp_nistz256_ord_mul_mont(table[i_111], table[i_101], table[i_10]);
612
613
0
  ecp_nistz256_ord_sqr_mont(table[i_1010], table[i_101], 1);
614
615
0
  ecp_nistz256_ord_mul_mont(table[i_1111], table[i_1010], table[i_101]);
616
617
0
  ecp_nistz256_ord_sqr_mont(table[i_10101], table[i_1010], 1);
618
0
  ecp_nistz256_ord_mul_mont(table[i_10101], table[i_10101], table[i_1]);
619
620
0
  ecp_nistz256_ord_sqr_mont(table[i_101010], table[i_10101], 1);
621
622
0
  ecp_nistz256_ord_mul_mont(table[i_101111], table[i_101010], table[i_101]);
623
624
0
  ecp_nistz256_ord_mul_mont(table[i_x6], table[i_101010], table[i_10101]);
625
626
0
  ecp_nistz256_ord_sqr_mont(table[i_x8], table[i_x6], 2);
627
0
  ecp_nistz256_ord_mul_mont(table[i_x8], table[i_x8], table[i_11]);
628
629
0
  ecp_nistz256_ord_sqr_mont(table[i_x16], table[i_x8], 8);
630
0
  ecp_nistz256_ord_mul_mont(table[i_x16], table[i_x16], table[i_x8]);
631
632
0
  ecp_nistz256_ord_sqr_mont(table[i_x32], table[i_x16], 16);
633
0
  ecp_nistz256_ord_mul_mont(table[i_x32], table[i_x32], table[i_x16]);
634
635
  // Compute |in| raised to the order-2.
636
0
  ecp_nistz256_ord_sqr_mont(out->words, table[i_x32], 64);
637
0
  ecp_nistz256_ord_mul_mont(out->words, out->words, table[i_x32]);
638
0
  static const struct {
639
0
    uint8_t p, i;
640
0
  } kChain[27] = {{32, i_x32},    {6, i_101111}, {5, i_111},    {4, i_11},
641
0
                  {5, i_1111},    {5, i_10101},  {4, i_101},    {3, i_101},
642
0
                  {3, i_101},     {5, i_111},    {9, i_101111}, {6, i_1111},
643
0
                  {2, i_1},       {5, i_1},      {6, i_1111},   {5, i_111},
644
0
                  {4, i_111},     {5, i_111},    {5, i_101},    {3, i_11},
645
0
                  {10, i_101111}, {2, i_11},     {5, i_11},     {5, i_11},
646
0
                  {3, i_1},       {7, i_10101},  {6, i_1111}};
647
0
  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kChain); i++) {
648
0
    ecp_nistz256_ord_sqr_mont(out->words, out->words, kChain[i].p);
649
0
    ecp_nistz256_ord_mul_mont(out->words, out->words, table[kChain[i].i]);
650
0
  }
651
0
}
652
653
static int ecp_nistz256_scalar_to_montgomery_inv_vartime(const EC_GROUP *group,
654
                                                 EC_SCALAR *out,
655
9
                                                 const EC_SCALAR *in) {
656
9
#if defined(OPENSSL_X86_64)
657
9
  if (!CRYPTO_is_AVX_capable()) {
658
    // No AVX support; fallback to generic code.
659
0
    return ec_simple_scalar_to_montgomery_inv_vartime(group, out, in);
660
0
  }
661
9
#endif
662
663
9
  assert(group->order.N.width == P256_LIMBS);
664
9
  if (!beeu_mod_inverse_vartime(out->words, in->words, group->order.N.d)) {
665
0
    return 0;
666
0
  }
667
668
  // The result should be returned in the Montgomery domain.
669
9
  ec_scalar_to_montgomery(group, out, out);
670
9
  return 1;
671
9
}
672
673
static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group,
674
                                         const EC_JACOBIAN *p,
675
9
                                         const EC_SCALAR *r) {
676
9
  if (ec_GFp_simple_is_at_infinity(group, p)) {
677
0
    return 0;
678
0
  }
679
680
9
  assert(group->order.N.width == P256_LIMBS);
681
9
  assert(group->field.N.width == P256_LIMBS);
682
683
  // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
684
  // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
685
  // not.
686
9
  BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS], X[P256_LIMBS];
687
9
  ecp_nistz256_mul_mont(Z2_mont, p->Z.words, p->Z.words);
688
9
  ecp_nistz256_mul_mont(r_Z2, r->words, Z2_mont);
689
9
  ecp_nistz256_from_mont(X, p->X.words);
690
691
9
  if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
692
0
    return 1;
693
0
  }
694
695
  // During signing the x coefficient is reduced modulo the group order.
696
  // Therefore there is a small possibility, less than 1/2^128, that group_order
697
  // < p.x < P. in that case we need not only to compare against |r| but also to
698
  // compare against r+group_order.
699
9
  BN_ULONG carry = bn_add_words(r_Z2, r->words, group->order.N.d, P256_LIMBS);
700
9
  if (carry == 0 && bn_less_than_words(r_Z2, group->field.N.d, P256_LIMBS)) {
701
    // r + group_order < p, so compare (r + group_order) * Z^2 against X.
702
0
    ecp_nistz256_mul_mont(r_Z2, r_Z2, Z2_mont);
703
0
    if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
704
0
      return 1;
705
0
    }
706
0
  }
707
708
9
  return 0;
709
9
}
710
711
1
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
712
1
  out->point_get_affine_coordinates = ecp_nistz256_get_affine;
713
1
  out->add = ecp_nistz256_add;
714
1
  out->dbl = ecp_nistz256_dbl;
715
1
  out->mul = ecp_nistz256_point_mul;
716
1
  out->mul_base = ecp_nistz256_point_mul_base;
717
1
  out->mul_public = ecp_nistz256_points_mul_public;
718
1
  out->felem_mul = ec_GFp_mont_felem_mul;
719
1
  out->felem_sqr = ec_GFp_mont_felem_sqr;
720
1
  out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
721
1
  out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
722
1
  out->felem_reduce = ec_GFp_mont_felem_reduce;
723
  // TODO(davidben): This should use the specialized field arithmetic
724
  // implementation, rather than the generic one.
725
1
  out->felem_exp = ec_GFp_mont_felem_exp;
726
1
  out->scalar_inv0_montgomery = ecp_nistz256_inv0_mod_ord;
727
1
  out->scalar_to_montgomery_inv_vartime =
728
1
      ecp_nistz256_scalar_to_montgomery_inv_vartime;
729
1
  out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;
730
1
}
731
732
#endif /* !defined(OPENSSL_NO_ASM) && \
733
          (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) &&  \
734
          !defined(OPENSSL_SMALL) */