Coverage Report

Created: 2026-05-11 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boringssl/crypto/fipsmodule/ec/p256-nistz.cc.inc
Line
Count
Source
1
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
2
// Copyright (c) 2014, Intel Corporation. All Rights Reserved.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
//     https://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
//
16
// Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
17
// (1) Intel Corporation, Israel Development Center, Haifa, Israel
18
// (2) University of Haifa, Israel
19
//
20
// Reference:
21
// S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
22
//                          256 Bit Primes"
23
24
#include <openssl/ec.h>
25
26
#include <assert.h>
27
#include <stdint.h>
28
#include <string.h>
29
30
#include <openssl/bn.h>
31
#include <openssl/crypto.h>
32
#include <openssl/err.h>
33
34
#include "../../internal.h"
35
#include "../bn/internal.h"
36
#include "../delocate.h"
37
#include "internal.h"
38
#include "p256-nistz.h"
39
40
41
using namespace bssl;
42
43
#if !defined(OPENSSL_NO_ASM) &&                              \
44
    (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \
45
    !defined(OPENSSL_SMALL)
46
47
typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
48
49
// One converted into the Montgomery domain
50
static const BN_ULONG ONE_MONT[P256_LIMBS] = {
51
    TOBN(0x00000000, 0x00000001),
52
    TOBN(0xffffffff, 0x00000000),
53
    TOBN(0xffffffff, 0xffffffff),
54
    TOBN(0x00000000, 0xfffffffe),
55
};
56
57
// Precomputed tables for the default generator
58
#include "p256-nistz-table.h"
59
60
// Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in
61
// util.c for details
62
610k
static crypto_word_t booth_recode_w5(crypto_word_t in) {
63
610k
  crypto_word_t s, d;
64
65
610k
  s = ~((in >> 5) - 1);
66
610k
  d = (1 << 6) - in - 1;
67
610k
  d = (d & s) | (in & ~s);
68
610k
  d = (d >> 1) + (d & 1);
69
70
610k
  return (d << 1) + (s & 1);
71
610k
}
72
73
479k
static crypto_word_t booth_recode_w7(crypto_word_t in) {
74
479k
  crypto_word_t s, d;
75
76
479k
  s = ~((in >> 7) - 1);
77
479k
  d = (1 << 8) - in - 1;
78
479k
  d = (d & s) | (in & ~s);
79
479k
  d = (d >> 1) + (d & 1);
80
81
479k
  return (d << 1) + (s & 1);
82
479k
}
83
84
// copy_conditional copies |src| to |dst| if |move| is one and leaves it as-is
85
// if |move| is zero.
86
//
87
// WARNING: this breaks the usual convention of constant-time functions
88
// returning masks.
89
static void copy_conditional(BN_ULONG dst[P256_LIMBS],
90
765k
                             const BN_ULONG src[P256_LIMBS], BN_ULONG move) {
91
765k
  BN_ULONG mask1 = ((BN_ULONG)0) - move;
92
765k
  BN_ULONG mask2 = ~mask1;
93
94
765k
  dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
95
765k
  dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
96
765k
  dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
97
765k
  dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
98
765k
  if (P256_LIMBS == 8) {
99
0
    dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
100
0
    dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
101
0
    dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
102
0
    dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
103
0
  }
104
765k
}
105
106
// is_not_zero returns one iff in != 0 and zero otherwise.
107
//
108
// WARNING: this breaks the usual convention of constant-time functions
109
// returning masks.
110
//
111
// (define-fun is_not_zero ((in (_ BitVec 64))) (_ BitVec 64)
112
//   (bvlshr (bvor in (bvsub #x0000000000000000 in)) #x000000000000003f)
113
// )
114
//
115
// (declare-fun x () (_ BitVec 64))
116
//
117
// (assert (and (= x #x0000000000000000) (= (is_not_zero x)
118
// #x0000000000000001))) (check-sat)
119
//
120
// (assert (and (not (= x #x0000000000000000)) (= (is_not_zero x)
121
// #x0000000000000000))) (check-sat)
122
//
123
4.39k
static BN_ULONG is_not_zero(BN_ULONG in) {
124
4.39k
  in |= (0 - in);
125
4.39k
  in >>= BN_BITS2 - 1;
126
4.39k
  return in;
127
4.39k
}
128
129
#if defined(OPENSSL_X86_64)
130
// Dispatch between CPU variations. The "_adx" suffixed functions use MULX in
131
// addition to ADCX/ADOX. MULX is part of BMI2, not ADX, so we must check both
132
// capabilities.
133
static void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
134
                                  const BN_ULONG a[P256_LIMBS],
135
132k
                                  const BN_ULONG b[P256_LIMBS]) {
136
132k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
137
132k
    ecp_nistz256_mul_mont_adx(res, a, b);
138
132k
  } else {
139
0
    ecp_nistz256_mul_mont_nohw(res, a, b);
140
0
  }
141
132k
}
142
143
static void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
144
2.08M
                                  const BN_ULONG a[P256_LIMBS]) {
145
2.08M
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
146
2.08M
    ecp_nistz256_sqr_mont_adx(res, a);
147
2.08M
  } else {
148
0
    ecp_nistz256_sqr_mont_nohw(res, a);
149
0
  }
150
2.08M
}
151
152
static void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
153
                                      const BN_ULONG a[P256_LIMBS],
154
25.1k
                                      const BN_ULONG b[P256_LIMBS]) {
155
25.1k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
156
25.1k
    ecp_nistz256_ord_mul_mont_adx(res, a, b);
157
25.1k
  } else {
158
0
    ecp_nistz256_ord_mul_mont_nohw(res, a, b);
159
0
  }
160
25.1k
}
161
162
static void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
163
                                      const BN_ULONG a[P256_LIMBS],
164
23.1k
                                      BN_ULONG rep) {
165
23.1k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
166
23.1k
    ecp_nistz256_ord_sqr_mont_adx(res, a, rep);
167
23.1k
  } else {
168
0
    ecp_nistz256_ord_sqr_mont_nohw(res, a, rep);
169
0
  }
170
23.1k
}
171
172
static void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
173
610k
                                   int index) {
174
610k
  if (CRYPTO_is_AVX2_capable()) {
175
610k
    ecp_nistz256_select_w5_avx2(val, in_t, index);
176
610k
  } else {
177
0
    ecp_nistz256_select_w5_nohw(val, in_t, index);
178
0
  }
179
610k
}
180
181
static void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
182
                                   const P256_POINT_AFFINE in_t[64],
183
162k
                                   int index) {
184
162k
  if (CRYPTO_is_AVX2_capable()) {
185
162k
    ecp_nistz256_select_w7_avx2(val, in_t, index);
186
162k
  } else {
187
0
    ecp_nistz256_select_w7_nohw(val, in_t, index);
188
0
  }
189
162k
}
190
191
3.08M
static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a) {
192
3.08M
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
193
3.08M
    ecp_nistz256_point_double_adx(r, a);
194
3.08M
  } else {
195
0
    ecp_nistz256_point_double_nohw(r, a);
196
0
  }
197
3.08M
}
198
199
static void ecp_nistz256_point_add(P256_POINT *r, const P256_POINT *a,
200
689k
                                   const P256_POINT *b) {
201
689k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
202
689k
    ecp_nistz256_point_add_adx(r, a, b);
203
689k
  } else {
204
0
    ecp_nistz256_point_add_nohw(r, a, b);
205
0
  }
206
689k
}
207
208
static void ecp_nistz256_point_add_affine(P256_POINT *r, const P256_POINT *a,
209
463k
                                          const P256_POINT_AFFINE *b) {
210
463k
  if (CRYPTO_is_BMI2_capable() && CRYPTO_is_ADX_capable()) {
211
463k
    ecp_nistz256_point_add_affine_adx(r, a, b);
212
463k
  } else {
213
0
    ecp_nistz256_point_add_affine_nohw(r, a, b);
214
0
  }
215
463k
}
216
#endif  // OPENSSL_X86_64
217
218
// ecp_nistz256_from_mont sets |res| to |in|, converted from Montgomery domain
219
// by multiplying with 1.
220
static void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
221
8.55k
                                   const BN_ULONG in[P256_LIMBS]) {
222
8.55k
  static const BN_ULONG ONE[P256_LIMBS] = {1};
223
8.55k
  ecp_nistz256_mul_mont(res, in, ONE);
224
8.55k
}
225
226
// ecp_nistz256_mod_inverse_sqr_mont sets |r| to (|in| * 2^-256)^-2 * 2^256 mod
227
// p. That is, |r| is the modular inverse square of |in| for input and output in
228
// the Montgomery domain.
229
static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],
230
8.14k
                                              const BN_ULONG in[P256_LIMBS]) {
231
  // This implements the addition chain described in
232
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
233
8.14k
  BN_ULONG x2[P256_LIMBS], x3[P256_LIMBS], x6[P256_LIMBS], x12[P256_LIMBS],
234
8.14k
      x15[P256_LIMBS], x30[P256_LIMBS], x32[P256_LIMBS];
235
8.14k
  ecp_nistz256_sqr_mont(x2, in);      // 2^2 - 2^1
236
8.14k
  ecp_nistz256_mul_mont(x2, x2, in);  // 2^2 - 2^0
237
238
8.14k
  ecp_nistz256_sqr_mont(x3, x2);      // 2^3 - 2^1
239
8.14k
  ecp_nistz256_mul_mont(x3, x3, in);  // 2^3 - 2^0
240
241
8.14k
  ecp_nistz256_sqr_mont(x6, x3);
242
24.4k
  for (int i = 1; i < 3; i++) {
243
16.2k
    ecp_nistz256_sqr_mont(x6, x6);
244
16.2k
  }                                   // 2^6 - 2^3
245
8.14k
  ecp_nistz256_mul_mont(x6, x6, x3);  // 2^6 - 2^0
246
247
8.14k
  ecp_nistz256_sqr_mont(x12, x6);
248
48.8k
  for (int i = 1; i < 6; i++) {
249
40.7k
    ecp_nistz256_sqr_mont(x12, x12);
250
40.7k
  }                                     // 2^12 - 2^6
251
8.14k
  ecp_nistz256_mul_mont(x12, x12, x6);  // 2^12 - 2^0
252
253
8.14k
  ecp_nistz256_sqr_mont(x15, x12);
254
24.4k
  for (int i = 1; i < 3; i++) {
255
16.2k
    ecp_nistz256_sqr_mont(x15, x15);
256
16.2k
  }                                     // 2^15 - 2^3
257
8.14k
  ecp_nistz256_mul_mont(x15, x15, x3);  // 2^15 - 2^0
258
259
8.14k
  ecp_nistz256_sqr_mont(x30, x15);
260
122k
  for (int i = 1; i < 15; i++) {
261
114k
    ecp_nistz256_sqr_mont(x30, x30);
262
114k
  }                                      // 2^30 - 2^15
263
8.14k
  ecp_nistz256_mul_mont(x30, x30, x15);  // 2^30 - 2^0
264
265
8.14k
  ecp_nistz256_sqr_mont(x32, x30);
266
8.14k
  ecp_nistz256_sqr_mont(x32, x32);      // 2^32 - 2^2
267
8.14k
  ecp_nistz256_mul_mont(x32, x32, x2);  // 2^32 - 2^0
268
269
8.14k
  BN_ULONG ret[P256_LIMBS];
270
8.14k
  ecp_nistz256_sqr_mont(ret, x32);
271
260k
  for (int i = 1; i < 31 + 1; i++) {
272
252k
    ecp_nistz256_sqr_mont(ret, ret);
273
252k
  }                                     // 2^64 - 2^32
274
8.14k
  ecp_nistz256_mul_mont(ret, ret, in);  // 2^64 - 2^32 + 2^0
275
276
1.05M
  for (int i = 0; i < 96 + 32; i++) {
277
1.04M
    ecp_nistz256_sqr_mont(ret, ret);
278
1.04M
  }                                      // 2^192 - 2^160 + 2^128
279
8.14k
  ecp_nistz256_mul_mont(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
280
281
268k
  for (int i = 0; i < 32; i++) {
282
260k
    ecp_nistz256_sqr_mont(ret, ret);
283
260k
  }                                      // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
284
8.14k
  ecp_nistz256_mul_mont(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
285
286
252k
  for (int i = 0; i < 30; i++) {
287
244k
    ecp_nistz256_sqr_mont(ret, ret);
288
244k
  }                                      // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
289
8.14k
  ecp_nistz256_mul_mont(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
290
291
8.14k
  ecp_nistz256_sqr_mont(ret, ret);
292
8.14k
  ecp_nistz256_sqr_mont(r, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
293
8.14k
}
294
295
// r = p * p_scalar
296
static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
297
                                      const EC_JACOBIAN *p,
298
11.7k
                                      const EC_SCALAR *p_scalar) {
299
11.7k
  assert(p != nullptr);
300
11.7k
  assert(p_scalar != nullptr);
301
11.7k
  assert(group->field.N.width == P256_LIMBS);
302
303
11.7k
  static const size_t kWindowSize = 5;
304
11.7k
  static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
305
306
  // A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
307
  // add no more than 63 bytes of overhead. Thus, |table| should require
308
  // ~1599 ((96 * 16) + 63) bytes of stack space.
309
11.7k
  alignas(64) P256_POINT table[16];
310
11.7k
  uint8_t p_str[33];
311
11.7k
  OPENSSL_memcpy(p_str, p_scalar->words, 32);
312
11.7k
  p_str[32] = 0;
313
314
  // table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
315
  // not stored. All other values are actually stored with an offset of -1 in
316
  // table.
317
11.7k
  P256_POINT *row = table;
318
11.7k
  assert(group->field.N.width == P256_LIMBS);
319
11.7k
  OPENSSL_memcpy(row[1 - 1].X, p->X.words, P256_LIMBS * sizeof(BN_ULONG));
320
11.7k
  OPENSSL_memcpy(row[1 - 1].Y, p->Y.words, P256_LIMBS * sizeof(BN_ULONG));
321
11.7k
  OPENSSL_memcpy(row[1 - 1].Z, p->Z.words, P256_LIMBS * sizeof(BN_ULONG));
322
323
11.7k
  ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
324
11.7k
  ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
325
11.7k
  ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
326
11.7k
  ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
327
11.7k
  ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
328
11.7k
  ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
329
11.7k
  ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
330
11.7k
  ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
331
11.7k
  ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
332
11.7k
  ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
333
11.7k
  ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
334
11.7k
  ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
335
11.7k
  ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
336
11.7k
  ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
337
11.7k
  ecp_nistz256_point_double(&row[16 - 1], &row[8 - 1]);
338
339
11.7k
  BN_ULONG tmp[P256_LIMBS];
340
11.7k
  alignas(32) P256_POINT h;
341
11.7k
  size_t index = 255;
342
11.7k
  crypto_word_t wvalue = p_str[(index - 1) / 8];
343
11.7k
  wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
344
345
11.7k
  ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
346
347
610k
  while (index >= 5) {
348
598k
    if (index != 255) {
349
587k
      size_t off = (index - 1) / 8;
350
351
587k
      wvalue = (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
352
587k
      wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
353
354
587k
      wvalue = booth_recode_w5(wvalue);
355
356
587k
      ecp_nistz256_select_w5(&h, table, wvalue >> 1);
357
358
587k
      ecp_nistz256_neg(tmp, h.Y);
359
587k
      copy_conditional(h.Y, tmp, (wvalue & 1));
360
361
587k
      ecp_nistz256_point_add(r, r, &h);
362
587k
    }
363
364
598k
    index -= kWindowSize;
365
366
598k
    ecp_nistz256_point_double(r, r);
367
598k
    ecp_nistz256_point_double(r, r);
368
598k
    ecp_nistz256_point_double(r, r);
369
598k
    ecp_nistz256_point_double(r, r);
370
598k
    ecp_nistz256_point_double(r, r);
371
598k
  }
372
373
  // Final window
374
11.7k
  wvalue = p_str[0];
375
11.7k
  wvalue = (wvalue << 1) & kMask;
376
377
11.7k
  wvalue = booth_recode_w5(wvalue);
378
379
11.7k
  ecp_nistz256_select_w5(&h, table, wvalue >> 1);
380
381
11.7k
  ecp_nistz256_neg(tmp, h.Y);
382
11.7k
  copy_conditional(h.Y, tmp, wvalue & 1);
383
384
11.7k
  ecp_nistz256_point_add(r, r, &h);
385
11.7k
}
386
387
12.9k
static crypto_word_t calc_first_wvalue(size_t *index, const uint8_t p_str[33]) {
388
12.9k
  static const size_t kWindowSize = 7;
389
12.9k
  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
390
12.9k
  *index = kWindowSize;
391
392
12.9k
  crypto_word_t wvalue = (p_str[0] << 1) & kMask;
393
12.9k
  return booth_recode_w7(wvalue);
394
12.9k
}
395
396
466k
static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) {
397
466k
  static const size_t kWindowSize = 7;
398
466k
  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
399
400
466k
  const size_t off = (*index - 1) / 8;
401
466k
  crypto_word_t wvalue =
402
466k
      (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
403
466k
  wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
404
466k
  *index += kWindowSize;
405
406
466k
  return booth_recode_w7(wvalue);
407
466k
}
408
409
static void ecp_nistz256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
410
                                   const EC_JACOBIAN *p,
411
3.18k
                                   const EC_SCALAR *scalar) {
412
3.18k
  alignas(32) P256_POINT out;
413
3.18k
  ecp_nistz256_windowed_mul(group, &out, p, scalar);
414
415
3.18k
  assert(group->field.N.width == P256_LIMBS);
416
3.18k
  OPENSSL_memcpy(r->X.words, out.X, P256_LIMBS * sizeof(BN_ULONG));
417
3.18k
  OPENSSL_memcpy(r->Y.words, out.Y, P256_LIMBS * sizeof(BN_ULONG));
418
3.18k
  OPENSSL_memcpy(r->Z.words, out.Z, P256_LIMBS * sizeof(BN_ULONG));
419
3.18k
}
420
421
static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_JACOBIAN *r,
422
4.39k
                                        const EC_SCALAR *scalar) {
423
4.39k
  uint8_t p_str[33];
424
4.39k
  OPENSSL_memcpy(p_str, scalar->words, 32);
425
4.39k
  p_str[32] = 0;
426
427
  // First window
428
4.39k
  size_t index = 0;
429
4.39k
  crypto_word_t wvalue = calc_first_wvalue(&index, p_str);
430
431
4.39k
  alignas(32) P256_POINT_AFFINE t;
432
4.39k
  alignas(32) P256_POINT p;
433
4.39k
  ecp_nistz256_select_w7(&t, ecp_nistz256_precomputed[0], wvalue >> 1);
434
4.39k
  ecp_nistz256_neg(p.Z, t.Y);
435
4.39k
  copy_conditional(t.Y, p.Z, wvalue & 1);
436
437
  // Convert |t| from affine to Jacobian coordinates. We set Z to zero if |t|
438
  // is infinity and |ONE_MONT| otherwise. |t| was computed from the table, so
439
  // it is infinity iff |wvalue >> 1| is zero.
440
4.39k
  OPENSSL_memcpy(p.X, t.X, sizeof(p.X));
441
4.39k
  OPENSSL_memcpy(p.Y, t.Y, sizeof(p.Y));
442
4.39k
  OPENSSL_memset(p.Z, 0, sizeof(p.Z));
443
4.39k
  copy_conditional(p.Z, ONE_MONT, is_not_zero(wvalue >> 1));
444
445
162k
  for (int i = 1; i < 37; i++) {
446
158k
    wvalue = calc_wvalue(&index, p_str);
447
448
158k
    ecp_nistz256_select_w7(&t, ecp_nistz256_precomputed[i], wvalue >> 1);
449
450
158k
    alignas(32) BN_ULONG neg_Y[P256_LIMBS];
451
158k
    ecp_nistz256_neg(neg_Y, t.Y);
452
158k
    copy_conditional(t.Y, neg_Y, wvalue & 1);
453
454
    // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are the
455
    // same non-infinity point.
456
158k
    ecp_nistz256_point_add_affine(&p, &p, &t);
457
158k
  }
458
459
4.39k
  assert(group->field.N.width == P256_LIMBS);
460
4.39k
  OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG));
461
4.39k
  OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG));
462
4.39k
  OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG));
463
4.39k
}
464
465
static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
466
                                           EC_JACOBIAN *r,
467
                                           const EC_SCALAR *g_scalar,
468
                                           const EC_JACOBIAN *p_,
469
8.55k
                                           const EC_SCALAR *p_scalar) {
470
8.55k
  assert(p_ != nullptr && p_scalar != nullptr && g_scalar != nullptr);
471
472
8.55k
  alignas(32) P256_POINT p;
473
8.55k
  uint8_t p_str[33];
474
8.55k
  OPENSSL_memcpy(p_str, g_scalar->words, 32);
475
8.55k
  p_str[32] = 0;
476
477
  // First window
478
8.55k
  size_t index = 0;
479
8.55k
  size_t wvalue = calc_first_wvalue(&index, p_str);
480
481
  // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
482
  // is infinity and |ONE_MONT| otherwise. |p| was computed from the table, so
483
  // it is infinity iff |wvalue >> 1| is zero.
484
8.55k
  if ((wvalue >> 1) != 0) {
485
8.48k
    OPENSSL_memcpy(p.X, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].X,
486
8.48k
                   sizeof(p.X));
487
8.48k
    OPENSSL_memcpy(p.Y, &ecp_nistz256_precomputed[0][(wvalue >> 1) - 1].Y,
488
8.48k
                   sizeof(p.Y));
489
8.48k
    OPENSSL_memcpy(p.Z, ONE_MONT, sizeof(p.Z));
490
8.48k
  } else {
491
68
    OPENSSL_memset(p.X, 0, sizeof(p.X));
492
68
    OPENSSL_memset(p.Y, 0, sizeof(p.Y));
493
68
    OPENSSL_memset(p.Z, 0, sizeof(p.Z));
494
68
  }
495
496
8.55k
  if ((wvalue & 1) == 1) {
497
4.31k
    ecp_nistz256_neg(p.Y, p.Y);
498
4.31k
  }
499
500
316k
  for (int i = 1; i < 37; i++) {
501
308k
    wvalue = calc_wvalue(&index, p_str);
502
308k
    if ((wvalue >> 1) == 0) {
503
3.02k
      continue;
504
3.02k
    }
505
506
304k
    alignas(32) P256_POINT_AFFINE t;
507
304k
    OPENSSL_memcpy(&t, &ecp_nistz256_precomputed[i][(wvalue >> 1) - 1],
508
304k
                   sizeof(t));
509
304k
    if ((wvalue & 1) == 1) {
510
147k
      ecp_nistz256_neg(t.Y, t.Y);
511
147k
    }
512
513
    // Note |ecp_nistz256_point_add_affine| does not work if |p| and |t| are
514
    // the same non-infinity point, so it is important that we compute the
515
    // |g_scalar| term before the |p_scalar| term.
516
304k
    ecp_nistz256_point_add_affine(&p, &p, &t);
517
304k
  }
518
519
8.55k
  alignas(32) P256_POINT tmp;
520
8.55k
  ecp_nistz256_windowed_mul(group, &tmp, p_, p_scalar);
521
8.55k
  ecp_nistz256_point_add(&p, &p, &tmp);
522
523
8.55k
  assert(group->field.N.width == P256_LIMBS);
524
8.55k
  OPENSSL_memcpy(r->X.words, p.X, P256_LIMBS * sizeof(BN_ULONG));
525
8.55k
  OPENSSL_memcpy(r->Y.words, p.Y, P256_LIMBS * sizeof(BN_ULONG));
526
8.55k
  OPENSSL_memcpy(r->Z.words, p.Z, P256_LIMBS * sizeof(BN_ULONG));
527
8.55k
}
528
529
static int ecp_nistz256_get_affine(const EC_GROUP *group,
530
                                   const EC_JACOBIAN *point, EC_FELEM *x,
531
8.14k
                                   EC_FELEM *y) {
532
8.14k
  if (constant_time_declassify_int(
533
8.14k
          ec_GFp_simple_is_at_infinity(group, point))) {
534
2
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
535
2
    return 0;
536
2
  }
537
538
8.14k
  BN_ULONG z_inv2[P256_LIMBS];
539
8.14k
  assert(group->field.N.width == P256_LIMBS);
540
8.14k
  ecp_nistz256_mod_inverse_sqr_mont(z_inv2, point->Z.words);
541
542
8.14k
  if (x != nullptr) {
543
8.14k
    ecp_nistz256_mul_mont(x->words, z_inv2, point->X.words);
544
8.14k
  }
545
546
8.14k
  if (y != nullptr) {
547
4.30k
    ecp_nistz256_sqr_mont(z_inv2, z_inv2);                            // z^-4
548
4.30k
    ecp_nistz256_mul_mont(y->words, point->Y.words, point->Z.words);  // y * z
549
4.30k
    ecp_nistz256_mul_mont(y->words, y->words, z_inv2);  // y * z^-3
550
4.30k
  }
551
552
8.14k
  return 1;
553
8.14k
}
554
555
static void ecp_nistz256_add(const EC_GROUP *group, EC_JACOBIAN *r,
556
0
                             const EC_JACOBIAN *a_, const EC_JACOBIAN *b_) {
557
0
  P256_POINT a, b;
558
0
  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
559
0
  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
560
0
  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
561
0
  OPENSSL_memcpy(b.X, b_->X.words, P256_LIMBS * sizeof(BN_ULONG));
562
0
  OPENSSL_memcpy(b.Y, b_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
563
0
  OPENSSL_memcpy(b.Z, b_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
564
0
  ecp_nistz256_point_add(&a, &a, &b);
565
0
  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
566
0
  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
567
0
  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
568
0
}
569
570
static void ecp_nistz256_dbl(const EC_GROUP *group, EC_JACOBIAN *r,
571
0
                             const EC_JACOBIAN *a_) {
572
0
  P256_POINT a;
573
0
  OPENSSL_memcpy(a.X, a_->X.words, P256_LIMBS * sizeof(BN_ULONG));
574
0
  OPENSSL_memcpy(a.Y, a_->Y.words, P256_LIMBS * sizeof(BN_ULONG));
575
0
  OPENSSL_memcpy(a.Z, a_->Z.words, P256_LIMBS * sizeof(BN_ULONG));
576
0
  ecp_nistz256_point_double(&a, &a);
577
0
  OPENSSL_memcpy(r->X.words, a.X, P256_LIMBS * sizeof(BN_ULONG));
578
0
  OPENSSL_memcpy(r->Y.words, a.Y, P256_LIMBS * sizeof(BN_ULONG));
579
0
  OPENSSL_memcpy(r->Z.words, a.Z, P256_LIMBS * sizeof(BN_ULONG));
580
0
}
581
582
static void ecp_nistz256_inv0_mod_ord(const EC_GROUP *group, EC_SCALAR *out,
583
661
                                      const EC_SCALAR *in) {
584
  // table[i] stores a power of |in| corresponding to the matching enum value.
585
661
  enum {
586
    // The following indices specify the power in binary.
587
661
    i_1 = 0,
588
661
    i_10,
589
661
    i_11,
590
661
    i_101,
591
661
    i_111,
592
661
    i_1010,
593
661
    i_1111,
594
661
    i_10101,
595
661
    i_101010,
596
661
    i_101111,
597
    // The following indices specify 2^N-1, or N ones in a row.
598
661
    i_x6,
599
661
    i_x8,
600
661
    i_x16,
601
661
    i_x32
602
661
  };
603
661
  BN_ULONG table[15][P256_LIMBS];
604
605
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
606
  //
607
  // Even though this code path spares 12 squarings, 4.5%, and 13
608
  // multiplications, 25%, the overall sign operation is not that much faster,
609
  // not more that 2%. Most of the performance of this function comes from the
610
  // scalar operations.
611
612
  // Pre-calculate powers.
613
661
  OPENSSL_memcpy(table[i_1], in->words, P256_LIMBS * sizeof(BN_ULONG));
614
615
661
  ecp_nistz256_ord_sqr_mont(table[i_10], table[i_1], 1);
616
617
661
  ecp_nistz256_ord_mul_mont(table[i_11], table[i_1], table[i_10]);
618
619
661
  ecp_nistz256_ord_mul_mont(table[i_101], table[i_11], table[i_10]);
620
621
661
  ecp_nistz256_ord_mul_mont(table[i_111], table[i_101], table[i_10]);
622
623
661
  ecp_nistz256_ord_sqr_mont(table[i_1010], table[i_101], 1);
624
625
661
  ecp_nistz256_ord_mul_mont(table[i_1111], table[i_1010], table[i_101]);
626
627
661
  ecp_nistz256_ord_sqr_mont(table[i_10101], table[i_1010], 1);
628
661
  ecp_nistz256_ord_mul_mont(table[i_10101], table[i_10101], table[i_1]);
629
630
661
  ecp_nistz256_ord_sqr_mont(table[i_101010], table[i_10101], 1);
631
632
661
  ecp_nistz256_ord_mul_mont(table[i_101111], table[i_101010], table[i_101]);
633
634
661
  ecp_nistz256_ord_mul_mont(table[i_x6], table[i_101010], table[i_10101]);
635
636
661
  ecp_nistz256_ord_sqr_mont(table[i_x8], table[i_x6], 2);
637
661
  ecp_nistz256_ord_mul_mont(table[i_x8], table[i_x8], table[i_11]);
638
639
661
  ecp_nistz256_ord_sqr_mont(table[i_x16], table[i_x8], 8);
640
661
  ecp_nistz256_ord_mul_mont(table[i_x16], table[i_x16], table[i_x8]);
641
642
661
  ecp_nistz256_ord_sqr_mont(table[i_x32], table[i_x16], 16);
643
661
  ecp_nistz256_ord_mul_mont(table[i_x32], table[i_x32], table[i_x16]);
644
645
  // Compute |in| raised to the order-2.
646
661
  ecp_nistz256_ord_sqr_mont(out->words, table[i_x32], 64);
647
661
  ecp_nistz256_ord_mul_mont(out->words, out->words, table[i_x32]);
648
661
  static const struct {
649
661
    uint8_t p, i;
650
661
  } kChain[27] = {{32, i_x32},    {6, i_101111}, {5, i_111},    {4, i_11},
651
661
                  {5, i_1111},    {5, i_10101},  {4, i_101},    {3, i_101},
652
661
                  {3, i_101},     {5, i_111},    {9, i_101111}, {6, i_1111},
653
661
                  {2, i_1},       {5, i_1},      {6, i_1111},   {5, i_111},
654
661
                  {4, i_111},     {5, i_111},    {5, i_101},    {3, i_11},
655
661
                  {10, i_101111}, {2, i_11},     {5, i_11},     {5, i_11},
656
661
                  {3, i_1},       {7, i_10101},  {6, i_1111}};
657
17.8k
  for (const auto &step : kChain) {
658
17.8k
    ecp_nistz256_ord_sqr_mont(out->words, out->words, step.p);
659
17.8k
    ecp_nistz256_ord_mul_mont(out->words, out->words, table[step.i]);
660
17.8k
  }
661
661
}
662
663
static int ecp_nistz256_scalar_to_montgomery_inv_vartime(const EC_GROUP *group,
664
                                                         EC_SCALAR *out,
665
8.55k
                                                         const EC_SCALAR *in) {
666
8.55k
#if defined(OPENSSL_X86_64)
667
8.55k
  if (!CRYPTO_is_AVX_capable()) {
668
    // No AVX support; fallback to generic code.
669
0
    return ec_simple_scalar_to_montgomery_inv_vartime(group, out, in);
670
0
  }
671
8.55k
#endif
672
673
8.55k
  assert(group->order.N.width == P256_LIMBS);
674
8.55k
  if (!beeu_mod_inverse_vartime(out->words, in->words, group->order.N.d)) {
675
0
    return 0;
676
0
  }
677
678
  // The result should be returned in the Montgomery domain.
679
8.55k
  ec_scalar_to_montgomery(group, out, out);
680
8.55k
  return 1;
681
8.55k
}
682
683
static int ecp_nistz256_cmp_x_coordinate(const EC_GROUP *group,
684
                                         const EC_JACOBIAN *p,
685
8.55k
                                         const EC_SCALAR *r) {
686
8.55k
  if (ec_GFp_simple_is_at_infinity(group, p)) {
687
0
    return 0;
688
0
  }
689
690
8.55k
  assert(group->order.N.width == P256_LIMBS);
691
8.55k
  assert(group->field.N.width == P256_LIMBS);
692
693
  // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
694
  // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
695
  // not.
696
8.55k
  BN_ULONG r_Z2[P256_LIMBS], Z2_mont[P256_LIMBS], X[P256_LIMBS];
697
8.55k
  ecp_nistz256_mul_mont(Z2_mont, p->Z.words, p->Z.words);
698
8.55k
  ecp_nistz256_mul_mont(r_Z2, r->words, Z2_mont);
699
8.55k
  ecp_nistz256_from_mont(X, p->X.words);
700
701
8.55k
  if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
702
13
    return 1;
703
13
  }
704
705
  // During signing the x coefficient is reduced modulo the group order.
706
  // Therefore there is a small possibility, less than 1/2^128, that group_order
707
  // < p.x < P. in that case we need not only to compare against |r| but also to
708
  // compare against r+group_order.
709
8.54k
  BN_ULONG carry = bn_add_words(r_Z2, r->words, group->order.N.d, P256_LIMBS);
710
8.54k
  if (carry == 0 && bn_less_than_words(r_Z2, group->field.N.d, P256_LIMBS)) {
711
    // r + group_order < p, so compare (r + group_order) * Z^2 against X.
712
198
    ecp_nistz256_mul_mont(r_Z2, r_Z2, Z2_mont);
713
198
    if (OPENSSL_memcmp(r_Z2, X, sizeof(r_Z2)) == 0) {
714
0
      return 1;
715
0
    }
716
198
  }
717
718
8.54k
  return 0;
719
8.54k
}
720
721
BSSL_NAMESPACE_BEGIN
722
723
19
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
724
19
  out->point_get_affine_coordinates = ecp_nistz256_get_affine;
725
19
  out->add = ecp_nistz256_add;
726
19
  out->dbl = ecp_nistz256_dbl;
727
19
  out->mul = ecp_nistz256_point_mul;
728
19
  out->mul_base = ecp_nistz256_point_mul_base;
729
19
  out->mul_public = ecp_nistz256_points_mul_public;
730
19
  out->scalar_inv0_montgomery = ecp_nistz256_inv0_mod_ord;
731
19
  out->scalar_to_montgomery_inv_vartime =
732
19
      ecp_nistz256_scalar_to_montgomery_inv_vartime;
733
19
  out->cmp_x_coordinate = ecp_nistz256_cmp_x_coordinate;
734
19
}
735
736
BSSL_NAMESPACE_END
737
738
#endif /* !defined(OPENSSL_NO_ASM) &&                              \
739
          (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \
740
          !defined(OPENSSL_SMALL) */