Coverage Report

Created: 2026-03-19 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boringssl/crypto/fipsmodule/ec/p256.cc.inc
Line
Count
Source
1
// Copyright 2020 The BoringSSL Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// An implementation of the NIST P-256 elliptic curve point multiplication.
16
// 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by
17
// Fiat, which lives in //third_party/fiat.
18
19
#include <openssl/base.h>
20
21
#include <openssl/bn.h>
22
#include <openssl/ec.h>
23
#include <openssl/err.h>
24
#include <openssl/mem.h>
25
26
#include <assert.h>
27
#include <string.h>
28
29
#include <iterator>
30
31
#include "../../internal.h"
32
#include "../delocate.h"
33
#include "./internal.h"
34
35
#include "../../../third_party/fiat/p256_field.c.inc"
36
#include "../../../third_party/fiat/p256_point.br.c.inc"
37
38
39
using namespace bssl;
40
41
// utility functions, handwritten
42
43
#if defined(OPENSSL_64_BIT)
44
0
#define FIAT_P256_NLIMBS 4
45
typedef uint64_t fiat_p256_limb_t;
46
typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS];
47
static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000,
48
                                              0xffffffffffffffff, 0xfffffffe};
49
#else  // 64BIT; else 32BIT
50
#define FIAT_P256_NLIMBS 8
51
typedef uint32_t fiat_p256_limb_t;
52
typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS];
53
static const fiat_p256_felem fiat_p256_one = {
54
    0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0};
55
#endif  // 64BIT
56
57
58
static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
59
0
                           const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
60
0
  for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
61
0
    out[i] = in1[i];
62
0
  }
63
0
}
64
65
static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
66
                              fiat_p256_limb_t t,
67
                              const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
68
0
                              const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
69
0
  fiat_p256_selectznz(out, !!t, z, nz);
70
0
}
71
72
static void fiat_p256_from_words(fiat_p256_felem out,
73
0
                                 const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
74
  // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
75
  // 64-bit platforms without |uint128_t|, they are different. However, on
76
  // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same
77
  // layout.
78
0
  OPENSSL_memcpy(out, in, 32);
79
0
}
80
81
0
static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) {
82
0
  fiat_p256_from_words(out, in->words);
83
0
}
84
85
0
static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) {
86
  // See |fiat_p256_from_words|.
87
0
  OPENSSL_memcpy(out->words, in, 32);
88
0
}
89
90
// fiat_p256_inv_square calculates |out| = |in|^{-2}
91
//
92
// Based on Fermat's Little Theorem:
93
//   a^p = a (mod p)
94
//   a^{p-1} = 1 (mod p)
95
//   a^{p-3} = a^{-2} (mod p)
96
static void fiat_p256_inv_square(fiat_p256_felem out,
97
0
                                 const fiat_p256_felem in) {
98
  // This implements the addition chain described in
99
  // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
100
0
  fiat_p256_felem x2, x3, x6, x12, x15, x30, x32;
101
0
  fiat_p256_square(x2, in);   // 2^2 - 2^1
102
0
  fiat_p256_mul(x2, x2, in);  // 2^2 - 2^0
103
104
0
  fiat_p256_square(x3, x2);   // 2^3 - 2^1
105
0
  fiat_p256_mul(x3, x3, in);  // 2^3 - 2^0
106
107
0
  fiat_p256_square(x6, x3);
108
0
  for (int i = 1; i < 3; i++) {
109
0
    fiat_p256_square(x6, x6);
110
0
  }                           // 2^6 - 2^3
111
0
  fiat_p256_mul(x6, x6, x3);  // 2^6 - 2^0
112
113
0
  fiat_p256_square(x12, x6);
114
0
  for (int i = 1; i < 6; i++) {
115
0
    fiat_p256_square(x12, x12);
116
0
  }                             // 2^12 - 2^6
117
0
  fiat_p256_mul(x12, x12, x6);  // 2^12 - 2^0
118
119
0
  fiat_p256_square(x15, x12);
120
0
  for (int i = 1; i < 3; i++) {
121
0
    fiat_p256_square(x15, x15);
122
0
  }                             // 2^15 - 2^3
123
0
  fiat_p256_mul(x15, x15, x3);  // 2^15 - 2^0
124
125
0
  fiat_p256_square(x30, x15);
126
0
  for (int i = 1; i < 15; i++) {
127
0
    fiat_p256_square(x30, x30);
128
0
  }                              // 2^30 - 2^15
129
0
  fiat_p256_mul(x30, x30, x15);  // 2^30 - 2^0
130
131
0
  fiat_p256_square(x32, x30);
132
0
  fiat_p256_square(x32, x32);   // 2^32 - 2^2
133
0
  fiat_p256_mul(x32, x32, x2);  // 2^32 - 2^0
134
135
0
  fiat_p256_felem ret;
136
0
  fiat_p256_square(ret, x32);
137
0
  for (int i = 1; i < 31 + 1; i++) {
138
0
    fiat_p256_square(ret, ret);
139
0
  }                             // 2^64 - 2^32
140
0
  fiat_p256_mul(ret, ret, in);  // 2^64 - 2^32 + 2^0
141
142
0
  for (int i = 0; i < 96 + 32; i++) {
143
0
    fiat_p256_square(ret, ret);
144
0
  }                              // 2^192 - 2^160 + 2^128
145
0
  fiat_p256_mul(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
146
147
0
  for (int i = 0; i < 32; i++) {
148
0
    fiat_p256_square(ret, ret);
149
0
  }                              // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
150
0
  fiat_p256_mul(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
151
152
0
  for (int i = 0; i < 30; i++) {
153
0
    fiat_p256_square(ret, ret);
154
0
  }                              // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
155
0
  fiat_p256_mul(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
156
157
0
  fiat_p256_square(ret, ret);
158
0
  fiat_p256_square(out, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
159
0
}
160
161
// Group operations
162
// ----------------
163
//
164
// Building on top of the field operations we have the operations on the
165
// elliptic curve group itself. Points on the curve are represented in Jacobian
166
// coordinates.
167
168
static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out,
169
                                   fiat_p256_felem z_out,
170
                                   const fiat_p256_felem x_in,
171
                                   const fiat_p256_felem y_in,
172
0
                                   const fiat_p256_felem z_in) {
173
0
  uint8_t out[3*32], in[3*32];
174
0
  static_assert(sizeof(fiat_p256_felem) == 32);
175
0
  OPENSSL_memcpy(&in[0], x_in, 32);
176
0
  OPENSSL_memcpy(&in[32], y_in, 32);
177
0
  OPENSSL_memcpy(&in[64], z_in, 32);
178
0
  p256_point_double((br_word_t)out, (br_word_t)in);
179
0
  OPENSSL_memcpy(x_out, &out[0], 32);
180
0
  OPENSSL_memcpy(y_out, &out[32], 32);
181
0
  OPENSSL_memcpy(z_out, &out[64], 32);
182
0
}
183
184
static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
185
                                fiat_p256_felem z3, const fiat_p256_felem x1,
186
                                const fiat_p256_felem y1,
187
                                const fiat_p256_felem z1,
188
                                const fiat_p256_felem x2,
189
                                const fiat_p256_felem y2,
190
0
                                const fiat_p256_felem z2) {
191
0
  uint8_t out[3 * 32], in1[3 * 32], in2[3 * 32];
192
0
  static_assert(sizeof(fiat_p256_felem) == 32);
193
0
  OPENSSL_memcpy(&in1[0], x1, 32);
194
0
  OPENSSL_memcpy(&in1[32], y1, 32);
195
0
  OPENSSL_memcpy(&in1[64], z1, 32);
196
0
  OPENSSL_memcpy(&in2[0], x2, 32);
197
0
  OPENSSL_memcpy(&in2[32], y2, 32);
198
0
  OPENSSL_memcpy(&in2[64], z2, 32);
199
0
  p256_point_add_vartime_if_doubling((br_word_t)out, (br_word_t)in1,
200
0
                                     (br_word_t)in2);
201
0
  OPENSSL_memcpy(x3, &out[0], 32);
202
0
  OPENSSL_memcpy(y3, &out[32], 32);
203
0
  OPENSSL_memcpy(z3, &out[64], 32);
204
0
}
205
#include "./p256_table.h"
206
207
// fiat_p256_select_point_affine selects the |idx-1|th point from a
208
// precomputation table and copies it to out. If |idx| is zero, the output is
209
// the point at infinity.
210
static void fiat_p256_select_point_affine(
211
    const fiat_p256_limb_t idx, size_t size,
212
0
    const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
213
0
  OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
214
0
  for (size_t i = 0; i < size; i++) {
215
0
    fiat_p256_limb_t mismatch = i ^ (idx - 1);
216
0
    fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
217
0
    fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
218
0
  }
219
0
  fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
220
0
}
221
222
// fiat_p256_select_point selects the |idx|th point from a precomputation table
223
// and copies it to out.
224
static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size,
225
                                   const fiat_p256_felem pre_comp[/*size*/][3],
226
0
                                   fiat_p256_felem out[3]) {
227
0
  OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
228
0
  for (size_t i = 0; i < size; i++) {
229
0
    fiat_p256_limb_t mismatch = i ^ idx;
230
0
    fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
231
0
    fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
232
0
    fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]);
233
0
  }
234
0
}
235
236
// fiat_p256_get_bit returns the |i|th bit in |in|.
237
0
static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) {
238
0
  if (i < 0 || i >= 256) {
239
0
    return 0;
240
0
  }
241
0
#if defined(OPENSSL_64_BIT)
242
0
  static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit");
243
0
  return (in->words[i >> 6] >> (i & 63)) & 1;
244
#else
245
  static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit");
246
  return (in->words[i >> 5] >> (i & 31)) & 1;
247
#endif
248
0
}
249
250
// OPENSSL EC_METHOD FUNCTIONS
251
252
// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
253
// (X/Z^2, Y/Z^3).
254
static int ec_GFp_nistp256_point_get_affine_coordinates(
255
    const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out,
256
0
    EC_FELEM *y_out) {
257
0
  if (constant_time_declassify_int(
258
0
          ec_GFp_simple_is_at_infinity(group, point))) {
259
0
    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
260
0
    return 0;
261
0
  }
262
263
0
  fiat_p256_felem z1, z2;
264
0
  fiat_p256_from_generic(z1, &point->Z);
265
0
  fiat_p256_inv_square(z2, z1);
266
267
0
  if (x_out != nullptr) {
268
0
    fiat_p256_felem x;
269
0
    fiat_p256_from_generic(x, &point->X);
270
0
    fiat_p256_mul(x, x, z2);
271
0
    fiat_p256_to_generic(x_out, x);
272
0
  }
273
274
0
  if (y_out != nullptr) {
275
0
    fiat_p256_felem y;
276
0
    fiat_p256_from_generic(y, &point->Y);
277
0
    fiat_p256_square(z2, z2);  // z^-4
278
0
    fiat_p256_mul(y, y, z1);   // y * z
279
0
    fiat_p256_mul(y, y, z2);   // y * z^-3
280
0
    fiat_p256_to_generic(y_out, y);
281
0
  }
282
283
0
  return 1;
284
0
}
285
286
static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r,
287
0
                                const EC_JACOBIAN *a, const EC_JACOBIAN *b) {
288
0
  fiat_p256_felem x1, y1, z1, x2, y2, z2;
289
0
  fiat_p256_from_generic(x1, &a->X);
290
0
  fiat_p256_from_generic(y1, &a->Y);
291
0
  fiat_p256_from_generic(z1, &a->Z);
292
0
  fiat_p256_from_generic(x2, &b->X);
293
0
  fiat_p256_from_generic(y2, &b->Y);
294
0
  fiat_p256_from_generic(z2, &b->Z);
295
0
  fiat_p256_point_add(x1, y1, z1, x1, y1, z1, x2, y2, z2);
296
0
  fiat_p256_to_generic(&r->X, x1);
297
0
  fiat_p256_to_generic(&r->Y, y1);
298
0
  fiat_p256_to_generic(&r->Z, z1);
299
0
}
300
301
static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r,
302
0
                                const EC_JACOBIAN *a) {
303
0
  fiat_p256_felem x, y, z;
304
0
  fiat_p256_from_generic(x, &a->X);
305
0
  fiat_p256_from_generic(y, &a->Y);
306
0
  fiat_p256_from_generic(z, &a->Z);
307
0
  fiat_p256_point_double(x, y, z, x, y, z);
308
0
  fiat_p256_to_generic(&r->X, x);
309
0
  fiat_p256_to_generic(&r->Y, y);
310
0
  fiat_p256_to_generic(&r->Z, z);
311
0
}
312
313
static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
314
                                      const EC_JACOBIAN *p,
315
0
                                      const EC_SCALAR *scalar) {
316
0
  fiat_p256_felem p_pre_comp[17][3];
317
0
  OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
318
  // Precompute multiples.
319
0
  fiat_p256_from_generic(p_pre_comp[1][0], &p->X);
320
0
  fiat_p256_from_generic(p_pre_comp[1][1], &p->Y);
321
0
  fiat_p256_from_generic(p_pre_comp[1][2], &p->Z);
322
0
  for (size_t j = 2; j <= 16; ++j) {
323
0
    if (j & 1) {
324
0
      fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
325
0
                          p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
326
0
                          p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
327
0
                          p_pre_comp[j - 1][2]);
328
0
    } else {
329
0
      fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
330
0
                             p_pre_comp[j][2], p_pre_comp[j / 2][0],
331
0
                             p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
332
0
    }
333
0
  }
334
335
  // Set nq to the point at infinity.
336
0
  fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
337
338
  // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
339
0
  int skip = 1;  // Save two point operations in the first round.
340
0
  for (size_t i = 255; i < 256; i--) {
341
    // double
342
0
    if (!skip) {
343
0
      fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
344
0
    }
345
346
    // do other additions every 5 doublings
347
0
    if (i % 5 == 0) {
348
0
      crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5;
349
0
      bits |= fiat_p256_get_bit(scalar, i + 3) << 4;
350
0
      bits |= fiat_p256_get_bit(scalar, i + 2) << 3;
351
0
      bits |= fiat_p256_get_bit(scalar, i + 1) << 2;
352
0
      bits |= fiat_p256_get_bit(scalar, i) << 1;
353
0
      bits |= fiat_p256_get_bit(scalar, i - 1);
354
0
      crypto_word_t sign, digit;
355
0
      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
356
357
      // select the point to add or subtract, in constant time.
358
0
      fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
359
0
                             (const fiat_p256_felem(*)[3])p_pre_comp, tmp);
360
0
      fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
361
0
      fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
362
363
0
      if (!skip) {
364
0
        fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0],
365
0
                            tmp[1], tmp[2]);
366
0
      } else {
367
0
        fiat_p256_copy(nq[0], tmp[0]);
368
0
        fiat_p256_copy(nq[1], tmp[1]);
369
0
        fiat_p256_copy(nq[2], tmp[2]);
370
0
        skip = 0;
371
0
      }
372
0
    }
373
0
  }
374
375
0
  fiat_p256_to_generic(&r->X, nq[0]);
376
0
  fiat_p256_to_generic(&r->Y, nq[1]);
377
0
  fiat_p256_to_generic(&r->Z, nq[2]);
378
0
}
379
380
static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
381
                                           EC_JACOBIAN *r,
382
0
                                           const EC_SCALAR *scalar) {
383
  // Set nq to the point at infinity.
384
0
  fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
385
386
0
  int skip = 1;  // Save two point operations in the first round.
387
0
  for (size_t i = 31; i < 32; i--) {
388
0
    if (!skip) {
389
0
      fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
390
0
    }
391
392
    // First, look 32 bits upwards.
393
0
    crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
394
0
    bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
395
0
    bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
396
0
    bits |= fiat_p256_get_bit(scalar, i + 32);
397
    // Select the point to add, in constant time.
398
0
    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
399
0
                                  fiat_p256_g_pre_comp[1], tmp);
400
401
0
    if (!skip) {
402
0
      fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0],
403
0
                          tmp[1], tmp[2]);
404
0
    } else {
405
0
      fiat_p256_copy(nq[0], tmp[0]);
406
0
      fiat_p256_copy(nq[1], tmp[1]);
407
0
      fiat_p256_copy(nq[2], tmp[2]);
408
0
      skip = 0;
409
0
    }
410
411
    // Second, look at the current position.
412
0
    bits = fiat_p256_get_bit(scalar, i + 192) << 3;
413
0
    bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
414
0
    bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
415
0
    bits |= fiat_p256_get_bit(scalar, i);
416
    // Select the point to add, in constant time.
417
0
    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
418
0
                                  fiat_p256_g_pre_comp[0], tmp);
419
0
    fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0],
420
0
                        tmp[1], tmp[2]);
421
0
  }
422
423
0
  fiat_p256_to_generic(&r->X, nq[0]);
424
0
  fiat_p256_to_generic(&r->Y, nq[1]);
425
0
  fiat_p256_to_generic(&r->Z, nq[2]);
426
0
}
427
428
static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
429
                                             EC_JACOBIAN *r,
430
                                             const EC_SCALAR *g_scalar,
431
                                             const EC_JACOBIAN *p,
432
0
                                             const EC_SCALAR *p_scalar) {
433
0
#define P256_WSIZE_PUBLIC 4
434
  // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
435
0
  fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3];
436
0
  fiat_p256_from_generic(p_pre_comp[0][0], &p->X);
437
0
  fiat_p256_from_generic(p_pre_comp[0][1], &p->Y);
438
0
  fiat_p256_from_generic(p_pre_comp[0][2], &p->Z);
439
0
  fiat_p256_felem p2[3];
440
0
  fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0],
441
0
                         p_pre_comp[0][1], p_pre_comp[0][2]);
442
0
  for (size_t i = 1; i < std::size(p_pre_comp); i++) {
443
0
    fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
444
0
                        p_pre_comp[i - 1][0], p_pre_comp[i - 1][1],
445
0
                        p_pre_comp[i - 1][2], p2[0], p2[1], p2[2]);
446
0
  }
447
448
  // Set up the coefficients for |p_scalar|.
449
0
  int8_t p_wNAF[257];
450
0
  ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
451
452
  // Set |ret| to the point at infinity.
453
0
  int skip = 1;  // Save some point operations.
454
0
  fiat_p256_felem ret[3] = {{0}, {0}, {0}};
455
0
  for (int i = 256; i >= 0; i--) {
456
0
    if (!skip) {
457
0
      fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
458
0
    }
459
460
    // For the |g_scalar|, we use the precomputed table without the
461
    // constant-time lookup.
462
0
    if (i <= 31) {
463
      // First, look 32 bits upwards.
464
0
      crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3;
465
0
      bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2;
466
0
      bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1;
467
0
      bits |= fiat_p256_get_bit(g_scalar, i + 32);
468
0
      if (bits != 0) {
469
0
        size_t index = (size_t)(bits - 1);
470
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
471
0
                            fiat_p256_g_pre_comp[1][index][0],
472
0
                            fiat_p256_g_pre_comp[1][index][1], fiat_p256_one);
473
0
        skip = 0;
474
0
      }
475
476
      // Second, look at the current position.
477
0
      bits = fiat_p256_get_bit(g_scalar, i + 192) << 3;
478
0
      bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2;
479
0
      bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1;
480
0
      bits |= fiat_p256_get_bit(g_scalar, i);
481
0
      if (bits != 0) {
482
0
        size_t index = (size_t)(bits - 1);
483
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
484
0
                            fiat_p256_g_pre_comp[0][index][0],
485
0
                            fiat_p256_g_pre_comp[0][index][1], fiat_p256_one);
486
0
        skip = 0;
487
0
      }
488
0
    }
489
490
0
    int digit = p_wNAF[i];
491
0
    if (digit != 0) {
492
0
      assert(digit & 1);
493
0
      size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
494
0
      fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
495
0
      if (digit < 0) {
496
0
        fiat_p256_opp(tmp, p_pre_comp[idx][1]);
497
0
        y = &tmp;
498
0
      }
499
0
      if (!skip) {
500
0
        fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
501
0
                            p_pre_comp[idx][0], *y, p_pre_comp[idx][2]);
502
0
      } else {
503
0
        fiat_p256_copy(ret[0], p_pre_comp[idx][0]);
504
0
        fiat_p256_copy(ret[1], *y);
505
0
        fiat_p256_copy(ret[2], p_pre_comp[idx][2]);
506
0
        skip = 0;
507
0
      }
508
0
    }
509
0
  }
510
511
0
  fiat_p256_to_generic(&r->X, ret[0]);
512
0
  fiat_p256_to_generic(&r->Y, ret[1]);
513
0
  fiat_p256_to_generic(&r->Z, ret[2]);
514
0
}
515
516
static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
517
                                            const EC_JACOBIAN *p,
518
0
                                            const EC_SCALAR *r) {
519
0
  if (ec_GFp_simple_is_at_infinity(group, p)) {
520
0
    return 0;
521
0
  }
522
523
  // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
524
  // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
525
  // not.
526
0
  fiat_p256_felem Z2_mont;
527
0
  fiat_p256_from_generic(Z2_mont, &p->Z);
528
0
  fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont);
529
530
0
  fiat_p256_felem r_Z2;
531
0
  fiat_p256_from_words(r_Z2, r->words);  // r < order < p, so this is valid.
532
0
  fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
533
534
0
  fiat_p256_felem X;
535
0
  fiat_p256_from_generic(X, &p->X);
536
0
  fiat_p256_from_montgomery(X, X);
537
538
0
  if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
539
0
    return 1;
540
0
  }
541
542
  // During signing the x coefficient is reduced modulo the group order.
543
  // Therefore there is a small possibility, less than 1/2^128, that group_order
544
  // < p.x < P. in that case we need not only to compare against |r| but also to
545
  // compare against r+group_order.
546
0
  assert(group->field.N.width == group->order.N.width);
547
0
  EC_FELEM tmp;
548
0
  BN_ULONG carry =
549
0
      bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width);
550
0
  if (carry == 0 &&
551
0
      bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) {
552
0
    fiat_p256_from_generic(r_Z2, &tmp);
553
0
    fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
554
0
    if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
555
0
      return 1;
556
0
    }
557
0
  }
558
559
0
  return 0;
560
0
}
561
562
BSSL_NAMESPACE_BEGIN
563
564
0
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
565
0
  out->point_get_affine_coordinates =
566
0
      ec_GFp_nistp256_point_get_affine_coordinates;
567
0
  out->add = ec_GFp_nistp256_add;
568
0
  out->dbl = ec_GFp_nistp256_dbl;
569
0
  out->mul = ec_GFp_nistp256_point_mul;
570
0
  out->mul_base = ec_GFp_nistp256_point_mul_base;
571
0
  out->mul_public = ec_GFp_nistp256_point_mul_public;
572
0
  out->felem_mul = ec_GFp_mont_felem_mul;
573
0
  out->felem_sqr = ec_GFp_mont_felem_sqr;
574
0
  out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
575
0
  out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
576
0
  out->felem_reduce = ec_GFp_mont_felem_reduce;
577
  // TODO(davidben): This should use the specialized field arithmetic
578
  // implementation, rather than the generic one.
579
0
  out->felem_exp = ec_GFp_mont_felem_exp;
580
0
  out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery;
581
0
  out->scalar_to_montgomery_inv_vartime =
582
0
      ec_simple_scalar_to_montgomery_inv_vartime;
583
0
  out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate;
584
0
}
585
586
BSSL_NAMESPACE_END