/src/boringssl/crypto/fipsmodule/ec/p256.cc.inc
Line | Count | Source |
1 | | // Copyright 2020 The BoringSSL Authors |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // An implementation of the NIST P-256 elliptic curve point multiplication. |
16 | | // 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by |
17 | | // Fiat, which lives in //third_party/fiat. |
18 | | |
19 | | #include <openssl/base.h> |
20 | | |
21 | | #include <openssl/bn.h> |
22 | | #include <openssl/ec.h> |
23 | | #include <openssl/err.h> |
24 | | #include <openssl/mem.h> |
25 | | |
26 | | #include <assert.h> |
27 | | #include <string.h> |
28 | | |
29 | | #include <iterator> |
30 | | |
31 | | #include "../../internal.h" |
32 | | #include "../delocate.h" |
33 | | #include "./internal.h" |
34 | | |
35 | | #include "../../../third_party/fiat/p256_field.c.inc" |
36 | | #include "../../../third_party/fiat/p256_point.br.c.inc" |
37 | | |
38 | | |
39 | | using namespace bssl; |
40 | | |
41 | | // utility functions, handwritten |
42 | | |
43 | | #if defined(OPENSSL_64_BIT) |
44 | 0 | #define FIAT_P256_NLIMBS 4 |
45 | | typedef uint64_t fiat_p256_limb_t; |
46 | | typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS]; |
47 | | static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000, |
48 | | 0xffffffffffffffff, 0xfffffffe}; |
49 | | #else // 64BIT; else 32BIT |
50 | | #define FIAT_P256_NLIMBS 8 |
51 | | typedef uint32_t fiat_p256_limb_t; |
52 | | typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS]; |
53 | | static const fiat_p256_felem fiat_p256_one = { |
54 | | 0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0}; |
55 | | #endif // 64BIT |
56 | | |
57 | | |
58 | | static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS], |
59 | 0 | const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { |
60 | 0 | for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) { |
61 | 0 | out[i] = in1[i]; |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], |
66 | | fiat_p256_limb_t t, |
67 | | const fiat_p256_limb_t z[FIAT_P256_NLIMBS], |
68 | 0 | const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) { |
69 | 0 | fiat_p256_selectznz(out, !!t, z, nz); |
70 | 0 | } |
71 | | |
72 | | static void fiat_p256_from_words(fiat_p256_felem out, |
73 | 0 | const BN_ULONG in[32 / sizeof(BN_ULONG)]) { |
74 | | // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on |
75 | | // 64-bit platforms without |uint128_t|, they are different. However, on |
76 | | // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same |
77 | | // layout. |
78 | 0 | OPENSSL_memcpy(out, in, 32); |
79 | 0 | } |
80 | | |
81 | 0 | static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) { |
82 | 0 | fiat_p256_from_words(out, in->words); |
83 | 0 | } |
84 | | |
85 | 0 | static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) { |
86 | | // See |fiat_p256_from_words|. |
87 | 0 | OPENSSL_memcpy(out->words, in, 32); |
88 | 0 | } |
89 | | |
90 | | // fiat_p256_inv_square calculates |out| = |in|^{-2} |
91 | | // |
92 | | // Based on Fermat's Little Theorem: |
93 | | // a^p = a (mod p) |
94 | | // a^{p-1} = 1 (mod p) |
95 | | // a^{p-3} = a^{-2} (mod p) |
96 | | static void fiat_p256_inv_square(fiat_p256_felem out, |
97 | 0 | const fiat_p256_felem in) { |
98 | | // This implements the addition chain described in |
99 | | // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion |
100 | 0 | fiat_p256_felem x2, x3, x6, x12, x15, x30, x32; |
101 | 0 | fiat_p256_square(x2, in); // 2^2 - 2^1 |
102 | 0 | fiat_p256_mul(x2, x2, in); // 2^2 - 2^0 |
103 | |
|
104 | 0 | fiat_p256_square(x3, x2); // 2^3 - 2^1 |
105 | 0 | fiat_p256_mul(x3, x3, in); // 2^3 - 2^0 |
106 | |
|
107 | 0 | fiat_p256_square(x6, x3); |
108 | 0 | for (int i = 1; i < 3; i++) { |
109 | 0 | fiat_p256_square(x6, x6); |
110 | 0 | } // 2^6 - 2^3 |
111 | 0 | fiat_p256_mul(x6, x6, x3); // 2^6 - 2^0 |
112 | |
|
113 | 0 | fiat_p256_square(x12, x6); |
114 | 0 | for (int i = 1; i < 6; i++) { |
115 | 0 | fiat_p256_square(x12, x12); |
116 | 0 | } // 2^12 - 2^6 |
117 | 0 | fiat_p256_mul(x12, x12, x6); // 2^12 - 2^0 |
118 | |
|
119 | 0 | fiat_p256_square(x15, x12); |
120 | 0 | for (int i = 1; i < 3; i++) { |
121 | 0 | fiat_p256_square(x15, x15); |
122 | 0 | } // 2^15 - 2^3 |
123 | 0 | fiat_p256_mul(x15, x15, x3); // 2^15 - 2^0 |
124 | |
|
125 | 0 | fiat_p256_square(x30, x15); |
126 | 0 | for (int i = 1; i < 15; i++) { |
127 | 0 | fiat_p256_square(x30, x30); |
128 | 0 | } // 2^30 - 2^15 |
129 | 0 | fiat_p256_mul(x30, x30, x15); // 2^30 - 2^0 |
130 | |
|
131 | 0 | fiat_p256_square(x32, x30); |
132 | 0 | fiat_p256_square(x32, x32); // 2^32 - 2^2 |
133 | 0 | fiat_p256_mul(x32, x32, x2); // 2^32 - 2^0 |
134 | |
|
135 | 0 | fiat_p256_felem ret; |
136 | 0 | fiat_p256_square(ret, x32); |
137 | 0 | for (int i = 1; i < 31 + 1; i++) { |
138 | 0 | fiat_p256_square(ret, ret); |
139 | 0 | } // 2^64 - 2^32 |
140 | 0 | fiat_p256_mul(ret, ret, in); // 2^64 - 2^32 + 2^0 |
141 | |
|
142 | 0 | for (int i = 0; i < 96 + 32; i++) { |
143 | 0 | fiat_p256_square(ret, ret); |
144 | 0 | } // 2^192 - 2^160 + 2^128 |
145 | 0 | fiat_p256_mul(ret, ret, x32); // 2^192 - 2^160 + 2^128 + 2^32 - 2^0 |
146 | |
|
147 | 0 | for (int i = 0; i < 32; i++) { |
148 | 0 | fiat_p256_square(ret, ret); |
149 | 0 | } // 2^224 - 2^192 + 2^160 + 2^64 - 2^32 |
150 | 0 | fiat_p256_mul(ret, ret, x32); // 2^224 - 2^192 + 2^160 + 2^64 - 2^0 |
151 | |
|
152 | 0 | for (int i = 0; i < 30; i++) { |
153 | 0 | fiat_p256_square(ret, ret); |
154 | 0 | } // 2^254 - 2^222 + 2^190 + 2^94 - 2^30 |
155 | 0 | fiat_p256_mul(ret, ret, x30); // 2^254 - 2^222 + 2^190 + 2^94 - 2^0 |
156 | |
|
157 | 0 | fiat_p256_square(ret, ret); |
158 | 0 | fiat_p256_square(out, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2 |
159 | 0 | } |
160 | | |
161 | | // Group operations |
162 | | // ---------------- |
163 | | // |
164 | | // Building on top of the field operations we have the operations on the |
165 | | // elliptic curve group itself. Points on the curve are represented in Jacobian |
166 | | // coordinates. |
167 | | |
168 | | static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out, |
169 | | fiat_p256_felem z_out, |
170 | | const fiat_p256_felem x_in, |
171 | | const fiat_p256_felem y_in, |
172 | 0 | const fiat_p256_felem z_in) { |
173 | 0 | uint8_t out[3*32], in[3*32]; |
174 | 0 | static_assert(sizeof(fiat_p256_felem) == 32); |
175 | 0 | OPENSSL_memcpy(&in[0], x_in, 32); |
176 | 0 | OPENSSL_memcpy(&in[32], y_in, 32); |
177 | 0 | OPENSSL_memcpy(&in[64], z_in, 32); |
178 | 0 | p256_point_double((br_word_t)out, (br_word_t)in); |
179 | 0 | OPENSSL_memcpy(x_out, &out[0], 32); |
180 | 0 | OPENSSL_memcpy(y_out, &out[32], 32); |
181 | 0 | OPENSSL_memcpy(z_out, &out[64], 32); |
182 | 0 | } |
183 | | |
184 | | static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3, |
185 | | fiat_p256_felem z3, const fiat_p256_felem x1, |
186 | | const fiat_p256_felem y1, |
187 | | const fiat_p256_felem z1, |
188 | | const fiat_p256_felem x2, |
189 | | const fiat_p256_felem y2, |
190 | 0 | const fiat_p256_felem z2) { |
191 | 0 | uint8_t out[3 * 32], in1[3 * 32], in2[3 * 32]; |
192 | 0 | static_assert(sizeof(fiat_p256_felem) == 32); |
193 | 0 | OPENSSL_memcpy(&in1[0], x1, 32); |
194 | 0 | OPENSSL_memcpy(&in1[32], y1, 32); |
195 | 0 | OPENSSL_memcpy(&in1[64], z1, 32); |
196 | 0 | OPENSSL_memcpy(&in2[0], x2, 32); |
197 | 0 | OPENSSL_memcpy(&in2[32], y2, 32); |
198 | 0 | OPENSSL_memcpy(&in2[64], z2, 32); |
199 | 0 | p256_point_add_vartime_if_doubling((br_word_t)out, (br_word_t)in1, |
200 | 0 | (br_word_t)in2); |
201 | 0 | OPENSSL_memcpy(x3, &out[0], 32); |
202 | 0 | OPENSSL_memcpy(y3, &out[32], 32); |
203 | 0 | OPENSSL_memcpy(z3, &out[64], 32); |
204 | 0 | } |
205 | | #include "./p256_table.h" |
206 | | |
207 | | // fiat_p256_select_point_affine selects the |idx-1|th point from a |
208 | | // precomputation table and copies it to out. If |idx| is zero, the output is |
209 | | // the point at infinity. |
210 | | static void fiat_p256_select_point_affine( |
211 | | const fiat_p256_limb_t idx, size_t size, |
212 | 0 | const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) { |
213 | 0 | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); |
214 | 0 | for (size_t i = 0; i < size; i++) { |
215 | 0 | fiat_p256_limb_t mismatch = i ^ (idx - 1); |
216 | 0 | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); |
217 | 0 | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); |
218 | 0 | } |
219 | 0 | fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one); |
220 | 0 | } |
221 | | |
222 | | // fiat_p256_select_point selects the |idx|th point from a precomputation table |
223 | | // and copies it to out. |
224 | | static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size, |
225 | | const fiat_p256_felem pre_comp[/*size*/][3], |
226 | 0 | fiat_p256_felem out[3]) { |
227 | 0 | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); |
228 | 0 | for (size_t i = 0; i < size; i++) { |
229 | 0 | fiat_p256_limb_t mismatch = i ^ idx; |
230 | 0 | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); |
231 | 0 | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); |
232 | 0 | fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]); |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | | // fiat_p256_get_bit returns the |i|th bit in |in|. |
237 | 0 | static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) { |
238 | 0 | if (i < 0 || i >= 256) { |
239 | 0 | return 0; |
240 | 0 | } |
241 | 0 | #if defined(OPENSSL_64_BIT) |
242 | 0 | static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit"); |
243 | 0 | return (in->words[i >> 6] >> (i & 63)) & 1; |
244 | | #else |
245 | | static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit"); |
246 | | return (in->words[i >> 5] >> (i & 31)) & 1; |
247 | | #endif |
248 | 0 | } |
249 | | |
250 | | // OPENSSL EC_METHOD FUNCTIONS |
251 | | |
252 | | // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = |
253 | | // (X/Z^2, Y/Z^3). |
254 | | static int ec_GFp_nistp256_point_get_affine_coordinates( |
255 | | const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out, |
256 | 0 | EC_FELEM *y_out) { |
257 | 0 | if (constant_time_declassify_int( |
258 | 0 | ec_GFp_simple_is_at_infinity(group, point))) { |
259 | 0 | OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); |
260 | 0 | return 0; |
261 | 0 | } |
262 | | |
263 | 0 | fiat_p256_felem z1, z2; |
264 | 0 | fiat_p256_from_generic(z1, &point->Z); |
265 | 0 | fiat_p256_inv_square(z2, z1); |
266 | |
|
267 | 0 | if (x_out != nullptr) { |
268 | 0 | fiat_p256_felem x; |
269 | 0 | fiat_p256_from_generic(x, &point->X); |
270 | 0 | fiat_p256_mul(x, x, z2); |
271 | 0 | fiat_p256_to_generic(x_out, x); |
272 | 0 | } |
273 | |
|
274 | 0 | if (y_out != nullptr) { |
275 | 0 | fiat_p256_felem y; |
276 | 0 | fiat_p256_from_generic(y, &point->Y); |
277 | 0 | fiat_p256_square(z2, z2); // z^-4 |
278 | 0 | fiat_p256_mul(y, y, z1); // y * z |
279 | 0 | fiat_p256_mul(y, y, z2); // y * z^-3 |
280 | 0 | fiat_p256_to_generic(y_out, y); |
281 | 0 | } |
282 | |
|
283 | 0 | return 1; |
284 | 0 | } |
285 | | |
286 | | static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r, |
287 | 0 | const EC_JACOBIAN *a, const EC_JACOBIAN *b) { |
288 | 0 | fiat_p256_felem x1, y1, z1, x2, y2, z2; |
289 | 0 | fiat_p256_from_generic(x1, &a->X); |
290 | 0 | fiat_p256_from_generic(y1, &a->Y); |
291 | 0 | fiat_p256_from_generic(z1, &a->Z); |
292 | 0 | fiat_p256_from_generic(x2, &b->X); |
293 | 0 | fiat_p256_from_generic(y2, &b->Y); |
294 | 0 | fiat_p256_from_generic(z2, &b->Z); |
295 | 0 | fiat_p256_point_add(x1, y1, z1, x1, y1, z1, x2, y2, z2); |
296 | 0 | fiat_p256_to_generic(&r->X, x1); |
297 | 0 | fiat_p256_to_generic(&r->Y, y1); |
298 | 0 | fiat_p256_to_generic(&r->Z, z1); |
299 | 0 | } |
300 | | |
301 | | static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r, |
302 | 0 | const EC_JACOBIAN *a) { |
303 | 0 | fiat_p256_felem x, y, z; |
304 | 0 | fiat_p256_from_generic(x, &a->X); |
305 | 0 | fiat_p256_from_generic(y, &a->Y); |
306 | 0 | fiat_p256_from_generic(z, &a->Z); |
307 | 0 | fiat_p256_point_double(x, y, z, x, y, z); |
308 | 0 | fiat_p256_to_generic(&r->X, x); |
309 | 0 | fiat_p256_to_generic(&r->Y, y); |
310 | 0 | fiat_p256_to_generic(&r->Z, z); |
311 | 0 | } |
312 | | |
313 | | static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, |
314 | | const EC_JACOBIAN *p, |
315 | 0 | const EC_SCALAR *scalar) { |
316 | 0 | fiat_p256_felem p_pre_comp[17][3]; |
317 | 0 | OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); |
318 | | // Precompute multiples. |
319 | 0 | fiat_p256_from_generic(p_pre_comp[1][0], &p->X); |
320 | 0 | fiat_p256_from_generic(p_pre_comp[1][1], &p->Y); |
321 | 0 | fiat_p256_from_generic(p_pre_comp[1][2], &p->Z); |
322 | 0 | for (size_t j = 2; j <= 16; ++j) { |
323 | 0 | if (j & 1) { |
324 | 0 | fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], |
325 | 0 | p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], |
326 | 0 | p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], |
327 | 0 | p_pre_comp[j - 1][2]); |
328 | 0 | } else { |
329 | 0 | fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1], |
330 | 0 | p_pre_comp[j][2], p_pre_comp[j / 2][0], |
331 | 0 | p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); |
332 | 0 | } |
333 | 0 | } |
334 | | |
335 | | // Set nq to the point at infinity. |
336 | 0 | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; |
337 | | |
338 | | // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. |
339 | 0 | int skip = 1; // Save two point operations in the first round. |
340 | 0 | for (size_t i = 255; i < 256; i--) { |
341 | | // double |
342 | 0 | if (!skip) { |
343 | 0 | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); |
344 | 0 | } |
345 | | |
346 | | // do other additions every 5 doublings |
347 | 0 | if (i % 5 == 0) { |
348 | 0 | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5; |
349 | 0 | bits |= fiat_p256_get_bit(scalar, i + 3) << 4; |
350 | 0 | bits |= fiat_p256_get_bit(scalar, i + 2) << 3; |
351 | 0 | bits |= fiat_p256_get_bit(scalar, i + 1) << 2; |
352 | 0 | bits |= fiat_p256_get_bit(scalar, i) << 1; |
353 | 0 | bits |= fiat_p256_get_bit(scalar, i - 1); |
354 | 0 | crypto_word_t sign, digit; |
355 | 0 | ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); |
356 | | |
357 | | // select the point to add or subtract, in constant time. |
358 | 0 | fiat_p256_select_point((fiat_p256_limb_t)digit, 17, |
359 | 0 | (const fiat_p256_felem(*)[3])p_pre_comp, tmp); |
360 | 0 | fiat_p256_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. |
361 | 0 | fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp); |
362 | |
|
363 | 0 | if (!skip) { |
364 | 0 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], |
365 | 0 | tmp[1], tmp[2]); |
366 | 0 | } else { |
367 | 0 | fiat_p256_copy(nq[0], tmp[0]); |
368 | 0 | fiat_p256_copy(nq[1], tmp[1]); |
369 | 0 | fiat_p256_copy(nq[2], tmp[2]); |
370 | 0 | skip = 0; |
371 | 0 | } |
372 | 0 | } |
373 | 0 | } |
374 | |
|
375 | 0 | fiat_p256_to_generic(&r->X, nq[0]); |
376 | 0 | fiat_p256_to_generic(&r->Y, nq[1]); |
377 | 0 | fiat_p256_to_generic(&r->Z, nq[2]); |
378 | 0 | } |
379 | | |
380 | | static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, |
381 | | EC_JACOBIAN *r, |
382 | 0 | const EC_SCALAR *scalar) { |
383 | | // Set nq to the point at infinity. |
384 | 0 | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; |
385 | |
|
386 | 0 | int skip = 1; // Save two point operations in the first round. |
387 | 0 | for (size_t i = 31; i < 32; i--) { |
388 | 0 | if (!skip) { |
389 | 0 | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); |
390 | 0 | } |
391 | | |
392 | | // First, look 32 bits upwards. |
393 | 0 | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3; |
394 | 0 | bits |= fiat_p256_get_bit(scalar, i + 160) << 2; |
395 | 0 | bits |= fiat_p256_get_bit(scalar, i + 96) << 1; |
396 | 0 | bits |= fiat_p256_get_bit(scalar, i + 32); |
397 | | // Select the point to add, in constant time. |
398 | 0 | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, |
399 | 0 | fiat_p256_g_pre_comp[1], tmp); |
400 | |
|
401 | 0 | if (!skip) { |
402 | 0 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], |
403 | 0 | tmp[1], tmp[2]); |
404 | 0 | } else { |
405 | 0 | fiat_p256_copy(nq[0], tmp[0]); |
406 | 0 | fiat_p256_copy(nq[1], tmp[1]); |
407 | 0 | fiat_p256_copy(nq[2], tmp[2]); |
408 | 0 | skip = 0; |
409 | 0 | } |
410 | | |
411 | | // Second, look at the current position. |
412 | 0 | bits = fiat_p256_get_bit(scalar, i + 192) << 3; |
413 | 0 | bits |= fiat_p256_get_bit(scalar, i + 128) << 2; |
414 | 0 | bits |= fiat_p256_get_bit(scalar, i + 64) << 1; |
415 | 0 | bits |= fiat_p256_get_bit(scalar, i); |
416 | | // Select the point to add, in constant time. |
417 | 0 | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, |
418 | 0 | fiat_p256_g_pre_comp[0], tmp); |
419 | 0 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], tmp[0], |
420 | 0 | tmp[1], tmp[2]); |
421 | 0 | } |
422 | |
|
423 | 0 | fiat_p256_to_generic(&r->X, nq[0]); |
424 | 0 | fiat_p256_to_generic(&r->Y, nq[1]); |
425 | 0 | fiat_p256_to_generic(&r->Z, nq[2]); |
426 | 0 | } |
427 | | |
428 | | static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, |
429 | | EC_JACOBIAN *r, |
430 | | const EC_SCALAR *g_scalar, |
431 | | const EC_JACOBIAN *p, |
432 | 0 | const EC_SCALAR *p_scalar) { |
433 | 0 | #define P256_WSIZE_PUBLIC 4 |
434 | | // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. |
435 | 0 | fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; |
436 | 0 | fiat_p256_from_generic(p_pre_comp[0][0], &p->X); |
437 | 0 | fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); |
438 | 0 | fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); |
439 | 0 | fiat_p256_felem p2[3]; |
440 | 0 | fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], |
441 | 0 | p_pre_comp[0][1], p_pre_comp[0][2]); |
442 | 0 | for (size_t i = 1; i < std::size(p_pre_comp); i++) { |
443 | 0 | fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], |
444 | 0 | p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], |
445 | 0 | p_pre_comp[i - 1][2], p2[0], p2[1], p2[2]); |
446 | 0 | } |
447 | | |
448 | | // Set up the coefficients for |p_scalar|. |
449 | 0 | int8_t p_wNAF[257]; |
450 | 0 | ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); |
451 | | |
452 | | // Set |ret| to the point at infinity. |
453 | 0 | int skip = 1; // Save some point operations. |
454 | 0 | fiat_p256_felem ret[3] = {{0}, {0}, {0}}; |
455 | 0 | for (int i = 256; i >= 0; i--) { |
456 | 0 | if (!skip) { |
457 | 0 | fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); |
458 | 0 | } |
459 | | |
460 | | // For the |g_scalar|, we use the precomputed table without the |
461 | | // constant-time lookup. |
462 | 0 | if (i <= 31) { |
463 | | // First, look 32 bits upwards. |
464 | 0 | crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; |
465 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; |
466 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; |
467 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 32); |
468 | 0 | if (bits != 0) { |
469 | 0 | size_t index = (size_t)(bits - 1); |
470 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
471 | 0 | fiat_p256_g_pre_comp[1][index][0], |
472 | 0 | fiat_p256_g_pre_comp[1][index][1], fiat_p256_one); |
473 | 0 | skip = 0; |
474 | 0 | } |
475 | | |
476 | | // Second, look at the current position. |
477 | 0 | bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; |
478 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; |
479 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; |
480 | 0 | bits |= fiat_p256_get_bit(g_scalar, i); |
481 | 0 | if (bits != 0) { |
482 | 0 | size_t index = (size_t)(bits - 1); |
483 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
484 | 0 | fiat_p256_g_pre_comp[0][index][0], |
485 | 0 | fiat_p256_g_pre_comp[0][index][1], fiat_p256_one); |
486 | 0 | skip = 0; |
487 | 0 | } |
488 | 0 | } |
489 | |
|
490 | 0 | int digit = p_wNAF[i]; |
491 | 0 | if (digit != 0) { |
492 | 0 | assert(digit & 1); |
493 | 0 | size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); |
494 | 0 | fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; |
495 | 0 | if (digit < 0) { |
496 | 0 | fiat_p256_opp(tmp, p_pre_comp[idx][1]); |
497 | 0 | y = &tmp; |
498 | 0 | } |
499 | 0 | if (!skip) { |
500 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
501 | 0 | p_pre_comp[idx][0], *y, p_pre_comp[idx][2]); |
502 | 0 | } else { |
503 | 0 | fiat_p256_copy(ret[0], p_pre_comp[idx][0]); |
504 | 0 | fiat_p256_copy(ret[1], *y); |
505 | 0 | fiat_p256_copy(ret[2], p_pre_comp[idx][2]); |
506 | 0 | skip = 0; |
507 | 0 | } |
508 | 0 | } |
509 | 0 | } |
510 | | |
511 | 0 | fiat_p256_to_generic(&r->X, ret[0]); |
512 | 0 | fiat_p256_to_generic(&r->Y, ret[1]); |
513 | 0 | fiat_p256_to_generic(&r->Z, ret[2]); |
514 | 0 | } |
515 | | |
516 | | static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group, |
517 | | const EC_JACOBIAN *p, |
518 | 0 | const EC_SCALAR *r) { |
519 | 0 | if (ec_GFp_simple_is_at_infinity(group, p)) { |
520 | 0 | return 0; |
521 | 0 | } |
522 | | |
523 | | // We wish to compare X/Z^2 with r. This is equivalent to comparing X with |
524 | | // r*Z^2. Note that X and Z are represented in Montgomery form, while r is |
525 | | // not. |
526 | 0 | fiat_p256_felem Z2_mont; |
527 | 0 | fiat_p256_from_generic(Z2_mont, &p->Z); |
528 | 0 | fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont); |
529 | |
|
530 | 0 | fiat_p256_felem r_Z2; |
531 | 0 | fiat_p256_from_words(r_Z2, r->words); // r < order < p, so this is valid. |
532 | 0 | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); |
533 | |
|
534 | 0 | fiat_p256_felem X; |
535 | 0 | fiat_p256_from_generic(X, &p->X); |
536 | 0 | fiat_p256_from_montgomery(X, X); |
537 | |
|
538 | 0 | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { |
539 | 0 | return 1; |
540 | 0 | } |
541 | | |
542 | | // During signing the x coefficient is reduced modulo the group order. |
543 | | // Therefore there is a small possibility, less than 1/2^128, that group_order |
544 | | // < p.x < P. in that case we need not only to compare against |r| but also to |
545 | | // compare against r+group_order. |
546 | 0 | assert(group->field.N.width == group->order.N.width); |
547 | 0 | EC_FELEM tmp; |
548 | 0 | BN_ULONG carry = |
549 | 0 | bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width); |
550 | 0 | if (carry == 0 && |
551 | 0 | bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) { |
552 | 0 | fiat_p256_from_generic(r_Z2, &tmp); |
553 | 0 | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); |
554 | 0 | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { |
555 | 0 | return 1; |
556 | 0 | } |
557 | 0 | } |
558 | | |
559 | 0 | return 0; |
560 | 0 | } |
561 | | |
562 | | BSSL_NAMESPACE_BEGIN |
563 | | |
564 | 0 | DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) { |
565 | 0 | out->point_get_affine_coordinates = |
566 | 0 | ec_GFp_nistp256_point_get_affine_coordinates; |
567 | 0 | out->add = ec_GFp_nistp256_add; |
568 | 0 | out->dbl = ec_GFp_nistp256_dbl; |
569 | 0 | out->mul = ec_GFp_nistp256_point_mul; |
570 | 0 | out->mul_base = ec_GFp_nistp256_point_mul_base; |
571 | 0 | out->mul_public = ec_GFp_nistp256_point_mul_public; |
572 | 0 | out->felem_mul = ec_GFp_mont_felem_mul; |
573 | 0 | out->felem_sqr = ec_GFp_mont_felem_sqr; |
574 | 0 | out->felem_to_bytes = ec_GFp_mont_felem_to_bytes; |
575 | 0 | out->felem_from_bytes = ec_GFp_mont_felem_from_bytes; |
576 | 0 | out->felem_reduce = ec_GFp_mont_felem_reduce; |
577 | | // TODO(davidben): This should use the specialized field arithmetic |
578 | | // implementation, rather than the generic one. |
579 | 0 | out->felem_exp = ec_GFp_mont_felem_exp; |
580 | 0 | out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery; |
581 | 0 | out->scalar_to_montgomery_inv_vartime = |
582 | 0 | ec_simple_scalar_to_montgomery_inv_vartime; |
583 | 0 | out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate; |
584 | 0 | } |
585 | | |
586 | | BSSL_NAMESPACE_END |