/src/boringssl/crypto/fipsmodule/ec/p256.c.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2020, Google Inc. |
2 | | * |
3 | | * Permission to use, copy, modify, and/or distribute this software for any |
4 | | * purpose with or without fee is hereby granted, provided that the above |
5 | | * copyright notice and this permission notice appear in all copies. |
6 | | * |
7 | | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
8 | | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
10 | | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
14 | | |
15 | | // An implementation of the NIST P-256 elliptic curve point multiplication. |
16 | | // 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by |
17 | | // Fiat, which lives in //third_party/fiat. |
18 | | |
19 | | #include <openssl/base.h> |
20 | | |
21 | | #include <openssl/bn.h> |
22 | | #include <openssl/ec.h> |
23 | | #include <openssl/err.h> |
24 | | #include <openssl/mem.h> |
25 | | |
26 | | #include <assert.h> |
27 | | #include <string.h> |
28 | | |
29 | | #include "../../internal.h" |
30 | | #include "../delocate.h" |
31 | | #include "./internal.h" |
32 | | |
33 | | #if defined(BORINGSSL_HAS_UINT128) |
34 | | #include "../../../third_party/fiat/p256_64.h" |
35 | | #elif defined(OPENSSL_64_BIT) |
36 | | #include "../../../third_party/fiat/p256_64_msvc.h" |
37 | | #else |
38 | | #include "../../../third_party/fiat/p256_32.h" |
39 | | #endif |
40 | | |
41 | | |
42 | | // utility functions, handwritten |
43 | | |
44 | | #if defined(OPENSSL_64_BIT) |
45 | 7.24k | #define FIAT_P256_NLIMBS 4 |
46 | | typedef uint64_t fiat_p256_limb_t; |
47 | | typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS]; |
48 | | static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000, |
49 | | 0xffffffffffffffff, 0xfffffffe}; |
50 | | #else // 64BIT; else 32BIT |
51 | | #define FIAT_P256_NLIMBS 8 |
52 | | typedef uint32_t fiat_p256_limb_t; |
53 | | typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS]; |
54 | | static const fiat_p256_felem fiat_p256_one = { |
55 | | 0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0}; |
56 | | #endif // 64BIT |
57 | | |
58 | | |
59 | | static fiat_p256_limb_t fiat_p256_nz( |
60 | 5.10k | const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { |
61 | 5.10k | fiat_p256_limb_t ret; |
62 | 5.10k | fiat_p256_nonzero(&ret, in1); |
63 | 5.10k | return ret; |
64 | 5.10k | } |
65 | | |
66 | | static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS], |
67 | 1.44k | const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { |
68 | 7.24k | for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) { |
69 | 5.79k | out[i] = in1[i]; |
70 | 5.79k | } |
71 | 1.44k | } |
72 | | |
73 | | static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], |
74 | | fiat_p256_limb_t t, |
75 | | const fiat_p256_limb_t z[FIAT_P256_NLIMBS], |
76 | 56.5k | const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) { |
77 | 56.5k | fiat_p256_selectznz(out, !!t, z, nz); |
78 | 56.5k | } |
79 | | |
80 | | static void fiat_p256_from_words(fiat_p256_felem out, |
81 | 115 | const BN_ULONG in[32 / sizeof(BN_ULONG)]) { |
82 | | // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on |
83 | | // 64-bit platforms without |uint128_t|, they are different. However, on |
84 | | // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same |
85 | | // layout. |
86 | 115 | OPENSSL_memcpy(out, in, 32); |
87 | 115 | } |
88 | | |
89 | 115 | static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) { |
90 | 115 | fiat_p256_from_words(out, in->words); |
91 | 115 | } |
92 | | |
93 | 116 | static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) { |
94 | | // See |fiat_p256_from_words|. |
95 | 116 | OPENSSL_memcpy(out->words, in, 32); |
96 | 116 | } |
97 | | |
98 | | // fiat_p256_inv_square calculates |out| = |in|^{-2} |
99 | | // |
100 | | // Based on Fermat's Little Theorem: |
101 | | // a^p = a (mod p) |
102 | | // a^{p-1} = 1 (mod p) |
103 | | // a^{p-3} = a^{-2} (mod p) |
104 | | static void fiat_p256_inv_square(fiat_p256_felem out, |
105 | 23 | const fiat_p256_felem in) { |
106 | | // This implements the addition chain described in |
107 | | // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion |
108 | 23 | fiat_p256_felem x2, x3, x6, x12, x15, x30, x32; |
109 | 23 | fiat_p256_square(x2, in); // 2^2 - 2^1 |
110 | 23 | fiat_p256_mul(x2, x2, in); // 2^2 - 2^0 |
111 | | |
112 | 23 | fiat_p256_square(x3, x2); // 2^3 - 2^1 |
113 | 23 | fiat_p256_mul(x3, x3, in); // 2^3 - 2^0 |
114 | | |
115 | 23 | fiat_p256_square(x6, x3); |
116 | 69 | for (int i = 1; i < 3; i++) { |
117 | 46 | fiat_p256_square(x6, x6); |
118 | 46 | } // 2^6 - 2^3 |
119 | 23 | fiat_p256_mul(x6, x6, x3); // 2^6 - 2^0 |
120 | | |
121 | 23 | fiat_p256_square(x12, x6); |
122 | 138 | for (int i = 1; i < 6; i++) { |
123 | 115 | fiat_p256_square(x12, x12); |
124 | 115 | } // 2^12 - 2^6 |
125 | 23 | fiat_p256_mul(x12, x12, x6); // 2^12 - 2^0 |
126 | | |
127 | 23 | fiat_p256_square(x15, x12); |
128 | 69 | for (int i = 1; i < 3; i++) { |
129 | 46 | fiat_p256_square(x15, x15); |
130 | 46 | } // 2^15 - 2^3 |
131 | 23 | fiat_p256_mul(x15, x15, x3); // 2^15 - 2^0 |
132 | | |
133 | 23 | fiat_p256_square(x30, x15); |
134 | 345 | for (int i = 1; i < 15; i++) { |
135 | 322 | fiat_p256_square(x30, x30); |
136 | 322 | } // 2^30 - 2^15 |
137 | 23 | fiat_p256_mul(x30, x30, x15); // 2^30 - 2^0 |
138 | | |
139 | 23 | fiat_p256_square(x32, x30); |
140 | 23 | fiat_p256_square(x32, x32); // 2^32 - 2^2 |
141 | 23 | fiat_p256_mul(x32, x32, x2); // 2^32 - 2^0 |
142 | | |
143 | 23 | fiat_p256_felem ret; |
144 | 23 | fiat_p256_square(ret, x32); |
145 | 736 | for (int i = 1; i < 31 + 1; i++) { |
146 | 713 | fiat_p256_square(ret, ret); |
147 | 713 | } // 2^64 - 2^32 |
148 | 23 | fiat_p256_mul(ret, ret, in); // 2^64 - 2^32 + 2^0 |
149 | | |
150 | 2.96k | for (int i = 0; i < 96 + 32; i++) { |
151 | 2.94k | fiat_p256_square(ret, ret); |
152 | 2.94k | } // 2^192 - 2^160 + 2^128 |
153 | 23 | fiat_p256_mul(ret, ret, x32); // 2^192 - 2^160 + 2^128 + 2^32 - 2^0 |
154 | | |
155 | 759 | for (int i = 0; i < 32; i++) { |
156 | 736 | fiat_p256_square(ret, ret); |
157 | 736 | } // 2^224 - 2^192 + 2^160 + 2^64 - 2^32 |
158 | 23 | fiat_p256_mul(ret, ret, x32); // 2^224 - 2^192 + 2^160 + 2^64 - 2^0 |
159 | | |
160 | 713 | for (int i = 0; i < 30; i++) { |
161 | 690 | fiat_p256_square(ret, ret); |
162 | 690 | } // 2^254 - 2^222 + 2^190 + 2^94 - 2^30 |
163 | 23 | fiat_p256_mul(ret, ret, x30); // 2^254 - 2^222 + 2^190 + 2^94 - 2^0 |
164 | | |
165 | 23 | fiat_p256_square(ret, ret); |
166 | 23 | fiat_p256_square(out, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2 |
167 | 23 | } |
168 | | |
169 | | // Group operations |
170 | | // ---------------- |
171 | | // |
172 | | // Building on top of the field operations we have the operations on the |
173 | | // elliptic curve group itself. Points on the curve are represented in Jacobian |
174 | | // coordinates. |
175 | | // |
176 | | // Both operations were transcribed to Coq and proven to correspond to naive |
177 | | // implementations using Affine coordinates, for all suitable fields. In the |
178 | | // Coq proofs, issues of constant-time execution and memory layout (aliasing) |
179 | | // conventions were not considered. Specification of affine coordinates: |
180 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28> |
181 | | // As a sanity check, a proof that these points form a commutative group: |
182 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33> |
183 | | |
184 | | // fiat_p256_point_double calculates 2*(x_in, y_in, z_in) |
185 | | // |
186 | | // The method is taken from: |
187 | | // http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b |
188 | | // |
189 | | // Coq transcription and correctness proof: |
190 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93> |
191 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201> |
192 | | // |
193 | | // Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. |
194 | | // while x_out == y_in is not (maybe this works, but it's not tested). |
195 | | static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out, |
196 | | fiat_p256_felem z_out, |
197 | | const fiat_p256_felem x_in, |
198 | | const fiat_p256_felem y_in, |
199 | 2.97k | const fiat_p256_felem z_in) { |
200 | 2.97k | fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta; |
201 | | // delta = z^2 |
202 | 2.97k | fiat_p256_square(delta, z_in); |
203 | | // gamma = y^2 |
204 | 2.97k | fiat_p256_square(gamma, y_in); |
205 | | // beta = x*gamma |
206 | 2.97k | fiat_p256_mul(beta, x_in, gamma); |
207 | | |
208 | | // alpha = 3*(x-delta)*(x+delta) |
209 | 2.97k | fiat_p256_sub(ftmp, x_in, delta); |
210 | 2.97k | fiat_p256_add(ftmp2, x_in, delta); |
211 | | |
212 | 2.97k | fiat_p256_add(tmptmp, ftmp2, ftmp2); |
213 | 2.97k | fiat_p256_add(ftmp2, ftmp2, tmptmp); |
214 | 2.97k | fiat_p256_mul(alpha, ftmp, ftmp2); |
215 | | |
216 | | // x' = alpha^2 - 8*beta |
217 | 2.97k | fiat_p256_square(x_out, alpha); |
218 | 2.97k | fiat_p256_add(fourbeta, beta, beta); |
219 | 2.97k | fiat_p256_add(fourbeta, fourbeta, fourbeta); |
220 | 2.97k | fiat_p256_add(tmptmp, fourbeta, fourbeta); |
221 | 2.97k | fiat_p256_sub(x_out, x_out, tmptmp); |
222 | | |
223 | | // z' = (y + z)^2 - gamma - delta |
224 | 2.97k | fiat_p256_add(delta, gamma, delta); |
225 | 2.97k | fiat_p256_add(ftmp, y_in, z_in); |
226 | 2.97k | fiat_p256_square(z_out, ftmp); |
227 | 2.97k | fiat_p256_sub(z_out, z_out, delta); |
228 | | |
229 | | // y' = alpha*(4*beta - x') - 8*gamma^2 |
230 | 2.97k | fiat_p256_sub(y_out, fourbeta, x_out); |
231 | 2.97k | fiat_p256_add(gamma, gamma, gamma); |
232 | 2.97k | fiat_p256_square(gamma, gamma); |
233 | 2.97k | fiat_p256_mul(y_out, alpha, y_out); |
234 | 2.97k | fiat_p256_add(gamma, gamma, gamma); |
235 | 2.97k | fiat_p256_sub(y_out, y_out, gamma); |
236 | 2.97k | } |
237 | | |
238 | | // fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2) |
239 | | // |
240 | | // The method is taken from: |
241 | | // http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, |
242 | | // adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). |
243 | | // |
244 | | // Coq transcription and correctness proof: |
245 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135> |
246 | | // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205> |
247 | | // |
248 | | // This function includes a branch for checking whether the two input points |
249 | | // are equal, (while not equal to the point at infinity). This case never |
250 | | // happens during single point multiplication, so there is no timing leak for |
251 | | // ECDH or ECDSA signing. |
252 | | static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3, |
253 | | fiat_p256_felem z3, const fiat_p256_felem x1, |
254 | | const fiat_p256_felem y1, |
255 | | const fiat_p256_felem z1, const int mixed, |
256 | | const fiat_p256_felem x2, |
257 | | const fiat_p256_felem y2, |
258 | 1.27k | const fiat_p256_felem z2) { |
259 | 1.27k | fiat_p256_felem x_out, y_out, z_out; |
260 | 1.27k | fiat_p256_limb_t z1nz = fiat_p256_nz(z1); |
261 | 1.27k | fiat_p256_limb_t z2nz = fiat_p256_nz(z2); |
262 | | |
263 | | // z1z1 = z1z1 = z1**2 |
264 | 1.27k | fiat_p256_felem z1z1; |
265 | 1.27k | fiat_p256_square(z1z1, z1); |
266 | | |
267 | 1.27k | fiat_p256_felem u1, s1, two_z1z2; |
268 | 1.27k | if (!mixed) { |
269 | | // z2z2 = z2**2 |
270 | 583 | fiat_p256_felem z2z2; |
271 | 583 | fiat_p256_square(z2z2, z2); |
272 | | |
273 | | // u1 = x1*z2z2 |
274 | 583 | fiat_p256_mul(u1, x1, z2z2); |
275 | | |
276 | | // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 |
277 | 583 | fiat_p256_add(two_z1z2, z1, z2); |
278 | 583 | fiat_p256_square(two_z1z2, two_z1z2); |
279 | 583 | fiat_p256_sub(two_z1z2, two_z1z2, z1z1); |
280 | 583 | fiat_p256_sub(two_z1z2, two_z1z2, z2z2); |
281 | | |
282 | | // s1 = y1 * z2**3 |
283 | 583 | fiat_p256_mul(s1, z2, z2z2); |
284 | 583 | fiat_p256_mul(s1, s1, y1); |
285 | 693 | } else { |
286 | | // We'll assume z2 = 1 (special case z2 = 0 is handled later). |
287 | | |
288 | | // u1 = x1*z2z2 |
289 | 693 | fiat_p256_copy(u1, x1); |
290 | | // two_z1z2 = 2z1z2 |
291 | 693 | fiat_p256_add(two_z1z2, z1, z1); |
292 | | // s1 = y1 * z2**3 |
293 | 693 | fiat_p256_copy(s1, y1); |
294 | 693 | } |
295 | | |
296 | | // u2 = x2*z1z1 |
297 | 1.27k | fiat_p256_felem u2; |
298 | 1.27k | fiat_p256_mul(u2, x2, z1z1); |
299 | | |
300 | | // h = u2 - u1 |
301 | 1.27k | fiat_p256_felem h; |
302 | 1.27k | fiat_p256_sub(h, u2, u1); |
303 | | |
304 | 1.27k | fiat_p256_limb_t xneq = fiat_p256_nz(h); |
305 | | |
306 | | // z_out = two_z1z2 * h |
307 | 1.27k | fiat_p256_mul(z_out, h, two_z1z2); |
308 | | |
309 | | // z1z1z1 = z1 * z1z1 |
310 | 1.27k | fiat_p256_felem z1z1z1; |
311 | 1.27k | fiat_p256_mul(z1z1z1, z1, z1z1); |
312 | | |
313 | | // s2 = y2 * z1**3 |
314 | 1.27k | fiat_p256_felem s2; |
315 | 1.27k | fiat_p256_mul(s2, y2, z1z1z1); |
316 | | |
317 | | // r = (s2 - s1)*2 |
318 | 1.27k | fiat_p256_felem r; |
319 | 1.27k | fiat_p256_sub(r, s2, s1); |
320 | 1.27k | fiat_p256_add(r, r, r); |
321 | | |
322 | 1.27k | fiat_p256_limb_t yneq = fiat_p256_nz(r); |
323 | | |
324 | 1.27k | fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) & |
325 | 1.27k | ~constant_time_is_zero_w(z1nz) & |
326 | 1.27k | ~constant_time_is_zero_w(z2nz); |
327 | 1.27k | if (constant_time_declassify_w(is_nontrivial_double)) { |
328 | 0 | fiat_p256_point_double(x3, y3, z3, x1, y1, z1); |
329 | 0 | return; |
330 | 0 | } |
331 | | |
332 | | // I = (2h)**2 |
333 | 1.27k | fiat_p256_felem i; |
334 | 1.27k | fiat_p256_add(i, h, h); |
335 | 1.27k | fiat_p256_square(i, i); |
336 | | |
337 | | // J = h * I |
338 | 1.27k | fiat_p256_felem j; |
339 | 1.27k | fiat_p256_mul(j, h, i); |
340 | | |
341 | | // V = U1 * I |
342 | 1.27k | fiat_p256_felem v; |
343 | 1.27k | fiat_p256_mul(v, u1, i); |
344 | | |
345 | | // x_out = r**2 - J - 2V |
346 | 1.27k | fiat_p256_square(x_out, r); |
347 | 1.27k | fiat_p256_sub(x_out, x_out, j); |
348 | 1.27k | fiat_p256_sub(x_out, x_out, v); |
349 | 1.27k | fiat_p256_sub(x_out, x_out, v); |
350 | | |
351 | | // y_out = r(V-x_out) - 2 * s1 * J |
352 | 1.27k | fiat_p256_sub(y_out, v, x_out); |
353 | 1.27k | fiat_p256_mul(y_out, y_out, r); |
354 | 1.27k | fiat_p256_felem s1j; |
355 | 1.27k | fiat_p256_mul(s1j, s1, j); |
356 | 1.27k | fiat_p256_sub(y_out, y_out, s1j); |
357 | 1.27k | fiat_p256_sub(y_out, y_out, s1j); |
358 | | |
359 | 1.27k | fiat_p256_cmovznz(x_out, z1nz, x2, x_out); |
360 | 1.27k | fiat_p256_cmovznz(x3, z2nz, x1, x_out); |
361 | 1.27k | fiat_p256_cmovznz(y_out, z1nz, y2, y_out); |
362 | 1.27k | fiat_p256_cmovznz(y3, z2nz, y1, y_out); |
363 | 1.27k | fiat_p256_cmovznz(z_out, z1nz, z2, z_out); |
364 | 1.27k | fiat_p256_cmovznz(z3, z2nz, z1, z_out); |
365 | 1.27k | } |
366 | | |
367 | | #include "./p256_table.h" |
368 | | |
369 | | // fiat_p256_select_point_affine selects the |idx-1|th point from a |
370 | | // precomputation table and copies it to out. If |idx| is zero, the output is |
371 | | // the point at infinity. |
372 | | static void fiat_p256_select_point_affine( |
373 | | const fiat_p256_limb_t idx, size_t size, |
374 | 704 | const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) { |
375 | 704 | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); |
376 | 11.2k | for (size_t i = 0; i < size; i++) { |
377 | 10.5k | fiat_p256_limb_t mismatch = i ^ (idx - 1); |
378 | 10.5k | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); |
379 | 10.5k | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); |
380 | 10.5k | } |
381 | 704 | fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one); |
382 | 704 | } |
383 | | |
384 | | // fiat_p256_select_point selects the |idx|th point from a precomputation table |
385 | | // and copies it to out. |
386 | | static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size, |
387 | | const fiat_p256_felem pre_comp[/*size*/][3], |
388 | 520 | fiat_p256_felem out[3]) { |
389 | 520 | OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); |
390 | 9.36k | for (size_t i = 0; i < size; i++) { |
391 | 8.84k | fiat_p256_limb_t mismatch = i ^ idx; |
392 | 8.84k | fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); |
393 | 8.84k | fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); |
394 | 8.84k | fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]); |
395 | 8.84k | } |
396 | 520 | } |
397 | | |
398 | | // fiat_p256_get_bit returns the |i|th bit in |in|. |
399 | 5.93k | static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) { |
400 | 5.93k | if (i < 0 || i >= 256) { |
401 | 50 | return 0; |
402 | 50 | } |
403 | 5.88k | #if defined(OPENSSL_64_BIT) |
404 | 5.88k | static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit"); |
405 | 5.88k | return (in->words[i >> 6] >> (i & 63)) & 1; |
406 | | #else |
407 | | static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit"); |
408 | | return (in->words[i >> 5] >> (i & 31)) & 1; |
409 | | #endif |
410 | 5.93k | } |
411 | | |
412 | | // OPENSSL EC_METHOD FUNCTIONS |
413 | | |
414 | | // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = |
415 | | // (X/Z^2, Y/Z^3). |
416 | | static int ec_GFp_nistp256_point_get_affine_coordinates( |
417 | | const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out, |
418 | 23 | EC_FELEM *y_out) { |
419 | 23 | if (constant_time_declassify_int( |
420 | 23 | ec_GFp_simple_is_at_infinity(group, point))) { |
421 | 0 | OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); |
422 | 0 | return 0; |
423 | 0 | } |
424 | | |
425 | 23 | fiat_p256_felem z1, z2; |
426 | 23 | fiat_p256_from_generic(z1, &point->Z); |
427 | 23 | fiat_p256_inv_square(z2, z1); |
428 | | |
429 | 23 | if (x_out != NULL) { |
430 | 23 | fiat_p256_felem x; |
431 | 23 | fiat_p256_from_generic(x, &point->X); |
432 | 23 | fiat_p256_mul(x, x, z2); |
433 | 23 | fiat_p256_to_generic(x_out, x); |
434 | 23 | } |
435 | | |
436 | 23 | if (y_out != NULL) { |
437 | 21 | fiat_p256_felem y; |
438 | 21 | fiat_p256_from_generic(y, &point->Y); |
439 | 21 | fiat_p256_square(z2, z2); // z^-4 |
440 | 21 | fiat_p256_mul(y, y, z1); // y * z |
441 | 21 | fiat_p256_mul(y, y, z2); // y * z^-3 |
442 | 21 | fiat_p256_to_generic(y_out, y); |
443 | 21 | } |
444 | | |
445 | 23 | return 1; |
446 | 23 | } |
447 | | |
448 | | static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r, |
449 | 3 | const EC_JACOBIAN *a, const EC_JACOBIAN *b) { |
450 | 3 | fiat_p256_felem x1, y1, z1, x2, y2, z2; |
451 | 3 | fiat_p256_from_generic(x1, &a->X); |
452 | 3 | fiat_p256_from_generic(y1, &a->Y); |
453 | 3 | fiat_p256_from_generic(z1, &a->Z); |
454 | 3 | fiat_p256_from_generic(x2, &b->X); |
455 | 3 | fiat_p256_from_generic(y2, &b->Y); |
456 | 3 | fiat_p256_from_generic(z2, &b->Z); |
457 | 3 | fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, |
458 | 3 | z2); |
459 | 3 | fiat_p256_to_generic(&r->X, x1); |
460 | 3 | fiat_p256_to_generic(&r->Y, y1); |
461 | 3 | fiat_p256_to_generic(&r->Z, z1); |
462 | 3 | } |
463 | | |
464 | | static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r, |
465 | 0 | const EC_JACOBIAN *a) { |
466 | 0 | fiat_p256_felem x, y, z; |
467 | 0 | fiat_p256_from_generic(x, &a->X); |
468 | 0 | fiat_p256_from_generic(y, &a->Y); |
469 | 0 | fiat_p256_from_generic(z, &a->Z); |
470 | 0 | fiat_p256_point_double(x, y, z, x, y, z); |
471 | 0 | fiat_p256_to_generic(&r->X, x); |
472 | 0 | fiat_p256_to_generic(&r->Y, y); |
473 | 0 | fiat_p256_to_generic(&r->Z, z); |
474 | 0 | } |
475 | | |
476 | | static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, |
477 | | const EC_JACOBIAN *p, |
478 | 10 | const EC_SCALAR *scalar) { |
479 | 10 | fiat_p256_felem p_pre_comp[17][3]; |
480 | 10 | OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); |
481 | | // Precompute multiples. |
482 | 10 | fiat_p256_from_generic(p_pre_comp[1][0], &p->X); |
483 | 10 | fiat_p256_from_generic(p_pre_comp[1][1], &p->Y); |
484 | 10 | fiat_p256_from_generic(p_pre_comp[1][2], &p->Z); |
485 | 160 | for (size_t j = 2; j <= 16; ++j) { |
486 | 150 | if (j & 1) { |
487 | 70 | fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], |
488 | 70 | p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], |
489 | 70 | 0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], |
490 | 70 | p_pre_comp[j - 1][2]); |
491 | 80 | } else { |
492 | 80 | fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1], |
493 | 80 | p_pre_comp[j][2], p_pre_comp[j / 2][0], |
494 | 80 | p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); |
495 | 80 | } |
496 | 150 | } |
497 | | |
498 | | // Set nq to the point at infinity. |
499 | 10 | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; |
500 | | |
501 | | // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. |
502 | 10 | int skip = 1; // Save two point operations in the first round. |
503 | 2.57k | for (size_t i = 255; i < 256; i--) { |
504 | | // double |
505 | 2.56k | if (!skip) { |
506 | 2.55k | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); |
507 | 2.55k | } |
508 | | |
509 | | // do other additions every 5 doublings |
510 | 2.56k | if (i % 5 == 0) { |
511 | 520 | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5; |
512 | 520 | bits |= fiat_p256_get_bit(scalar, i + 3) << 4; |
513 | 520 | bits |= fiat_p256_get_bit(scalar, i + 2) << 3; |
514 | 520 | bits |= fiat_p256_get_bit(scalar, i + 1) << 2; |
515 | 520 | bits |= fiat_p256_get_bit(scalar, i) << 1; |
516 | 520 | bits |= fiat_p256_get_bit(scalar, i - 1); |
517 | 520 | crypto_word_t sign, digit; |
518 | 520 | ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); |
519 | | |
520 | | // select the point to add or subtract, in constant time. |
521 | 520 | fiat_p256_select_point((fiat_p256_limb_t)digit, 17, |
522 | 520 | (const fiat_p256_felem(*)[3])p_pre_comp, tmp); |
523 | 520 | fiat_p256_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. |
524 | 520 | fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp); |
525 | | |
526 | 520 | if (!skip) { |
527 | 510 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], |
528 | 510 | 0 /* mixed */, tmp[0], tmp[1], tmp[2]); |
529 | 510 | } else { |
530 | 10 | fiat_p256_copy(nq[0], tmp[0]); |
531 | 10 | fiat_p256_copy(nq[1], tmp[1]); |
532 | 10 | fiat_p256_copy(nq[2], tmp[2]); |
533 | 10 | skip = 0; |
534 | 10 | } |
535 | 520 | } |
536 | 2.56k | } |
537 | | |
538 | 10 | fiat_p256_to_generic(&r->X, nq[0]); |
539 | 10 | fiat_p256_to_generic(&r->Y, nq[1]); |
540 | 10 | fiat_p256_to_generic(&r->Z, nq[2]); |
541 | 10 | } |
542 | | |
543 | | static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, |
544 | | EC_JACOBIAN *r, |
545 | 11 | const EC_SCALAR *scalar) { |
546 | | // Set nq to the point at infinity. |
547 | 11 | fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; |
548 | | |
549 | 11 | int skip = 1; // Save two point operations in the first round. |
550 | 363 | for (size_t i = 31; i < 32; i--) { |
551 | 352 | if (!skip) { |
552 | 341 | fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); |
553 | 341 | } |
554 | | |
555 | | // First, look 32 bits upwards. |
556 | 352 | crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3; |
557 | 352 | bits |= fiat_p256_get_bit(scalar, i + 160) << 2; |
558 | 352 | bits |= fiat_p256_get_bit(scalar, i + 96) << 1; |
559 | 352 | bits |= fiat_p256_get_bit(scalar, i + 32); |
560 | | // Select the point to add, in constant time. |
561 | 352 | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, |
562 | 352 | fiat_p256_g_pre_comp[1], tmp); |
563 | | |
564 | 352 | if (!skip) { |
565 | 341 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], |
566 | 341 | 1 /* mixed */, tmp[0], tmp[1], tmp[2]); |
567 | 341 | } else { |
568 | 11 | fiat_p256_copy(nq[0], tmp[0]); |
569 | 11 | fiat_p256_copy(nq[1], tmp[1]); |
570 | 11 | fiat_p256_copy(nq[2], tmp[2]); |
571 | 11 | skip = 0; |
572 | 11 | } |
573 | | |
574 | | // Second, look at the current position. |
575 | 352 | bits = fiat_p256_get_bit(scalar, i + 192) << 3; |
576 | 352 | bits |= fiat_p256_get_bit(scalar, i + 128) << 2; |
577 | 352 | bits |= fiat_p256_get_bit(scalar, i + 64) << 1; |
578 | 352 | bits |= fiat_p256_get_bit(scalar, i); |
579 | | // Select the point to add, in constant time. |
580 | 352 | fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, |
581 | 352 | fiat_p256_g_pre_comp[0], tmp); |
582 | 352 | fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, |
583 | 352 | tmp[0], tmp[1], tmp[2]); |
584 | 352 | } |
585 | | |
586 | 11 | fiat_p256_to_generic(&r->X, nq[0]); |
587 | 11 | fiat_p256_to_generic(&r->Y, nq[1]); |
588 | 11 | fiat_p256_to_generic(&r->Z, nq[2]); |
589 | 11 | } |
590 | | |
591 | | static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, |
592 | | EC_JACOBIAN *r, |
593 | | const EC_SCALAR *g_scalar, |
594 | | const EC_JACOBIAN *p, |
595 | 0 | const EC_SCALAR *p_scalar) { |
596 | 0 | #define P256_WSIZE_PUBLIC 4 |
597 | | // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. |
598 | 0 | fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; |
599 | 0 | fiat_p256_from_generic(p_pre_comp[0][0], &p->X); |
600 | 0 | fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); |
601 | 0 | fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); |
602 | 0 | fiat_p256_felem p2[3]; |
603 | 0 | fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], |
604 | 0 | p_pre_comp[0][1], p_pre_comp[0][2]); |
605 | 0 | for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { |
606 | 0 | fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], |
607 | 0 | p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], |
608 | 0 | p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1], |
609 | 0 | p2[2]); |
610 | 0 | } |
611 | | |
612 | | // Set up the coefficients for |p_scalar|. |
613 | 0 | int8_t p_wNAF[257]; |
614 | 0 | ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); |
615 | | |
616 | | // Set |ret| to the point at infinity. |
617 | 0 | int skip = 1; // Save some point operations. |
618 | 0 | fiat_p256_felem ret[3] = {{0}, {0}, {0}}; |
619 | 0 | for (int i = 256; i >= 0; i--) { |
620 | 0 | if (!skip) { |
621 | 0 | fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); |
622 | 0 | } |
623 | | |
624 | | // For the |g_scalar|, we use the precomputed table without the |
625 | | // constant-time lookup. |
626 | 0 | if (i <= 31) { |
627 | | // First, look 32 bits upwards. |
628 | 0 | crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; |
629 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; |
630 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; |
631 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 32); |
632 | 0 | if (bits != 0) { |
633 | 0 | size_t index = (size_t)(bits - 1); |
634 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
635 | 0 | 1 /* mixed */, fiat_p256_g_pre_comp[1][index][0], |
636 | 0 | fiat_p256_g_pre_comp[1][index][1], |
637 | 0 | fiat_p256_one); |
638 | 0 | skip = 0; |
639 | 0 | } |
640 | | |
641 | | // Second, look at the current position. |
642 | 0 | bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; |
643 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; |
644 | 0 | bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; |
645 | 0 | bits |= fiat_p256_get_bit(g_scalar, i); |
646 | 0 | if (bits != 0) { |
647 | 0 | size_t index = (size_t)(bits - 1); |
648 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
649 | 0 | 1 /* mixed */, fiat_p256_g_pre_comp[0][index][0], |
650 | 0 | fiat_p256_g_pre_comp[0][index][1], |
651 | 0 | fiat_p256_one); |
652 | 0 | skip = 0; |
653 | 0 | } |
654 | 0 | } |
655 | |
|
656 | 0 | int digit = p_wNAF[i]; |
657 | 0 | if (digit != 0) { |
658 | 0 | assert(digit & 1); |
659 | 0 | size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); |
660 | 0 | fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; |
661 | 0 | if (digit < 0) { |
662 | 0 | fiat_p256_opp(tmp, p_pre_comp[idx][1]); |
663 | 0 | y = &tmp; |
664 | 0 | } |
665 | 0 | if (!skip) { |
666 | 0 | fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], |
667 | 0 | 0 /* not mixed */, p_pre_comp[idx][0], *y, |
668 | 0 | p_pre_comp[idx][2]); |
669 | 0 | } else { |
670 | 0 | fiat_p256_copy(ret[0], p_pre_comp[idx][0]); |
671 | 0 | fiat_p256_copy(ret[1], *y); |
672 | 0 | fiat_p256_copy(ret[2], p_pre_comp[idx][2]); |
673 | 0 | skip = 0; |
674 | 0 | } |
675 | 0 | } |
676 | 0 | } |
677 | | |
678 | 0 | fiat_p256_to_generic(&r->X, ret[0]); |
679 | 0 | fiat_p256_to_generic(&r->Y, ret[1]); |
680 | 0 | fiat_p256_to_generic(&r->Z, ret[2]); |
681 | 0 | } |
682 | | |
683 | | static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group, |
684 | | const EC_JACOBIAN *p, |
685 | 0 | const EC_SCALAR *r) { |
686 | 0 | if (ec_GFp_simple_is_at_infinity(group, p)) { |
687 | 0 | return 0; |
688 | 0 | } |
689 | | |
690 | | // We wish to compare X/Z^2 with r. This is equivalent to comparing X with |
691 | | // r*Z^2. Note that X and Z are represented in Montgomery form, while r is |
692 | | // not. |
693 | 0 | fiat_p256_felem Z2_mont; |
694 | 0 | fiat_p256_from_generic(Z2_mont, &p->Z); |
695 | 0 | fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont); |
696 | |
|
697 | 0 | fiat_p256_felem r_Z2; |
698 | 0 | fiat_p256_from_words(r_Z2, r->words); // r < order < p, so this is valid. |
699 | 0 | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); |
700 | |
|
701 | 0 | fiat_p256_felem X; |
702 | 0 | fiat_p256_from_generic(X, &p->X); |
703 | 0 | fiat_p256_from_montgomery(X, X); |
704 | |
|
705 | 0 | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { |
706 | 0 | return 1; |
707 | 0 | } |
708 | | |
709 | | // During signing the x coefficient is reduced modulo the group order. |
710 | | // Therefore there is a small possibility, less than 1/2^128, that group_order |
711 | | // < p.x < P. in that case we need not only to compare against |r| but also to |
712 | | // compare against r+group_order. |
713 | 0 | assert(group->field.N.width == group->order.N.width); |
714 | 0 | EC_FELEM tmp; |
715 | 0 | BN_ULONG carry = |
716 | 0 | bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width); |
717 | 0 | if (carry == 0 && |
718 | 0 | bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) { |
719 | 0 | fiat_p256_from_generic(r_Z2, &tmp); |
720 | 0 | fiat_p256_mul(r_Z2, r_Z2, Z2_mont); |
721 | 0 | if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { |
722 | 0 | return 1; |
723 | 0 | } |
724 | 0 | } |
725 | | |
726 | 0 | return 0; |
727 | 0 | } |
728 | | |
729 | 1 | DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) { |
730 | 1 | out->point_get_affine_coordinates = |
731 | 1 | ec_GFp_nistp256_point_get_affine_coordinates; |
732 | 1 | out->add = ec_GFp_nistp256_add; |
733 | 1 | out->dbl = ec_GFp_nistp256_dbl; |
734 | 1 | out->mul = ec_GFp_nistp256_point_mul; |
735 | 1 | out->mul_base = ec_GFp_nistp256_point_mul_base; |
736 | 1 | out->mul_public = ec_GFp_nistp256_point_mul_public; |
737 | 1 | out->felem_mul = ec_GFp_mont_felem_mul; |
738 | 1 | out->felem_sqr = ec_GFp_mont_felem_sqr; |
739 | 1 | out->felem_to_bytes = ec_GFp_mont_felem_to_bytes; |
740 | 1 | out->felem_from_bytes = ec_GFp_mont_felem_from_bytes; |
741 | 1 | out->felem_reduce = ec_GFp_mont_felem_reduce; |
742 | | // TODO(davidben): This should use the specialized field arithmetic |
743 | | // implementation, rather than the generic one. |
744 | 1 | out->felem_exp = ec_GFp_mont_felem_exp; |
745 | 1 | out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery; |
746 | 1 | out->scalar_to_montgomery_inv_vartime = |
747 | 1 | ec_simple_scalar_to_montgomery_inv_vartime; |
748 | 1 | out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate; |
749 | 1 | } |