/src/nettle/ecc-secp256r1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* ecc-secp256r1.c |
2 | | |
3 | | Compile time constant (but machine dependent) tables. |
4 | | |
5 | | Copyright (C) 2013, 2014 Niels Möller |
6 | | |
7 | | This file is part of GNU Nettle. |
8 | | |
9 | | GNU Nettle is free software: you can redistribute it and/or |
10 | | modify it under the terms of either: |
11 | | |
12 | | * the GNU Lesser General Public License as published by the Free |
13 | | Software Foundation; either version 3 of the License, or (at your |
14 | | option) any later version. |
15 | | |
16 | | or |
17 | | |
18 | | * the GNU General Public License as published by the Free |
19 | | Software Foundation; either version 2 of the License, or (at your |
20 | | option) any later version. |
21 | | |
22 | | or both in parallel, as here. |
23 | | |
24 | | GNU Nettle is distributed in the hope that it will be useful, |
25 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
26 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
27 | | General Public License for more details. |
28 | | |
29 | | You should have received copies of the GNU General Public License and |
30 | | the GNU Lesser General Public License along with this program. If |
31 | | not, see http://www.gnu.org/licenses/. |
32 | | */ |
33 | | |
34 | | /* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ |
35 | | |
36 | | #if HAVE_CONFIG_H |
37 | | # include "config.h" |
38 | | #endif |
39 | | |
40 | | #include <assert.h> |
41 | | |
42 | | #include "ecc-internal.h" |
43 | | |
44 | | #if HAVE_NATIVE_ecc_secp256r1_redc |
45 | | # define USE_REDC 1 |
46 | | #else |
47 | | # define USE_REDC (ECC_REDC_SIZE != 0) |
48 | | #endif |
49 | | |
50 | | #include "ecc-secp256r1.h" |
51 | | |
52 | | #if HAVE_NATIVE_ecc_secp256r1_redc |
53 | | # define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc |
54 | | void |
55 | | ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp); |
56 | | #else /* !HAVE_NATIVE_ecc_secp256r1_redc */ |
57 | | # if ECC_REDC_SIZE > 0 |
58 | | # define ecc_secp256r1_redc ecc_pp1_redc |
59 | | # elif ECC_REDC_SIZE == 0 |
60 | | # define ecc_secp256r1_redc NULL |
61 | | # else |
62 | | # error Configuration error |
63 | | # endif |
64 | | #endif /* !HAVE_NATIVE_ecc_secp256r1_redc */ |
65 | | |
66 | | #if ECC_BMODP_SIZE < ECC_LIMB_SIZE |
67 | | #define ecc_secp256r1_modp ecc_mod |
68 | | #define ecc_secp256r1_modq ecc_mod |
69 | | #elif GMP_NUMB_BITS == 64 |
70 | | |
71 | | static void |
72 | | ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) |
73 | 0 | { |
74 | 0 | mp_limb_t d1, u1, cy; |
75 | 0 | mp_size_t n; |
76 | | |
77 | | /* Reduce to < B^4 p up front, to avoid first quotient overflowing a limb. */ |
78 | 0 | cy = mpn_sub_n (xp + 4, xp + 4, p->m, p->size); |
79 | 0 | mpn_cnd_add_n (cy, xp + 4, xp + 4, p->m, p->size); |
80 | |
|
81 | 0 | d1 = UINT64_C(0xffffffff00000001); |
82 | 0 | for (n = 2*p->size, u1 = xp[--n] ;; n--) |
83 | 0 | { |
84 | 0 | mp_limb_t u0, q1, q0, qmax, r, t, mask; |
85 | 0 | u0 = xp[n-1]; |
86 | | |
87 | | /* Since d0 == 0, 2/1 division gives a good enough quotient |
88 | | approximation. |
89 | | |
90 | | <q1, q0> = v * u1 + <u1,u0>, with v = 2^32 - 1: |
91 | | |
92 | | +---+---+ |
93 | | | u1| u0| |
94 | | +---+---+ |
95 | | |-u1| |
96 | | +-+-+-+ |
97 | | | u1| |
98 | | +-+-+-+-+ |
99 | | | q1| q0| |
100 | | +---+---+ |
101 | | */ |
102 | 0 | q1 = u1 - (u1 > u0); |
103 | 0 | q0 = u0 - u1; |
104 | 0 | t = u1 << 32; |
105 | 0 | q0 += t; |
106 | 0 | q1 += (u1 >> 32) + (q0 < t) + 1; |
107 | | |
108 | | /* Force q = B-1 when u1 == d1 */ |
109 | 0 | qmax = - (mp_limb_t) (u1 >= d1); |
110 | | |
111 | | /* Candidate remainder r = u0 - q d1 (mod B), and 2/1 division |
112 | | adjustments. */ |
113 | 0 | r = u0 + (q1 << 32) - q1; |
114 | 0 | mask = - (mp_limb_t) (r > q0); |
115 | 0 | q1 += mask; |
116 | 0 | r += (mask & d1); |
117 | 0 | mask = - (mp_limb_t) (r >= d1); |
118 | 0 | q1 -= mask; |
119 | 0 | r -= (mask & d1); |
120 | | |
121 | | /* In the case that u1 == d1, we get q1 == 0, r == 0 here (and |
122 | | correct 2/1 quotient would be B). Replace with q1 = B-1, r = |
123 | | d1. */ |
124 | 0 | q1 |= qmax; |
125 | 0 | r += d1 & qmax; |
126 | |
|
127 | 0 | cy = mpn_submul_1 (xp + n - 4, p->m, 3, q1); |
128 | 0 | mask = - (mp_limb_t) (r < cy); |
129 | 0 | if (n == p->size) |
130 | 0 | { |
131 | 0 | rp[3] = r - cy + (mask & d1) + mpn_cnd_add_n (mask, rp, xp, p->m, 3); |
132 | 0 | return; |
133 | 0 | } |
134 | 0 | u1 = r - cy + (mask & d1) + mpn_cnd_add_n (mask, xp + n - 4, xp + n- 4, p->m, 3); |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | | static void |
139 | | ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp) |
140 | 0 | { |
141 | 0 | mp_limb_t d1, cy; |
142 | 0 | mp_size_t n; |
143 | | |
144 | | /* Reduce to < B^4 p up front, to avoid first quotient overflowing a limb. */ |
145 | 0 | cy = mpn_sub_n (xp + 4, xp + 4, q->m, q->size); |
146 | 0 | mpn_cnd_add_n (cy, xp + 4, xp + 4, q->m, q->size); |
147 | |
|
148 | 0 | d1 = UINT64_C(0xffffffff00000000); |
149 | 0 | n = 2*q->size; |
150 | 0 | for (;;) |
151 | 0 | { |
152 | 0 | mp_limb_t u1, u0, q1, q0, r, t, qmax, mask; |
153 | 0 | u1 = xp[--n]; |
154 | 0 | u0 = xp[n-1]; |
155 | | |
156 | | /* divappr2, specialized for d1 = 2^64 - 2^32, d0 = 2^64-1. |
157 | | |
158 | | <q1, q0> = v * u1 + <u1,u0>, with v = 2^32 - 1: |
159 | | |
160 | | +---+---+ |
161 | | | u1| u0| |
162 | | +---+---+ |
163 | | |-u1| |
164 | | +-+-+-+ |
165 | | | u1| |
166 | | +-+-+-+-+ |
167 | | | q1| q0| |
168 | | +---+---+ |
169 | | */ |
170 | 0 | q1 = u1 - (u1 > u0); |
171 | 0 | q0 = u0 - u1; |
172 | 0 | t = u1 << 32; |
173 | 0 | q0 += t; |
174 | 0 | q1 += (q0 < t); |
175 | 0 | t = u1 >> 32; |
176 | | /* The divappr2 algorithm handles only q < B - 1. If we check |
177 | | for u1 >= d1 = 2^{64}-2^{32}, we cover all cases where q = |
178 | | 2^64-1, and some when q = 2^64-2. The latter case is |
179 | | corrected by the final adjustment. */ |
180 | 0 | qmax = - (mp_limb_t) (t == 0xffffffff); |
181 | 0 | q1 += t + 1; |
182 | | |
183 | | /* Candidate remainder r = u0 - q (d1 + 1) (mod B), and divappr2 |
184 | | adjustments. |
185 | | |
186 | | For general divappr2, the expression is |
187 | | |
188 | | r = u_0 - q1 d1 - floor(q1 d0 / B) - 1 |
189 | | |
190 | | but in our case floor(q1 d0 / B) simplifies to q1 - 1. |
191 | | */ |
192 | 0 | r = u0 + (q1 << 32) - q1; |
193 | 0 | mask = - (mp_limb_t) (r >= q0); |
194 | 0 | q1 += mask; |
195 | 0 | r += (mask & (d1 + 1)); |
196 | 0 | q1 += (r >= d1 - 1); |
197 | | |
198 | | /* Replace by qmax, when that is needed */ |
199 | 0 | q1 |= qmax; |
200 | | |
201 | | /* Subtract, may underflow. */ |
202 | 0 | cy = mpn_submul_1 (xp + n - 4, q->m, 4, q1); |
203 | 0 | if (n == q->size) |
204 | 0 | { |
205 | 0 | mpn_cnd_add_n (cy > u1, rp, xp, q->m, 4); |
206 | 0 | return; |
207 | 0 | } |
208 | 0 | mpn_cnd_add_n (cy > u1, xp + n - 4, xp + n- 4, q->m, 4); |
209 | 0 | } |
210 | 0 | } |
211 | | |
212 | | #else |
213 | | #error Unsupported parameters |
214 | | #endif |
215 | | |
216 | | #define ECC_SECP256R1_INV_ITCH (4*ECC_LIMB_SIZE) |
217 | | |
218 | | static void |
219 | | ecc_secp256r1_inv (const struct ecc_modulo *p, |
220 | | mp_limb_t *rp, const mp_limb_t *ap, |
221 | | mp_limb_t *scratch) |
222 | 0 | { |
223 | 0 | #define a5m1 scratch |
224 | 0 | #define t0 (scratch + ECC_LIMB_SIZE) |
225 | 0 | #define a15m1 t0 |
226 | 0 | #define a32m1 a5m1 |
227 | 0 | #define tp (scratch + 2*ECC_LIMB_SIZE) |
228 | | /* |
229 | | Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3 |
230 | | |
231 | | 2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1) 4 S + 3 M |
232 | | 2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5) 10 S + 2 M |
233 | | 2^{16} - 1 = 1 + 2 (2^{15} - 1) S + M |
234 | | 2^{32} - 1 = (2^{16} + 1) (2^{16} - 1) 16 S + M |
235 | | 2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1 32 S + M |
236 | | 2^{192} - 2^{160} + 2^{128} + 2^{32} - 1 |
237 | | = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1 128 S + M |
238 | | 2^{224} - 2^{192} + 2^{160} + 2^{64} - 1 |
239 | | = 2^{32} (...) + 2^{32} - 1 32 S + M |
240 | | 2^{239} - 2^{207} + 2^{175} + 2^{79} - 1 |
241 | | = 2^{15} (...) + 2^{15} - 1 15 S + M |
242 | | 2^{254} - 2^{222} + 2^{190} + 2^{94} - 1 |
243 | | = 2^{15} (...) + 2^{15} - 1 15 S + M |
244 | | p - 2 = 2^2 (...) + 1 2 S M |
245 | | --------------- |
246 | | 255 S + 13 M |
247 | | */ |
248 | 0 | ecc_mod_sqr (p, rp, ap, tp); /* a^2 */ |
249 | 0 | ecc_mod_mul (p, rp, rp, ap, tp); /* a^3 */ |
250 | 0 | ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^{2^4 - 1} */ |
251 | 0 | ecc_mod_sqr (p, rp, t0, tp); /* a^{2^5 - 2} */ |
252 | 0 | ecc_mod_mul (p, a5m1, rp, ap, tp); /* a^{2^5 - 1}, a5m1 */ |
253 | |
|
254 | 0 | ecc_mod_pow_2kp1 (p, rp, a5m1, 5, tp); /* a^{2^{10} - 1, a5m1*/ |
255 | 0 | ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, tp); /* a^{2^{15} - 1}, a5m1 a15m1 */ |
256 | 0 | ecc_mod_sqr (p, rp, a15m1, tp); /* a^{2^{16} - 2}, a15m1 */ |
257 | 0 | ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^{16} - 1}, a15m1 */ |
258 | 0 | ecc_mod_pow_2kp1 (p, a32m1, rp, 16, tp); /* a^{2^{32} - 1}, a15m1, a32m1 */ |
259 | |
|
260 | 0 | ecc_mod_pow_2k_mul (p, rp, a32m1, 32, ap, tp);/* a^{2^{64} - 2^{32} + 1 */ |
261 | 0 | ecc_mod_pow_2k_mul (p, rp, rp, 128, a32m1, tp); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */ |
262 | 0 | ecc_mod_pow_2k_mul (p, rp, rp, 32, a32m1, tp);/* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */ |
263 | 0 | ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */ |
264 | 0 | ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */ |
265 | 0 | ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp); /* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */ |
266 | |
|
267 | 0 | #undef a5m1 |
268 | 0 | #undef t0 |
269 | 0 | #undef a15m1 |
270 | 0 | #undef a32m1 |
271 | 0 | #undef tp |
272 | 0 | } |
273 | | |
274 | | /* To guarantee that inputs to ecc_mod_zero_p are in the required range. */ |
275 | | #if ECC_LIMB_SIZE * GMP_NUMB_BITS != 256 |
276 | | #error Unsupported limb size |
277 | | #endif |
278 | | |
279 | | #define ECC_SECP256R1_SQRT_ITCH (3*ECC_LIMB_SIZE) |
280 | | |
281 | | static int |
282 | | ecc_secp256r1_sqrt (const struct ecc_modulo *m, |
283 | | mp_limb_t *rp, |
284 | | const mp_limb_t *cp, |
285 | | mp_limb_t *scratch) |
286 | 0 | { |
287 | | /* This computes the square root modulo p256 using the identity: |
288 | | |
289 | | sqrt(c) = c^(2^254 − 2^222 + 2^190 + 2^94) (mod P-256) |
290 | | |
291 | | which can be seen as a special case of Tonelli-Shanks with e=1. |
292 | | |
293 | | It would be nice to share part of the addition chain between inverse and sqrt. |
294 | | |
295 | | We need |
296 | | |
297 | | p-2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3 (inverse) |
298 | | |
299 | | and |
300 | | |
301 | | (p+1)/4 = 2^{254} − 2^{222} + 2^{190} + 2^{94} (sqrt) |
302 | | |
303 | | which we can both get conveniently from |
304 | | |
305 | | (p-3)/4 = 2^{254} − 2^{222} + 2^{190} + 2^{94} - 1 |
306 | | |
307 | | But addition chain for 2^{94} - 1 appears to cost a few more mul |
308 | | operations than the current, separate, chains. */ |
309 | |
|
310 | 0 | #define t0 scratch |
311 | 0 | #define tp (scratch + ECC_LIMB_SIZE) |
312 | |
|
313 | 0 | ecc_mod_sqr (m, rp, cp, tp); /* c^2 */ |
314 | 0 | ecc_mod_mul (m, t0, rp, cp, tp); /* c^3 */ |
315 | 0 | ecc_mod_pow_2kp1 (m, rp, t0, 2, tp); /* c^(2^4 - 1) */ |
316 | 0 | ecc_mod_pow_2kp1 (m, t0, rp, 4, tp); /* c^(2^8 - 1) */ |
317 | 0 | ecc_mod_pow_2kp1 (m, rp, t0, 8, tp); /* c^(2^16 - 1) */ |
318 | 0 | ecc_mod_pow_2kp1 (m, t0, rp, 16, tp); /* c^(2^32 - 1) */ |
319 | 0 | ecc_mod_pow_2k_mul (m, rp, t0, 32, cp, tp); /* c^(2^64 - 2^32 + 1) */ |
320 | 0 | ecc_mod_pow_2k_mul (m, t0, rp, 96, cp, tp); /* c^(2^160 - 2^128 + 2^96 + 1) */ |
321 | 0 | ecc_mod_pow_2k (m, rp, t0, 94, tp); /* c^(2^254 - 2^222 + 2^190 + 2^94) */ |
322 | |
|
323 | 0 | ecc_mod_sqr (m, t0, rp, tp); |
324 | 0 | ecc_mod_sub (m, t0, t0, cp); |
325 | |
|
326 | 0 | return ecc_mod_zero_p (m, t0); |
327 | 0 | #undef t0 |
328 | 0 | #undef tp |
329 | |
|
330 | 0 | } |
331 | | |
332 | | const struct ecc_curve _nettle_secp_256r1 = |
333 | | { |
334 | | { |
335 | | 256, |
336 | | ECC_LIMB_SIZE, |
337 | | ECC_BMODP_SIZE, |
338 | | ECC_REDC_SIZE, |
339 | | ECC_SECP256R1_INV_ITCH, |
340 | | ECC_SECP256R1_SQRT_ITCH, |
341 | | 0, |
342 | | |
343 | | ecc_p, |
344 | | ecc_Bmodp, |
345 | | ecc_Bmodp_shifted, |
346 | | ecc_Bm2p, |
347 | | ecc_redc_ppm1, |
348 | | ecc_pp1h, |
349 | | |
350 | | ecc_secp256r1_modp, |
351 | | USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp, |
352 | | ecc_secp256r1_inv, |
353 | | ecc_secp256r1_sqrt, |
354 | | NULL, |
355 | | }, |
356 | | { |
357 | | 256, |
358 | | ECC_LIMB_SIZE, |
359 | | ECC_BMODQ_SIZE, |
360 | | 0, |
361 | | ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), |
362 | | 0, |
363 | | 0, |
364 | | |
365 | | ecc_q, |
366 | | ecc_Bmodq, |
367 | | ecc_Bmodq_shifted, |
368 | | ecc_Bm2q, |
369 | | NULL, |
370 | | ecc_qp1h, |
371 | | |
372 | | ecc_secp256r1_modq, |
373 | | ecc_secp256r1_modq, |
374 | | ecc_mod_inv, |
375 | | NULL, |
376 | | NULL, |
377 | | }, |
378 | | |
379 | | USE_REDC, |
380 | | ECC_PIPPENGER_K, |
381 | | ECC_PIPPENGER_C, |
382 | | |
383 | | ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE), |
384 | | ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE), |
385 | | ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), |
386 | | ECC_MUL_A_ITCH (ECC_LIMB_SIZE), |
387 | | ECC_MUL_G_ITCH (ECC_LIMB_SIZE), |
388 | | ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP256R1_INV_ITCH), |
389 | | |
390 | | ecc_add_jja, |
391 | | ecc_add_jjj, |
392 | | ecc_dup_jj, |
393 | | ecc_mul_a, |
394 | | ecc_mul_g, |
395 | | ecc_j_to_a, |
396 | | |
397 | | ecc_b, |
398 | | ecc_unit, |
399 | | ecc_table |
400 | | }; |
401 | | |
402 | | const struct ecc_curve *nettle_get_secp_256r1(void) |
403 | 0 | { |
404 | 0 | return &_nettle_secp_256r1; |
405 | 0 | } |