/src/relic/src/fp/relic_fp_inv.c
Line | Count | Source |
1 | | /* |
2 | | * RELIC is an Efficient LIbrary for Cryptography |
3 | | * Copyright (c) 2009 RELIC Authors |
4 | | * |
5 | | * This file is part of RELIC. RELIC is legal property of its developers, |
6 | | * whose names are not listed here. Please refer to the COPYRIGHT file |
7 | | * for contact information. |
8 | | * |
9 | | * RELIC is free software; you can redistribute it and/or modify it under the |
10 | | * terms of the version 2.1 (or later) of the GNU Lesser General Public License |
11 | | * as published by the Free Software Foundation; or version 2.0 of the Apache |
12 | | * License as published by the Apache Software Foundation. See the LICENSE files |
13 | | * for more details. |
14 | | * |
15 | | * RELIC is distributed in the hope that it will be useful, but WITHOUT ANY |
16 | | * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
17 | | * A PARTICULAR PURPOSE. See the LICENSE files for more details. |
18 | | * |
19 | | * You should have received a copy of the GNU Lesser General Public or the |
20 | | * Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/> |
21 | | * or <https://www.apache.org/licenses/>. |
22 | | */ |
23 | | |
24 | | /** |
25 | | * @file |
26 | | * |
27 | | * Implementation of the prime field inversion functions. |
28 | | * |
29 | | * @ingroup fp |
30 | | */ |
31 | | |
32 | | #include "relic_core.h" |
33 | | #include "relic_fp_low.h" |
34 | | #include "relic_bn_low.h" |
35 | | |
36 | | /*============================================================================*/ |
37 | | /* Private definitions */ |
38 | | /*============================================================================*/ |
39 | | |
40 | | #if FP_INV == JMPDS || !defined(STRIP) |
41 | | |
42 | | #ifdef RLC_FP_ROOM |
43 | | |
44 | | static void bn_mul2_low(dig_t *c, const dig_t *a, dis_t digit, size_t size) { |
45 | | int sd = digit >> (RLC_DIG - 1); |
46 | | digit = (digit ^ sd) - sd; |
47 | | c[size] = bn_mul1_low(c, a, digit, size); |
48 | | } |
49 | | |
50 | | #endif /* RLC_FP_ROOM */ |
51 | | |
52 | | #if WSIZE == 8 |
53 | | static int jumpdivstep(dis_t m[4], int delta, dig_t f, dig_t g, int s) { |
54 | | #else |
55 | 848k | static dis_t jumpdivstep(dis_t m[4], dis_t delta, dig_t f, dig_t g, int s) { |
56 | 848k | #endif |
57 | 848k | dig_t u = 1, v = 0, q = 0, r = 1, c0, c1; |
58 | | |
59 | | /* This is actually faster than my previous version, several tricks from |
60 | | * https://github.com/bitcoin-core/secp256k1/blob/master/src/modinv64_impl.h |
61 | | */ |
62 | 50.9M | for (s--; s >= 0; s--) { |
63 | | /* First handle the else part: if delta < 0, compute -(f,u,v). */ |
64 | 50.1M | c0 = delta >> (8 * sizeof(delta) - 1); |
65 | 50.1M | c1 = -(g & 1); |
66 | 50.1M | c0 &= c1; |
67 | | /* Conditionally add -(f,u,v) to (g,q,r) */ |
68 | 50.1M | g += ((f ^ c0) - c0) & c1; |
69 | 50.1M | q += ((u ^ c0) - c0) & c1; |
70 | 50.1M | r += ((v ^ c0) - c0) & c1; |
71 | | /* Now handle the 'if' part, so c0 will be (delta < 0) && (g & 1)) */ |
72 | | /* delta = RLC_SEL(delta, -delta, c0 & 1) - 2 (for half-divstep), thus |
73 | | * delta = - delta - 2 or delta - 1 */ |
74 | | #if WSIZE == 8 |
75 | | delta = RLC_SEL(delta, -delta, c0 & 1) - 2; |
76 | | #else |
77 | 50.1M | delta = (delta ^ c0) - 1; |
78 | 50.1M | #endif |
79 | 50.1M | f = f + (g & c0); |
80 | 50.1M | u = u + (q & c0); |
81 | 50.1M | v = v + (r & c0); |
82 | 50.1M | g >>= 1; |
83 | 50.1M | u += u; |
84 | 50.1M | v += v; |
85 | 50.1M | } |
86 | 848k | m[0] = u; |
87 | 848k | m[1] = v; |
88 | 848k | m[2] = q; |
89 | 848k | m[3] = r; |
90 | 848k | return delta; |
91 | 848k | } |
92 | | |
93 | | #endif |
94 | | |
95 | | /*============================================================================*/ |
96 | | /* Public definitions */ |
97 | | /*============================================================================*/ |
98 | | |
99 | | #if FP_INV == BASIC || !defined(STRIP) |
100 | | |
101 | 0 | void fp_inv_basic(fp_t c, const fp_t a) { |
102 | 0 | bn_t e; |
103 | |
|
104 | 0 | bn_null(e); |
105 | |
|
106 | 0 | if (fp_is_zero(a)) { |
107 | 0 | RLC_THROW(ERR_NO_VALID); |
108 | 0 | return; |
109 | 0 | } |
110 | | |
111 | 0 | RLC_TRY { |
112 | 0 | bn_new(e); |
113 | |
|
114 | 0 | e->used = RLC_FP_DIGS; |
115 | 0 | dv_copy(e->dp, fp_prime_get(), RLC_FP_DIGS); |
116 | 0 | bn_sub_dig(e, e, 2); |
117 | |
|
118 | 0 | fp_exp(c, a, e); |
119 | 0 | } |
120 | 0 | RLC_CATCH_ANY { |
121 | 0 | RLC_THROW(ERR_CAUGHT); |
122 | 0 | } |
123 | 0 | RLC_FINALLY { |
124 | 0 | bn_free(e); |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | #endif |
129 | | |
130 | | #if FP_INV == BINAR || !defined(STRIP) |
131 | | |
132 | 0 | void fp_inv_binar(fp_t c, const fp_t a) { |
133 | 0 | bn_t u, v, g1, g2, p; |
134 | |
|
135 | 0 | bn_null(u); |
136 | 0 | bn_null(v); |
137 | 0 | bn_null(g1); |
138 | 0 | bn_null(g2); |
139 | 0 | bn_null(p); |
140 | |
|
141 | 0 | if (fp_is_zero(a)) { |
142 | 0 | RLC_THROW(ERR_NO_VALID); |
143 | 0 | return; |
144 | 0 | } |
145 | | |
146 | 0 | RLC_TRY { |
147 | 0 | bn_new(u); |
148 | 0 | bn_new(v); |
149 | 0 | bn_new(g1); |
150 | 0 | bn_new(g2); |
151 | 0 | bn_new(p); |
152 | | |
153 | | /* u = a, v = p, g1 = 1, g2 = 0. */ |
154 | 0 | fp_prime_back(u, a); |
155 | 0 | p->used = RLC_FP_DIGS; |
156 | 0 | dv_copy(p->dp, fp_prime_get(), RLC_FP_DIGS); |
157 | 0 | bn_copy(v, p); |
158 | 0 | bn_set_dig(g1, 1); |
159 | 0 | bn_zero(g2); |
160 | | |
161 | | /* While (u != 1 && v != 1). */ |
162 | 0 | while (1) { |
163 | | /* While u is even do. */ |
164 | 0 | while (!(u->dp[0] & 1)) { |
165 | | /* u = u/2. */ |
166 | 0 | fp_rsh1_low(u->dp, u->dp); |
167 | | /* If g1 is even then g1 = g1/2; else g1 = (g1 + p)/2. */ |
168 | 0 | if (g1->dp[0] & 1) { |
169 | 0 | bn_add(g1, g1, p); |
170 | 0 | } |
171 | 0 | bn_hlv(g1, g1); |
172 | 0 | } |
173 | |
|
174 | 0 | while (u->dp[u->used - 1] == 0) { |
175 | 0 | u->used--; |
176 | 0 | } |
177 | 0 | if (u->used == 1 && u->dp[0] == 1) |
178 | 0 | break; |
179 | | |
180 | | /* While z divides v do. */ |
181 | 0 | while (!(v->dp[0] & 1)) { |
182 | | /* v = v/2. */ |
183 | 0 | fp_rsh1_low(v->dp, v->dp); |
184 | | /* If g2 is even then g2 = g2/2; else (g2 = g2 + p)/2. */ |
185 | 0 | if (g2->dp[0] & 1) { |
186 | 0 | bn_add(g2, g2, p); |
187 | 0 | } |
188 | 0 | bn_hlv(g2, g2); |
189 | 0 | } |
190 | |
|
191 | 0 | while (v->dp[v->used - 1] == 0) { |
192 | 0 | v->used--; |
193 | 0 | } |
194 | 0 | if (v->used == 1 && v->dp[0] == 1) |
195 | 0 | break; |
196 | | |
197 | | /* If u > v then u = u - v, g1 = g1 - g2. */ |
198 | 0 | if (bn_cmp(u, v) == RLC_GT) { |
199 | 0 | bn_sub(u, u, v); |
200 | 0 | bn_sub(g1, g1, g2); |
201 | 0 | } else { |
202 | 0 | bn_sub(v, v, u); |
203 | 0 | bn_sub(g2, g2, g1); |
204 | 0 | } |
205 | 0 | } |
206 | | /* If u == 1 then return g1; else return g2. */ |
207 | 0 | if (bn_cmp_dig(u, 1) == RLC_EQ) { |
208 | 0 | while (bn_sign(g1) == RLC_NEG) { |
209 | 0 | bn_add(g1, g1, p); |
210 | 0 | } |
211 | 0 | while (bn_cmp(g1, p) != RLC_LT) { |
212 | 0 | bn_sub(g1, g1, p); |
213 | 0 | } |
214 | 0 | #if FP_RDC == MONTY |
215 | 0 | fp_prime_conv(c, g1); |
216 | | #else |
217 | | dv_copy(c, g1->dp, RLC_FP_DIGS); |
218 | | #endif |
219 | 0 | } else { |
220 | 0 | while (bn_sign(g2) == RLC_NEG) { |
221 | 0 | bn_add(g2, g2, p); |
222 | 0 | } |
223 | 0 | while (bn_cmp(g2, p) != RLC_LT) { |
224 | 0 | bn_sub(g2, g2, p); |
225 | 0 | } |
226 | 0 | #if FP_RDC == MONTY |
227 | 0 | fp_prime_conv(c, g2); |
228 | | #else |
229 | | dv_copy(c, g2->dp, RLC_FP_DIGS); |
230 | | #endif |
231 | 0 | } |
232 | 0 | } |
233 | 0 | RLC_CATCH_ANY { |
234 | 0 | RLC_THROW(ERR_CAUGHT); |
235 | 0 | } |
236 | 0 | RLC_FINALLY { |
237 | 0 | bn_free(u); |
238 | 0 | bn_free(v); |
239 | 0 | bn_free(g1); |
240 | 0 | bn_free(g2); |
241 | 0 | bn_free(p); |
242 | 0 | } |
243 | 0 | } |
244 | | |
245 | | #endif |
246 | | |
247 | | #if FP_INV == MONTY || !defined(STRIP) |
248 | | |
249 | 0 | void fp_inv_monty(fp_t c, const fp_t a) { |
250 | 0 | bn_t _a, _p, u, v, x1, x2; |
251 | 0 | const dig_t *p = NULL; |
252 | 0 | dig_t carry; |
253 | 0 | int i, k; |
254 | |
|
255 | 0 | bn_null(_a); |
256 | 0 | bn_null(_p); |
257 | 0 | bn_null(u); |
258 | 0 | bn_null(v); |
259 | 0 | bn_null(x1); |
260 | 0 | bn_null(x2); |
261 | |
|
262 | 0 | if (fp_is_zero(a)) { |
263 | 0 | RLC_THROW(ERR_NO_VALID); |
264 | 0 | return; |
265 | 0 | } |
266 | | |
267 | 0 | RLC_TRY { |
268 | 0 | bn_new(_a); |
269 | 0 | bn_new(_p); |
270 | 0 | bn_new(u); |
271 | 0 | bn_new(v); |
272 | 0 | bn_new(x1); |
273 | 0 | bn_new(x2); |
274 | |
|
275 | 0 | p = fp_prime_get(); |
276 | | |
277 | | /* u = a, v = p, x1 = 1, x2 = 0, k = 0. */ |
278 | 0 | k = 0; |
279 | 0 | bn_set_dig(x1, 1); |
280 | 0 | bn_zero(x2); |
281 | |
|
282 | | #if FP_RDC != MONTY |
283 | | bn_read_raw(_a, a, RLC_FP_DIGS); |
284 | | bn_read_raw(_p, p, RLC_FP_DIGS); |
285 | | bn_mod_monty_conv(u, _a, _p); |
286 | | #else |
287 | 0 | bn_read_raw(u, a, RLC_FP_DIGS); |
288 | 0 | #endif |
289 | 0 | bn_read_raw(v, p, RLC_FP_DIGS); |
290 | |
|
291 | 0 | while (!bn_is_zero(v)) { |
292 | | /* If v is even then v = v/2, x1 = 2 * x1. */ |
293 | 0 | if (!(v->dp[0] & 1)) { |
294 | 0 | fp_rsh1_low(v->dp, v->dp); |
295 | 0 | bn_dbl(x1, x1); |
296 | 0 | } else { |
297 | | /* If u is even then u = u/2, x2 = 2 * x2. */ |
298 | 0 | if (!(u->dp[0] & 1)) { |
299 | 0 | fp_rsh1_low(u->dp, u->dp); |
300 | 0 | bn_dbl(x2, x2); |
301 | | /* If v >= u,then v = (v - u)/2, x2 += x1, x1 = 2 * x1. */ |
302 | 0 | } else { |
303 | 0 | if (bn_cmp(v, u) != RLC_LT) { |
304 | 0 | fp_subn_low(v->dp, v->dp, u->dp); |
305 | 0 | fp_rsh1_low(v->dp, v->dp); |
306 | 0 | bn_add(x2, x2, x1); |
307 | 0 | bn_dbl(x1, x1); |
308 | 0 | } else { |
309 | | /* u = (u - v)/2, x1 += x2, x2 = 2 * x2. */ |
310 | 0 | fp_subn_low(u->dp, u->dp, v->dp); |
311 | 0 | fp_rsh1_low(u->dp, u->dp); |
312 | 0 | bn_add(x1, x1, x2); |
313 | 0 | bn_dbl(x2, x2); |
314 | 0 | } |
315 | 0 | } |
316 | 0 | } |
317 | 0 | bn_trim(u); |
318 | 0 | bn_trim(v); |
319 | 0 | k++; |
320 | 0 | } |
321 | | |
322 | | /* If x1 > p then x1 = x1 - p. */ |
323 | 0 | for (i = x1->used; i < RLC_FP_DIGS; i++) { |
324 | 0 | x1->dp[i] = 0; |
325 | 0 | } |
326 | |
|
327 | 0 | while (x1->used > RLC_FP_DIGS) { |
328 | 0 | carry = bn_subn_low(x1->dp, x1->dp, fp_prime_get(), RLC_FP_DIGS); |
329 | 0 | bn_sub1_low(x1->dp + RLC_FP_DIGS, x1->dp + RLC_FP_DIGS, carry, |
330 | 0 | x1->used - RLC_FP_DIGS); |
331 | 0 | bn_trim(x1); |
332 | 0 | } |
333 | 0 | if (dv_cmp(x1->dp, fp_prime_get(), RLC_FP_DIGS) == RLC_GT) { |
334 | 0 | fp_subn_low(x1->dp, x1->dp, fp_prime_get()); |
335 | 0 | } |
336 | | |
337 | | /* If k < Wt then x1 = x1 * R^2 * R^{-1} mod p. */ |
338 | 0 | if (k <= RLC_FP_DIGS * RLC_DIG) { |
339 | 0 | k = k + RLC_FP_DIGS * RLC_DIG; |
340 | 0 | #if FP_RDC == MONTY |
341 | 0 | fp_mul(x1->dp, x1->dp, fp_prime_get_conv()); |
342 | 0 | #endif |
343 | 0 | } |
344 | |
|
345 | 0 | #if FP_RDC == MONTY |
346 | | /* x1 = x1 * R^2 * R^{-1} mod p. */ |
347 | 0 | fp_mul(x1->dp, x1->dp, fp_prime_get_conv()); |
348 | 0 | #endif |
349 | | /* c = x1 * 2^(2Wt - k) * R^{-1} mod p. */ |
350 | 0 | fp_copy(c, x1->dp); |
351 | 0 | dv_zero(x1->dp, RLC_FP_DIGS); |
352 | 0 | bn_set_2b(x1, 2 * RLC_FP_DIGS * RLC_DIG - k); |
353 | 0 | fp_mul(c, c, x1->dp); |
354 | |
|
355 | | #if FP_RDC != MONTY |
356 | | /* |
357 | | * If we do not use Montgomery reduction, convert back. |
358 | | */ |
359 | | bn_read_raw(_a, c, RLC_FP_DIGS); |
360 | | bn_mod_monty_back(_a, _a, _p); |
361 | | |
362 | | fp_zero(c); |
363 | | dv_copy(c, _a->dp, _a->used); |
364 | | #endif |
365 | 0 | } |
366 | 0 | RLC_CATCH_ANY { |
367 | 0 | RLC_THROW(ERR_CAUGHT); |
368 | 0 | } |
369 | 0 | RLC_FINALLY { |
370 | 0 | bn_free(_a); |
371 | 0 | bn_free(_p); |
372 | 0 | bn_free(u); |
373 | 0 | bn_free(v); |
374 | 0 | bn_free(x1); |
375 | 0 | bn_free(x2); |
376 | 0 | } |
377 | 0 | } |
378 | | |
379 | | #endif |
380 | | |
381 | | #if FP_INV == EXGCD || !defined(STRIP) |
382 | | |
383 | 0 | void fp_inv_exgcd(fp_t c, const fp_t a) { |
384 | 0 | bn_t u, v, g1, g2, p, q, r; |
385 | |
|
386 | 0 | bn_null(u); |
387 | 0 | bn_null(v); |
388 | 0 | bn_null(g1); |
389 | 0 | bn_null(g2); |
390 | 0 | bn_null(p); |
391 | 0 | bn_null(q); |
392 | 0 | bn_null(r); |
393 | |
|
394 | 0 | if (fp_is_zero(a)) { |
395 | 0 | RLC_THROW(ERR_NO_VALID); |
396 | 0 | return; |
397 | 0 | } |
398 | | |
399 | 0 | RLC_TRY { |
400 | 0 | bn_new(u); |
401 | 0 | bn_new(v); |
402 | 0 | bn_new(g1); |
403 | 0 | bn_new(g2); |
404 | 0 | bn_new(p); |
405 | 0 | bn_new(q); |
406 | 0 | bn_new(r); |
407 | | |
408 | | /* u = a, v = p, g1 = 1, g2 = 0. */ |
409 | 0 | fp_prime_back(u, a); |
410 | 0 | p->used = RLC_FP_DIGS; |
411 | 0 | dv_copy(p->dp, fp_prime_get(), RLC_FP_DIGS); |
412 | 0 | bn_copy(v, p); |
413 | 0 | bn_set_dig(g1, 1); |
414 | 0 | bn_zero(g2); |
415 | | |
416 | | /* While (u != 1. */ |
417 | 0 | while (bn_cmp_dig(u, 1) != RLC_EQ) { |
418 | | /* q = [v/u], r = v mod u. */ |
419 | 0 | bn_div_rem(q, r, v, u); |
420 | | /* v = u, u = r. */ |
421 | 0 | bn_copy(v, u); |
422 | 0 | bn_copy(u, r); |
423 | | /* r = g2 - q * g1. */ |
424 | 0 | bn_mul(r, q, g1); |
425 | 0 | bn_sub(r, g2, r); |
426 | | /* g2 = g1, g1 = r. */ |
427 | 0 | bn_copy(g2, g1); |
428 | 0 | bn_copy(g1, r); |
429 | 0 | } |
430 | |
|
431 | 0 | if (bn_sign(g1) == RLC_NEG) { |
432 | 0 | bn_add(g1, g1, p); |
433 | 0 | } |
434 | 0 | fp_prime_conv(c, g1); |
435 | 0 | } |
436 | 0 | RLC_CATCH_ANY { |
437 | 0 | RLC_THROW(ERR_CAUGHT); |
438 | 0 | } |
439 | 0 | RLC_FINALLY { |
440 | 0 | bn_free(u); |
441 | 0 | bn_free(v); |
442 | 0 | bn_free(g1); |
443 | 0 | bn_free(g2); |
444 | 0 | bn_free(p); |
445 | 0 | bn_free(q); |
446 | 0 | bn_free(r); |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | | #endif |
451 | | |
452 | | #if FP_INV == DIVST || !defined(STRIP) |
453 | | |
454 | 0 | void fp_inv_divst(fp_t c, const fp_t a) { |
455 | | /* Compute number of iterations based on modulus size. */ |
456 | | #if FP_PRIME < 46 |
457 | | const int d = (49 * FP_PRIME + 80) / 17; |
458 | | #else |
459 | 0 | const int d = (49 * FP_PRIME + 57) / 17; |
460 | 0 | #endif |
461 | 0 | int g0, d0; |
462 | 0 | dig_t fs, gs, delta = 1; |
463 | 0 | bn_t _t; |
464 | 0 | fp_t f, g, u, v, r; |
465 | 0 | dv_t t; |
466 | |
|
467 | 0 | bn_null(_t); |
468 | 0 | dv_null(t); |
469 | 0 | fp_null(f); |
470 | 0 | fp_null(g); |
471 | 0 | fp_null(u); |
472 | 0 | fp_null(v); |
473 | 0 | fp_null(r); |
474 | |
|
475 | 0 | if (fp_is_zero(a)) { |
476 | 0 | RLC_THROW(ERR_NO_VALID); |
477 | 0 | return; |
478 | 0 | } |
479 | | |
480 | 0 | RLC_TRY { |
481 | 0 | bn_new(_t); |
482 | 0 | dv_new(t); |
483 | 0 | fp_new(f); |
484 | 0 | fp_new(g); |
485 | 0 | fp_new(u); |
486 | 0 | fp_new(v); |
487 | 0 | fp_new(r); |
488 | |
|
489 | 0 | fp_zero(v); |
490 | 0 | fp_set_dig(r, 1); |
491 | 0 | dv_copy(f, fp_prime_get(), RLC_FP_DIGS); |
492 | 0 | #if FP_RDC == MONTY |
493 | | /* Convert a from Montgomery form. */ |
494 | 0 | dv_zero(t, 2 * RLC_FP_DIGS); |
495 | 0 | fp_copy(t, a); |
496 | 0 | fp_rdcn_low(g, t); |
497 | | #else |
498 | | fp_copy(g, a); |
499 | | #endif |
500 | 0 | fs = gs = RLC_POS; |
501 | |
|
502 | 0 | for (int i = 0; i < d; i++) { |
503 | 0 | g0 = g[0] & 1; |
504 | 0 | d0 = delta >> (RLC_DIG - 1); |
505 | 0 | d0 = g0 & ~d0; |
506 | | /* Conditionally negate delta if d0 is set. */ |
507 | 0 | delta = (delta ^ -d0) + d0; |
508 | | /* Conditionally swap based on d0. */ |
509 | 0 | dv_swap_sec(r, v, RLC_FP_DIGS, d0); |
510 | 0 | fp_negm_low(t, r); |
511 | 0 | dv_swap_sec(f, g, RLC_FP_DIGS, d0); |
512 | 0 | dv_copy_sec(r, t, RLC_FP_DIGS, d0); |
513 | 0 | for (int j = 0; j < RLC_FP_DIGS; j++) { |
514 | 0 | g[j] = RLC_SEL(g[j], ~g[j], d0); |
515 | 0 | } |
516 | 0 | fp_add1_low(g, g, d0); |
517 | 0 | t[0] = (fs ^ gs) & (-d0); |
518 | 0 | fs ^= t[0]; |
519 | 0 | gs ^= t[0] ^ d0; |
520 | |
|
521 | 0 | delta++; |
522 | 0 | g0 = g[0] & 1; |
523 | 0 | for (int j = 0; j < RLC_FP_DIGS; j++) { |
524 | 0 | t[j] = v[j] & (-g0); |
525 | 0 | u[j] = f[j] & (-g0); |
526 | 0 | } |
527 | 0 | fp_addm_low(r, r, t); |
528 | 0 | fp_dblm_low(v, v); |
529 | | |
530 | | /* Compute g = (g + g0*f) div 2 by conditionally copying f to u and |
531 | | * updating the sign of g. */ |
532 | 0 | gs ^= g0 & (fs ^ bn_addn_low(g, g, u, RLC_FP_DIGS)); |
533 | | /* Shift and restore the sign. */ |
534 | 0 | fp_rsh1_low(g, g); |
535 | 0 | g[RLC_FP_DIGS - 1] |= (dig_t)gs << (RLC_DIG - 1); |
536 | 0 | } |
537 | 0 | fp_neg(t, v); |
538 | 0 | fp_copy_sec(v, t, fs); |
539 | |
|
540 | 0 | dv_copy(t, fp_prime_get(), RLC_FP_DIGS); |
541 | 0 | fp_add_dig(t, t, 1); |
542 | 0 | fp_hlv(t, t); |
543 | | #if WSIZE == 8 |
544 | | bn_set_dig(_t, d >> 8); |
545 | | bn_lsh(_t, _t, 8); |
546 | | bn_add_dig(_t, _t, d & 0xFF); |
547 | | #else |
548 | 0 | bn_set_dig(_t, d); |
549 | 0 | #endif |
550 | 0 | fp_exp(t, t, _t); |
551 | |
|
552 | 0 | fp_mul(c, v, t); |
553 | 0 | } RLC_CATCH_ANY { |
554 | 0 | RLC_THROW(ERR_CAUGHT) |
555 | 0 | } RLC_FINALLY { |
556 | 0 | bn_free(_t); |
557 | 0 | dv_free(t); |
558 | 0 | fp_free(f); |
559 | 0 | fp_free(g); |
560 | 0 | fp_free(u); |
561 | 0 | fp_free(v); |
562 | 0 | fp_free(r); |
563 | 0 | } |
564 | 0 | } |
565 | | |
566 | | #endif |
567 | | |
568 | | #if FP_INV == JMPDS || !defined(STRIP) |
569 | | |
570 | 84.8k | void fp_inv_jmpds(fp_t c, const fp_t a) { |
571 | 84.8k | dis_t m[4]; |
572 | | /* Compute number of iterations based on modulus size. */ |
573 | | /* Iterations taken directly from https://github.com/sipa/safegcd-bounds */ |
574 | 84.8k | const int iterations = (45907 * FP_PRIME + 26313) / 19929; |
575 | 84.8k | int loops, i, j = 0, s = RLC_DIG - 2; |
576 | 84.8k | dv_t f, g, t, p, t0, t1, u0, u1, v0, v1, p01, p11; |
577 | 84.8k | dig_t sf, sg; |
578 | 84.8k | fp_t pre; |
579 | | #if WSIZE == 8 |
580 | | int d = -1; |
581 | | #else |
582 | 84.8k | dis_t d = -1; |
583 | 84.8k | #endif |
584 | | |
585 | 84.8k | if (fp_is_zero(a)) { |
586 | 0 | RLC_THROW(ERR_NO_VALID); |
587 | 0 | return; |
588 | 0 | } |
589 | | |
590 | 84.8k | dv_null(f); |
591 | 84.8k | dv_null(g); |
592 | 84.8k | dv_null(t); |
593 | 84.8k | dv_null(p); |
594 | 84.8k | dv_null(t0); |
595 | 84.8k | dv_null(t1); |
596 | 84.8k | dv_null(u0); |
597 | 84.8k | dv_null(u1); |
598 | 84.8k | dv_null(v0); |
599 | 84.8k | dv_null(v1); |
600 | 84.8k | dv_null(p01); |
601 | 84.8k | dv_null(p11); |
602 | 84.8k | fp_null(pre); |
603 | | |
604 | 84.8k | RLC_TRY { |
605 | 84.8k | dv_new(t0); |
606 | 84.8k | dv_new(f); |
607 | 84.8k | dv_new(t); |
608 | 84.8k | dv_new(p); |
609 | 84.8k | dv_new(g); |
610 | 84.8k | dv_new(t1); |
611 | 84.8k | dv_new(u0); |
612 | 84.8k | dv_new(u1); |
613 | 84.8k | dv_new(v0); |
614 | 84.8k | dv_new(v1); |
615 | 84.8k | dv_new(p01); |
616 | 84.8k | dv_new(p11); |
617 | 84.8k | fp_new(pre); |
618 | | |
619 | 84.8k | fp_copy(pre, core_get()->inv.dp); |
620 | 84.8k | dv_zero(t, 2 * RLC_FP_DIGS); |
621 | 84.8k | dv_zero(p, 2 * RLC_FP_DIGS); |
622 | 84.8k | dv_zero(u0, 2 * RLC_FP_DIGS); |
623 | 84.8k | dv_zero(u1, 2 * RLC_FP_DIGS); |
624 | 84.8k | dv_zero(v0, 2 * RLC_FP_DIGS); |
625 | 84.8k | dv_zero(v1, 2 * RLC_FP_DIGS); |
626 | 84.8k | dv_copy(f, fp_prime_get(), RLC_FP_DIGS); |
627 | 84.8k | #if FP_RDC == MONTY |
628 | | /* Convert a from Montgomery form. */ |
629 | 84.8k | fp_copy(p, a); |
630 | 84.8k | fp_rdcn_low(g, p); |
631 | | #else |
632 | | fp_copy(g, a); |
633 | | #endif |
634 | 84.8k | f[RLC_FP_DIGS] = g[RLC_FP_DIGS] = 0; |
635 | 84.8k | d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s); |
636 | | |
637 | 84.8k | t0[RLC_FP_DIGS] = bn_muls_low(t0, f, RLC_POS, m[0], RLC_FP_DIGS); |
638 | 84.8k | t1[RLC_FP_DIGS] = bn_muls_low(t1, g, RLC_POS, m[1], RLC_FP_DIGS); |
639 | 84.8k | bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1); |
640 | | |
641 | 84.8k | f[RLC_FP_DIGS] = bn_muls_low(f, f, RLC_POS, m[2], RLC_FP_DIGS); |
642 | 84.8k | t1[RLC_FP_DIGS] = bn_muls_low(t1, g, RLC_POS, m[3], RLC_FP_DIGS); |
643 | 84.8k | bn_addn_low(t1, t1, f, RLC_FP_DIGS + 1); |
644 | | |
645 | | /* Update f and g. */ |
646 | 84.8k | bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s); |
647 | 84.8k | bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s); |
648 | | |
649 | | /* Update column vector below. */ |
650 | 84.8k | v1[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1])); |
651 | 84.8k | fp_negm_low(t1, v1); |
652 | 84.8k | fp_copy_sec(v1, t1, RLC_SIGN(m[1])); |
653 | 84.8k | u1[0] = RLC_SEL(m[3], -m[3], RLC_SIGN(m[3])); |
654 | 84.8k | fp_negm_low(t1, u1); |
655 | 84.8k | fp_copy_sec(u1, t1, RLC_SIGN(m[3])); |
656 | | |
657 | 84.8k | dv_copy(p01, v1, 2 * RLC_FP_DIGS); |
658 | 84.8k | dv_copy(p11, u1, 2 * RLC_FP_DIGS); |
659 | | |
660 | 84.8k | loops = iterations / s; |
661 | 84.8k | loops = (iterations % s == 0 ? loops - 1 : loops); |
662 | | |
663 | 763k | for (i = 1; i < loops; i++) { |
664 | 678k | d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s); |
665 | | |
666 | 678k | sf = RLC_SIGN(f[RLC_FP_DIGS]); |
667 | 678k | sg = RLC_SIGN(g[RLC_FP_DIGS]); |
668 | 678k | bn_negs_low(u0, f, sf, RLC_FP_DIGS); |
669 | 678k | bn_negs_low(u1, g, sg, RLC_FP_DIGS); |
670 | | |
671 | 678k | t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[0], RLC_FP_DIGS); |
672 | 678k | t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[1], RLC_FP_DIGS); |
673 | 678k | bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1); |
674 | 678k | bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s); |
675 | | |
676 | 678k | t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[2], RLC_FP_DIGS); |
677 | 678k | t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[3], RLC_FP_DIGS); |
678 | 678k | bn_addn_low(t1, t1, t0, RLC_FP_DIGS + 1); |
679 | 678k | bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s); |
680 | | |
681 | | #ifdef RLC_FP_ROOM |
682 | | p[j] = 0; |
683 | | dv_copy(p + j + 1, fp_prime_get(), RLC_FP_DIGS); |
684 | | |
685 | | /* Update column vector below. */ |
686 | | bn_mul2_low(v0, p01, m[0], RLC_FP_DIGS + j); |
687 | | fp_subd_low(t, p, v0); |
688 | | dv_copy_sec(v0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[0])); |
689 | | |
690 | | bn_mul2_low(v1, p11, m[1], RLC_FP_DIGS + j); |
691 | | fp_subd_low(t, p, v1); |
692 | | dv_copy_sec(v1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[1])); |
693 | | |
694 | | bn_mul2_low(u0, p01, m[2], RLC_FP_DIGS + j); |
695 | | fp_subd_low(t, p, u0); |
696 | | dv_copy_sec(u0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[2])); |
697 | | |
698 | | bn_mul2_low(u1, p11, m[3], RLC_FP_DIGS + j); |
699 | | fp_subd_low(t, p, u1); |
700 | | dv_copy_sec(u1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[3])); |
701 | | |
702 | | j = i % RLC_FP_DIGS; |
703 | | if (j == 0) { |
704 | | fp_addd_low(t, u0, u1); |
705 | | fp_rdc(p11, t); |
706 | | fp_addd_low(t, v0, v1); |
707 | | fp_rdc(p01, t); |
708 | | dv_zero(v0, 2 * RLC_FP_DIGS); |
709 | | dv_zero(v1, 2 * RLC_FP_DIGS); |
710 | | } else { |
711 | | fp_addd_low(p11, u0, u1); |
712 | | fp_addd_low(p01, v0, v1); |
713 | | } |
714 | | #else |
715 | | /* Update column vector below. */ |
716 | 678k | t[0] = RLC_SEL(m[0], -m[0], RLC_SIGN(m[0])); |
717 | 678k | fp_mul(v0, p01, t); |
718 | 678k | fp_neg(t0, v0); |
719 | 678k | fp_copy_sec(v0, t0, RLC_SIGN(m[0])); |
720 | | |
721 | 678k | t[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1])); |
722 | 678k | fp_mul(v1, p11, t); |
723 | 678k | fp_neg(t1, v1); |
724 | 678k | fp_copy_sec(v1, t1, RLC_SIGN(m[1])); |
725 | | |
726 | 678k | t[0] = RLC_SEL(m[2], -m[2], RLC_SIGN(m[2])); |
727 | 678k | fp_mul(u0, p01, t); |
728 | 678k | fp_neg(t0, u0); |
729 | 678k | fp_copy_sec(u0, t0, RLC_SIGN(m[2])); |
730 | | |
731 | 678k | t[0] = RLC_SEL(m[3], -m[3], RLC_SIGN(m[3])); |
732 | 678k | fp_mul(u1, p11, t); |
733 | 678k | fp_neg(t1, u1); |
734 | 678k | fp_copy_sec(u1, t1, RLC_SIGN(m[3])); |
735 | | |
736 | 678k | fp_add(p11, u0, u1); |
737 | 678k | fp_add(p01, v0, v1); |
738 | 678k | #endif |
739 | 678k | } |
740 | | |
741 | 84.8k | s = iterations - loops * s; |
742 | 84.8k | d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s); |
743 | | |
744 | 84.8k | sf = RLC_SIGN(f[RLC_FP_DIGS]); |
745 | 84.8k | sg = RLC_SIGN(g[RLC_FP_DIGS]); |
746 | 84.8k | bn_negs_low(u0, f, sf, RLC_FP_DIGS); |
747 | 84.8k | bn_negs_low(u1, g, sg, RLC_FP_DIGS); |
748 | | |
749 | 84.8k | t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[0], RLC_FP_DIGS); |
750 | 84.8k | t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[1], RLC_FP_DIGS); |
751 | 84.8k | bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1); |
752 | 84.8k | bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s); |
753 | | |
754 | 84.8k | t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[2], RLC_FP_DIGS); |
755 | 84.8k | t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[3], RLC_FP_DIGS); |
756 | 84.8k | bn_addn_low(t1, t1, t0, RLC_FP_DIGS + 1); |
757 | 84.8k | bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s); |
758 | | |
759 | | #ifdef RLC_FP_ROOM |
760 | | p[j] = 0; |
761 | | dv_copy(p + j + 1, fp_prime_get(), RLC_FP_DIGS); |
762 | | |
763 | | /* Update column vector below. */ |
764 | | bn_mul2_low(v0, p01, m[0], RLC_FP_DIGS + j); |
765 | | fp_subd_low(t, p, v0); |
766 | | dv_copy_sec(v0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[0])); |
767 | | |
768 | | bn_mul2_low(v1, p11, m[1], RLC_FP_DIGS + j); |
769 | | fp_subd_low(t, p, v1); |
770 | | dv_copy_sec(v1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[1])); |
771 | | |
772 | | fp_addd_low(t, v0, v1); |
773 | | fp_rdc(p01, t); |
774 | | #else |
775 | 84.8k | (void)j; |
776 | | |
777 | 84.8k | t[0] = RLC_SEL(m[0], -m[0], RLC_SIGN(m[0])); |
778 | 84.8k | fp_mul(v0, p01, t); |
779 | 84.8k | fp_neg(t0, v0); |
780 | 84.8k | fp_copy_sec(v0, t0, RLC_SIGN(m[0])); |
781 | | |
782 | 84.8k | t[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1])); |
783 | 84.8k | fp_mul(v1, p11, t); |
784 | 84.8k | fp_neg(t1, v1); |
785 | 84.8k | fp_copy_sec(v1, t1, RLC_SIGN(m[1])); |
786 | | |
787 | 84.8k | fp_add(p01, v0, v1); |
788 | 84.8k | #endif |
789 | | /* Negate based on sign of f at the end. */ |
790 | 84.8k | fp_negm_low(t, p01); |
791 | 84.8k | dv_copy_sec(p01, t, RLC_FP_DIGS, f[RLC_FP_DIGS] >> (RLC_DIG - 1)); |
792 | | /* Multiply by (precomp * R^j) % p, one for each iteration of the loop, |
793 | | * one for the constant, one more to be removed by reduction. */ |
794 | 84.8k | fp_mul(c, p01, pre); |
795 | 84.8k | } |
796 | 169k | RLC_CATCH_ANY { |
797 | 0 | RLC_THROW(ERR_CAUGHT); |
798 | 0 | } |
799 | 169k | RLC_FINALLY { |
800 | 84.8k | dv_free(t0); |
801 | 84.8k | dv_free(f); |
802 | 84.8k | dv_free(t); |
803 | 84.8k | dv_free(p); |
804 | 84.8k | dv_free(g); |
805 | 84.8k | dv_free(t1); |
806 | 84.8k | dv_free(u0); |
807 | 84.8k | dv_free(u1); |
808 | 84.8k | dv_free(v0); |
809 | 84.8k | dv_free(v1); |
810 | 84.8k | dv_free(p01); |
811 | 84.8k | dv_free(p11); |
812 | 84.8k | fp_free(pre); |
813 | 84.8k | } |
814 | 84.8k | } |
815 | | |
816 | | #endif |
817 | | |
818 | | #if FP_INV == LOWER || !defined(STRIP) |
819 | | |
820 | 0 | void fp_inv_lower(fp_t c, const fp_t a) { |
821 | 0 | if (fp_is_zero(a)) { |
822 | 0 | RLC_THROW(ERR_NO_VALID); |
823 | 0 | return; |
824 | 0 | } |
825 | | |
826 | 0 | fp_invm_low(c, a); |
827 | 0 | } |
828 | | |
829 | | #endif |
830 | | |
831 | 7.48k | void fp_inv_sim(fp_t *c, const fp_t *a, int n) { |
832 | 7.48k | int i; |
833 | 7.48k | fp_t u, *t = RLC_ALLOCA(fp_t, n); |
834 | | |
835 | 7.48k | fp_null(u); |
836 | | |
837 | 7.48k | RLC_TRY { |
838 | 7.48k | if (t == NULL) { |
839 | 0 | RLC_THROW(ERR_NO_MEMORY); |
840 | 0 | } |
841 | 154k | for (i = 0; i < n; i++) { |
842 | 147k | fp_null(t[i]); |
843 | 147k | fp_new(t[i]); |
844 | 147k | } |
845 | 7.48k | fp_new(u); |
846 | | |
847 | 7.48k | fp_copy(t[0], a[0]); |
848 | 7.48k | fp_copy(c[0], a[0]); |
849 | | |
850 | 147k | for (i = 1; i < n; i++) { |
851 | 139k | fp_copy(t[i], a[i]); |
852 | 139k | fp_mul(c[i], c[i - 1], a[i]); |
853 | 139k | } |
854 | | |
855 | 7.48k | fp_inv(u, c[n - 1]); |
856 | | |
857 | 147k | for (i = n - 1; i > 0; i--) { |
858 | 139k | fp_mul(c[i], u, c[i - 1]); |
859 | 139k | fp_mul(u, u, t[i]); |
860 | 139k | } |
861 | 7.48k | fp_copy(c[0], u); |
862 | 7.48k | } |
863 | 14.9k | RLC_CATCH_ANY { |
864 | 0 | RLC_THROW(ERR_CAUGHT); |
865 | 0 | } |
866 | 14.9k | RLC_FINALLY { |
867 | 154k | for (i = 0; i < n; i++) { |
868 | 147k | fp_free(t[i]); |
869 | 147k | } |
870 | 7.48k | fp_free(u); |
871 | | RLC_FREE(t); |
872 | 7.48k | } |
873 | 7.48k | } |