Coverage Report

Created: 2026-02-07 07:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/relic/src/fp/relic_fp_inv.c
Line
Count
Source
1
/*
2
 * RELIC is an Efficient LIbrary for Cryptography
3
 * Copyright (c) 2009 RELIC Authors
4
 *
5
 * This file is part of RELIC. RELIC is legal property of its developers,
6
 * whose names are not listed here. Please refer to the COPYRIGHT file
7
 * for contact information.
8
 *
9
 * RELIC is free software; you can redistribute it and/or modify it under the
10
 * terms of the version 2.1 (or later) of the GNU Lesser General Public License
11
 * as published by the Free Software Foundation; or version 2.0 of the Apache
12
 * License as published by the Apache Software Foundation. See the LICENSE files
13
 * for more details.
14
 *
15
 * RELIC is distributed in the hope that it will be useful, but WITHOUT ANY
16
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17
 * A PARTICULAR PURPOSE. See the LICENSE files for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public or the
20
 * Apache License along with RELIC. If not, see <https://www.gnu.org/licenses/>
21
 * or <https://www.apache.org/licenses/>.
22
 */
23
24
/**
25
 * @file
26
 *
27
 * Implementation of the prime field inversion functions.
28
 *
29
 * @ingroup fp
30
 */
31
32
#include "relic_core.h"
33
#include "relic_fp_low.h"
34
#include "relic_bn_low.h"
35
36
/*============================================================================*/
37
/* Private definitions                                                        */
38
/*============================================================================*/
39
40
#if FP_INV == JMPDS || !defined(STRIP)
41
42
#ifdef RLC_FP_ROOM
43
44
static void bn_mul2_low(dig_t *c, const dig_t *a, dis_t digit, size_t size) {
45
  int sd = digit >> (RLC_DIG - 1);
46
  digit = (digit ^ sd) - sd;
47
  c[size] = bn_mul1_low(c, a, digit, size);
48
}
49
50
#endif /* RLC_FP_ROOM */
51
52
#if WSIZE == 8
53
static int jumpdivstep(dis_t m[4], int delta, dig_t f, dig_t g, int s) {
54
#else
55
848k
static dis_t jumpdivstep(dis_t m[4], dis_t delta, dig_t f, dig_t g, int s) {
56
848k
#endif
57
848k
  dig_t u = 1, v = 0, q = 0, r = 1, c0, c1;
58
59
  /* This is actually faster than my previous version, several tricks from
60
   * https://github.com/bitcoin-core/secp256k1/blob/master/src/modinv64_impl.h
61
   */
62
50.9M
  for (s--; s >= 0; s--) {
63
    /* First handle the else part: if delta < 0, compute -(f,u,v). */
64
50.1M
    c0 = delta >> (8 * sizeof(delta) - 1);
65
50.1M
    c1 = -(g & 1);
66
50.1M
    c0 &= c1;
67
    /* Conditionally add -(f,u,v) to (g,q,r) */
68
50.1M
    g += ((f ^ c0) - c0) & c1;
69
50.1M
    q += ((u ^ c0) - c0) & c1;
70
50.1M
    r += ((v ^ c0) - c0) & c1;
71
    /* Now handle the 'if' part, so c0 will be (delta < 0) && (g & 1)) */
72
    /* delta = RLC_SEL(delta, -delta, c0 & 1) - 2 (for half-divstep), thus
73
     * delta = - delta - 2 or delta - 1 */
74
#if WSIZE == 8
75
    delta = RLC_SEL(delta, -delta, c0 & 1) - 2;
76
#else
77
50.1M
    delta = (delta ^ c0) - 1;
78
50.1M
#endif
79
50.1M
    f = f + (g & c0);
80
50.1M
    u = u + (q & c0);
81
50.1M
    v = v + (r & c0);
82
50.1M
    g >>= 1;
83
50.1M
    u += u;
84
50.1M
    v += v;
85
50.1M
  }
86
848k
  m[0] = u;
87
848k
  m[1] = v;
88
848k
  m[2] = q;
89
848k
  m[3] = r;
90
848k
  return delta;
91
848k
}
92
93
#endif
94
95
/*============================================================================*/
96
/* Public definitions                                                         */
97
/*============================================================================*/
98
99
#if FP_INV == BASIC || !defined(STRIP)
100
101
0
void fp_inv_basic(fp_t c, const fp_t a) {
102
0
  bn_t e;
103
104
0
  bn_null(e);
105
106
0
  if (fp_is_zero(a)) {
107
0
    RLC_THROW(ERR_NO_VALID);
108
0
    return;
109
0
  }
110
111
0
  RLC_TRY {
112
0
    bn_new(e);
113
114
0
    e->used = RLC_FP_DIGS;
115
0
    dv_copy(e->dp, fp_prime_get(), RLC_FP_DIGS);
116
0
    bn_sub_dig(e, e, 2);
117
118
0
    fp_exp(c, a, e);
119
0
  }
120
0
  RLC_CATCH_ANY {
121
0
    RLC_THROW(ERR_CAUGHT);
122
0
  }
123
0
  RLC_FINALLY {
124
0
    bn_free(e);
125
0
  }
126
0
}
127
128
#endif
129
130
#if FP_INV == BINAR || !defined(STRIP)
131
132
0
void fp_inv_binar(fp_t c, const fp_t a) {
133
0
  bn_t u, v, g1, g2, p;
134
135
0
  bn_null(u);
136
0
  bn_null(v);
137
0
  bn_null(g1);
138
0
  bn_null(g2);
139
0
  bn_null(p);
140
141
0
  if (fp_is_zero(a)) {
142
0
    RLC_THROW(ERR_NO_VALID);
143
0
    return;
144
0
  }
145
146
0
  RLC_TRY {
147
0
    bn_new(u);
148
0
    bn_new(v);
149
0
    bn_new(g1);
150
0
    bn_new(g2);
151
0
    bn_new(p);
152
153
    /* u = a, v = p, g1 = 1, g2 = 0. */
154
0
    fp_prime_back(u, a);
155
0
    p->used = RLC_FP_DIGS;
156
0
    dv_copy(p->dp, fp_prime_get(), RLC_FP_DIGS);
157
0
    bn_copy(v, p);
158
0
    bn_set_dig(g1, 1);
159
0
    bn_zero(g2);
160
161
    /* While (u != 1 && v != 1). */
162
0
    while (1) {
163
      /* While u is even do. */
164
0
      while (!(u->dp[0] & 1)) {
165
        /* u = u/2. */
166
0
        fp_rsh1_low(u->dp, u->dp);
167
        /* If g1 is even then g1 = g1/2; else g1 = (g1 + p)/2. */
168
0
        if (g1->dp[0] & 1) {
169
0
          bn_add(g1, g1, p);
170
0
        }
171
0
        bn_hlv(g1, g1);
172
0
      }
173
174
0
      while (u->dp[u->used - 1] == 0) {
175
0
        u->used--;
176
0
      }
177
0
      if (u->used == 1 && u->dp[0] == 1)
178
0
        break;
179
180
      /* While z divides v do. */
181
0
      while (!(v->dp[0] & 1)) {
182
        /* v = v/2. */
183
0
        fp_rsh1_low(v->dp, v->dp);
184
        /* If g2 is even then g2 = g2/2; else (g2 = g2 + p)/2. */
185
0
        if (g2->dp[0] & 1) {
186
0
          bn_add(g2, g2, p);
187
0
        }
188
0
        bn_hlv(g2, g2);
189
0
      }
190
191
0
      while (v->dp[v->used - 1] == 0) {
192
0
        v->used--;
193
0
      }
194
0
      if (v->used == 1 && v->dp[0] == 1)
195
0
        break;
196
197
      /* If u > v then u = u - v, g1 = g1 - g2. */
198
0
      if (bn_cmp(u, v) == RLC_GT) {
199
0
        bn_sub(u, u, v);
200
0
        bn_sub(g1, g1, g2);
201
0
      } else {
202
0
        bn_sub(v, v, u);
203
0
        bn_sub(g2, g2, g1);
204
0
      }
205
0
    }
206
    /* If u == 1 then return g1; else return g2. */
207
0
    if (bn_cmp_dig(u, 1) == RLC_EQ) {
208
0
      while (bn_sign(g1) == RLC_NEG) {
209
0
        bn_add(g1, g1, p);
210
0
      }
211
0
      while (bn_cmp(g1, p) != RLC_LT) {
212
0
        bn_sub(g1, g1, p);
213
0
      }
214
0
#if FP_RDC == MONTY
215
0
      fp_prime_conv(c, g1);
216
#else
217
      dv_copy(c, g1->dp, RLC_FP_DIGS);
218
#endif
219
0
    } else {
220
0
      while (bn_sign(g2) == RLC_NEG) {
221
0
        bn_add(g2, g2, p);
222
0
      }
223
0
      while (bn_cmp(g2, p) != RLC_LT) {
224
0
        bn_sub(g2, g2, p);
225
0
      }
226
0
#if FP_RDC == MONTY
227
0
      fp_prime_conv(c, g2);
228
#else
229
      dv_copy(c, g2->dp, RLC_FP_DIGS);
230
#endif
231
0
    }
232
0
  }
233
0
  RLC_CATCH_ANY {
234
0
    RLC_THROW(ERR_CAUGHT);
235
0
  }
236
0
  RLC_FINALLY {
237
0
    bn_free(u);
238
0
    bn_free(v);
239
0
    bn_free(g1);
240
0
    bn_free(g2);
241
0
    bn_free(p);
242
0
  }
243
0
}
244
245
#endif
246
247
#if FP_INV == MONTY || !defined(STRIP)
248
249
0
void fp_inv_monty(fp_t c, const fp_t a) {
250
0
  bn_t _a, _p, u, v, x1, x2;
251
0
  const dig_t *p = NULL;
252
0
  dig_t carry;
253
0
  int i, k;
254
255
0
  bn_null(_a);
256
0
  bn_null(_p);
257
0
  bn_null(u);
258
0
  bn_null(v);
259
0
  bn_null(x1);
260
0
  bn_null(x2);
261
262
0
  if (fp_is_zero(a)) {
263
0
    RLC_THROW(ERR_NO_VALID);
264
0
    return;
265
0
  }
266
267
0
  RLC_TRY {
268
0
    bn_new(_a);
269
0
    bn_new(_p);
270
0
    bn_new(u);
271
0
    bn_new(v);
272
0
    bn_new(x1);
273
0
    bn_new(x2);
274
275
0
    p = fp_prime_get();
276
277
    /* u = a, v = p, x1 = 1, x2 = 0, k = 0. */
278
0
    k = 0;
279
0
    bn_set_dig(x1, 1);
280
0
    bn_zero(x2);
281
282
#if FP_RDC != MONTY
283
    bn_read_raw(_a, a, RLC_FP_DIGS);
284
    bn_read_raw(_p, p, RLC_FP_DIGS);
285
    bn_mod_monty_conv(u, _a, _p);
286
#else
287
0
    bn_read_raw(u, a, RLC_FP_DIGS);
288
0
#endif
289
0
    bn_read_raw(v, p, RLC_FP_DIGS);
290
291
0
    while (!bn_is_zero(v)) {
292
      /* If v is even then v = v/2, x1 = 2 * x1. */
293
0
      if (!(v->dp[0] & 1)) {
294
0
        fp_rsh1_low(v->dp, v->dp);
295
0
        bn_dbl(x1, x1);
296
0
      } else {
297
        /* If u is even then u = u/2, x2 = 2 * x2. */
298
0
        if (!(u->dp[0] & 1)) {
299
0
          fp_rsh1_low(u->dp, u->dp);
300
0
          bn_dbl(x2, x2);
301
          /* If v >= u,then v = (v - u)/2, x2 += x1, x1 = 2 * x1. */
302
0
        } else {
303
0
          if (bn_cmp(v, u) != RLC_LT) {
304
0
            fp_subn_low(v->dp, v->dp, u->dp);
305
0
            fp_rsh1_low(v->dp, v->dp);
306
0
            bn_add(x2, x2, x1);
307
0
            bn_dbl(x1, x1);
308
0
          } else {
309
            /* u = (u - v)/2, x1 += x2, x2 = 2 * x2. */
310
0
            fp_subn_low(u->dp, u->dp, v->dp);
311
0
            fp_rsh1_low(u->dp, u->dp);
312
0
            bn_add(x1, x1, x2);
313
0
            bn_dbl(x2, x2);
314
0
          }
315
0
        }
316
0
      }
317
0
      bn_trim(u);
318
0
      bn_trim(v);
319
0
      k++;
320
0
    }
321
322
    /* If x1 > p then x1 = x1 - p. */
323
0
    for (i = x1->used; i < RLC_FP_DIGS; i++) {
324
0
      x1->dp[i] = 0;
325
0
    }
326
327
0
    while (x1->used > RLC_FP_DIGS) {
328
0
      carry = bn_subn_low(x1->dp, x1->dp, fp_prime_get(), RLC_FP_DIGS);
329
0
      bn_sub1_low(x1->dp + RLC_FP_DIGS, x1->dp + RLC_FP_DIGS, carry,
330
0
          x1->used - RLC_FP_DIGS);
331
0
      bn_trim(x1);
332
0
    }
333
0
    if (dv_cmp(x1->dp, fp_prime_get(), RLC_FP_DIGS) == RLC_GT) {
334
0
      fp_subn_low(x1->dp, x1->dp, fp_prime_get());
335
0
    }
336
337
    /* If k < Wt then x1 = x1 * R^2 * R^{-1} mod p. */
338
0
    if (k <= RLC_FP_DIGS * RLC_DIG) {
339
0
      k = k + RLC_FP_DIGS * RLC_DIG;
340
0
#if FP_RDC == MONTY
341
0
      fp_mul(x1->dp, x1->dp, fp_prime_get_conv());
342
0
#endif
343
0
    }
344
345
0
#if FP_RDC == MONTY
346
    /* x1 = x1 * R^2 * R^{-1} mod p. */
347
0
    fp_mul(x1->dp, x1->dp, fp_prime_get_conv());
348
0
#endif
349
    /* c = x1 * 2^(2Wt - k) * R^{-1} mod p. */
350
0
    fp_copy(c, x1->dp);
351
0
    dv_zero(x1->dp, RLC_FP_DIGS);
352
0
    bn_set_2b(x1, 2 * RLC_FP_DIGS * RLC_DIG - k);
353
0
    fp_mul(c, c, x1->dp);
354
355
#if FP_RDC != MONTY
356
    /*
357
     * If we do not use Montgomery reduction, convert back.
358
     */
359
    bn_read_raw(_a, c, RLC_FP_DIGS);
360
    bn_mod_monty_back(_a, _a, _p);
361
362
    fp_zero(c);
363
    dv_copy(c, _a->dp, _a->used);
364
#endif
365
0
  }
366
0
  RLC_CATCH_ANY {
367
0
    RLC_THROW(ERR_CAUGHT);
368
0
  }
369
0
  RLC_FINALLY {
370
0
    bn_free(_a);
371
0
    bn_free(_p);
372
0
    bn_free(u);
373
0
    bn_free(v);
374
0
    bn_free(x1);
375
0
    bn_free(x2);
376
0
  }
377
0
}
378
379
#endif
380
381
#if FP_INV == EXGCD || !defined(STRIP)
382
383
0
void fp_inv_exgcd(fp_t c, const fp_t a) {
384
0
  bn_t u, v, g1, g2, p, q, r;
385
386
0
  bn_null(u);
387
0
  bn_null(v);
388
0
  bn_null(g1);
389
0
  bn_null(g2);
390
0
  bn_null(p);
391
0
  bn_null(q);
392
0
  bn_null(r);
393
394
0
  if (fp_is_zero(a)) {
395
0
    RLC_THROW(ERR_NO_VALID);
396
0
    return;
397
0
  }
398
399
0
  RLC_TRY {
400
0
    bn_new(u);
401
0
    bn_new(v);
402
0
    bn_new(g1);
403
0
    bn_new(g2);
404
0
    bn_new(p);
405
0
    bn_new(q);
406
0
    bn_new(r);
407
408
    /* u = a, v = p, g1 = 1, g2 = 0. */
409
0
    fp_prime_back(u, a);
410
0
    p->used = RLC_FP_DIGS;
411
0
    dv_copy(p->dp, fp_prime_get(), RLC_FP_DIGS);
412
0
    bn_copy(v, p);
413
0
    bn_set_dig(g1, 1);
414
0
    bn_zero(g2);
415
416
    /* While (u != 1. */
417
0
    while (bn_cmp_dig(u, 1) != RLC_EQ) {
418
      /* q = [v/u], r = v mod u. */
419
0
      bn_div_rem(q, r, v, u);
420
      /* v = u, u = r. */
421
0
      bn_copy(v, u);
422
0
      bn_copy(u, r);
423
      /* r = g2 - q * g1. */
424
0
      bn_mul(r, q, g1);
425
0
      bn_sub(r, g2, r);
426
      /* g2 = g1, g1 = r. */
427
0
      bn_copy(g2, g1);
428
0
      bn_copy(g1, r);
429
0
    }
430
431
0
    if (bn_sign(g1) == RLC_NEG) {
432
0
      bn_add(g1, g1, p);
433
0
    }
434
0
    fp_prime_conv(c, g1);
435
0
  }
436
0
  RLC_CATCH_ANY {
437
0
    RLC_THROW(ERR_CAUGHT);
438
0
  }
439
0
  RLC_FINALLY {
440
0
    bn_free(u);
441
0
    bn_free(v);
442
0
    bn_free(g1);
443
0
    bn_free(g2);
444
0
    bn_free(p);
445
0
    bn_free(q);
446
0
    bn_free(r);
447
0
  }
448
0
}
449
450
#endif
451
452
#if FP_INV == DIVST || !defined(STRIP)
453
454
0
void fp_inv_divst(fp_t c, const fp_t a) {
455
  /* Compute number of iterations based on modulus size. */
456
#if FP_PRIME < 46
457
  const int d = (49 * FP_PRIME + 80) / 17;
458
#else
459
0
  const int d = (49 * FP_PRIME + 57) / 17;
460
0
#endif
461
0
  int g0, d0;
462
0
  dig_t fs, gs, delta = 1;
463
0
  bn_t _t;
464
0
  fp_t f, g, u, v, r;
465
0
  dv_t t;
466
467
0
  bn_null(_t);
468
0
  dv_null(t);
469
0
  fp_null(f);
470
0
  fp_null(g);
471
0
  fp_null(u);
472
0
  fp_null(v);
473
0
  fp_null(r);
474
475
0
  if (fp_is_zero(a)) {
476
0
    RLC_THROW(ERR_NO_VALID);
477
0
    return;
478
0
  }
479
480
0
  RLC_TRY {
481
0
    bn_new(_t);
482
0
    dv_new(t);
483
0
    fp_new(f);
484
0
    fp_new(g);
485
0
    fp_new(u);
486
0
    fp_new(v);
487
0
    fp_new(r);
488
489
0
    fp_zero(v);
490
0
    fp_set_dig(r, 1);
491
0
    dv_copy(f, fp_prime_get(), RLC_FP_DIGS);
492
0
#if FP_RDC == MONTY
493
    /* Convert a from Montgomery form. */
494
0
    dv_zero(t, 2 * RLC_FP_DIGS);
495
0
    fp_copy(t, a);
496
0
    fp_rdcn_low(g, t);
497
#else
498
    fp_copy(g, a);
499
#endif
500
0
    fs = gs = RLC_POS;
501
502
0
    for (int i = 0; i < d; i++) {
503
0
      g0 = g[0] & 1;
504
0
      d0 = delta >> (RLC_DIG - 1);
505
0
      d0 = g0 & ~d0;
506
      /* Conditionally negate delta if d0 is set. */
507
0
      delta = (delta ^ -d0) + d0;
508
      /* Conditionally swap based on d0. */
509
0
      dv_swap_sec(r, v, RLC_FP_DIGS, d0);
510
0
      fp_negm_low(t, r);
511
0
      dv_swap_sec(f, g, RLC_FP_DIGS, d0);
512
0
      dv_copy_sec(r, t, RLC_FP_DIGS, d0);
513
0
      for (int j = 0; j < RLC_FP_DIGS; j++) {
514
0
        g[j] = RLC_SEL(g[j], ~g[j], d0);
515
0
      }
516
0
      fp_add1_low(g, g, d0);
517
0
      t[0] = (fs ^ gs) & (-d0);
518
0
      fs ^= t[0];
519
0
      gs ^= t[0] ^ d0;
520
521
0
      delta++;
522
0
      g0 = g[0] & 1;
523
0
      for (int j = 0; j < RLC_FP_DIGS; j++) {
524
0
        t[j] = v[j] & (-g0);
525
0
        u[j] = f[j] & (-g0);
526
0
      }
527
0
      fp_addm_low(r, r, t);
528
0
      fp_dblm_low(v, v);
529
530
      /* Compute g = (g + g0*f) div 2 by conditionally copying f to u and
531
       * updating the sign of g. */
532
0
      gs ^= g0 & (fs ^ bn_addn_low(g, g, u, RLC_FP_DIGS));
533
      /* Shift and restore the sign. */
534
0
      fp_rsh1_low(g, g);
535
0
      g[RLC_FP_DIGS - 1] |= (dig_t)gs << (RLC_DIG - 1);
536
0
    }
537
0
    fp_neg(t, v);
538
0
    fp_copy_sec(v, t, fs);
539
540
0
    dv_copy(t, fp_prime_get(), RLC_FP_DIGS);
541
0
    fp_add_dig(t, t, 1);
542
0
    fp_hlv(t, t);
543
#if WSIZE == 8
544
    bn_set_dig(_t, d >> 8);
545
    bn_lsh(_t, _t, 8);
546
    bn_add_dig(_t, _t, d & 0xFF);
547
#else
548
0
    bn_set_dig(_t, d);
549
0
#endif
550
0
    fp_exp(t, t, _t);
551
552
0
    fp_mul(c, v, t);
553
0
  } RLC_CATCH_ANY {
554
0
    RLC_THROW(ERR_CAUGHT)
555
0
  } RLC_FINALLY {
556
0
    bn_free(_t);
557
0
    dv_free(t);
558
0
    fp_free(f);
559
0
    fp_free(g);
560
0
    fp_free(u);
561
0
    fp_free(v);
562
0
    fp_free(r);
563
0
  }
564
0
}
565
566
#endif
567
568
#if FP_INV == JMPDS || !defined(STRIP)
569
570
84.8k
void fp_inv_jmpds(fp_t c, const fp_t a) {
571
84.8k
  dis_t m[4];
572
  /* Compute number of iterations based on modulus size. */
573
  /* Iterations taken directly from https://github.com/sipa/safegcd-bounds */
574
84.8k
  const int iterations = (45907 * FP_PRIME + 26313) / 19929;
575
84.8k
  int loops, i, j = 0, s = RLC_DIG - 2;
576
84.8k
  dv_t f, g, t, p, t0, t1, u0, u1, v0, v1, p01, p11;
577
84.8k
  dig_t sf, sg;
578
84.8k
  fp_t pre;
579
#if WSIZE == 8
580
  int d = -1;
581
#else
582
84.8k
  dis_t d = -1;
583
84.8k
#endif
584
585
84.8k
  if (fp_is_zero(a)) {
586
0
    RLC_THROW(ERR_NO_VALID);
587
0
    return;
588
0
  }
589
590
84.8k
  dv_null(f);
591
84.8k
  dv_null(g);
592
84.8k
  dv_null(t);
593
84.8k
  dv_null(p);
594
84.8k
  dv_null(t0);
595
84.8k
  dv_null(t1);
596
84.8k
  dv_null(u0);
597
84.8k
  dv_null(u1);
598
84.8k
  dv_null(v0);
599
84.8k
  dv_null(v1);
600
84.8k
  dv_null(p01);
601
84.8k
  dv_null(p11);
602
84.8k
  fp_null(pre);
603
604
84.8k
  RLC_TRY {
605
84.8k
    dv_new(t0);
606
84.8k
    dv_new(f);
607
84.8k
    dv_new(t);
608
84.8k
    dv_new(p);
609
84.8k
    dv_new(g);
610
84.8k
    dv_new(t1);
611
84.8k
    dv_new(u0);
612
84.8k
    dv_new(u1);
613
84.8k
    dv_new(v0);
614
84.8k
    dv_new(v1);
615
84.8k
    dv_new(p01);
616
84.8k
    dv_new(p11);
617
84.8k
    fp_new(pre);
618
619
84.8k
    fp_copy(pre, core_get()->inv.dp);
620
84.8k
    dv_zero(t, 2 * RLC_FP_DIGS);
621
84.8k
    dv_zero(p, 2 * RLC_FP_DIGS);
622
84.8k
    dv_zero(u0, 2 * RLC_FP_DIGS);
623
84.8k
    dv_zero(u1, 2 * RLC_FP_DIGS);
624
84.8k
    dv_zero(v0, 2 * RLC_FP_DIGS);
625
84.8k
    dv_zero(v1, 2 * RLC_FP_DIGS);
626
84.8k
    dv_copy(f, fp_prime_get(), RLC_FP_DIGS);
627
84.8k
#if FP_RDC == MONTY
628
    /* Convert a from Montgomery form. */
629
84.8k
    fp_copy(p, a);
630
84.8k
    fp_rdcn_low(g, p);
631
#else
632
    fp_copy(g, a);
633
#endif
634
84.8k
    f[RLC_FP_DIGS] = g[RLC_FP_DIGS] = 0;
635
84.8k
    d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s);
636
637
84.8k
    t0[RLC_FP_DIGS] = bn_muls_low(t0, f, RLC_POS, m[0], RLC_FP_DIGS);
638
84.8k
    t1[RLC_FP_DIGS] = bn_muls_low(t1, g, RLC_POS, m[1], RLC_FP_DIGS);
639
84.8k
    bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1);
640
641
84.8k
    f[RLC_FP_DIGS] = bn_muls_low(f, f, RLC_POS, m[2], RLC_FP_DIGS);
642
84.8k
    t1[RLC_FP_DIGS] = bn_muls_low(t1, g, RLC_POS, m[3], RLC_FP_DIGS);
643
84.8k
    bn_addn_low(t1, t1, f, RLC_FP_DIGS + 1);
644
645
    /* Update f and g. */
646
84.8k
    bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s);
647
84.8k
    bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s);
648
649
    /* Update column vector below. */
650
84.8k
    v1[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1]));
651
84.8k
    fp_negm_low(t1, v1);
652
84.8k
    fp_copy_sec(v1, t1, RLC_SIGN(m[1]));
653
84.8k
    u1[0] = RLC_SEL(m[3], -m[3], RLC_SIGN(m[3]));
654
84.8k
    fp_negm_low(t1, u1);
655
84.8k
    fp_copy_sec(u1, t1, RLC_SIGN(m[3]));
656
657
84.8k
    dv_copy(p01, v1, 2 * RLC_FP_DIGS);
658
84.8k
    dv_copy(p11, u1, 2 * RLC_FP_DIGS);
659
660
84.8k
    loops = iterations / s;
661
84.8k
    loops = (iterations % s == 0 ? loops - 1 : loops);
662
663
763k
    for (i = 1; i < loops; i++) {
664
678k
      d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s);
665
666
678k
      sf = RLC_SIGN(f[RLC_FP_DIGS]);
667
678k
      sg = RLC_SIGN(g[RLC_FP_DIGS]);
668
678k
      bn_negs_low(u0, f, sf, RLC_FP_DIGS);
669
678k
      bn_negs_low(u1, g, sg, RLC_FP_DIGS);
670
671
678k
      t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[0], RLC_FP_DIGS);
672
678k
      t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[1], RLC_FP_DIGS);
673
678k
      bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1);
674
678k
      bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s);
675
676
678k
      t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[2], RLC_FP_DIGS);
677
678k
      t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[3], RLC_FP_DIGS);
678
678k
      bn_addn_low(t1, t1, t0, RLC_FP_DIGS + 1);
679
678k
      bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s);
680
681
#ifdef RLC_FP_ROOM
682
      p[j] = 0;
683
      dv_copy(p + j + 1, fp_prime_get(), RLC_FP_DIGS);
684
685
      /* Update column vector below. */
686
      bn_mul2_low(v0, p01, m[0], RLC_FP_DIGS + j);
687
      fp_subd_low(t, p, v0);
688
      dv_copy_sec(v0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[0]));
689
690
      bn_mul2_low(v1, p11, m[1], RLC_FP_DIGS + j);
691
      fp_subd_low(t, p, v1);
692
      dv_copy_sec(v1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[1]));
693
694
      bn_mul2_low(u0, p01, m[2], RLC_FP_DIGS + j);
695
      fp_subd_low(t, p, u0);
696
      dv_copy_sec(u0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[2]));
697
698
      bn_mul2_low(u1, p11, m[3], RLC_FP_DIGS + j);
699
      fp_subd_low(t, p, u1);
700
      dv_copy_sec(u1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[3]));
701
702
      j = i % RLC_FP_DIGS;
703
      if (j == 0) {
704
        fp_addd_low(t, u0, u1);
705
        fp_rdc(p11, t);
706
        fp_addd_low(t, v0, v1);
707
        fp_rdc(p01, t);
708
        dv_zero(v0, 2 * RLC_FP_DIGS);
709
        dv_zero(v1, 2 * RLC_FP_DIGS);
710
      } else {
711
        fp_addd_low(p11, u0, u1);
712
        fp_addd_low(p01, v0, v1);
713
      }
714
#else
715
      /* Update column vector below. */
716
678k
      t[0] = RLC_SEL(m[0], -m[0], RLC_SIGN(m[0]));
717
678k
      fp_mul(v0, p01, t);
718
678k
      fp_neg(t0, v0);
719
678k
      fp_copy_sec(v0, t0, RLC_SIGN(m[0]));
720
721
678k
      t[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1]));
722
678k
      fp_mul(v1, p11, t);
723
678k
      fp_neg(t1, v1);
724
678k
      fp_copy_sec(v1, t1, RLC_SIGN(m[1]));
725
726
678k
      t[0] = RLC_SEL(m[2], -m[2], RLC_SIGN(m[2]));
727
678k
      fp_mul(u0, p01, t);
728
678k
      fp_neg(t0, u0);
729
678k
      fp_copy_sec(u0, t0, RLC_SIGN(m[2]));
730
731
678k
      t[0] = RLC_SEL(m[3], -m[3], RLC_SIGN(m[3]));
732
678k
      fp_mul(u1, p11, t);
733
678k
      fp_neg(t1, u1);
734
678k
      fp_copy_sec(u1, t1, RLC_SIGN(m[3]));
735
736
678k
      fp_add(p11, u0, u1);
737
678k
      fp_add(p01, v0, v1);
738
678k
#endif
739
678k
    }
740
741
84.8k
    s = iterations - loops * s;
742
84.8k
    d = jumpdivstep(m, d, f[0] & RLC_MASK(s), g[0] & RLC_MASK(s), s);
743
744
84.8k
    sf = RLC_SIGN(f[RLC_FP_DIGS]);
745
84.8k
    sg = RLC_SIGN(g[RLC_FP_DIGS]);
746
84.8k
    bn_negs_low(u0, f, sf, RLC_FP_DIGS);
747
84.8k
    bn_negs_low(u1, g, sg, RLC_FP_DIGS);
748
749
84.8k
    t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[0], RLC_FP_DIGS);
750
84.8k
    t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[1], RLC_FP_DIGS);
751
84.8k
    bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1);
752
84.8k
    bn_rshs_low(f, t0, RLC_FP_DIGS + 1, s);
753
754
84.8k
    t0[RLC_FP_DIGS] = bn_muls_low(t0, u0, sf, m[2], RLC_FP_DIGS);
755
84.8k
    t1[RLC_FP_DIGS] = bn_muls_low(t1, u1, sg, m[3], RLC_FP_DIGS);
756
84.8k
    bn_addn_low(t1, t1, t0, RLC_FP_DIGS + 1);
757
84.8k
    bn_rshs_low(g, t1, RLC_FP_DIGS + 1, s);
758
759
#ifdef RLC_FP_ROOM
760
    p[j] = 0;
761
    dv_copy(p + j + 1, fp_prime_get(), RLC_FP_DIGS);
762
763
    /* Update column vector below. */
764
    bn_mul2_low(v0, p01, m[0], RLC_FP_DIGS + j);
765
    fp_subd_low(t, p, v0);
766
    dv_copy_sec(v0, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[0]));
767
768
    bn_mul2_low(v1, p11, m[1], RLC_FP_DIGS + j);
769
    fp_subd_low(t, p, v1);
770
    dv_copy_sec(v1, t, RLC_FP_DIGS + j + 1, RLC_SIGN(m[1]));
771
772
    fp_addd_low(t, v0, v1);
773
    fp_rdc(p01, t);
774
#else
775
84.8k
    (void)j;
776
777
84.8k
    t[0] = RLC_SEL(m[0], -m[0], RLC_SIGN(m[0]));
778
84.8k
    fp_mul(v0, p01, t);
779
84.8k
    fp_neg(t0, v0);
780
84.8k
    fp_copy_sec(v0, t0, RLC_SIGN(m[0]));
781
782
84.8k
    t[0] = RLC_SEL(m[1], -m[1], RLC_SIGN(m[1]));
783
84.8k
    fp_mul(v1, p11, t);
784
84.8k
    fp_neg(t1, v1);
785
84.8k
    fp_copy_sec(v1, t1, RLC_SIGN(m[1]));
786
787
84.8k
    fp_add(p01, v0, v1);
788
84.8k
#endif
789
    /* Negate based on sign of f at the end. */
790
84.8k
    fp_negm_low(t, p01);
791
84.8k
    dv_copy_sec(p01, t, RLC_FP_DIGS, f[RLC_FP_DIGS] >> (RLC_DIG - 1));
792
    /* Multiply by (precomp * R^j) % p, one for each iteration of the loop,
793
     * one for the constant, one more to be removed by reduction. */
794
84.8k
    fp_mul(c, p01, pre);
795
84.8k
  }
796
169k
  RLC_CATCH_ANY {
797
0
    RLC_THROW(ERR_CAUGHT);
798
0
  }
799
169k
  RLC_FINALLY {
800
84.8k
    dv_free(t0);
801
84.8k
    dv_free(f);
802
84.8k
    dv_free(t);
803
84.8k
    dv_free(p);
804
84.8k
    dv_free(g);
805
84.8k
    dv_free(t1);
806
84.8k
    dv_free(u0);
807
84.8k
    dv_free(u1);
808
84.8k
    dv_free(v0);
809
84.8k
    dv_free(v1);
810
84.8k
    dv_free(p01);
811
84.8k
    dv_free(p11);
812
84.8k
    fp_free(pre);
813
84.8k
  }
814
84.8k
}
815
816
#endif
817
818
#if FP_INV == LOWER || !defined(STRIP)
819
820
0
void fp_inv_lower(fp_t c, const fp_t a) {
821
0
  if (fp_is_zero(a)) {
822
0
    RLC_THROW(ERR_NO_VALID);
823
0
    return;
824
0
  }
825
826
0
  fp_invm_low(c, a);
827
0
}
828
829
#endif
830
831
7.48k
void fp_inv_sim(fp_t *c, const fp_t *a, int n) {
832
7.48k
  int i;
833
7.48k
  fp_t u, *t = RLC_ALLOCA(fp_t, n);
834
835
7.48k
  fp_null(u);
836
837
7.48k
  RLC_TRY {
838
7.48k
    if (t == NULL) {
839
0
      RLC_THROW(ERR_NO_MEMORY);
840
0
    }
841
154k
    for (i = 0; i < n; i++) {
842
147k
      fp_null(t[i]);
843
147k
      fp_new(t[i]);
844
147k
    }
845
7.48k
    fp_new(u);
846
847
7.48k
    fp_copy(t[0], a[0]);
848
7.48k
    fp_copy(c[0], a[0]);
849
850
147k
    for (i = 1; i < n; i++) {
851
139k
      fp_copy(t[i], a[i]);
852
139k
      fp_mul(c[i], c[i - 1], a[i]);
853
139k
    }
854
855
7.48k
    fp_inv(u, c[n - 1]);
856
857
147k
    for (i = n - 1; i > 0; i--) {
858
139k
      fp_mul(c[i], u, c[i - 1]);
859
139k
      fp_mul(u, u, t[i]);
860
139k
    }
861
7.48k
    fp_copy(c[0], u);
862
7.48k
  }
863
14.9k
  RLC_CATCH_ANY {
864
0
    RLC_THROW(ERR_CAUGHT);
865
0
  }
866
14.9k
  RLC_FINALLY {
867
154k
    for (i = 0; i < n; i++) {
868
147k
      fp_free(t[i]);
869
147k
    }
870
7.48k
    fp_free(u);
871
    RLC_FREE(t);
872
7.48k
  }
873
7.48k
}