Coverage Report

Created: 2023-02-22 06:39

/src/libecc/src/nn/nn_div.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (C) 2017 - This file is part of libecc project
3
 *
4
 *  Authors:
5
 *      Ryad BENADJILA <ryadbenadjila@gmail.com>
6
 *      Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
7
 *      Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
8
 *
9
 *  Contributors:
10
 *      Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
11
 *      Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
12
 *
13
 *  This software is licensed under a dual BSD and GPL v2 license.
14
 *  See LICENSE file at the root folder of the project.
15
 */
16
#include "nn_div.h"
17
#include "nn_mul.h"
18
#include "nn_logical.h"
19
#include "nn_add.h"
20
#include "nn.h"
21
22
/*
23
 * Some helper functions to perform operations on an arbitrary part
24
 * of a multiprecision number.
25
 * This is exactly the same code as for operations on the least significant
26
 * part of a multiprecision number except for the starting point in the
27
 * array representing it.
28
 * Done in *constant time*.
29
 *
30
 * Operations producing an output are in place.
31
 */
32
33
/*
34
 * Compare all the bits of in2 with the same number of bits in in1 starting at
35
 * 'shift' position in in1. in1 must be long enough for that purpose, i.e.
36
 * in1->wlen >= (in2->wlen + shift). The comparison value is provided in
37
 * 'cmp' parameter. The function returns 0 on success, -1 on error.
38
 *
39
 * The function is an internal helper; it expects initialized nn in1 and
40
 * in2: it does not verify that.
41
 */
42
ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp)
43
2.95M
{
44
2.95M
  int ret, mask, tmp;
45
2.95M
  u8 i;
46
47
2.95M
  MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err);
48
2.95M
  MUST_HAVE((cmp != NULL), ret, err);
49
50
2.95M
  tmp = 0;
51
17.4M
  for (i = in2->wlen; i > 0; i--) {
52
14.5M
    mask = (!(tmp & 0x1));
53
14.5M
    tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask);
54
14.5M
    tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask);
55
14.5M
  }
56
2.95M
  (*cmp) = tmp;
57
2.95M
  ret = 0;
58
59
2.95M
err:
60
2.95M
  return ret;
61
2.95M
}
62
63
/*
64
 * Conditionally subtract a shifted version of in from out, i.e.:
65
 *   - if cnd == 1, out <- out - (in << shift)
66
 *   - if cnd == 0, out <- out
67
 * The function returns 0 on success, -1 on error. On success, 'borrow'
68
 * provides the possible borrow resulting from the subtraction. 'borrow'
69
 * is not meaningful on error.
70
 *
71
 * The function is an internal helper; it expects initialized nn out and
72
 * in: it does not verify that.
73
 */
74
ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in,
75
          u8 shift, word_t *borrow)
76
1.34M
{
77
1.34M
  word_t tmp, borrow1, borrow2, _borrow = WORD(0);
78
1.34M
  word_t mask = WORD_MASK_IFNOTZERO(cnd);
79
1.34M
  int ret;
80
1.34M
  u8 i;
81
82
1.34M
  MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
83
1.34M
  MUST_HAVE((borrow != NULL), ret, err);
84
85
  /*
86
   *  Perform subtraction one word at a time,
87
   *  propagating the borrow.
88
   */
89
8.04M
  for (i = 0; i < in->wlen; i++) {
90
6.69M
    tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask));
91
6.69M
    borrow1 = (word_t)(tmp > out->val[shift + i]);
92
6.69M
    out->val[shift + i] = (word_t)(tmp - _borrow);
93
6.69M
    borrow2 = (word_t)(out->val[shift + i] > tmp);
94
    /* There is at most one borrow going out. */
95
6.69M
    _borrow = (word_t)(borrow1 | borrow2);
96
6.69M
  }
97
98
1.34M
  (*borrow) = _borrow;
99
1.34M
  ret = 0;
100
101
1.34M
err:
102
1.34M
  return ret;
103
1.34M
}
104
105
/*
106
 * Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return
107
 * borrow. The function returns 0 on success, -1 on error. 'borrow' is
108
 * meaningful only on success.
109
 *
110
 * The function is an internal helper; it expects initialized nn out and
111
 * in: it does not verify that.
112
 */
113
ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift,
114
        word_t *borrow)
115
1.08M
{
116
1.08M
  word_t _borrow = WORD(0), prod_high, prod_low, tmp;
117
1.08M
  int ret;
118
1.08M
  u8 i;
119
120
1.08M
  MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
121
1.08M
  MUST_HAVE((borrow != NULL), ret, err);
122
123
6.66M
  for (i = 0; i < in->wlen; i++) {
124
    /*
125
     * Compute the result of the multiplication of
126
     * two words.
127
     */
128
5.58M
    WORD_MUL(prod_high, prod_low, in->val[i], w);
129
130
    /*
131
     * And add previous borrow.
132
     */
133
5.58M
    prod_low = (word_t)(prod_low + _borrow);
134
5.58M
    prod_high = (word_t)(prod_high + (prod_low < _borrow));
135
136
    /*
137
     * Subtract computed word at current position in result.
138
     */
139
5.58M
    tmp = (word_t)(out->val[shift + i] - prod_low);
140
5.58M
    _borrow = (word_t)(prod_high + (tmp > out->val[shift + i]));
141
5.58M
    out->val[shift + i] = tmp;
142
5.58M
  }
143
144
1.08M
  (*borrow) = _borrow;
145
1.08M
  ret = 0;
146
147
1.08M
err:
148
1.08M
  return ret;
149
1.08M
}
150
151
/*
152
 * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
153
 * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on
154
 * return. * Computation are performed in *constant time*, only depending on
155
 * the lengths of 'a' and 'b', but not on the values of 'a' and 'b'.
156
 *
157
 * This uses the above function to perform arithmetic on arbitrary parts
158
 * of multiprecision numbers.
159
 *
160
 * The algorithm used is schoolbook division:
161
 * + the quotient is computed word by word,
162
 * + a small division of the MSW is performed to obtain an
163
 *   approximation of the MSW of the quotient,
164
 * + the approximation is corrected to obtain the correct
165
 *   multiprecision MSW of the quotient,
166
 * + the corresponding product is subtracted from the dividend,
167
 * + the same procedure is used for the following word of the quotient.
168
 *
169
 * It is assumed that:
170
 * + b is normalized: the MSB of its MSW is 1,
171
 * + the most significant part of a is smaller than b,
172
 * + a precomputed reciprocal
173
 *     v = floor(B^3/(d+1)) - B
174
 *   where d is the MSW of the (normalized) divisor
175
 *   is given to perform the small 3-by-2 division.
176
 * + using this reciprocal, the approximated quotient is always
177
 *   too small and at most one multiprecision correction is needed.
178
 *
179
 * It returns 0 on sucess, -1 on error.
180
 *
181
 * CAUTION:
182
 *
183
 * - The function is expected to be used ONLY by the generic version
184
 *   nn_divrem_normalized() defined later in the file.
185
 * - All parameters must have been initialized. Unlike exported/public
186
 *   functions, this internal helper does not verify that nn parameters
187
 *   have been initialized. Again, this is expected from the caller
188
 *   (nn_divrem_normalized()).
189
 * - The function does not support aliasing of 'b' or 'q'. See
190
 *   _nn_divrem_normalized_aliased() for such a wrapper.
191
 *
192
 */
193
ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r,
194
         nn_src_t a, nn_src_t b, word_t v)
195
261k
{
196
261k
  word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */
197
261k
  int small, cmp, ret;
198
261k
  u8 i;
199
200
261k
  MUST_HAVE(!(b->wlen <= 0), ret, err);
201
261k
  MUST_HAVE(!(a->wlen <= b->wlen), ret, err);
202
261k
  MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err);
203
261k
  MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err);
204
205
  /* Handle trivial aliasing for a and r */
206
261k
  if (r != a) {
207
261k
    ret = nn_set_wlen(r, a->wlen); EG(ret, err);
208
261k
    ret = nn_copy(r, a); EG(ret, err);
209
261k
  }
210
211
261k
  ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err);
212
213
  /*
214
   * Compute subsequent words of the quotient one by one.
215
   * Perform approximate 3-by-2 division using the precomputed
216
   * reciprocal and correct afterward.
217
   */
218
1.34M
  for (i = r->wlen; i > b->wlen; i--) {
219
1.08M
    u8 shift = (u8)(i - b->wlen - 1);
220
221
    /*
222
     * Perform 3-by-2 approximate division:
223
     * <qstar, qh, ql> = <rh, rl> * (v + B)
224
     * We are only interested in qstar.
225
     */
226
1.08M
    rh = r->val[i - 1];
227
1.08M
    rl = r->val[i - 2];
228
    /* Perform 2-by-1 multiplication. */
229
1.08M
    WORD_MUL(qh, ql, rl, v);
230
1.08M
    WORD_MUL(qstar, ql, rh, v);
231
    /* And propagate carries. */
232
1.08M
    qh = (word_t)(qh + ql);
233
1.08M
    qstar = (word_t)(qstar + (qh < ql));
234
1.08M
    qh = (word_t)(qh + rl);
235
1.08M
    rh = (word_t)(rh + (qh < rl));
236
1.08M
    qstar = (word_t)(qstar + rh);
237
238
    /*
239
     * Compute approximate quotient times divisor
240
     * and subtract it from remainder:
241
     * r = r - (b*qstar << B^shift)
242
     */
243
1.08M
    ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err);
244
245
    /* Check the approximate quotient was indeed not too large. */
246
1.08M
    MUST_HAVE(!(r->val[i - 1] < borrow), ret, err);
247
1.08M
    r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
248
249
    /*
250
     * Check whether the approximate quotient was too small or not.
251
     * At most one multiprecision correction is needed.
252
     */
253
1.08M
    ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
254
1.08M
    small = ((!!(r->val[i - 1])) | (cmp >= 0));
255
    /* Perform conditional multiprecision correction. */
256
1.08M
    ret = _nn_cnd_sub_shift(small, r, b, shift, &borrow); EG(ret, err);
257
1.08M
    MUST_HAVE(!(r->val[i - 1] != borrow), ret, err);
258
1.08M
    r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
259
    /*
260
     * Adjust the quotient if it was too small and set it in the
261
     * multiprecision array.
262
     */
263
1.08M
    qstar = (word_t)(qstar + (word_t)small);
264
1.08M
    q->val[shift] = qstar;
265
    /*
266
     * Check that the MSW of remainder was cancelled out and that
267
     * we could not increase the quotient anymore.
268
     */
269
1.08M
    MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err);
270
271
1.08M
    ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
272
1.08M
    MUST_HAVE(!(cmp >= 0), ret, err);
273
274
1.08M
    ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err);
275
1.08M
  }
276
277
261k
err:
278
261k
  return ret;
279
261k
}
280
281
/*
282
 * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
283
 * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized.
284
 * Compared to _nn_divrem_normalized(), this internal version
285
 * explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r'
286
 * result is stored in 'b' on success), hence the removal of 'r' parameter from
287
 * function prototype compared to _nn_divrem_normalized().
288
 *
289
 * The computation is performed in *constant time*, see documentation of
290
 * _nn_divrem_normalized().
291
 *
292
 * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
293
 * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
294
 * 'b'.
295
 *
296
 * The function returns 0 on success, -1 on error.
297
 *
298
 * CAUTION:
299
 *
300
 * - The function is expected to be used ONLY by the generic version
301
 *   nn_divrem_normalized() defined later in the file.
302
 * - All parameters must have been initialized. Unlike exported/public
303
 *   functions, this internal helper does not verify that nn parameters
304
 *   have been initialized. Again, this is expected from the caller
305
 *   (nn_divrem_normalized()).
306
 * - The function does not support aliasing of 'a' or 'q'.
307
 *
308
 */
309
ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v)
310
0
{
311
0
  int ret;
312
0
  nn r;
313
0
  r.magic = WORD(0);
314
315
0
  ret = nn_init(&r, 0); EG(ret, err);
316
0
  ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err);
317
0
  ret = nn_copy(b, &r); EG(ret, err);
318
319
0
err:
320
0
  nn_uninit(&r);
321
0
  return ret;
322
0
}
323
324
/*
325
 * Compute quotient and remainder of Euclidean division, and do not normalize
326
 * them. Done in *constant time*, see documentation of _nn_divrem_normalized().
327
 *
328
 * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
329
 * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
330
 * 'b'.
331
 *
332
 * Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point
333
 * to the same nn.
334
 *
335
 * The function returns 0 on success, -1 on error.
336
 */
337
int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v)
338
261k
{
339
261k
  int ret;
340
341
261k
  ret = nn_check_initialized(a); EG(ret, err);
342
261k
  ret = nn_check_initialized(q); EG(ret, err);
343
261k
  ret = nn_check_initialized(r); EG(ret, err);
344
345
261k
  if (r == b) {
346
0
    ret = _nn_divrem_normalized_aliased(q, a, r, v);
347
261k
  } else {
348
261k
    ret = nn_check_initialized(b); EG(ret, err);
349
261k
    ret = _nn_divrem_normalized(q, r, a, b, v);
350
261k
  }
351
352
261k
err:
353
261k
  return ret;
354
261k
}
355
356
/*
357
 * Compute remainder only and do not normalize it.
358
 * Constant time, see documentation of _nn_divrem_normalized.
359
 *
360
 * Support aliasing of inputs and outputs.
361
 *
362
 * The function returns 0 on success, -1 on error.
363
 */
364
int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v)
365
0
{
366
0
  int ret;
367
0
  nn q;
368
0
  q.magic = WORD(0);
369
370
0
  ret = nn_init(&q, 0); EG(ret, err);
371
0
  ret = nn_divrem_normalized(&q, r, a, b, v);
372
373
0
err:
374
0
  nn_uninit(&q);
375
0
  return ret;
376
0
}
377
378
/*
379
 * Compute quotient and remainder of Euclidean division,
380
 * and do not normalize them.
381
 * Done in *constant time*,
382
 * only depending on the lengths of 'a' and 'b' and the value of 'cnt',
383
 * but not on the values of 'a' and 'b'.
384
 *
385
 * Assume that b has been normalized by a 'cnt' bit shift,
386
 * that v is the reciprocal of the MSW of 'b',
387
 * but a is not shifted yet.
388
 * Useful when multiple multiplication by the same b are performed,
389
 * e.g. at the fp level.
390
 *
391
 * All outputs MUST have been initialized. The function does not support
392
 * aliasing of 'b' or 'q'. It returns 0 on success, -1 on error.
393
 *
394
 * CAUTION:
395
 *
396
 * - The function is expected to be used ONLY by the generic version
397
 *   nn_divrem_normalized() defined later in the file.
398
 * - All parameters must have been initialized. Unlike exported/public
399
 *   functions, this internal helper does not verify that
400
 *   have been initialized. Again, this is expected from the caller
401
 *   (nn_divrem_unshifted()).
402
 * - The function does not support aliasing. See
403
 *   _nn_divrem_unshifted_aliased() for such a wrapper.
404
 */
405
ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm,
406
        word_t v, bitcnt_t cnt)
407
281k
{
408
281k
  nn a_shift;
409
281k
  u8 new_wlen, b_wlen;
410
281k
  int larger, ret, cmp;
411
281k
  word_t borrow;
412
281k
  a_shift.magic = WORD(0);
413
414
  /* Avoid overflow */
415
281k
  MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err);
416
417
  /* We now know that new_wlen will fit in an u8 */
418
281k
  new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt));
419
420
281k
  b_wlen = b_norm->wlen;
421
281k
  if (new_wlen < b_wlen) { /* trivial case */
422
15.0k
    ret = nn_copy(r, a); EG(ret, err);
423
15.0k
    ret = nn_zero(q);
424
15.0k
    goto err;
425
15.0k
  }
426
427
  /* Shift a. */
428
266k
  ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err);
429
266k
  ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err);
430
266k
  ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err);
431
266k
  ret = nn_set_wlen(r, new_wlen); EG(ret, err);
432
433
266k
  if (new_wlen == b_wlen) {
434
    /* Ensure that a is smaller than b. */
435
4.67k
    ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err);
436
4.67k
    larger = (cmp >= 0);
437
4.67k
    ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err);
438
4.67k
    MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err);
439
440
    /* Set MSW of quotient. */
441
4.67k
    ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
442
4.67k
    q->val[new_wlen - b_wlen] = (word_t) larger;
443
    /* And we are done as the quotient is 0 or 1. */
444
261k
  } else if (new_wlen > b_wlen) {
445
    /* Ensure that most significant part of a is smaller than b. */
446
261k
    ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err);
447
261k
    larger = (cmp >= 0);
448
261k
    ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err);
449
261k
    MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err);
450
451
    /*
452
     * Perform division with MSP of a smaller than b. This ensures
453
     * that the quotient is of length a_len - b_len.
454
     */
455
261k
    ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err);
456
457
    /* Set MSW of quotient. */
458
261k
    ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
459
261k
    q->val[new_wlen - b_wlen] = (word_t) larger;
460
261k
  } /* else a is smaller than b... treated above. */
461
462
266k
  ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err);
463
266k
  ret = nn_set_wlen(r, b_wlen);
464
465
281k
err:
466
281k
  nn_uninit(&a_shift);
467
468
281k
  return ret;
469
266k
}
470
471
/*
472
 * Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success,
473
 * result 'r' is passed through 'b_norm'.
474
 *
475
 * CAUTION:
476
 *
477
 * - The function is expected to be used ONLY by the generic version
478
 *   nn_divrem_normalized() defined later in the file.
479
 * - All parameter must have been initialized. Unlike exported/public
480
 *   functions, this internal helper does not verify that nn parameters
481
 *   have been initialized. Again, this is expected from the caller
482
 *   (nn_divrem_unshifted()).
483
 */
484
ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm,
485
          word_t v, bitcnt_t cnt)
486
0
{
487
0
  int ret;
488
0
  nn r;
489
0
  r.magic = WORD(0);
490
491
0
  ret = nn_init(&r, 0); EG(ret, err);
492
0
  ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err);
493
0
  ret = nn_copy(b_norm, &r); EG(ret, err);
494
495
0
err:
496
0
  nn_uninit(&r);
497
0
  return ret;
498
0
}
499
500
/*
501
 * Compute quotient and remainder and do not normalize them.
502
 * Constant time, see documentation of _nn_divrem_unshifted().
503
 *
504
 * Alias-supporting version of _nn_divrem_unshifted for 'r' only.
505
 *
506
 * The function returns 0 on success, -1 on error.
507
 */
508
int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b,
509
      word_t v, bitcnt_t cnt)
510
180k
{
511
180k
  int ret;
512
513
180k
  ret = nn_check_initialized(a); EG(ret, err);
514
180k
  ret = nn_check_initialized(q); EG(ret, err);
515
180k
  ret = nn_check_initialized(r); EG(ret, err);
516
517
180k
  if (r == b) {
518
0
    ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt);
519
180k
  } else {
520
180k
    ret = nn_check_initialized(b); EG(ret, err);
521
180k
    ret = _nn_divrem_unshifted(q, r, a, b, v, cnt);
522
180k
  }
523
524
180k
err:
525
180k
  return ret;
526
180k
}
527
528
/*
529
 * Compute remainder only and do not normalize it.
530
 * Constant time, see documentation of _nn_divrem_unshifted.
531
 *
532
 * Aliasing of inputs and outputs is possible.
533
 *
534
 * The function returns 0 on success, -1 on error.
535
 */
536
int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt)
537
180k
{
538
180k
  nn q;
539
180k
  int ret;
540
180k
  q.magic = WORD(0);
541
542
180k
  ret = nn_init(&q, 0); EG(ret, err);
543
180k
  ret = nn_divrem_unshifted(&q, r, a, b, v, cnt);
544
545
180k
err:
546
180k
  nn_uninit(&q);
547
548
180k
  return ret;
549
180k
}
550
551
/*
552
 * Helper functions for arithmetic in 2-by-1 division
553
 * used in the reciprocal computation.
554
 *
555
 * These are variations of the nn multiprecision functions
556
 * acting on arrays of fixed length, in place,
557
 * and returning carry/borrow.
558
 *
559
 * Done in constant time.
560
 */
561
562
/*
563
 * Comparison of two limbs numbers. Internal helper.
564
 * Checks left to the caller
565
 */
566
ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2])
567
514k
{
568
514k
  int mask, ret = 0;
569
514k
  ret += (a[1] > b[1]);
570
514k
  ret -= (a[1] < b[1]);
571
514k
  mask = !(ret & 0x1);
572
514k
  ret += ((a[0] > b[0]) & mask);
573
514k
  ret -= ((a[0] < b[0]) & mask);
574
514k
  return ret;
575
514k
}
576
577
/*
578
 * Addition of two limbs numbers with carry returned. Internal helper.
579
 * Checks left to the caller.
580
 */
581
ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2])
582
71.7k
{
583
71.7k
  word_t carry;
584
71.7k
  a[0]  = (word_t)(a[0] + b[0]);
585
71.7k
  carry = (word_t)(a[0] < b[0]);
586
71.7k
  a[1]  = (word_t)(a[1] + carry);
587
71.7k
  carry = (word_t)(a[1] < carry);
588
71.7k
  a[1]  = (word_t)(a[1] + b[1]);
589
71.7k
  carry = (word_t)(carry | (a[1] < b[1]));
590
71.7k
  return carry;
591
71.7k
}
592
593
/*
594
 * Subtraction of two limbs numbers with borrow returned. Internal helper.
595
 * Checks left to the caller.
596
 */
597
ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2])
598
175k
{
599
175k
  word_t borrow, tmp;
600
175k
  tmp    = (word_t)(a[0] - b[0]);
601
175k
  borrow = (word_t)(tmp > a[0]);
602
175k
  a[0]   = tmp;
603
175k
  tmp    = (word_t)(a[1] - borrow);
604
175k
  borrow = (word_t)(tmp > a[1]);
605
175k
  a[1]   = (word_t)(tmp - b[1]);
606
175k
  borrow = (word_t)(borrow | (a[1] > tmp));
607
175k
  return borrow;
608
175k
}
609
610
/*
611
 * Helper macros for conditional subtraction in 2-by-1 division
612
 * used in the reciprocal computation.
613
 *
614
 * Done in constant time.
615
 */
616
617
/* Conditional subtraction of a one limb number from a two limbs number. */
618
138k
#define WORD_CND_SUB_21(cnd, ah, al, b) do {       \
619
138k
    word_t tmp, mask;         \
620
138k
    mask = WORD_MASK_IFNOTZERO((cnd));      \
621
138k
    tmp  = (word_t)((al) - ((b) & mask));     \
622
138k
    (ah) = (word_t)((ah) - (tmp > (al)));     \
623
138k
    (al) = tmp;           \
624
138k
  } while (0)
625
/* Conditional subtraction of a two limbs number from a two limbs number. */
626
138k
#define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do {     \
627
138k
    word_t tmp, mask;         \
628
138k
    mask = WORD_MASK_IFNOTZERO((cnd));      \
629
138k
    tmp  = (word_t)((al) - ((bl) & mask));      \
630
138k
    (ah) = (word_t)((ah) - (tmp > (al)));     \
631
138k
    (al) = tmp;           \
632
138k
    (ah) = (word_t)((ah) - ((bh) & mask));      \
633
138k
  } while (0)
634
635
/*
636
 * divide two words by a normalized word using schoolbook division on half
637
 * words. This is only used below in the reciprocal computation. No checks
638
 * are performed on inputs. This is expected to be done by the caller.
639
 *
640
 * Returns 0 on success, -1 on error.
641
 */
642
ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b)
643
69.3k
{
644
69.3k
  word_t bh, bl, qh, ql, rm, rhl[2], phl[2];
645
69.3k
  int larger, ret;
646
69.3k
  u8 j;
647
648
69.3k
  MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err);
649
69.3k
  bh = WRSHIFT((b), HWORD_BITS);
650
69.3k
  bl = WLSHIFT((b), HWORD_BITS);
651
69.3k
  rhl[1] = ah;
652
69.3k
  rhl[0] = al;
653
654
  /*
655
   * Compute high part of the quotient. We know from
656
   * MUST_HAVE() check above that bh (a word_t) is not 0
657
   */
658
659
69.3k
  KNOWN_FACT(bh != 0, ret, err);
660
69.3k
  qh = (rhl[1] / bh);
661
69.3k
  qh = WORD_MIN(qh, HWORD_MASK);
662
69.3k
  WORD_MUL(phl[1], phl[0], qh, (b));
663
69.3k
  phl[1] = (WLSHIFT(phl[1], HWORD_BITS) |
664
69.3k
      WRSHIFT(phl[0], HWORD_BITS));
665
69.3k
  phl[0] = WLSHIFT(phl[0], HWORD_BITS);
666
667
207k
  for (j = 0; j < 2; j++) {
668
138k
    larger = (_wcmp_22(phl, rhl) > 0);
669
138k
    qh = (word_t)(qh - (word_t)larger);
670
138k
    WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl);
671
138k
  }
672
673
69.3k
  ret = (_wcmp_22(phl, rhl) > 0);
674
69.3k
  MUST_HAVE(!(ret), ret, err);
675
69.3k
  IGNORE_RET_VAL(_wsub_22(rhl, phl));
676
69.3k
  MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err);
677
678
  /* Compute low part of the quotient. */
679
69.3k
  rm = (WLSHIFT(rhl[1], HWORD_BITS) |
680
69.3k
        WRSHIFT(rhl[0], HWORD_BITS));
681
69.3k
  ql = (rm / bh);
682
69.3k
  ql = WORD_MIN(ql, HWORD_MASK);
683
69.3k
  WORD_MUL(phl[1], phl[0], ql, (b));
684
685
207k
  for (j = 0; j < 2; j++) {
686
138k
    larger = (_wcmp_22(phl, rhl) > 0);
687
138k
    ql = (word_t) (ql - (word_t)larger);
688
138k
    WORD_CND_SUB_21(larger, phl[1], phl[0], (b));
689
138k
  }
690
691
69.3k
  ret = _wcmp_22(phl, rhl) > 0;
692
69.3k
  MUST_HAVE(!(ret), ret, err);
693
69.3k
  IGNORE_RET_VAL(_wsub_22(rhl, phl));
694
  /* Set outputs. */
695
69.3k
  MUST_HAVE((rhl[1] == WORD(0)), ret, err);
696
69.3k
  MUST_HAVE(!(rhl[0] >= (b)), ret, err);
697
69.3k
  (*q) = (WLSHIFT(qh, HWORD_BITS) | ql);
698
69.3k
  (*r) = rhl[0];
699
69.3k
  MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err);
700
69.3k
  ret = 0;
701
702
69.3k
err:
703
69.3k
  return ret;
704
69.3k
}
705
706
/*
707
 * Compute the reciprocal of d as
708
 *  floor(B^3/(d+1)) - B
709
 * which is used to perform approximate small division using a multiplication.
710
 *
711
 * No attempt was made to make it constant time. Indeed, such values are usually
712
 * precomputed in contexts where constant time is wanted, e.g. in the fp layer.
713
 *
714
 * Returns 0 on success, -1 on error.
715
 */
716
int wreciprocal(word_t dh, word_t dl, word_t *reciprocal)
717
101k
{
718
101k
  word_t q, carry, r[2], t[2];
719
101k
  int ret;
720
721
101k
  MUST_HAVE((reciprocal != NULL), ret, err);
722
723
101k
  if (((word_t)(dh + WORD(1)) == WORD(0)) &&
724
101k
      ((word_t)(dl + WORD(1)) == WORD(0))) {
725
27.2k
    (*reciprocal) = WORD(0);
726
27.2k
    ret = 0;
727
27.2k
    goto err;
728
27.2k
  }
729
730
73.7k
  if ((word_t)(dh + WORD(1)) == WORD(0)) {
731
4.42k
    q = (word_t)(~dh);
732
4.42k
    r[1] = (word_t)(~dl);
733
69.3k
  } else {
734
69.3k
    t[1] = (word_t)(~dh);
735
69.3k
    t[0] = (word_t)(~dl);
736
69.3k
    ret = _word_divrem(&q, r+1, t[1], t[0],
737
69.3k
           (word_t)(dh + WORD(1))); EG(ret, err);
738
69.3k
  }
739
740
73.7k
  if ((word_t)(dl + WORD(1)) == WORD(0)) {
741
2.02k
    (*reciprocal) = q;
742
2.02k
    ret = 0;
743
2.02k
    goto err;
744
2.02k
  }
745
746
71.7k
  r[0] = WORD(0);
747
748
71.7k
  WORD_MUL(t[1], t[0], q, (word_t)~dl);
749
71.7k
  carry = _wadd_22(r, t);
750
751
71.7k
  t[0] = (word_t)(dl + WORD(1));
752
71.7k
  t[1] = dh;
753
108k
  while (carry || (_wcmp_22(r, t) >= 0)) {
754
36.6k
    q++;
755
36.6k
    carry = (word_t)(carry - _wsub_22(r, t));
756
36.6k
  }
757
758
71.7k
  (*reciprocal) = q;
759
71.7k
  ret = 0;
760
761
101k
err:
762
101k
  return ret;
763
71.7k
}
764
765
/*
766
 * Given an odd number p, compute division coefficients p_normalized,
767
 * p_shift and p_reciprocal so that:
768
 *  - p_shift = p_rounded_bitlen - bitsizeof(p), where
769
 *          o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
770
 *            minimum number of words required to store p) and
771
 *          o p_bitlen is the real bit size of p
772
 *  - p_normalized = p << p_shift
773
 *  - p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B
774
 *    with B = 2^WORDSIZE
775
 *
776
 * These coefficients are useful for the optimized shifted variants of NN
777
 * division and modular functions. Because we have two word_t outputs
778
 * (p_shift and p_reciprocal), these are passed through word_t pointers.
779
 * Aliasing of outputs with the input is possible since p_in is copied in
780
 * local p at the beginning of the function.
781
 *
782
 * The function returns 0 on success, -1 on error.
783
 */
784
int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift,
785
        word_t *p_reciprocal, nn_src_t p_in)
786
0
{
787
0
  bitcnt_t p_rounded_bitlen, p_bitlen;
788
0
  nn p, tmp_nn;
789
0
  int ret;
790
0
  p.magic = tmp_nn.magic = WORD(0);
791
792
0
  ret = nn_check_initialized(p_in); EG(ret, err);
793
794
0
  MUST_HAVE((p_shift != NULL), ret, err);
795
0
  MUST_HAVE((p_reciprocal != NULL), ret, err);
796
797
0
  ret = nn_init(&p, 0); EG(ret, err);
798
0
  ret = nn_copy(&p, p_in); EG(ret, err);
799
800
  /*
801
   * In order for our reciprocal division routines to work, it is expected
802
   * that the bit length (including leading zeroes) of input prime
803
   * p is >= 2 * wlen where wlen is the number of bits of a word size.
804
   */
805
0
  if (p.wlen < 2) {
806
0
    ret = nn_set_wlen(&p, 2); EG(ret, err);
807
0
  }
808
809
0
  ret = nn_init(p_normalized, 0); EG(ret, err);
810
0
  ret = nn_init(&tmp_nn, 0); EG(ret, err);
811
812
  /* p_rounded_bitlen = bitlen of p rounded to word size */
813
0
  p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
814
815
  /* p_shift */
816
0
  ret = nn_bitlen(&p, &p_bitlen); EG(ret, err);
817
0
  (*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen);
818
819
  /* p_normalized = p << pshift */
820
0
  ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err);
821
822
  /* Sanity check to protect the p_reciprocal computation */
823
0
  MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err);
824
825
  /*
826
   * p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B
827
   * where B = 2^wlen where wlen = word size in bits. We use our NN
828
   * helper to compute it.
829
   */
830
0
  ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err);
831
0
  ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal);
832
833
0
err:
834
0
  nn_uninit(&p);
835
0
  nn_uninit(&tmp_nn);
836
837
0
  return ret;
838
0
}
839
840
/*
841
 * Compute quotient remainder of Euclidean division.
842
 *
843
 * This function is a wrapper to normalize the divisor, i.e. shift it so that
844
 * the MSB of its MSW is set, and precompute the reciprocal of this MSW to be
845
 * used to perform small divisions using multiplications during the long
846
 * schoolbook division. It uses the helper functions/macros above.
847
 *
848
 * This is NOT constant time with regards to the word length of a and b,
849
 * but also the actual bitlength of b as we need to normalize b at the
850
 * bit level.
851
 * Moreover the precomputation of the reciprocal is not constant time at all.
852
 *
853
 * r need not be initialized, the function does it for the the caller.
854
 *
855
 * This function does not support aliasing. This is an internal helper, which
856
 * expects caller to check parameters.
857
 *
858
 * It returns 0 on sucess, -1 on error.
859
 */
860
ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
861
101k
{
862
101k
  nn b_large, b_normalized;
863
101k
  bitcnt_t cnt;
864
101k
  word_t v;
865
101k
  nn_src_t ptr = b;
866
101k
  int ret, iszero;
867
101k
  b_large.magic = b_normalized.magic = WORD(0);
868
869
101k
  ret = nn_init(r, 0); EG(ret, err);
870
101k
  ret = nn_init(q, 0); EG(ret, err);
871
101k
  ret = nn_init(&b_large, 0); EG(ret, err);
872
873
101k
  MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err);
874
875
101k
  if (b->wlen == 1) {
876
29.3k
    ret = nn_copy(&b_large, b); EG(ret, err);
877
878
    /* Expand our big number with zeroes */
879
29.3k
    ret = nn_set_wlen(&b_large, 2); EG(ret, err);
880
881
    /*
882
     * This cast could seem inappropriate, but we are
883
     * sure here that we won't touch ptr since it is only
884
     * given as a const parameter to sub functions.
885
     */
886
29.3k
    ptr = (nn_src_t) &b_large;
887
29.3k
  }
888
889
  /* After this, we only handle >= 2 words big numbers */
890
101k
  MUST_HAVE(!(ptr->wlen < 2), ret, err);
891
892
101k
  ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err);
893
101k
  ret = nn_clz(ptr, &cnt); EG(ret, err);
894
101k
  ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err);
895
101k
  ret = wreciprocal(b_normalized.val[ptr->wlen - 1],
896
101k
        b_normalized.val[ptr->wlen - 2],
897
101k
        &v); /* Not constant time. */ EG(ret, err);
898
899
101k
  ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt);
900
901
101k
err:
902
101k
  nn_uninit(&b_normalized);
903
101k
  nn_uninit(&b_large);
904
905
101k
  return ret;
906
101k
}
907
908
/*
909
 * Returns 0 on succes, -1 on error. Internal helper. Checks on params
910
 * expected from the caller.
911
 */
912
ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
913
20.2k
{
914
20.2k
  nn a_cpy, b_cpy;
915
20.2k
  int ret;
916
20.2k
  a_cpy.magic = b_cpy.magic = WORD(0);
917
918
20.2k
  ret = nn_init(&a_cpy, 0); EG(ret, err);
919
20.2k
  ret = nn_init(&b_cpy, 0); EG(ret, err);
920
20.2k
  ret = nn_copy(&a_cpy, a); EG(ret, err);
921
20.2k
  ret = nn_copy(&b_cpy, b); EG(ret, err);
922
20.2k
  ret = _nn_divrem(q, r, &a_cpy, &b_cpy);
923
924
20.2k
err:
925
20.2k
  nn_uninit(&b_cpy);
926
20.2k
  nn_uninit(&a_cpy);
927
928
20.2k
  return ret;
929
20.2k
}
930
931
932
933
/*
934
 * Compute quotient and remainder and normalize them.
935
 * Not constant time, see documentation of _nn_divrem.
936
 *
937
 * Aliased version of _nn_divrem. Returns 0 on success,
938
 * -1 on error.
939
 */
940
int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
941
101k
{
942
101k
  int ret;
943
944
  /* _nn_divrem initializes q and r */
945
101k
  ret = nn_check_initialized(a); EG(ret, err);
946
101k
  ret = nn_check_initialized(b); EG(ret, err);
947
101k
  MUST_HAVE(((q != NULL) && (r != NULL)), ret, err);
948
949
  /*
950
   * Handle aliasing whenever any of the inputs is
951
   * used as an output.
952
   */
953
101k
  if ((a == q) || (a == r) || (b == q) || (b == r)) {
954
20.2k
    ret = __nn_divrem_notrim_alias(q, r, a, b);
955
80.8k
  } else {
956
80.8k
    ret = _nn_divrem(q, r, a, b);
957
80.8k
  }
958
959
101k
err:
960
101k
  return ret;
961
101k
}
962
963
/*
964
 * Compute quotient and remainder and normalize them.
965
 * Not constant time, see documentation of _nn_divrem().
966
 * Returns 0 on success, -1 on error.
967
 */
968
int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
969
94.0k
{
970
94.0k
  int ret;
971
972
94.0k
  ret = nn_divrem_notrim(q, r, a, b); EG(ret, err);
973
974
  /* Normalize (trim) the quotient and rest to avoid size overflow */
975
94.0k
  ret = nn_normalize(q); EG(ret, err);
976
94.0k
  ret = nn_normalize(r);
977
978
94.0k
err:
979
94.0k
  return ret;
980
94.0k
}
981
982
/*
983
 * Compute remainder only and do not normalize it. Not constant time, see
984
 * documentation of _nn_divrem(). Returns 0 on success, -1 on error.
985
 */
986
int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b)
987
6.96k
{
988
6.96k
  int ret;
989
6.96k
  nn q;
990
6.96k
  q.magic = WORD(0);
991
992
  /* nn_divrem() will init q. */
993
6.96k
  ret = nn_divrem_notrim(&q, r, a, b);
994
995
6.96k
  nn_uninit(&q);
996
997
6.96k
  return ret;
998
6.96k
}
999
1000
/*
1001
 * Compute remainder only and normalize it. Not constant time, see
1002
 * documentation of _nn_divrem(). r is initialized by the function.
1003
 * Returns 0 on success, -1 on error.
1004
 */
1005
int nn_mod(nn_t r, nn_src_t a, nn_src_t b)
1006
35.1k
{
1007
35.1k
  int ret;
1008
35.1k
  nn q;
1009
35.1k
  q.magic = WORD(0);
1010
1011
  /* nn_divrem will init q. */
1012
35.1k
  ret = nn_divrem(&q, r, a, b);
1013
1014
35.1k
  nn_uninit(&q);
1015
1016
35.1k
  return ret;
1017
35.1k
}
1018
1019
/*
1020
 * Below follow gcd and xgcd non constant time functions for the user ease.
1021
 */
1022
1023
/*
1024
 * Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant
1025
 * time per the algorithm used. The function returns 0 on success, -1 on
1026
 * error. internal helper: expect caller to check parameters.
1027
 */
1028
ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b,
1029
        int *sign)
1030
1.03k
{
1031
1.03k
  nn_t c, d, q, r, u1, v1, u2, v2;
1032
1.03k
  nn scratch[8];
1033
1.03k
  int ret, swap, iszero;
1034
1.03k
  u8 i;
1035
1036
9.35k
  for (i = 0; i < 8; i++){
1037
8.31k
    scratch[i].magic = WORD(0);
1038
8.31k
  }
1039
1040
  /*
1041
   * Maintain:
1042
   * |u1 v1| |c| = |a|
1043
   * |u2 v2| |d|   |b|
1044
   * u1, v1, u2, v2 >= 0
1045
   * c >= d
1046
   *
1047
   * Initially:
1048
   * |1  0 | |a| = |a|
1049
   * |0  1 | |b|   |b|
1050
   *
1051
   * At each iteration:
1052
   * c >= d
1053
   * c = q*d + r
1054
   * |u1 v1| = |q*u1+v1 u1|
1055
   * |u2 v2|   |q*u2+v2 u2|
1056
   *
1057
   * Finally, after i steps:
1058
   * |u1 v1| |g| = |a|
1059
   * |u2 v2| |0| = |b|
1060
   *
1061
   * Inverting the matrix:
1062
   * |g| = (-1)^i | v2 -v1| |a|
1063
   * |0|          |-u2  u1| |b|
1064
   */
1065
1066
  /*
1067
   * Initialization.
1068
   */
1069
1.03k
  ret = nn_init(g, 0); EG(ret, err);
1070
1.03k
  ret = nn_init(u, 0); EG(ret, err);
1071
1.03k
  ret = nn_init(v, 0); EG(ret, err);
1072
1.03k
  ret = nn_iszero(b, &iszero); EG(ret, err);
1073
1.03k
  if (iszero) {
1074
    /* gcd(0, a) = a, and 1*a + 0*b = a */
1075
32
    ret = nn_copy(g, a); EG(ret, err);
1076
32
    ret = nn_one(u); EG(ret, err);
1077
32
    ret = nn_zero(v); EG(ret, err);
1078
32
    (*sign) = 1;
1079
32
    goto err;
1080
32
  }
1081
1082
9.06k
  for (i = 0; i < 8; i++){
1083
8.05k
    ret = nn_init(&scratch[i], 0); EG(ret, err);
1084
8.05k
  }
1085
1086
1.00k
  u1 = &(scratch[0]);
1087
1.00k
  v1 = &(scratch[1]);
1088
1.00k
  u2 = &(scratch[2]);
1089
1.00k
  v2 = &(scratch[3]);
1090
1.00k
  ret = nn_one(u1); EG(ret, err);
1091
1.00k
  ret = nn_zero(v1); EG(ret, err);
1092
1.00k
  ret = nn_zero(u2); EG(ret, err);
1093
1.00k
  ret = nn_one(v2); EG(ret, err);
1094
1.00k
  c = &(scratch[4]);
1095
1.00k
  d = &(scratch[5]);
1096
1.00k
  ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */
1097
1.00k
  ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */
1098
1.00k
  q = &(scratch[6]);
1099
1.00k
  r = &(scratch[7]);
1100
1.00k
  swap = 0;
1101
1102
  /*
1103
   * Loop.
1104
   */
1105
1.00k
  ret = nn_iszero(d, &iszero); EG(ret, err);
1106
29.8k
  while (!iszero) {
1107
29.3k
    ret = nn_divrem(q, r, c, d); EG(ret, err);
1108
29.3k
    ret = nn_normalize(q); EG(ret, err);
1109
29.3k
    ret = nn_normalize(r); EG(ret, err);
1110
29.3k
    ret = nn_copy(c, r); EG(ret, err);
1111
29.3k
    ret = nn_mul(r, q, u1); EG(ret, err);
1112
29.3k
    ret = nn_normalize(r); EG(ret, err);
1113
29.3k
    ret = nn_add(v1, v1, r); EG(ret, err);
1114
29.3k
    ret = nn_mul(r, q, u2); EG(ret, err);
1115
29.3k
    ret = nn_normalize(r); EG(ret, err);
1116
29.3k
    ret = nn_add(v2, v2, r); EG(ret, err);
1117
29.3k
    ret = nn_normalize(v1); EG(ret, err);
1118
29.3k
    ret = nn_normalize(v2); EG(ret, err);
1119
29.3k
    swap = 1;
1120
29.3k
    ret = nn_iszero(c, &iszero); EG(ret, err);
1121
29.3k
    if (iszero) {
1122
523
      break;
1123
523
    }
1124
28.8k
    ret = nn_divrem(q, r, d, c); EG(ret, err);
1125
28.8k
    ret = nn_normalize(q); EG(ret, err);
1126
28.8k
    ret = nn_normalize(r); EG(ret, err);
1127
28.8k
    ret = nn_copy(d, r); EG(ret, err);
1128
28.8k
    ret = nn_mul(r, q, v1); EG(ret, err);
1129
28.8k
    ret = nn_normalize(r); EG(ret, err);
1130
28.8k
    ret = nn_add(u1, u1, r); EG(ret, err);
1131
28.8k
    ret = nn_mul(r, q, v2); EG(ret, err);
1132
28.8k
    ret = nn_normalize(r); EG(ret, err);
1133
28.8k
    ret = nn_add(u2, u2, r); EG(ret, err);
1134
28.8k
    ret = nn_normalize(u1); EG(ret, err);
1135
28.8k
    ret = nn_normalize(u2); EG(ret, err);
1136
28.8k
    swap = 0;
1137
1138
    /* refresh loop condition */
1139
28.8k
    ret = nn_iszero(d, &iszero); EG(ret, err);
1140
28.8k
  }
1141
1142
  /* Copies could be skipped. */
1143
1.00k
  if (swap) {
1144
523
    ret = nn_copy(g, d); EG(ret, err);
1145
523
    ret = nn_copy(u, u2); EG(ret, err);
1146
523
    ret = nn_copy(v, u1); EG(ret, err);
1147
523
  } else {
1148
484
    ret = nn_copy(g, c); EG(ret, err);
1149
484
    ret = nn_copy(u, v2); EG(ret, err);
1150
484
    ret = nn_copy(v, v1); EG(ret, err);
1151
484
  }
1152
1153
  /* swap = -1 means u <= 0; = 1 means v <= 0 */
1154
1.00k
  (*sign) = swap ? -1 : 1;
1155
1.00k
  ret = 0;
1156
1157
1.03k
err:
1158
  /*
1159
   * We uninit scratch elements in all cases, i.e. whether or not
1160
   * we return an error or not.
1161
   */
1162
9.35k
  for (i = 0; i < 8; i++){
1163
8.31k
    nn_uninit(&scratch[i]);
1164
8.31k
  }
1165
  /* Unitialize output in case of error */
1166
1.03k
  if (ret){
1167
0
    nn_uninit(v);
1168
0
    nn_uninit(u);
1169
0
    nn_uninit(g);
1170
0
  }
1171
1172
1.03k
  return ret;
1173
1.00k
}
1174
1175
/*
1176
 * Aliased version of xgcd, and no assumption on a and b. Not constant time at
1177
 * all. returns 0 on success, -1 on error. XXX document 'sign'
1178
 */
1179
int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign)
1180
1.03k
{
1181
  /* Handle aliasing
1182
   * Note: in order to properly handle aliasing, we accept to lose
1183
   * some "space" on the stack with copies.
1184
   */
1185
1.03k
  nn a_cpy, b_cpy;
1186
1.03k
  nn_src_t a_, b_;
1187
1.03k
  int ret, cmp, _sign;
1188
1.03k
  a_cpy.magic = b_cpy.magic = WORD(0);
1189
1190
  /* The internal _nn_xgcd function initializes g, u and v */
1191
1.03k
  ret = nn_check_initialized(a); EG(ret, err);
1192
1.03k
  ret = nn_check_initialized(b); EG(ret, err);
1193
1.03k
  MUST_HAVE((sign != NULL), ret, err);
1194
1195
1.03k
  ret = nn_init(&b_cpy, 0); EG(ret, err);
1196
1197
  /* Aliasing of a */
1198
1.03k
  if ((g == a) || (u == a) || (v == a)){
1199
0
    ret = nn_copy(&a_cpy, a); EG(ret, err);
1200
0
    a_ = &a_cpy;
1201
1.03k
  } else {
1202
1.03k
    a_ = a;
1203
1.03k
  }
1204
  /* Aliasing of b */
1205
1.03k
  if ((g == b) || (u == b) || (v == b)) {
1206
0
    ret = nn_copy(&b_cpy, b); EG(ret, err);
1207
0
    b_ = &b_cpy;
1208
1.03k
  } else {
1209
1.03k
    b_ = b;
1210
1.03k
  }
1211
1212
1.03k
  ret = nn_cmp(a_, b_, &cmp); EG(ret, err);
1213
1.03k
  if (cmp < 0) {
1214
    /* If a < b, swap the inputs */
1215
753
    ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err);
1216
753
    (*sign) = -(_sign);
1217
753
  } else {
1218
286
    ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err);
1219
286
    (*sign) = _sign;
1220
286
  }
1221
1222
1.03k
err:
1223
1.03k
  nn_uninit(&b_cpy);
1224
1.03k
  nn_uninit(&a_cpy);
1225
1226
1.03k
  return ret;
1227
1.03k
}
1228
1229
/*
1230
 * Compute g = gcd(a, b). Internally use the xgcd and drop u and v.
1231
 * Not constant time at all. Returns 0 on success, -1 on error.
1232
 * XXX document 'sign'.
1233
 */
1234
int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign)
1235
564
{
1236
564
  nn u, v;
1237
564
  int ret;
1238
564
  u.magic = v.magic = WORD(0);
1239
1240
  /* nn_xgcd will initialize g, u and v and
1241
   * check if a and b are indeed initialized.
1242
   */
1243
564
  ret = nn_xgcd(g, &u, &v, a, b, sign);
1244
1245
564
  nn_uninit(&u);
1246
564
  nn_uninit(&v);
1247
1248
564
  return ret;
1249
564
}