Coverage Report

Created: 2025-07-04 06:19

/src/nss/lib/freebl/mpi/mpi.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  mpi.c
3
 *
4
 *  Arbitrary precision integer arithmetic library
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9
10
#include "mpi-priv.h"
11
#include "mplogic.h"
12
13
#include <assert.h>
14
15
#if defined(__arm__) && \
16
    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
17
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
18
#undef MP_ASSEMBLY_MULTIPLY
19
#undef MP_ASSEMBLY_SQUARE
20
#endif
21
22
#if MP_LOGTAB
23
/*
24
  A table of the logs of 2 for various bases (the 0 and 1 entries of
25
  this table are meaningless and should not be referenced).
26
27
  This table is used to compute output lengths for the mp_toradix()
28
  function.  Since a number n in radix r takes up about log_r(n)
29
  digits, we estimate the output size by taking the least integer
30
  greater than log_r(n), where:
31
32
  log_r(n) = log_2(n) * log_r(2)
33
34
  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
35
  which are the output bases supported.
36
 */
37
#include "logtab.h"
38
#endif
39
40
#ifdef CT_VERIF
41
#include <valgrind/memcheck.h>
42
#endif
43
44
/* {{{ Constant strings */
45
46
/* Constant strings returned by mp_strerror() */
47
static const char *mp_err_string[] = {
48
    "unknown result code",     /* say what?            */
49
    "boolean true",            /* MP_OKAY, MP_YES      */
50
    "boolean false",           /* MP_NO                */
51
    "out of memory",           /* MP_MEM               */
52
    "argument out of range",   /* MP_RANGE             */
53
    "invalid input parameter", /* MP_BADARG            */
54
    "result is undefined"      /* MP_UNDEF             */
55
};
56
57
/* Value to digit maps for radix conversion   */
58
59
/* s_dmap_1 - standard digits and letters */
60
static const char *s_dmap_1 =
61
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
62
63
/* }}} */
64
65
/* {{{ Default precision manipulation */
66
67
/* Default precision for newly created mp_int's      */
68
static mp_size s_mp_defprec = MP_DEFPREC;
69
70
mp_size
71
mp_get_prec(void)
72
0
{
73
0
    return s_mp_defprec;
74
75
0
} /* end mp_get_prec() */
76
77
void
78
mp_set_prec(mp_size prec)
79
0
{
80
0
    if (prec == 0)
81
0
        s_mp_defprec = MP_DEFPREC;
82
0
    else
83
0
        s_mp_defprec = prec;
84
85
0
} /* end mp_set_prec() */
86
87
/* }}} */
88
89
#ifdef CT_VERIF
90
void
91
mp_taint(mp_int *mp)
92
{
93
    size_t i;
94
    for (i = 0; i < mp->used; ++i) {
95
        VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
96
    }
97
}
98
99
void
100
mp_untaint(mp_int *mp)
101
{
102
    size_t i;
103
    for (i = 0; i < mp->used; ++i) {
104
        VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
105
    }
106
}
107
#endif
108
109
/*------------------------------------------------------------------------*/
110
/* {{{ mp_init(mp) */
111
112
/*
113
  mp_init(mp)
114
115
  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
116
  MP_MEM if memory could not be allocated for the structure.
117
 */
118
119
mp_err
120
mp_init(mp_int *mp)
121
708k
{
122
708k
    return mp_init_size(mp, s_mp_defprec);
123
124
708k
} /* end mp_init() */
125
126
/* }}} */
127
128
/* {{{ mp_init_size(mp, prec) */
129
130
/*
131
  mp_init_size(mp, prec)
132
133
  Initialize a new zero-valued mp_int with at least the given
134
  precision; returns MP_OKAY if successful, or MP_MEM if memory could
135
  not be allocated for the structure.
136
 */
137
138
mp_err
139
mp_init_size(mp_int *mp, mp_size prec)
140
2.86M
{
141
2.86M
    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
142
143
2.86M
    prec = MP_ROUNDUP(prec, s_mp_defprec);
144
2.86M
    if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
145
0
        return MP_MEM;
146
147
2.86M
    SIGN(mp) = ZPOS;
148
2.86M
    USED(mp) = 1;
149
2.86M
    ALLOC(mp) = prec;
150
151
2.86M
    return MP_OKAY;
152
153
2.86M
} /* end mp_init_size() */
154
155
/* }}} */
156
157
/* {{{ mp_init_copy(mp, from) */
158
159
/*
160
  mp_init_copy(mp, from)
161
162
  Initialize mp as an exact copy of from.  Returns MP_OKAY if
163
  successful, MP_MEM if memory could not be allocated for the new
164
  structure.
165
 */
166
167
mp_err
168
mp_init_copy(mp_int *mp, const mp_int *from)
169
5.58M
{
170
5.58M
    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
171
172
5.58M
    if (mp == from)
173
0
        return MP_OKAY;
174
175
5.58M
    if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
176
0
        return MP_MEM;
177
178
5.58M
    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
179
5.58M
    USED(mp) = USED(from);
180
5.58M
    ALLOC(mp) = ALLOC(from);
181
5.58M
    SIGN(mp) = SIGN(from);
182
183
5.58M
    return MP_OKAY;
184
185
5.58M
} /* end mp_init_copy() */
186
187
/* }}} */
188
189
/* {{{ mp_copy(from, to) */
190
191
/*
192
  mp_copy(from, to)
193
194
  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
195
  'to' has already been initialized (if not, use mp_init_copy()
196
  instead). If 'from' and 'to' are identical, nothing happens.
197
 */
198
199
mp_err
200
mp_copy(const mp_int *from, mp_int *to)
201
3.99M
{
202
3.99M
    ARGCHK(from != NULL && to != NULL, MP_BADARG);
203
204
3.99M
    if (from == to)
205
55.8k
        return MP_OKAY;
206
207
3.94M
    { /* copy */
208
3.94M
        mp_digit *tmp;
209
210
        /*
211
          If the allocated buffer in 'to' already has enough space to hold
212
          all the used digits of 'from', we'll re-use it to avoid hitting
213
          the memory allocater more than necessary; otherwise, we'd have
214
          to grow anyway, so we just allocate a hunk and make the copy as
215
          usual
216
         */
217
3.94M
        if (ALLOC(to) >= USED(from)) {
218
3.94M
            s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
219
3.94M
            s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
220
221
3.94M
        } else {
222
1
            if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
223
0
                return MP_MEM;
224
225
1
            s_mp_copy(DIGITS(from), tmp, USED(from));
226
227
1
            if (DIGITS(to) != NULL) {
228
1
                s_mp_setz(DIGITS(to), ALLOC(to));
229
1
                s_mp_free(DIGITS(to));
230
1
            }
231
232
1
            DIGITS(to) = tmp;
233
1
            ALLOC(to) = ALLOC(from);
234
1
        }
235
236
        /* Copy the precision and sign from the original */
237
3.94M
        USED(to) = USED(from);
238
3.94M
        SIGN(to) = SIGN(from);
239
3.94M
    } /* end copy */
240
241
3.94M
    return MP_OKAY;
242
243
3.94M
} /* end mp_copy() */
244
245
/* }}} */
246
247
/* {{{ mp_exch(mp1, mp2) */
248
249
/*
250
  mp_exch(mp1, mp2)
251
252
  Exchange mp1 and mp2 without allocating any intermediate memory
253
  (well, unless you count the stack space needed for this call and the
254
  locals it creates...).  This cannot fail.
255
 */
256
257
void
258
mp_exch(mp_int *mp1, mp_int *mp2)
259
88.4k
{
260
88.4k
#if MP_ARGCHK == 2
261
88.4k
    assert(mp1 != NULL && mp2 != NULL);
262
#else
263
    if (mp1 == NULL || mp2 == NULL)
264
        return;
265
#endif
266
267
88.4k
    s_mp_exch(mp1, mp2);
268
269
88.4k
} /* end mp_exch() */
270
271
/* }}} */
272
273
/* {{{ mp_clear(mp) */
274
275
/*
276
  mp_clear(mp)
277
278
  Release the storage used by an mp_int, and void its fields so that
279
  if someone calls mp_clear() again for the same int later, we won't
280
  get tollchocked.
281
 */
282
283
void
284
mp_clear(mp_int *mp)
285
61.2M
{
286
61.2M
    if (mp == NULL)
287
0
        return;
288
289
61.2M
    if (DIGITS(mp) != NULL) {
290
8.44M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
291
8.44M
        s_mp_free(DIGITS(mp));
292
8.44M
        DIGITS(mp) = NULL;
293
8.44M
    }
294
295
61.2M
    USED(mp) = 0;
296
61.2M
    ALLOC(mp) = 0;
297
298
61.2M
} /* end mp_clear() */
299
300
/* }}} */
301
302
/* {{{ mp_zero(mp) */
303
304
/*
305
  mp_zero(mp)
306
307
  Set mp to zero.  Does not change the allocated size of the structure,
308
  and therefore cannot fail (except on a bad argument, which we ignore)
309
 */
310
void
311
mp_zero(mp_int *mp)
312
573k
{
313
573k
    if (mp == NULL)
314
0
        return;
315
316
573k
    s_mp_setz(DIGITS(mp), ALLOC(mp));
317
573k
    USED(mp) = 1;
318
573k
    SIGN(mp) = ZPOS;
319
320
573k
} /* end mp_zero() */
321
322
/* }}} */
323
324
/* {{{ mp_set(mp, d) */
325
326
void
327
mp_set(mp_int *mp, mp_digit d)
328
141k
{
329
141k
    if (mp == NULL)
330
0
        return;
331
332
141k
    mp_zero(mp);
333
141k
    DIGIT(mp, 0) = d;
334
335
141k
} /* end mp_set() */
336
337
/* }}} */
338
339
/* {{{ mp_set_int(mp, z) */
340
341
mp_err
342
mp_set_int(mp_int *mp, long z)
343
39.3k
{
344
39.3k
    unsigned long v = labs(z);
345
39.3k
    mp_err res;
346
347
39.3k
    ARGCHK(mp != NULL, MP_BADARG);
348
349
    /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
350
39.3k
    if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
351
0
        return res;
352
0
    }
353
354
39.3k
    if (z < 0) {
355
0
        SIGN(mp) = NEG;
356
0
    }
357
358
39.3k
    return MP_OKAY;
359
39.3k
} /* end mp_set_int() */
360
361
/* }}} */
362
363
/* {{{ mp_set_ulong(mp, z) */
364
365
mp_err
366
mp_set_ulong(mp_int *mp, unsigned long z)
367
39.3k
{
368
39.3k
    int ix;
369
39.3k
    mp_err res;
370
371
39.3k
    ARGCHK(mp != NULL, MP_BADARG);
372
373
39.3k
    mp_zero(mp);
374
39.3k
    if (z == 0)
375
0
        return MP_OKAY; /* shortcut for zero */
376
377
39.3k
    if (sizeof z <= sizeof(mp_digit)) {
378
39.3k
        DIGIT(mp, 0) = z;
379
39.3k
    } else {
380
0
        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
381
0
            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
382
0
                return res;
383
384
0
            res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
385
0
            if (res != MP_OKAY)
386
0
                return res;
387
0
        }
388
0
    }
389
39.3k
    return MP_OKAY;
390
39.3k
} /* end mp_set_ulong() */
391
392
/* }}} */
393
394
/*------------------------------------------------------------------------*/
395
/* {{{ Digit arithmetic */
396
397
/* {{{ mp_add_d(a, d, b) */
398
399
/*
400
  mp_add_d(a, d, b)
401
402
  Compute the sum b = a + d, for a single digit d.  Respects the sign of
403
  its primary addend (single digits are unsigned anyway).
404
 */
405
406
mp_err
407
mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
408
0
{
409
0
    mp_int tmp;
410
0
    mp_err res;
411
412
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
413
414
0
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
415
0
        return res;
416
417
0
    if (SIGN(&tmp) == ZPOS) {
418
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
419
0
            goto CLEANUP;
420
0
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
421
0
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
422
0
            goto CLEANUP;
423
0
    } else {
424
0
        mp_neg(&tmp, &tmp);
425
426
0
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
427
0
    }
428
429
0
    if (s_mp_cmp_d(&tmp, 0) == 0)
430
0
        SIGN(&tmp) = ZPOS;
431
432
0
    s_mp_exch(&tmp, b);
433
434
0
CLEANUP:
435
0
    mp_clear(&tmp);
436
0
    return res;
437
438
0
} /* end mp_add_d() */
439
440
/* }}} */
441
442
/* {{{ mp_sub_d(a, d, b) */
443
444
/*
445
  mp_sub_d(a, d, b)
446
447
  Compute the difference b = a - d, for a single digit d.  Respects the
448
  sign of its subtrahend (single digits are unsigned anyway).
449
 */
450
451
mp_err
452
mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
453
16.8k
{
454
16.8k
    mp_int tmp;
455
16.8k
    mp_err res;
456
457
16.8k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
458
459
16.8k
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
460
0
        return res;
461
462
16.8k
    if (SIGN(&tmp) == NEG) {
463
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
464
0
            goto CLEANUP;
465
16.8k
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
466
13.9k
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
467
0
            goto CLEANUP;
468
13.9k
    } else {
469
2.93k
        mp_neg(&tmp, &tmp);
470
471
2.93k
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
472
2.93k
        SIGN(&tmp) = NEG;
473
2.93k
    }
474
475
16.8k
    if (s_mp_cmp_d(&tmp, 0) == 0)
476
11
        SIGN(&tmp) = ZPOS;
477
478
16.8k
    s_mp_exch(&tmp, b);
479
480
16.8k
CLEANUP:
481
16.8k
    mp_clear(&tmp);
482
16.8k
    return res;
483
484
16.8k
} /* end mp_sub_d() */
485
486
/* }}} */
487
488
/* {{{ mp_mul_d(a, d, b) */
489
490
/*
491
  mp_mul_d(a, d, b)
492
493
  Compute the product b = a * d, for a single digit d.  Respects the sign
494
  of its multiplicand (single digits are unsigned anyway)
495
 */
496
497
mp_err
498
mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
499
0
{
500
0
    mp_err res;
501
502
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
503
504
0
    if (d == 0) {
505
0
        mp_zero(b);
506
0
        return MP_OKAY;
507
0
    }
508
509
0
    if ((res = mp_copy(a, b)) != MP_OKAY)
510
0
        return res;
511
512
0
    res = s_mp_mul_d(b, d);
513
514
0
    return res;
515
516
0
} /* end mp_mul_d() */
517
518
/* }}} */
519
520
/* {{{ mp_mul_2(a, c) */
521
522
mp_err
523
mp_mul_2(const mp_int *a, mp_int *c)
524
0
{
525
0
    mp_err res;
526
527
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
528
529
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
530
0
        return res;
531
532
0
    return s_mp_mul_2(c);
533
534
0
} /* end mp_mul_2() */
535
536
/* }}} */
537
538
/* {{{ mp_div_d(a, d, q, r) */
539
540
/*
541
  mp_div_d(a, d, q, r)
542
543
  Compute the quotient q = a / d and remainder r = a mod d, for a
544
  single digit d.  Respects the sign of its divisor (single digits are
545
  unsigned anyway).
546
 */
547
548
mp_err
549
mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
550
0
{
551
0
    mp_err res;
552
0
    mp_int qp;
553
0
    mp_digit rem = 0;
554
0
    int pow;
555
556
0
    ARGCHK(a != NULL, MP_BADARG);
557
558
0
    if (d == 0)
559
0
        return MP_RANGE;
560
561
    /* Shortcut for powers of two ... */
562
0
    if ((pow = s_mp_ispow2d(d)) >= 0) {
563
0
        mp_digit mask;
564
565
0
        mask = ((mp_digit)1 << pow) - 1;
566
0
        rem = DIGIT(a, 0) & mask;
567
568
0
        if (q) {
569
0
            if ((res = mp_copy(a, q)) != MP_OKAY) {
570
0
                return res;
571
0
            }
572
0
            s_mp_div_2d(q, pow);
573
0
        }
574
575
0
        if (r)
576
0
            *r = rem;
577
578
0
        return MP_OKAY;
579
0
    }
580
581
0
    if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
582
0
        return res;
583
584
0
    res = s_mp_div_d(&qp, d, &rem);
585
586
0
    if (s_mp_cmp_d(&qp, 0) == 0)
587
0
        SIGN(q) = ZPOS;
588
589
0
    if (r) {
590
0
        *r = rem;
591
0
    }
592
593
0
    if (q)
594
0
        s_mp_exch(&qp, q);
595
596
0
    mp_clear(&qp);
597
0
    return res;
598
599
0
} /* end mp_div_d() */
600
601
/* }}} */
602
603
/* {{{ mp_div_2(a, c) */
604
605
/*
606
  mp_div_2(a, c)
607
608
  Compute c = a / 2, disregarding the remainder.
609
 */
610
611
mp_err
612
mp_div_2(const mp_int *a, mp_int *c)
613
0
{
614
0
    mp_err res;
615
616
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
617
618
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
619
0
        return res;
620
621
0
    s_mp_div_2(c);
622
623
0
    return MP_OKAY;
624
625
0
} /* end mp_div_2() */
626
627
/* }}} */
628
629
/* {{{ mp_expt_d(a, d, b) */
630
631
mp_err
632
mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
633
0
{
634
0
    mp_int s, x;
635
0
    mp_err res;
636
637
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
638
639
0
    if ((res = mp_init(&s)) != MP_OKAY)
640
0
        return res;
641
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
642
0
        goto X;
643
644
0
    DIGIT(&s, 0) = 1;
645
646
0
    while (d != 0) {
647
0
        if (d & 1) {
648
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
649
0
                goto CLEANUP;
650
0
        }
651
652
0
        d /= 2;
653
654
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
655
0
            goto CLEANUP;
656
0
    }
657
658
0
    s_mp_exch(&s, c);
659
660
0
CLEANUP:
661
0
    mp_clear(&x);
662
0
X:
663
0
    mp_clear(&s);
664
665
0
    return res;
666
667
0
} /* end mp_expt_d() */
668
669
/* }}} */
670
671
/* }}} */
672
673
/*------------------------------------------------------------------------*/
674
/* {{{ Full arithmetic */
675
676
/* {{{ mp_abs(a, b) */
677
678
/*
679
  mp_abs(a, b)
680
681
  Compute b = |a|.  'a' and 'b' may be identical.
682
 */
683
684
mp_err
685
mp_abs(const mp_int *a, mp_int *b)
686
0
{
687
0
    mp_err res;
688
689
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
690
691
0
    if ((res = mp_copy(a, b)) != MP_OKAY)
692
0
        return res;
693
694
0
    SIGN(b) = ZPOS;
695
696
0
    return MP_OKAY;
697
698
0
} /* end mp_abs() */
699
700
/* }}} */
701
702
/* {{{ mp_neg(a, b) */
703
704
/*
705
  mp_neg(a, b)
706
707
  Compute b = -a.  'a' and 'b' may be identical.
708
 */
709
710
mp_err
711
mp_neg(const mp_int *a, mp_int *b)
712
2.93k
{
713
2.93k
    mp_err res;
714
715
2.93k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
716
717
2.93k
    if ((res = mp_copy(a, b)) != MP_OKAY)
718
0
        return res;
719
720
2.93k
    if (s_mp_cmp_d(b, 0) == MP_EQ)
721
2.93k
        SIGN(b) = ZPOS;
722
0
    else
723
0
        SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
724
725
2.93k
    return MP_OKAY;
726
727
2.93k
} /* end mp_neg() */
728
729
/* }}} */
730
731
/* {{{ mp_add(a, b, c) */
732
733
/*
734
  mp_add(a, b, c)
735
736
  Compute c = a + b.  All parameters may be identical.
737
 */
738
739
mp_err
740
mp_add(const mp_int *a, const mp_int *b, mp_int *c)
741
16.6M
{
742
16.6M
    mp_err res;
743
744
16.6M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
745
746
16.6M
    if (SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
747
10.1M
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
748
10.1M
    } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b|   */
749
2.59M
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
750
3.89M
    } else { /* different sign: |a|  < |b|   */
751
3.89M
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
752
3.89M
    }
753
754
16.6M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
755
13.5k
        SIGN(c) = ZPOS;
756
757
16.6M
CLEANUP:
758
16.6M
    return res;
759
760
16.6M
} /* end mp_add() */
761
762
/* }}} */
763
764
/* {{{ mp_sub(a, b, c) */
765
766
/*
767
  mp_sub(a, b, c)
768
769
  Compute c = a - b.  All parameters may be identical.
770
 */
771
772
mp_err
773
mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
774
1.54M
{
775
1.54M
    mp_err res;
776
1.54M
    int magDiff;
777
778
1.54M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
779
780
1.54M
    if (a == b) {
781
0
        mp_zero(c);
782
0
        return MP_OKAY;
783
0
    }
784
785
1.54M
    if (MP_SIGN(a) != MP_SIGN(b)) {
786
0
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
787
1.54M
    } else if (!(magDiff = s_mp_cmp(a, b))) {
788
27.4k
        mp_zero(c);
789
27.4k
        res = MP_OKAY;
790
1.52M
    } else if (magDiff > 0) {
791
1.45M
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
792
1.45M
    } else {
793
66.0k
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
794
66.0k
        MP_SIGN(c) = !MP_SIGN(a);
795
66.0k
    }
796
797
1.54M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
798
27.4k
        MP_SIGN(c) = MP_ZPOS;
799
800
1.54M
CLEANUP:
801
1.54M
    return res;
802
803
1.54M
} /* end mp_sub() */
804
805
/* }}} */
806
807
/* {{{ s_mp_mulg(a, b, c) */
808
809
/*
810
  s_mp_mulg(a, b, c)
811
812
  Compute c = a * b.  All parameters may be identical. if constantTime is set,
813
  then the operations are done in constant time. The original is mostly
814
  constant time as long as s_mpv_mul_d_add() is constant time. This is true
815
  of the x86 assembler, as well as the current c code.
816
 */
817
mp_err
818
s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
819
3.60M
{
820
3.60M
    mp_digit *pb;
821
3.60M
    mp_int tmp;
822
3.60M
    mp_err res;
823
3.60M
    mp_size ib;
824
3.60M
    mp_size useda, usedb;
825
826
3.60M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
827
828
3.60M
    if (a == c) {
829
3.55M
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
830
0
            return res;
831
3.55M
        if (a == b)
832
0
            b = &tmp;
833
3.55M
        a = &tmp;
834
3.55M
    } else if (b == c) {
835
0
        if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
836
0
            return res;
837
0
        b = &tmp;
838
50.4k
    } else {
839
50.4k
        MP_DIGITS(&tmp) = 0;
840
50.4k
    }
841
842
3.60M
    if (MP_USED(a) < MP_USED(b)) {
843
489k
        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
844
489k
        b = a;
845
489k
        a = xch;
846
489k
    }
847
848
3.60M
    MP_USED(c) = 1;
849
3.60M
    MP_DIGIT(c, 0) = 0;
850
3.60M
    if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
851
0
        goto CLEANUP;
852
853
3.60M
#ifdef NSS_USE_COMBA
854
    /* comba isn't constant time because it clamps! If we cared
855
     * (we needed a constant time version of multiply that was 'faster'
856
     * we could easily pass constantTime down to the comba code and
857
     * get it to skip the clamp... but here are assembler versions
858
     * which add comba to platforms that can't compile the normal
859
     * comba's imbedded assembler which would also need to change, so
860
     * for now we just skip comba when we are running constant time. */
861
3.60M
    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
862
1.73M
        if (MP_USED(a) == 4) {
863
7.39k
            s_mp_mul_comba_4(a, b, c);
864
7.39k
            goto CLEANUP;
865
7.39k
        }
866
1.72M
        if (MP_USED(a) == 8) {
867
597
            s_mp_mul_comba_8(a, b, c);
868
597
            goto CLEANUP;
869
597
        }
870
1.72M
        if (MP_USED(a) == 16) {
871
476k
            s_mp_mul_comba_16(a, b, c);
872
476k
            goto CLEANUP;
873
476k
        }
874
1.25M
        if (MP_USED(a) == 32) {
875
1.22M
            s_mp_mul_comba_32(a, b, c);
876
1.22M
            goto CLEANUP;
877
1.22M
        }
878
1.25M
    }
879
1.89M
#endif
880
881
1.89M
    pb = MP_DIGITS(b);
882
1.89M
    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
883
884
    /* Outer loop:  Digits of b */
885
1.89M
    useda = MP_USED(a);
886
1.89M
    usedb = MP_USED(b);
887
43.2M
    for (ib = 1; ib < usedb; ib++) {
888
41.3M
        mp_digit b_i = *pb++;
889
890
        /* Inner product:  Digits of a */
891
41.3M
        if (constantTime || b_i)
892
39.4M
            s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
893
1.88M
        else
894
1.88M
            MP_DIGIT(c, ib + useda) = b_i;
895
41.3M
    }
896
897
1.89M
    if (!constantTime) {
898
1.87M
        s_mp_clamp(c);
899
1.87M
    }
900
901
1.89M
    if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
902
1.87M
        SIGN(c) = ZPOS;
903
14.3k
    else
904
14.3k
        SIGN(c) = NEG;
905
906
3.60M
CLEANUP:
907
3.60M
    mp_clear(&tmp);
908
3.60M
    return res;
909
1.89M
} /* end smp_mulg() */
910
911
/* }}} */
912
913
/* {{{ mp_mul(a, b, c) */
914
915
/*
916
  mp_mul(a, b, c)
917
918
  Compute c = a * b.  All parameters may be identical.
919
 */
920
921
mp_err
922
mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
923
3.58M
{
924
3.58M
    return s_mp_mulg(a, b, c, 0);
925
3.58M
} /* end mp_mul() */
926
927
/* }}} */
928
929
/* {{{ mp_mulCT(a, b, c) */
930
931
/*
932
  mp_mulCT(a, b, c)
933
934
  Compute c = a * b. In constant time. Parameters may not be identical.
935
  NOTE: a and b may be modified.
936
 */
937
938
mp_err
939
mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
940
19.6k
{
941
19.6k
    mp_err res;
942
943
    /* make the multiply values fixed length so multiply
944
     * doesn't leak the length. at this point all the
945
     * values are blinded, but once we finish we want the
946
     * output size to be hidden (so no clamping the out put) */
947
19.6k
    MP_CHECKOK(s_mp_pad(a, setSize));
948
19.6k
    MP_CHECKOK(s_mp_pad(b, setSize));
949
19.6k
    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
950
19.6k
    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
951
19.6k
CLEANUP:
952
19.6k
    return res;
953
19.6k
} /* end mp_mulCT() */
954
955
/* }}} */
956
957
/* {{{ mp_sqr(a, sqr) */
958
959
#if MP_SQUARE
960
/*
961
  Computes the square of a.  This can be done more
962
  efficiently than a general multiplication, because many of the
963
  computation steps are redundant when squaring.  The inner product
964
  step is a bit more complicated, but we save a fair number of
965
  iterations of the multiplication loop.
966
 */
967
968
/* sqr = a^2;   Caller provides both a and tmp; */
969
mp_err
970
mp_sqr(const mp_int *a, mp_int *sqr)
971
52.5M
{
972
52.5M
    mp_digit *pa;
973
52.5M
    mp_digit d;
974
52.5M
    mp_err res;
975
52.5M
    mp_size ix;
976
52.5M
    mp_int tmp;
977
52.5M
    int count;
978
979
52.5M
    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
980
981
52.5M
    if (a == sqr) {
982
0
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
983
0
            return res;
984
0
        a = &tmp;
985
52.5M
    } else {
986
52.5M
        DIGITS(&tmp) = 0;
987
52.5M
        res = MP_OKAY;
988
52.5M
    }
989
990
52.5M
    ix = 2 * MP_USED(a);
991
52.5M
    if (ix > MP_ALLOC(sqr)) {
992
0
        MP_USED(sqr) = 1;
993
0
        MP_CHECKOK(s_mp_grow(sqr, ix));
994
0
    }
995
52.5M
    MP_USED(sqr) = ix;
996
52.5M
    MP_DIGIT(sqr, 0) = 0;
997
998
52.5M
#ifdef NSS_USE_COMBA
999
52.5M
    if (IS_POWER_OF_2(MP_USED(a))) {
1000
51.3M
        if (MP_USED(a) == 4) {
1001
2.40k
            s_mp_sqr_comba_4(a, sqr);
1002
2.40k
            goto CLEANUP;
1003
2.40k
        }
1004
51.3M
        if (MP_USED(a) == 8) {
1005
2.45k
            s_mp_sqr_comba_8(a, sqr);
1006
2.45k
            goto CLEANUP;
1007
2.45k
        }
1008
51.3M
        if (MP_USED(a) == 16) {
1009
43.2M
            s_mp_sqr_comba_16(a, sqr);
1010
43.2M
            goto CLEANUP;
1011
43.2M
        }
1012
8.13M
        if (MP_USED(a) == 32) {
1013
4.25M
            s_mp_sqr_comba_32(a, sqr);
1014
4.25M
            goto CLEANUP;
1015
4.25M
        }
1016
8.13M
    }
1017
5.03M
#endif
1018
1019
5.03M
    pa = MP_DIGITS(a);
1020
5.03M
    count = MP_USED(a) - 1;
1021
5.03M
    if (count > 0) {
1022
1.54M
        d = *pa++;
1023
1.54M
        s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
1024
100M
        for (ix = 3; --count > 0; ix += 2) {
1025
98.6M
            d = *pa++;
1026
98.6M
            s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
1027
98.6M
        }                                    /* for(ix ...) */
1028
1.54M
        MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
1029
1030
        /* now sqr *= 2 */
1031
1.54M
        s_mp_mul_2(sqr);
1032
3.49M
    } else {
1033
3.49M
        MP_DIGIT(sqr, 1) = 0;
1034
3.49M
    }
1035
1036
    /* now add the squares of the digits of a to sqr. */
1037
5.03M
    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
1038
1039
5.03M
    SIGN(sqr) = ZPOS;
1040
5.03M
    s_mp_clamp(sqr);
1041
1042
52.5M
CLEANUP:
1043
52.5M
    mp_clear(&tmp);
1044
52.5M
    return res;
1045
1046
5.03M
} /* end mp_sqr() */
1047
#endif
1048
1049
/* }}} */
1050
1051
/* {{{ mp_div(a, b, q, r) */
1052
1053
/*
1054
  mp_div(a, b, q, r)
1055
1056
  Compute q = a / b and r = a mod b.  Input parameters may be re-used
1057
  as output parameters.  If q or r is NULL, that portion of the
1058
  computation will be discarded (although it will still be computed)
1059
 */
1060
mp_err
1061
mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1062
264k
{
1063
264k
    mp_err res;
1064
264k
    mp_int *pQ, *pR;
1065
264k
    mp_int qtmp, rtmp, btmp;
1066
264k
    int cmp;
1067
264k
    mp_sign signA;
1068
264k
    mp_sign signB;
1069
1070
264k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
1071
1072
264k
    signA = MP_SIGN(a);
1073
264k
    signB = MP_SIGN(b);
1074
1075
264k
    if (mp_cmp_z(b) == MP_EQ)
1076
1
        return MP_RANGE;
1077
1078
264k
    DIGITS(&qtmp) = 0;
1079
264k
    DIGITS(&rtmp) = 0;
1080
264k
    DIGITS(&btmp) = 0;
1081
1082
    /* Set up some temporaries... */
1083
264k
    if (!r || r == a || r == b) {
1084
225k
        MP_CHECKOK(mp_init_copy(&rtmp, a));
1085
225k
        pR = &rtmp;
1086
225k
    } else {
1087
38.6k
        MP_CHECKOK(mp_copy(a, r));
1088
38.6k
        pR = r;
1089
38.6k
    }
1090
1091
264k
    if (!q || q == a || q == b) {
1092
264k
        MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
1093
264k
        pQ = &qtmp;
1094
264k
    } else {
1095
0
        MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
1096
0
        pQ = q;
1097
0
        mp_zero(pQ);
1098
0
    }
1099
1100
    /*
1101
      If |a| <= |b|, we can compute the solution without division;
1102
      otherwise, we actually do the work required.
1103
     */
1104
264k
    if ((cmp = s_mp_cmp(a, b)) <= 0) {
1105
3.00k
        if (cmp) {
1106
            /* r was set to a above. */
1107
3.00k
            mp_zero(pQ);
1108
3.00k
        } else {
1109
0
            mp_set(pQ, 1);
1110
0
            mp_zero(pR);
1111
0
        }
1112
261k
    } else {
1113
261k
        MP_CHECKOK(mp_init_copy(&btmp, b));
1114
261k
        MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
1115
261k
    }
1116
1117
    /* Compute the signs for the output  */
1118
264k
    MP_SIGN(pR) = signA;        /* Sr = Sa              */
1119
    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1120
264k
    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1121
1122
264k
    if (s_mp_cmp_d(pQ, 0) == MP_EQ)
1123
3.00k
        SIGN(pQ) = ZPOS;
1124
264k
    if (s_mp_cmp_d(pR, 0) == MP_EQ)
1125
3.74k
        SIGN(pR) = ZPOS;
1126
1127
    /* Copy output, if it is needed      */
1128
264k
    if (q && q != pQ)
1129
21.2k
        s_mp_exch(pQ, q);
1130
1131
264k
    if (r && r != pR)
1132
204k
        s_mp_exch(pR, r);
1133
1134
264k
CLEANUP:
1135
264k
    mp_clear(&btmp);
1136
264k
    mp_clear(&rtmp);
1137
264k
    mp_clear(&qtmp);
1138
1139
264k
    return res;
1140
1141
264k
} /* end mp_div() */
1142
1143
/* }}} */
1144
1145
/* {{{ mp_div_2d(a, d, q, r) */
1146
1147
mp_err
1148
mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1149
0
{
1150
0
    mp_err res;
1151
1152
0
    ARGCHK(a != NULL, MP_BADARG);
1153
1154
0
    if (q) {
1155
0
        if ((res = mp_copy(a, q)) != MP_OKAY)
1156
0
            return res;
1157
0
    }
1158
0
    if (r) {
1159
0
        if ((res = mp_copy(a, r)) != MP_OKAY)
1160
0
            return res;
1161
0
    }
1162
0
    if (q) {
1163
0
        s_mp_div_2d(q, d);
1164
0
    }
1165
0
    if (r) {
1166
0
        s_mp_mod_2d(r, d);
1167
0
    }
1168
1169
0
    return MP_OKAY;
1170
1171
0
} /* end mp_div_2d() */
1172
1173
/* }}} */
1174
1175
/* {{{ mp_expt(a, b, c) */
1176
1177
/*
1178
  mp_expt(a, b, c)
1179
1180
  Compute c = a ** b, that is, raise a to the b power.  Uses a
1181
  standard iterative square-and-multiply technique.
1182
 */
1183
1184
mp_err
1185
mp_expt(mp_int *a, mp_int *b, mp_int *c)
1186
0
{
1187
0
    mp_int s, x;
1188
0
    mp_err res;
1189
0
    mp_digit d;
1190
0
    unsigned int dig, bit;
1191
1192
0
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1193
1194
0
    if (mp_cmp_z(b) < 0)
1195
0
        return MP_RANGE;
1196
1197
0
    if ((res = mp_init(&s)) != MP_OKAY)
1198
0
        return res;
1199
1200
0
    mp_set(&s, 1);
1201
1202
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1203
0
        goto X;
1204
1205
    /* Loop over low-order digits in ascending order */
1206
0
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1207
0
        d = DIGIT(b, dig);
1208
1209
        /* Loop over bits of each non-maximal digit */
1210
0
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1211
0
            if (d & 1) {
1212
0
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1213
0
                    goto CLEANUP;
1214
0
            }
1215
1216
0
            d >>= 1;
1217
1218
0
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1219
0
                goto CLEANUP;
1220
0
        }
1221
0
    }
1222
1223
    /* Consider now the last digit... */
1224
0
    d = DIGIT(b, dig);
1225
1226
0
    while (d) {
1227
0
        if (d & 1) {
1228
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1229
0
                goto CLEANUP;
1230
0
        }
1231
1232
0
        d >>= 1;
1233
1234
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1235
0
            goto CLEANUP;
1236
0
    }
1237
1238
0
    if (mp_iseven(b))
1239
0
        SIGN(&s) = SIGN(a);
1240
1241
0
    res = mp_copy(&s, c);
1242
1243
0
CLEANUP:
1244
0
    mp_clear(&x);
1245
0
X:
1246
0
    mp_clear(&s);
1247
1248
0
    return res;
1249
1250
0
} /* end mp_expt() */
1251
1252
/* }}} */
1253
1254
/* {{{ mp_2expt(a, k) */
1255
1256
/* Compute a = 2^k */
1257
1258
mp_err
1259
mp_2expt(mp_int *a, mp_digit k)
1260
0
{
1261
0
    ARGCHK(a != NULL, MP_BADARG);
1262
1263
0
    return s_mp_2expt(a, k);
1264
1265
0
} /* end mp_2expt() */
1266
1267
/* }}} */
1268
1269
/* {{{ mp_mod(a, m, c) */
1270
1271
/*
1272
  mp_mod(a, m, c)
1273
1274
  Compute c = a (mod m).  Result will always be 0 <= c < m.
1275
 */
1276
1277
mp_err
1278
mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1279
148k
{
1280
148k
    mp_err res;
1281
148k
    int mag;
1282
1283
148k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1284
1285
148k
    if (SIGN(m) == NEG)
1286
102
        return MP_RANGE;
1287
1288
    /*
1289
     If |a| > m, we need to divide to get the remainder and take the
1290
     absolute value.
1291
1292
     If |a| < m, we don't need to do any division, just copy and adjust
1293
     the sign (if a is negative).
1294
1295
     If |a| == m, we can simply set the result to zero.
1296
1297
     This order is intended to minimize the average path length of the
1298
     comparison chain on common workloads -- the most frequent cases are
1299
     that |a| != m, so we do those first.
1300
     */
1301
148k
    if ((mag = s_mp_cmp(a, m)) > 0) {
1302
90.1k
        if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1303
1
            return res;
1304
1305
90.1k
        if (SIGN(c) == NEG) {
1306
473
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1307
0
                return res;
1308
473
        }
1309
1310
90.1k
    } else if (mag < 0) {
1311
58.1k
        if ((res = mp_copy(a, c)) != MP_OKAY)
1312
0
            return res;
1313
1314
58.1k
        if (mp_cmp_z(a) < 0) {
1315
8.46k
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1316
0
                return res;
1317
8.46k
        }
1318
1319
58.1k
    } else {
1320
2
        mp_zero(c);
1321
2
    }
1322
1323
148k
    return MP_OKAY;
1324
1325
148k
} /* end mp_mod() */
1326
1327
/* }}} */
1328
1329
/* {{{ s_mp_subCT_d(a, b, borrow, c) */
1330
1331
/*
1332
  s_mp_subCT_d(a, b, borrow, c)
1333
1334
  Compute c = (a -b) - subtract in constant time. returns borrow
1335
 */
1336
mp_digit
1337
s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
1338
628k
{
1339
628k
    *ret = a - b - borrow;
1340
628k
    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
1341
628k
} /*  s_mp_subCT_d() */
1342
1343
/* }}} */
1344
1345
/* {{{ mp_subCT(a, b, ret, borrow) */
1346
1347
/* return ret= a - b and borrow in borrow. done in constant time.
1348
 * b could be modified.
1349
 */
1350
mp_err
1351
mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
1352
19.6k
{
1353
19.6k
    mp_size used_a = MP_USED(a);
1354
19.6k
    mp_size i;
1355
19.6k
    mp_err res;
1356
1357
19.6k
    MP_CHECKOK(s_mp_pad(b, used_a));
1358
19.6k
    MP_CHECKOK(s_mp_pad(ret, used_a));
1359
19.6k
    *borrow = 0;
1360
648k
    for (i = 0; i < used_a; i++) {
1361
628k
        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
1362
628k
                               &MP_DIGIT(ret, i));
1363
628k
    }
1364
1365
19.6k
    res = MP_OKAY;
1366
19.6k
CLEANUP:
1367
19.6k
    return res;
1368
19.6k
} /*  end mp_subCT() */
1369
1370
/* }}} */
1371
1372
/* {{{ mp_selectCT(cond, a, b, ret) */
1373
1374
/*
1375
 * return ret= cond ? a : b; cond should be either 0 or 1
1376
 */
1377
mp_err
1378
mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
1379
19.6k
{
1380
19.6k
    mp_size used_a = MP_USED(a);
1381
19.6k
    mp_err res;
1382
19.6k
    mp_size i;
1383
1384
19.6k
    cond *= MP_DIGIT_MAX;
1385
1386
    /* we currently require these to be equal on input,
1387
     * we could use pad to extend one of them, but that might
1388
     * leak data as it wouldn't be constant time */
1389
19.6k
    if (used_a != MP_USED(b)) {
1390
0
        return MP_BADARG;
1391
0
    }
1392
1393
19.6k
    MP_CHECKOK(s_mp_pad(ret, used_a));
1394
648k
    for (i = 0; i < used_a; i++) {
1395
628k
        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
1396
628k
    }
1397
19.6k
    res = MP_OKAY;
1398
19.6k
CLEANUP:
1399
19.6k
    return res;
1400
19.6k
} /* end mp_selectCT() */
1401
1402
/* {{{ mp_reduceCT(a, m, c) */
1403
1404
/*
1405
  mp_reduceCT(a, m, c)
1406
1407
  Compute c = aR^-1 (mod m) in constant time.
1408
   input should be in montgomery form. If input is the
1409
   result of a montgomery multiply then out put will be
1410
   in mongomery form.
1411
   Result will be reduced to MP_USED(m), but not be
1412
   clamped.
1413
 */
1414
1415
mp_err
1416
mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
1417
19.6k
{
1418
19.6k
    mp_size used_m = MP_USED(m);
1419
19.6k
    mp_size used_c = used_m * 2 + 1;
1420
19.6k
    mp_digit *m_digits, *c_digits;
1421
19.6k
    mp_size i;
1422
19.6k
    mp_digit borrow, carry;
1423
19.6k
    mp_err res;
1424
19.6k
    mp_int sub;
1425
1426
19.6k
    MP_DIGITS(&sub) = 0;
1427
19.6k
    MP_CHECKOK(mp_init_size(&sub, used_m));
1428
1429
19.6k
    if (a != c) {
1430
0
        MP_CHECKOK(mp_copy(a, c));
1431
0
    }
1432
19.6k
    MP_CHECKOK(s_mp_pad(c, used_c));
1433
19.6k
    m_digits = MP_DIGITS(m);
1434
19.6k
    c_digits = MP_DIGITS(c);
1435
648k
    for (i = 0; i < used_m; i++) {
1436
628k
        mp_digit m_i = MP_DIGIT(c, i) * n0i;
1437
628k
        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
1438
628k
    }
1439
19.6k
    s_mp_rshd(c, used_m);
1440
    /* MP_USED(c) should be used_m+1 with the high word being any carry
1441
     * from the previous multiply, save that carry and drop the high
1442
     * word for the substraction below */
1443
19.6k
    carry = MP_DIGIT(c, used_m);
1444
19.6k
    MP_DIGIT(c, used_m) = 0;
1445
19.6k
    MP_USED(c) = used_m;
1446
    /* mp_subCT wants c and m to be the same size, we've already
1447
     * guarrenteed that in the previous statement, so mp_subCT won't actually
1448
     * modify m, so it's safe to recast */
1449
19.6k
    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
1450
1451
    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
1452
19.6k
    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
1453
19.6k
    res = MP_OKAY;
1454
19.6k
CLEANUP:
1455
19.6k
    mp_clear(&sub);
1456
19.6k
    return res;
1457
19.6k
} /* end mp_reduceCT() */
1458
1459
/* }}} */
1460
1461
/* {{{ mp_mod_d(a, d, c) */
1462
1463
/*
1464
  mp_mod_d(a, d, c)
1465
1466
  Compute c = a (mod d).  Result will always be 0 <= c < d
1467
 */
1468
mp_err
1469
mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1470
0
{
1471
0
    mp_err res;
1472
0
    mp_digit rem;
1473
1474
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
1475
1476
0
    if (s_mp_cmp_d(a, d) > 0) {
1477
0
        if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1478
0
            return res;
1479
1480
0
    } else {
1481
0
        if (SIGN(a) == NEG)
1482
0
            rem = d - DIGIT(a, 0);
1483
0
        else
1484
0
            rem = DIGIT(a, 0);
1485
0
    }
1486
1487
0
    if (c)
1488
0
        *c = rem;
1489
1490
0
    return MP_OKAY;
1491
1492
0
} /* end mp_mod_d() */
1493
1494
/* }}} */
1495
1496
/* }}} */
1497
1498
/*------------------------------------------------------------------------*/
1499
/* {{{ Modular arithmetic */
1500
1501
#if MP_MODARITH
1502
/* {{{ mp_addmod(a, b, m, c) */
1503
1504
/*
1505
  mp_addmod(a, b, m, c)
1506
1507
  Compute c = (a + b) mod m
1508
 */
1509
1510
mp_err
1511
mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1512
0
{
1513
0
    mp_err res;
1514
1515
0
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1516
1517
0
    if ((res = mp_add(a, b, c)) != MP_OKAY)
1518
0
        return res;
1519
0
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1520
0
        return res;
1521
1522
0
    return MP_OKAY;
1523
0
}
1524
1525
/* }}} */
1526
1527
/* {{{ mp_submod(a, b, m, c) */
1528
1529
/*
1530
  mp_submod(a, b, m, c)
1531
1532
  Compute c = (a - b) mod m
1533
 */
1534
1535
mp_err
1536
mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1537
19.6k
{
1538
19.6k
    mp_err res;
1539
1540
19.6k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1541
1542
19.6k
    if ((res = mp_sub(a, b, c)) != MP_OKAY)
1543
0
        return res;
1544
19.6k
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1545
0
        return res;
1546
1547
19.6k
    return MP_OKAY;
1548
19.6k
}
1549
1550
/* }}} */
1551
1552
/* {{{ mp_mulmod(a, b, m, c) */
1553
1554
/*
1555
  mp_mulmod(a, b, m, c)
1556
1557
  Compute c = (a * b) mod m
1558
 */
1559
1560
mp_err
1561
mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1562
51.4k
{
1563
51.4k
    mp_err res;
1564
1565
51.4k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1566
1567
51.4k
    if ((res = mp_mul(a, b, c)) != MP_OKAY)
1568
0
        return res;
1569
51.4k
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1570
103
        return res;
1571
1572
51.3k
    return MP_OKAY;
1573
51.4k
}
1574
1575
/* }}} */
1576
1577
/* {{{ mp_mulmontmodCT(a, b, m, c) */
1578
1579
/*
1580
  mp_mulmontmodCT(a, b, m, c)
1581
1582
  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
1583
  should be in montgomery form and the output is native. If both a and b
1584
  are in montgomery form, then the output will also be in montgomery form
1585
  and can be recovered with an mp_reduceCT call.
1586
  NOTE: a and b may be modified.
1587
 */
1588
1589
mp_err
1590
mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
1591
                mp_int *c)
1592
19.6k
{
1593
19.6k
    mp_err res;
1594
1595
19.6k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1596
1597
19.6k
    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
1598
0
        return res;
1599
1600
19.6k
    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
1601
0
        return res;
1602
1603
19.6k
    return MP_OKAY;
1604
19.6k
}
1605
1606
/* }}} */
1607
1608
/* {{{ mp_sqrmod(a, m, c) */
1609
1610
#if MP_SQUARE
1611
mp_err
1612
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1613
0
{
1614
0
    mp_err res;
1615
1616
0
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1617
1618
0
    if ((res = mp_sqr(a, c)) != MP_OKAY)
1619
0
        return res;
1620
0
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1621
0
        return res;
1622
1623
0
    return MP_OKAY;
1624
1625
0
} /* end mp_sqrmod() */
1626
#endif
1627
1628
/* }}} */
1629
1630
/* {{{ s_mp_exptmod(a, b, m, c) */
1631
1632
/*
1633
  s_mp_exptmod(a, b, m, c)
1634
1635
  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1636
  method with modular reductions at each step. (This is basically the
1637
  same code as mp_expt(), except for the addition of the reductions)
1638
1639
  The modular reductions are done using Barrett's algorithm (see
1640
  s_mp_reduce() below for details)
1641
 */
1642
1643
mp_err
1644
s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1645
21.2k
{
1646
21.2k
    mp_int s, x, mu;
1647
21.2k
    mp_err res;
1648
21.2k
    mp_digit d;
1649
21.2k
    unsigned int dig, bit;
1650
1651
21.2k
    ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
1652
1653
21.2k
    if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1654
0
        return MP_RANGE;
1655
1656
21.2k
    if ((res = mp_init(&s)) != MP_OKAY)
1657
0
        return res;
1658
21.2k
    if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
1659
21.2k
        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1660
0
        goto X;
1661
21.2k
    if ((res = mp_init(&mu)) != MP_OKAY)
1662
0
        goto MU;
1663
1664
21.2k
    mp_set(&s, 1);
1665
1666
    /* mu = b^2k / m */
1667
21.2k
    if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
1668
0
        goto CLEANUP;
1669
21.2k
    if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
1670
0
        goto CLEANUP;
1671
21.2k
    if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1672
0
        goto CLEANUP;
1673
1674
    /* Loop over digits of b in ascending order, except highest order */
1675
29.2k
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1676
8.07k
        d = DIGIT(b, dig);
1677
1678
        /* Loop over the bits of the lower-order digits */
1679
525k
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1680
517k
            if (d & 1) {
1681
263k
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1682
0
                    goto CLEANUP;
1683
263k
                if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1684
0
                    goto CLEANUP;
1685
263k
            }
1686
1687
517k
            d >>= 1;
1688
1689
517k
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1690
0
                goto CLEANUP;
1691
517k
            if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1692
0
                goto CLEANUP;
1693
517k
        }
1694
8.07k
    }
1695
1696
    /* Now do the last digit... */
1697
21.2k
    d = DIGIT(b, dig);
1698
1699
486k
    while (d) {
1700
465k
        if (d & 1) {
1701
228k
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1702
0
                goto CLEANUP;
1703
228k
            if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1704
0
                goto CLEANUP;
1705
228k
        }
1706
1707
465k
        d >>= 1;
1708
1709
465k
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1710
0
            goto CLEANUP;
1711
465k
        if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1712
0
            goto CLEANUP;
1713
465k
    }
1714
1715
21.2k
    s_mp_exch(&s, c);
1716
1717
21.2k
CLEANUP:
1718
21.2k
    mp_clear(&mu);
1719
21.2k
MU:
1720
21.2k
    mp_clear(&x);
1721
21.2k
X:
1722
21.2k
    mp_clear(&s);
1723
1724
21.2k
    return res;
1725
1726
21.2k
} /* end s_mp_exptmod() */
1727
1728
/* }}} */
1729
1730
/* {{{ mp_exptmod_d(a, d, m, c) */
1731
1732
mp_err
1733
mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1734
0
{
1735
0
    mp_int s, x;
1736
0
    mp_err res;
1737
1738
0
    ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
1739
1740
0
    if ((res = mp_init(&s)) != MP_OKAY)
1741
0
        return res;
1742
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1743
0
        goto X;
1744
1745
0
    mp_set(&s, 1);
1746
1747
0
    while (d != 0) {
1748
0
        if (d & 1) {
1749
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1750
0
                (res = mp_mod(&s, m, &s)) != MP_OKAY)
1751
0
                goto CLEANUP;
1752
0
        }
1753
1754
0
        d /= 2;
1755
1756
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY ||
1757
0
            (res = mp_mod(&x, m, &x)) != MP_OKAY)
1758
0
            goto CLEANUP;
1759
0
    }
1760
1761
0
    s_mp_exch(&s, c);
1762
1763
0
CLEANUP:
1764
0
    mp_clear(&x);
1765
0
X:
1766
0
    mp_clear(&s);
1767
1768
0
    return res;
1769
1770
0
} /* end mp_exptmod_d() */
1771
1772
/* }}} */
1773
#endif /* if MP_MODARITH */
1774
1775
/* }}} */
1776
1777
/*------------------------------------------------------------------------*/
1778
/* {{{ Comparison functions */
1779
1780
/* {{{ mp_cmp_z(a) */
1781
1782
/*
1783
  mp_cmp_z(a)
1784
1785
  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1786
 */
1787
1788
int
1789
mp_cmp_z(const mp_int *a)
1790
7.76M
{
1791
7.76M
    ARGMPCHK(a != NULL);
1792
1793
7.76M
    if (SIGN(a) == NEG)
1794
55.0k
        return MP_LT;
1795
7.71M
    else if (USED(a) == 1 && DIGIT(a, 0) == 0)
1796
416k
        return MP_EQ;
1797
7.29M
    else
1798
7.29M
        return MP_GT;
1799
1800
7.76M
} /* end mp_cmp_z() */
1801
1802
/* }}} */
1803
1804
/* {{{ mp_cmp_d(a, d) */
1805
1806
/*
1807
  mp_cmp_d(a, d)
1808
1809
  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1810
 */
1811
1812
int
1813
mp_cmp_d(const mp_int *a, mp_digit d)
1814
47.7k
{
1815
47.7k
    ARGCHK(a != NULL, MP_EQ);
1816
1817
47.7k
    if (SIGN(a) == NEG)
1818
0
        return MP_LT;
1819
1820
47.7k
    return s_mp_cmp_d(a, d);
1821
1822
47.7k
} /* end mp_cmp_d() */
1823
1824
/* }}} */
1825
1826
/* {{{ mp_cmp(a, b) */
1827
1828
int
1829
mp_cmp(const mp_int *a, const mp_int *b)
1830
15.6M
{
1831
15.6M
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1832
1833
15.6M
    if (SIGN(a) == SIGN(b)) {
1834
15.6M
        int mag;
1835
1836
15.6M
        if ((mag = s_mp_cmp(a, b)) == MP_EQ)
1837
22.8k
            return MP_EQ;
1838
1839
15.5M
        if (SIGN(a) == ZPOS)
1840
15.5M
            return mag;
1841
0
        else
1842
0
            return -mag;
1843
1844
15.5M
    } else if (SIGN(a) == ZPOS) {
1845
0
        return MP_GT;
1846
0
    } else {
1847
0
        return MP_LT;
1848
0
    }
1849
1850
15.6M
} /* end mp_cmp() */
1851
1852
/* }}} */
1853
1854
/* {{{ mp_cmp_mag(a, b) */
1855
1856
/*
1857
  mp_cmp_mag(a, b)
1858
1859
  Compares |a| <=> |b|, and returns an appropriate comparison result
1860
 */
1861
1862
int
1863
mp_cmp_mag(const mp_int *a, const mp_int *b)
1864
0
{
1865
0
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1866
1867
0
    return s_mp_cmp(a, b);
1868
1869
0
} /* end mp_cmp_mag() */
1870
1871
/* }}} */
1872
1873
/* {{{ mp_isodd(a) */
1874
1875
/*
1876
  mp_isodd(a)
1877
1878
  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1879
 */
1880
int
1881
mp_isodd(const mp_int *a)
1882
121k
{
1883
121k
    ARGMPCHK(a != NULL);
1884
1885
121k
    return (int)(DIGIT(a, 0) & 1);
1886
1887
121k
} /* end mp_isodd() */
1888
1889
/* }}} */
1890
1891
/* {{{ mp_iseven(a) */
1892
1893
int
1894
mp_iseven(const mp_int *a)
1895
11.2k
{
1896
11.2k
    return !mp_isodd(a);
1897
1898
11.2k
} /* end mp_iseven() */
1899
1900
/* }}} */
1901
1902
/* }}} */
1903
1904
/*------------------------------------------------------------------------*/
1905
/* {{{ Number theoretic functions */
1906
1907
/* {{{ mp_gcd(a, b, c) */
1908
1909
/*
1910
  Computes the GCD using the constant-time algorithm
1911
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
1912
  "Fast constant-time gcd computation and modular inversion"
1913
 */
1914
mp_err
1915
mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1916
6.27k
{
1917
6.27k
    mp_err res;
1918
6.27k
    mp_digit cond = 0, mask = 0;
1919
6.27k
    mp_int g, temp, f;
1920
6.27k
    int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
1921
6.27k
    mp_size top, flen, glen;
1922
6.27k
    mp_int *clear[3];
1923
1924
6.27k
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1925
    /*
1926
    Early exit if either of the inputs is zero.
1927
    Caller is responsible for the proper handling of inputs.
1928
    */
1929
6.27k
    if (mp_cmp_z(a) == MP_EQ) {
1930
11
        res = mp_copy(b, c);
1931
11
        SIGN(c) = ZPOS;
1932
11
        return res;
1933
6.26k
    } else if (mp_cmp_z(b) == MP_EQ) {
1934
10
        res = mp_copy(a, c);
1935
10
        SIGN(c) = ZPOS;
1936
10
        return res;
1937
10
    }
1938
1939
6.25k
    MP_CHECKOK(mp_init(&temp));
1940
6.25k
    clear[++last] = &temp;
1941
6.25k
    MP_CHECKOK(mp_init_copy(&g, a));
1942
6.25k
    clear[++last] = &g;
1943
6.25k
    MP_CHECKOK(mp_init_copy(&f, b));
1944
6.25k
    clear[++last] = &f;
1945
1946
    /*
1947
    For even case compute the number of
1948
    shared powers of 2 in f and g.
1949
    */
1950
23.5k
    for (i = 0; i < USED(&f) && i < USED(&g); i++) {
1951
17.3k
        mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
1952
1.12M
        for (j = 0; j < MP_DIGIT_BIT; j++) {
1953
1.10M
            bit &= mask;
1954
1.10M
            shifts += bit;
1955
1.10M
            mask >>= 1;
1956
1.10M
        }
1957
17.3k
    }
1958
    /* Reduce to the odd case by removing the powers of 2. */
1959
6.25k
    s_mp_div_2d(&f, shifts);
1960
6.25k
    s_mp_div_2d(&g, shifts);
1961
1962
    /* Allocate to the size of largest mp_int. */
1963
6.25k
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
1964
6.25k
    MP_CHECKOK(s_mp_grow(&f, top));
1965
6.25k
    MP_CHECKOK(s_mp_grow(&g, top));
1966
6.25k
    MP_CHECKOK(s_mp_grow(&temp, top));
1967
1968
    /* Make sure f contains the odd value. */
1969
6.25k
    MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
1970
1971
    /* Upper bound for the total iterations. */
1972
6.25k
    flen = mpl_significant_bits(&f);
1973
6.25k
    glen = mpl_significant_bits(&g);
1974
6.25k
    m = 4 + 3 * ((flen >= glen) ? flen : glen);
1975
1976
#if defined(_MSC_VER)
1977
#pragma warning(push)
1978
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
1979
#endif
1980
1981
7.41M
    for (i = 0; i < m; i++) {
1982
        /* Step 1: conditional swap. */
1983
        /* Set cond if delta > 0 and g is odd. */
1984
7.41M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
1985
        /* If cond is set replace (delta,f) with (-delta,-f). */
1986
7.41M
        delta = (-cond & -delta) | ((cond - 1) & delta);
1987
7.41M
        SIGN(&f) ^= cond;
1988
        /* If cond is set swap f with g. */
1989
7.41M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
1990
1991
        /* Step 2: elemination. */
1992
        /* Update delta. */
1993
7.41M
        delta++;
1994
        /* If g is odd, right shift (g+f) else right shift g. */
1995
7.41M
        MP_CHECKOK(mp_add(&g, &f, &temp));
1996
7.41M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
1997
7.41M
        s_mp_div_2(&g);
1998
7.41M
    }
1999
2000
#if defined(_MSC_VER)
2001
#pragma warning(pop)
2002
#endif
2003
2004
    /* GCD is in f, take the absolute value. */
2005
6.25k
    SIGN(&f) = ZPOS;
2006
2007
    /* Add back the removed powers of 2. */
2008
6.25k
    MP_CHECKOK(s_mp_mul_2d(&f, shifts));
2009
2010
6.25k
    MP_CHECKOK(mp_copy(&f, c));
2011
2012
6.25k
CLEANUP:
2013
25.0k
    while (last >= 0)
2014
18.7k
        mp_clear(clear[last--]);
2015
6.25k
    return res;
2016
6.25k
} /* end mp_gcd() */
2017
2018
/* }}} */
2019
2020
/* {{{ mp_lcm(a, b, c) */
2021
2022
/* We compute the least common multiple using the rule:
2023
2024
   ab = [a, b](a, b)
2025
2026
   ... by computing the product, and dividing out the gcd.
2027
 */
2028
2029
mp_err
2030
mp_lcm(mp_int *a, mp_int *b, mp_int *c)
2031
0
{
2032
0
    mp_int gcd, prod;
2033
0
    mp_err res;
2034
2035
0
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
2036
2037
    /* Set up temporaries */
2038
0
    if ((res = mp_init(&gcd)) != MP_OKAY)
2039
0
        return res;
2040
0
    if ((res = mp_init(&prod)) != MP_OKAY)
2041
0
        goto GCD;
2042
2043
0
    if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
2044
0
        goto CLEANUP;
2045
0
    if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
2046
0
        goto CLEANUP;
2047
2048
0
    res = mp_div(&prod, &gcd, c, NULL);
2049
2050
0
CLEANUP:
2051
0
    mp_clear(&prod);
2052
0
GCD:
2053
0
    mp_clear(&gcd);
2054
2055
0
    return res;
2056
2057
0
} /* end mp_lcm() */
2058
2059
/* }}} */
2060
2061
/* {{{ mp_xgcd(a, b, g, x, y) */
2062
2063
/*
2064
  mp_xgcd(a, b, g, x, y)
2065
2066
  Compute g = (a, b) and values x and y satisfying Bezout's identity
2067
  (that is, ax + by = g).  This uses the binary extended GCD algorithm
2068
  based on the Stein algorithm used for mp_gcd()
2069
  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
2070
 */
2071
2072
mp_err
2073
mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
2074
0
{
2075
0
    mp_int gx, xc, yc, u, v, A, B, C, D;
2076
0
    mp_int *clean[9];
2077
0
    mp_err res;
2078
0
    int last = -1;
2079
2080
0
    if (mp_cmp_z(b) == 0)
2081
0
        return MP_RANGE;
2082
2083
    /* Initialize all these variables we need */
2084
0
    MP_CHECKOK(mp_init(&u));
2085
0
    clean[++last] = &u;
2086
0
    MP_CHECKOK(mp_init(&v));
2087
0
    clean[++last] = &v;
2088
0
    MP_CHECKOK(mp_init(&gx));
2089
0
    clean[++last] = &gx;
2090
0
    MP_CHECKOK(mp_init(&A));
2091
0
    clean[++last] = &A;
2092
0
    MP_CHECKOK(mp_init(&B));
2093
0
    clean[++last] = &B;
2094
0
    MP_CHECKOK(mp_init(&C));
2095
0
    clean[++last] = &C;
2096
0
    MP_CHECKOK(mp_init(&D));
2097
0
    clean[++last] = &D;
2098
0
    MP_CHECKOK(mp_init_copy(&xc, a));
2099
0
    clean[++last] = &xc;
2100
0
    mp_abs(&xc, &xc);
2101
0
    MP_CHECKOK(mp_init_copy(&yc, b));
2102
0
    clean[++last] = &yc;
2103
0
    mp_abs(&yc, &yc);
2104
2105
0
    mp_set(&gx, 1);
2106
2107
    /* Divide by two until at least one of them is odd */
2108
0
    while (mp_iseven(&xc) && mp_iseven(&yc)) {
2109
0
        mp_size nx = mp_trailing_zeros(&xc);
2110
0
        mp_size ny = mp_trailing_zeros(&yc);
2111
0
        mp_size n = MP_MIN(nx, ny);
2112
0
        s_mp_div_2d(&xc, n);
2113
0
        s_mp_div_2d(&yc, n);
2114
0
        MP_CHECKOK(s_mp_mul_2d(&gx, n));
2115
0
    }
2116
2117
0
    MP_CHECKOK(mp_copy(&xc, &u));
2118
0
    MP_CHECKOK(mp_copy(&yc, &v));
2119
0
    mp_set(&A, 1);
2120
0
    mp_set(&D, 1);
2121
2122
    /* Loop through binary GCD algorithm */
2123
0
    do {
2124
0
        while (mp_iseven(&u)) {
2125
0
            s_mp_div_2(&u);
2126
2127
0
            if (mp_iseven(&A) && mp_iseven(&B)) {
2128
0
                s_mp_div_2(&A);
2129
0
                s_mp_div_2(&B);
2130
0
            } else {
2131
0
                MP_CHECKOK(mp_add(&A, &yc, &A));
2132
0
                s_mp_div_2(&A);
2133
0
                MP_CHECKOK(mp_sub(&B, &xc, &B));
2134
0
                s_mp_div_2(&B);
2135
0
            }
2136
0
        }
2137
2138
0
        while (mp_iseven(&v)) {
2139
0
            s_mp_div_2(&v);
2140
2141
0
            if (mp_iseven(&C) && mp_iseven(&D)) {
2142
0
                s_mp_div_2(&C);
2143
0
                s_mp_div_2(&D);
2144
0
            } else {
2145
0
                MP_CHECKOK(mp_add(&C, &yc, &C));
2146
0
                s_mp_div_2(&C);
2147
0
                MP_CHECKOK(mp_sub(&D, &xc, &D));
2148
0
                s_mp_div_2(&D);
2149
0
            }
2150
0
        }
2151
2152
0
        if (mp_cmp(&u, &v) >= 0) {
2153
0
            MP_CHECKOK(mp_sub(&u, &v, &u));
2154
0
            MP_CHECKOK(mp_sub(&A, &C, &A));
2155
0
            MP_CHECKOK(mp_sub(&B, &D, &B));
2156
0
        } else {
2157
0
            MP_CHECKOK(mp_sub(&v, &u, &v));
2158
0
            MP_CHECKOK(mp_sub(&C, &A, &C));
2159
0
            MP_CHECKOK(mp_sub(&D, &B, &D));
2160
0
        }
2161
0
    } while (mp_cmp_z(&u) != 0);
2162
2163
    /* copy results to output */
2164
0
    if (x)
2165
0
        MP_CHECKOK(mp_copy(&C, x));
2166
2167
0
    if (y)
2168
0
        MP_CHECKOK(mp_copy(&D, y));
2169
2170
0
    if (g)
2171
0
        MP_CHECKOK(mp_mul(&gx, &v, g));
2172
2173
0
CLEANUP:
2174
0
    while (last >= 0)
2175
0
        mp_clear(clean[last--]);
2176
2177
0
    return res;
2178
2179
0
} /* end mp_xgcd() */
2180
2181
/* }}} */
2182
2183
mp_size
2184
mp_trailing_zeros(const mp_int *mp)
2185
2.73k
{
2186
2.73k
    mp_digit d;
2187
2.73k
    mp_size n = 0;
2188
2.73k
    unsigned int ix;
2189
2190
2.73k
    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2191
0
        return n;
2192
2193
4.70k
    for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
2194
1.97k
        n += MP_DIGIT_BIT;
2195
2.73k
    if (!d)
2196
0
        return 0; /* shouldn't happen, but ... */
2197
2.73k
#if !defined(MP_USE_UINT_DIGIT)
2198
2.73k
    if (!(d & 0xffffffffU)) {
2199
917
        d >>= 32;
2200
917
        n += 32;
2201
917
    }
2202
2.73k
#endif
2203
2.73k
    if (!(d & 0xffffU)) {
2204
511
        d >>= 16;
2205
511
        n += 16;
2206
511
    }
2207
2.73k
    if (!(d & 0xffU)) {
2208
878
        d >>= 8;
2209
878
        n += 8;
2210
878
    }
2211
2.73k
    if (!(d & 0xfU)) {
2212
333
        d >>= 4;
2213
333
        n += 4;
2214
333
    }
2215
2.73k
    if (!(d & 0x3U)) {
2216
981
        d >>= 2;
2217
981
        n += 2;
2218
981
    }
2219
2.73k
    if (!(d & 0x1U)) {
2220
1.09k
        d >>= 1;
2221
1.09k
        n += 1;
2222
1.09k
    }
2223
2.73k
#if MP_ARGCHK == 2
2224
2.73k
    assert(0 != (d & 1));
2225
2.73k
#endif
2226
2.73k
    return n;
2227
2.73k
}
2228
2229
/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2230
** Returns k (positive) or error (negative).
2231
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2232
** by Richard Schroeppel (a.k.a. Captain Nemo).
2233
*/
2234
mp_err
2235
s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2236
0
{
2237
0
    mp_err res;
2238
0
    mp_err k = 0;
2239
0
    mp_int d, f, g;
2240
2241
0
    ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
2242
2243
0
    MP_DIGITS(&d) = 0;
2244
0
    MP_DIGITS(&f) = 0;
2245
0
    MP_DIGITS(&g) = 0;
2246
0
    MP_CHECKOK(mp_init(&d));
2247
0
    MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
2248
0
    MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
2249
2250
0
    mp_set(c, 1);
2251
0
    mp_zero(&d);
2252
2253
0
    if (mp_cmp_z(&f) == 0) {
2254
0
        res = MP_UNDEF;
2255
0
    } else
2256
0
        for (;;) {
2257
0
            int diff_sign;
2258
0
            while (mp_iseven(&f)) {
2259
0
                mp_size n = mp_trailing_zeros(&f);
2260
0
                if (!n) {
2261
0
                    res = MP_UNDEF;
2262
0
                    goto CLEANUP;
2263
0
                }
2264
0
                s_mp_div_2d(&f, n);
2265
0
                MP_CHECKOK(s_mp_mul_2d(&d, n));
2266
0
                k += n;
2267
0
            }
2268
0
            if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
2269
0
                res = k;
2270
0
                break;
2271
0
            }
2272
0
            diff_sign = mp_cmp(&f, &g);
2273
0
            if (diff_sign < 0) { /* f < g */
2274
0
                s_mp_exch(&f, &g);
2275
0
                s_mp_exch(c, &d);
2276
0
            } else if (diff_sign == 0) { /* f == g */
2277
0
                res = MP_UNDEF;          /* a and p are not relatively prime */
2278
0
                break;
2279
0
            }
2280
0
            if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
2281
0
                MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
2282
0
                MP_CHECKOK(mp_sub(c, &d, c));   /* c = c - d */
2283
0
            } else {
2284
0
                MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
2285
0
                MP_CHECKOK(mp_add(c, &d, c));   /* c = c + d */
2286
0
            }
2287
0
        }
2288
0
    if (res >= 0) {
2289
0
        if (mp_cmp_mag(c, p) >= 0) {
2290
0
            MP_CHECKOK(mp_div(c, p, NULL, c));
2291
0
        }
2292
0
        if (MP_SIGN(c) != MP_ZPOS) {
2293
0
            MP_CHECKOK(mp_add(c, p, c));
2294
0
        }
2295
0
        res = k;
2296
0
    }
2297
2298
0
CLEANUP:
2299
0
    mp_clear(&d);
2300
0
    mp_clear(&f);
2301
0
    mp_clear(&g);
2302
0
    return res;
2303
0
}
2304
2305
/* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2306
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2307
** by Richard Schroeppel (a.k.a. Captain Nemo).
2308
*/
2309
mp_digit
2310
s_mp_invmod_radix(mp_digit P)
2311
88.2k
{
2312
88.2k
    mp_digit T = P;
2313
88.2k
    T *= 2 - (P * T);
2314
88.2k
    T *= 2 - (P * T);
2315
88.2k
    T *= 2 - (P * T);
2316
88.2k
    T *= 2 - (P * T);
2317
88.2k
#if !defined(MP_USE_UINT_DIGIT)
2318
88.2k
    T *= 2 - (P * T);
2319
88.2k
    T *= 2 - (P * T);
2320
88.2k
#endif
2321
88.2k
    return T;
2322
88.2k
}
2323
2324
/* Given c, k, and prime p, where a*c == 2**k (mod p),
2325
** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2326
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2327
** by Richard Schroeppel (a.k.a. Captain Nemo).
2328
*/
2329
mp_err
2330
s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2331
0
{
2332
0
    int k_orig = k;
2333
0
    mp_digit r;
2334
0
    mp_size ix;
2335
0
    mp_err res;
2336
2337
0
    if (mp_cmp_z(c) < 0) {           /* c < 0 */
2338
0
        MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
2339
0
    } else {
2340
0
        MP_CHECKOK(mp_copy(c, x)); /* x = c */
2341
0
    }
2342
2343
    /* make sure x is large enough */
2344
0
    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2345
0
    ix = MP_MAX(ix, MP_USED(x));
2346
0
    MP_CHECKOK(s_mp_pad(x, ix));
2347
2348
0
    r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
2349
2350
0
    for (ix = 0; k > 0; ix++) {
2351
0
        int j = MP_MIN(k, MP_DIGIT_BIT);
2352
0
        mp_digit v = r * MP_DIGIT(x, ix);
2353
0
        if (j < MP_DIGIT_BIT) {
2354
0
            v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
2355
0
        }
2356
0
        s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2357
0
        k -= j;
2358
0
    }
2359
0
    s_mp_clamp(x);
2360
0
    s_mp_div_2d(x, k_orig);
2361
0
    res = MP_OKAY;
2362
2363
0
CLEANUP:
2364
0
    return res;
2365
0
}
2366
2367
/*
2368
  Computes the modular inverse using the constant-time algorithm
2369
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
2370
  "Fast constant-time gcd computation and modular inversion"
2371
 */
2372
mp_err
2373
s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2374
3.25k
{
2375
3.25k
    mp_err res;
2376
3.25k
    mp_digit cond = 0;
2377
3.25k
    mp_int g, f, v, r, temp;
2378
3.25k
    int i, its, delta = 1, last = -1;
2379
3.25k
    mp_size top, flen, glen;
2380
3.25k
    mp_int *clear[6];
2381
2382
3.25k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2383
    /* Check for invalid inputs. */
2384
3.25k
    if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
2385
0
        return MP_RANGE;
2386
2387
3.25k
    if (a == m || mp_iseven(m))
2388
0
        return MP_UNDEF;
2389
2390
3.25k
    MP_CHECKOK(mp_init(&temp));
2391
3.25k
    clear[++last] = &temp;
2392
3.25k
    MP_CHECKOK(mp_init(&v));
2393
3.25k
    clear[++last] = &v;
2394
3.25k
    MP_CHECKOK(mp_init(&r));
2395
3.25k
    clear[++last] = &r;
2396
3.25k
    MP_CHECKOK(mp_init_copy(&g, a));
2397
3.25k
    clear[++last] = &g;
2398
3.25k
    MP_CHECKOK(mp_init_copy(&f, m));
2399
3.25k
    clear[++last] = &f;
2400
2401
3.25k
    mp_set(&v, 0);
2402
3.25k
    mp_set(&r, 1);
2403
2404
    /* Allocate to the size of largest mp_int. */
2405
3.25k
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
2406
3.25k
    MP_CHECKOK(s_mp_grow(&f, top));
2407
3.25k
    MP_CHECKOK(s_mp_grow(&g, top));
2408
3.25k
    MP_CHECKOK(s_mp_grow(&temp, top));
2409
3.25k
    MP_CHECKOK(s_mp_grow(&v, top));
2410
3.25k
    MP_CHECKOK(s_mp_grow(&r, top));
2411
2412
    /* Upper bound for the total iterations. */
2413
3.25k
    flen = mpl_significant_bits(&f);
2414
3.25k
    glen = mpl_significant_bits(&g);
2415
3.25k
    its = 4 + 3 * ((flen >= glen) ? flen : glen);
2416
2417
#if defined(_MSC_VER)
2418
#pragma warning(push)
2419
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2420
#endif
2421
2422
3.02M
    for (i = 0; i < its; i++) {
2423
        /* Step 1: conditional swap. */
2424
        /* Set cond if delta > 0 and g is odd. */
2425
3.02M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
2426
        /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
2427
3.02M
        delta = (-cond & -delta) | ((cond - 1) & delta);
2428
3.02M
        SIGN(&f) ^= cond;
2429
3.02M
        SIGN(&v) ^= cond;
2430
        /* If cond is set swap (f,v) with (g,r). */
2431
3.02M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
2432
3.02M
        MP_CHECKOK(mp_cswap(cond, &v, &r, top));
2433
2434
        /* Step 2: elemination. */
2435
        /* Update delta */
2436
3.02M
        delta++;
2437
        /* If g is odd replace r with (r+v). */
2438
3.02M
        MP_CHECKOK(mp_add(&r, &v, &temp));
2439
3.02M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
2440
        /* If g is odd, right shift (g+f) else right shift g. */
2441
3.02M
        MP_CHECKOK(mp_add(&g, &f, &temp));
2442
3.02M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
2443
3.02M
        s_mp_div_2(&g);
2444
        /*
2445
        If r is even, right shift it.
2446
        If r is odd, right shift (r+m) which is even because m is odd.
2447
        We want the result modulo m so adding in multiples of m here vanish.
2448
        */
2449
3.02M
        MP_CHECKOK(mp_add(&r, m, &temp));
2450
3.02M
        MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
2451
3.02M
        s_mp_div_2(&r);
2452
3.02M
    }
2453
2454
#if defined(_MSC_VER)
2455
#pragma warning(pop)
2456
#endif
2457
2458
    /* We have the inverse in v, propagate sign from f. */
2459
3.25k
    SIGN(&v) ^= SIGN(&f);
2460
    /* GCD is in f, take the absolute value. */
2461
3.25k
    SIGN(&f) = ZPOS;
2462
2463
    /* If gcd != 1, not invertible. */
2464
3.25k
    if (mp_cmp_d(&f, 1) != MP_EQ) {
2465
174
        res = MP_UNDEF;
2466
174
        goto CLEANUP;
2467
174
    }
2468
2469
    /* Return inverse modulo m. */
2470
3.07k
    MP_CHECKOK(mp_mod(&v, m, c));
2471
2472
3.25k
CLEANUP:
2473
19.5k
    while (last >= 0)
2474
16.2k
        mp_clear(clear[last--]);
2475
3.25k
    return res;
2476
3.07k
}
2477
2478
/* Known good algorithm for computing modular inverse.  But slow. */
2479
mp_err
2480
mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2481
0
{
2482
0
    mp_int g, x;
2483
0
    mp_err res;
2484
2485
0
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2486
2487
0
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2488
0
        return MP_RANGE;
2489
2490
0
    MP_DIGITS(&g) = 0;
2491
0
    MP_DIGITS(&x) = 0;
2492
0
    MP_CHECKOK(mp_init(&x));
2493
0
    MP_CHECKOK(mp_init(&g));
2494
2495
0
    MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
2496
2497
0
    if (mp_cmp_d(&g, 1) != MP_EQ) {
2498
0
        res = MP_UNDEF;
2499
0
        goto CLEANUP;
2500
0
    }
2501
2502
0
    res = mp_mod(&x, m, c);
2503
0
    SIGN(c) = SIGN(a);
2504
2505
0
CLEANUP:
2506
0
    mp_clear(&x);
2507
0
    mp_clear(&g);
2508
2509
0
    return res;
2510
0
}
2511
2512
/* modular inverse where modulus is 2**k. */
2513
/* c = a**-1 mod 2**k */
2514
mp_err
2515
s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2516
5.24k
{
2517
5.24k
    mp_err res;
2518
5.24k
    mp_size ix = k + 4;
2519
5.24k
    mp_int t0, t1, val, tmp, two2k;
2520
2521
5.24k
    static const mp_digit d2 = 2;
2522
5.24k
    static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2523
2524
5.24k
    if (mp_iseven(a))
2525
0
        return MP_UNDEF;
2526
2527
#if defined(_MSC_VER)
2528
#pragma warning(push)
2529
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2530
#endif
2531
5.24k
    if (k <= MP_DIGIT_BIT) {
2532
2.65k
        mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
2533
        /* propagate the sign from mp_int */
2534
2.65k
        i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
2535
2.65k
        if (k < MP_DIGIT_BIT)
2536
2.58k
            i &= ((mp_digit)1 << k) - (mp_digit)1;
2537
2.65k
        mp_set(c, i);
2538
2.65k
        return MP_OKAY;
2539
2.65k
    }
2540
#if defined(_MSC_VER)
2541
#pragma warning(pop)
2542
#endif
2543
2544
2.58k
    MP_DIGITS(&t0) = 0;
2545
2.58k
    MP_DIGITS(&t1) = 0;
2546
2.58k
    MP_DIGITS(&val) = 0;
2547
2.58k
    MP_DIGITS(&tmp) = 0;
2548
2.58k
    MP_DIGITS(&two2k) = 0;
2549
2.58k
    MP_CHECKOK(mp_init_copy(&val, a));
2550
2.58k
    s_mp_mod_2d(&val, k);
2551
2.58k
    MP_CHECKOK(mp_init_copy(&t0, &val));
2552
2.58k
    MP_CHECKOK(mp_init_copy(&t1, &t0));
2553
2.58k
    MP_CHECKOK(mp_init(&tmp));
2554
2.58k
    MP_CHECKOK(mp_init(&two2k));
2555
2.58k
    MP_CHECKOK(s_mp_2expt(&two2k, k));
2556
12.8k
    do {
2557
12.8k
        MP_CHECKOK(mp_mul(&val, &t1, &tmp));
2558
12.8k
        MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
2559
12.8k
        MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
2560
12.8k
        s_mp_mod_2d(&t1, k);
2561
25.6k
        while (MP_SIGN(&t1) != MP_ZPOS) {
2562
12.8k
            MP_CHECKOK(mp_add(&t1, &two2k, &t1));
2563
12.8k
        }
2564
12.8k
        if (mp_cmp(&t1, &t0) == MP_EQ)
2565
2.58k
            break;
2566
10.2k
        MP_CHECKOK(mp_copy(&t1, &t0));
2567
10.2k
    } while (--ix > 0);
2568
2.58k
    if (!ix) {
2569
0
        res = MP_UNDEF;
2570
2.58k
    } else {
2571
2.58k
        mp_exch(c, &t1);
2572
2.58k
    }
2573
2574
2.58k
CLEANUP:
2575
2.58k
    mp_clear(&t0);
2576
2.58k
    mp_clear(&t1);
2577
2.58k
    mp_clear(&val);
2578
2.58k
    mp_clear(&tmp);
2579
2.58k
    mp_clear(&two2k);
2580
2.58k
    return res;
2581
2.58k
}
2582
2583
mp_err
2584
s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2585
2.78k
{
2586
2.78k
    mp_err res;
2587
2.78k
    mp_size k;
2588
2.78k
    mp_int oddFactor, evenFactor; /* factors of the modulus */
2589
2.78k
    mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2590
2.78k
    mp_int C2, tmp1, tmp2;
2591
2592
2.78k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2593
2594
    /*static const mp_digit d1 = 1; */
2595
    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2596
2597
2.78k
    if ((res = s_mp_ispow2(m)) >= 0) {
2598
54
        k = res;
2599
54
        return s_mp_invmod_2d(a, k, c);
2600
54
    }
2601
2.73k
    MP_DIGITS(&oddFactor) = 0;
2602
2.73k
    MP_DIGITS(&evenFactor) = 0;
2603
2.73k
    MP_DIGITS(&oddPart) = 0;
2604
2.73k
    MP_DIGITS(&evenPart) = 0;
2605
2.73k
    MP_DIGITS(&C2) = 0;
2606
2.73k
    MP_DIGITS(&tmp1) = 0;
2607
2.73k
    MP_DIGITS(&tmp2) = 0;
2608
2609
2.73k
    MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
2610
2.73k
    MP_CHECKOK(mp_init(&evenFactor));
2611
2.73k
    MP_CHECKOK(mp_init(&oddPart));
2612
2.73k
    MP_CHECKOK(mp_init(&evenPart));
2613
2.73k
    MP_CHECKOK(mp_init(&C2));
2614
2.73k
    MP_CHECKOK(mp_init(&tmp1));
2615
2.73k
    MP_CHECKOK(mp_init(&tmp2));
2616
2617
2.73k
    k = mp_trailing_zeros(m);
2618
2.73k
    s_mp_div_2d(&oddFactor, k);
2619
2.73k
    MP_CHECKOK(s_mp_2expt(&evenFactor, k));
2620
2621
    /* compute a**-1 mod oddFactor. */
2622
2.73k
    MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
2623
    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2624
2.59k
    MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
2625
2626
    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2627
    /* let m1 = oddFactor,  v1 = oddPart,
2628
     * let m2 = evenFactor, v2 = evenPart.
2629
     */
2630
2631
    /* Compute C2 = m1**-1 mod m2. */
2632
2.59k
    MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
2633
2634
    /* compute u = (v2 - v1)*C2 mod m2 */
2635
2.59k
    MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
2636
2.59k
    MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
2637
2.59k
    s_mp_mod_2d(&tmp2, k);
2638
4.46k
    while (MP_SIGN(&tmp2) != MP_ZPOS) {
2639
1.87k
        MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
2640
1.87k
    }
2641
2642
    /* compute answer = v1 + u*m1 */
2643
2.59k
    MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
2644
2.59k
    MP_CHECKOK(mp_add(&oddPart, c, c));
2645
    /* not sure this is necessary, but it's low cost if not. */
2646
2.59k
    MP_CHECKOK(mp_mod(c, m, c));
2647
2648
2.73k
CLEANUP:
2649
2.73k
    mp_clear(&oddFactor);
2650
2.73k
    mp_clear(&evenFactor);
2651
2.73k
    mp_clear(&oddPart);
2652
2.73k
    mp_clear(&evenPart);
2653
2.73k
    mp_clear(&C2);
2654
2.73k
    mp_clear(&tmp1);
2655
2.73k
    mp_clear(&tmp2);
2656
2.73k
    return res;
2657
2.59k
}
2658
2659
/* {{{ mp_invmod(a, m, c) */
2660
2661
/*
2662
  mp_invmod(a, m, c)
2663
2664
  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2665
  This is equivalent to the question of whether (a, m) = 1.  If not,
2666
  MP_UNDEF is returned, and there is no inverse.
2667
 */
2668
2669
mp_err
2670
mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2671
3.30k
{
2672
3.30k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2673
2674
3.30k
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2675
0
        return MP_RANGE;
2676
2677
3.30k
    if (mp_isodd(m)) {
2678
519
        return s_mp_invmod_odd_m(a, m, c);
2679
519
    }
2680
2.79k
    if (mp_iseven(a))
2681
4
        return MP_UNDEF; /* not invertable */
2682
2683
2.78k
    return s_mp_invmod_even_m(a, m, c);
2684
2685
2.79k
} /* end mp_invmod() */
2686
2687
/* }}} */
2688
2689
/* }}} */
2690
2691
/*------------------------------------------------------------------------*/
2692
/* {{{ mp_print(mp, ofp) */
2693
2694
#if MP_IOFUNC
2695
/*
2696
  mp_print(mp, ofp)
2697
2698
  Print a textual representation of the given mp_int on the output
2699
  stream 'ofp'.  Output is generated using the internal radix.
2700
 */
2701
2702
void
2703
mp_print(mp_int *mp, FILE *ofp)
2704
{
2705
    int ix;
2706
2707
    if (mp == NULL || ofp == NULL)
2708
        return;
2709
2710
    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2711
2712
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2713
        fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2714
    }
2715
2716
} /* end mp_print() */
2717
2718
#endif /* if MP_IOFUNC */
2719
2720
/* }}} */
2721
2722
/*------------------------------------------------------------------------*/
2723
/* {{{ More I/O Functions */
2724
2725
/* {{{ mp_read_raw(mp, str, len) */
2726
2727
/*
2728
   mp_read_raw(mp, str, len)
2729
2730
   Read in a raw value (base 256) into the given mp_int
2731
 */
2732
2733
mp_err
2734
mp_read_raw(mp_int *mp, char *str, int len)
2735
0
{
2736
0
    int ix;
2737
0
    mp_err res;
2738
0
    unsigned char *ustr = (unsigned char *)str;
2739
2740
0
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2741
2742
0
    mp_zero(mp);
2743
2744
    /* Read the rest of the digits */
2745
0
    for (ix = 1; ix < len; ix++) {
2746
0
        if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2747
0
            return res;
2748
0
        if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2749
0
            return res;
2750
0
    }
2751
2752
    /* Get sign from first byte */
2753
0
    if (ustr[0])
2754
0
        SIGN(mp) = NEG;
2755
0
    else
2756
0
        SIGN(mp) = ZPOS;
2757
2758
0
    return MP_OKAY;
2759
2760
0
} /* end mp_read_raw() */
2761
2762
/* }}} */
2763
2764
/* {{{ mp_raw_size(mp) */
2765
2766
int
2767
mp_raw_size(mp_int *mp)
2768
0
{
2769
0
    ARGCHK(mp != NULL, 0);
2770
2771
0
    return (USED(mp) * sizeof(mp_digit)) + 1;
2772
2773
0
} /* end mp_raw_size() */
2774
2775
/* }}} */
2776
2777
/* {{{ mp_toraw(mp, str) */
2778
2779
mp_err
2780
mp_toraw(mp_int *mp, char *str)
2781
0
{
2782
0
    int ix, jx, pos = 1;
2783
2784
0
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2785
2786
0
    str[0] = (char)SIGN(mp);
2787
2788
    /* Iterate over each digit... */
2789
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2790
0
        mp_digit d = DIGIT(mp, ix);
2791
2792
        /* Unpack digit bytes, high order first */
2793
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2794
0
            str[pos++] = (char)(d >> (jx * CHAR_BIT));
2795
0
        }
2796
0
    }
2797
2798
0
    return MP_OKAY;
2799
2800
0
} /* end mp_toraw() */
2801
2802
/* }}} */
2803
2804
/* {{{ mp_read_radix(mp, str, radix) */
2805
2806
/*
2807
  mp_read_radix(mp, str, radix)
2808
2809
  Read an integer from the given string, and set mp to the resulting
2810
  value.  The input is presumed to be in base 10.  Leading non-digit
2811
  characters are ignored, and the function reads until a non-digit
2812
  character or the end of the string.
2813
 */
2814
2815
mp_err
2816
mp_read_radix(mp_int *mp, const char *str, int radix)
2817
0
{
2818
0
    int ix = 0, val = 0;
2819
0
    mp_err res;
2820
0
    mp_sign sig = ZPOS;
2821
2822
0
    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2823
0
           MP_BADARG);
2824
2825
0
    mp_zero(mp);
2826
2827
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2828
0
    while (str[ix] &&
2829
0
           (s_mp_tovalue(str[ix], radix) < 0) &&
2830
0
           str[ix] != '-' &&
2831
0
           str[ix] != '+') {
2832
0
        ++ix;
2833
0
    }
2834
2835
0
    if (str[ix] == '-') {
2836
0
        sig = NEG;
2837
0
        ++ix;
2838
0
    } else if (str[ix] == '+') {
2839
0
        sig = ZPOS; /* this is the default anyway... */
2840
0
        ++ix;
2841
0
    }
2842
2843
0
    while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2844
0
        if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2845
0
            return res;
2846
0
        if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
2847
0
            return res;
2848
0
        ++ix;
2849
0
    }
2850
2851
0
    if (s_mp_cmp_d(mp, 0) == MP_EQ)
2852
0
        SIGN(mp) = ZPOS;
2853
0
    else
2854
0
        SIGN(mp) = sig;
2855
2856
0
    return MP_OKAY;
2857
2858
0
} /* end mp_read_radix() */
2859
2860
mp_err
2861
mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
2862
0
{
2863
0
    int radix = default_radix;
2864
0
    int cx;
2865
0
    mp_sign sig = ZPOS;
2866
0
    mp_err res;
2867
2868
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2869
0
    while ((cx = *str) != 0 &&
2870
0
           (s_mp_tovalue(cx, radix) < 0) &&
2871
0
           cx != '-' &&
2872
0
           cx != '+') {
2873
0
        ++str;
2874
0
    }
2875
2876
0
    if (cx == '-') {
2877
0
        sig = NEG;
2878
0
        ++str;
2879
0
    } else if (cx == '+') {
2880
0
        sig = ZPOS; /* this is the default anyway... */
2881
0
        ++str;
2882
0
    }
2883
2884
0
    if (str[0] == '0') {
2885
0
        if ((str[1] | 0x20) == 'x') {
2886
0
            radix = 16;
2887
0
            str += 2;
2888
0
        } else {
2889
0
            radix = 8;
2890
0
            str++;
2891
0
        }
2892
0
    }
2893
0
    res = mp_read_radix(a, str, radix);
2894
0
    if (res == MP_OKAY) {
2895
0
        MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2896
0
    }
2897
0
    return res;
2898
0
}
2899
2900
/* }}} */
2901
2902
/* {{{ mp_radix_size(mp, radix) */
2903
2904
int
2905
mp_radix_size(mp_int *mp, int radix)
2906
0
{
2907
0
    int bits;
2908
2909
0
    if (!mp || radix < 2 || radix > MAX_RADIX)
2910
0
        return 0;
2911
2912
0
    bits = USED(mp) * DIGIT_BIT - 1;
2913
2914
0
    return SIGN(mp) + s_mp_outlen(bits, radix);
2915
2916
0
} /* end mp_radix_size() */
2917
2918
/* }}} */
2919
2920
/* {{{ mp_toradix(mp, str, radix) */
2921
2922
mp_err
2923
mp_toradix(mp_int *mp, char *str, int radix)
2924
0
{
2925
0
    int ix, pos = 0;
2926
2927
0
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2928
0
    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2929
2930
0
    if (mp_cmp_z(mp) == MP_EQ) {
2931
0
        str[0] = '0';
2932
0
        str[1] = '\0';
2933
0
    } else {
2934
0
        mp_err res;
2935
0
        mp_int tmp;
2936
0
        mp_sign sgn;
2937
0
        mp_digit rem, rdx = (mp_digit)radix;
2938
0
        char ch;
2939
2940
0
        if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2941
0
            return res;
2942
2943
        /* Save sign for later, and take absolute value */
2944
0
        sgn = SIGN(&tmp);
2945
0
        SIGN(&tmp) = ZPOS;
2946
2947
        /* Generate output digits in reverse order      */
2948
0
        while (mp_cmp_z(&tmp) != 0) {
2949
0
            if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2950
0
                mp_clear(&tmp);
2951
0
                return res;
2952
0
            }
2953
2954
            /* Generate digits, use capital letters */
2955
0
            ch = s_mp_todigit(rem, radix, 0);
2956
2957
0
            str[pos++] = ch;
2958
0
        }
2959
2960
        /* Add - sign if original value was negative */
2961
0
        if (sgn == NEG)
2962
0
            str[pos++] = '-';
2963
2964
        /* Add trailing NUL to end the string        */
2965
0
        str[pos--] = '\0';
2966
2967
        /* Reverse the digits and sign indicator     */
2968
0
        ix = 0;
2969
0
        while (ix < pos) {
2970
0
            char tmpc = str[ix];
2971
2972
0
            str[ix] = str[pos];
2973
0
            str[pos] = tmpc;
2974
0
            ++ix;
2975
0
            --pos;
2976
0
        }
2977
2978
0
        mp_clear(&tmp);
2979
0
    }
2980
2981
0
    return MP_OKAY;
2982
2983
0
} /* end mp_toradix() */
2984
2985
/* }}} */
2986
2987
/* {{{ mp_tovalue(ch, r) */
2988
2989
int
2990
mp_tovalue(char ch, int r)
2991
0
{
2992
0
    return s_mp_tovalue(ch, r);
2993
2994
0
} /* end mp_tovalue() */
2995
2996
/* }}} */
2997
2998
/* }}} */
2999
3000
/* {{{ mp_strerror(ec) */
3001
3002
/*
3003
  mp_strerror(ec)
3004
3005
  Return a string describing the meaning of error code 'ec'.  The
3006
  string returned is allocated in static memory, so the caller should
3007
  not attempt to modify or free the memory associated with this
3008
  string.
3009
 */
3010
const char *
3011
mp_strerror(mp_err ec)
3012
0
{
3013
0
    int aec = (ec < 0) ? -ec : ec;
3014
3015
    /* Code values are negative, so the senses of these comparisons
3016
     are accurate */
3017
0
    if (ec < MP_LAST_CODE || ec > MP_OKAY) {
3018
0
        return mp_err_string[0]; /* unknown error code */
3019
0
    } else {
3020
0
        return mp_err_string[aec + 1];
3021
0
    }
3022
3023
0
} /* end mp_strerror() */
3024
3025
/* }}} */
3026
3027
/*========================================================================*/
3028
/*------------------------------------------------------------------------*/
3029
/* Static function definitions (internal use only)                        */
3030
3031
/* {{{ Memory management */
3032
3033
/* {{{ s_mp_grow(mp, min) */
3034
3035
/* Make sure there are at least 'min' digits allocated to mp              */
3036
mp_err
3037
s_mp_grow(mp_int *mp, mp_size min)
3038
969k
{
3039
969k
    ARGCHK(mp != NULL, MP_BADARG);
3040
3041
969k
    if (min > ALLOC(mp)) {
3042
934k
        mp_digit *tmp;
3043
3044
        /* Set min to next nearest default precision block size */
3045
934k
        min = MP_ROUNDUP(min, s_mp_defprec);
3046
3047
934k
        if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
3048
0
            return MP_MEM;
3049
3050
934k
        s_mp_copy(DIGITS(mp), tmp, USED(mp));
3051
3052
934k
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3053
934k
        s_mp_free(DIGITS(mp));
3054
934k
        DIGITS(mp) = tmp;
3055
934k
        ALLOC(mp) = min;
3056
934k
    }
3057
3058
969k
    return MP_OKAY;
3059
3060
969k
} /* end s_mp_grow() */
3061
3062
/* }}} */
3063
3064
/* {{{ s_mp_pad(mp, min) */
3065
3066
/* Make sure the used size of mp is at least 'min', growing if needed     */
3067
mp_err
3068
s_mp_pad(mp_int *mp, mp_size min)
3069
93.4M
{
3070
93.4M
    ARGCHK(mp != NULL, MP_BADARG);
3071
3072
93.4M
    if (min > USED(mp)) {
3073
77.9M
        mp_err res;
3074
3075
        /* Make sure there is room to increase precision  */
3076
77.9M
        if (min > ALLOC(mp)) {
3077
934k
            if ((res = s_mp_grow(mp, min)) != MP_OKAY)
3078
0
                return res;
3079
76.9M
        } else {
3080
76.9M
            s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
3081
76.9M
        }
3082
3083
        /* Increase precision; should already be 0-filled */
3084
77.9M
        USED(mp) = min;
3085
77.9M
    }
3086
3087
93.4M
    return MP_OKAY;
3088
3089
93.4M
} /* end s_mp_pad() */
3090
3091
/* }}} */
3092
3093
/* {{{ s_mp_setz(dp, count) */
3094
3095
/* Set 'count' digits pointed to by dp to be zeroes                       */
3096
void
3097
s_mp_setz(mp_digit *dp, mp_size count)
3098
105M
{
3099
105M
    memset(dp, 0, count * sizeof(mp_digit));
3100
105M
} /* end s_mp_setz() */
3101
3102
/* }}} */
3103
3104
/* {{{ s_mp_copy(sp, dp, count) */
3105
3106
/* Copy 'count' digits from sp to dp                                      */
3107
void
3108
s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
3109
10.4M
{
3110
10.4M
    memcpy(dp, sp, count * sizeof(mp_digit));
3111
10.4M
} /* end s_mp_copy() */
3112
3113
/* }}} */
3114
3115
/* {{{ s_mp_alloc(nb, ni) */
3116
3117
/* Allocate ni records of nb bytes each, and return a pointer to that     */
3118
void *
3119
s_mp_alloc(size_t nb, size_t ni)
3120
9.38M
{
3121
9.38M
    return calloc(nb, ni);
3122
3123
9.38M
} /* end s_mp_alloc() */
3124
3125
/* }}} */
3126
3127
/* {{{ s_mp_free(ptr) */
3128
3129
/* Free the memory pointed to by ptr                                      */
3130
void
3131
s_mp_free(void *ptr)
3132
9.38M
{
3133
9.38M
    if (ptr) {
3134
9.38M
        free(ptr);
3135
9.38M
    }
3136
9.38M
} /* end s_mp_free() */
3137
3138
/* }}} */
3139
3140
/* {{{ s_mp_clamp(mp) */
3141
3142
/* Remove leading zeroes from the given value                             */
3143
void
3144
s_mp_clamp(mp_int *mp)
3145
130M
{
3146
130M
    mp_size used = MP_USED(mp);
3147
433M
    while (used > 1 && DIGIT(mp, used - 1) == 0)
3148
303M
        --used;
3149
130M
    MP_USED(mp) = used;
3150
130M
    if (used == 1 && DIGIT(mp, 0) == 0)
3151
12.2M
        MP_SIGN(mp) = ZPOS;
3152
130M
} /* end s_mp_clamp() */
3153
3154
/* }}} */
3155
3156
/* {{{ s_mp_exch(a, b) */
3157
3158
/* Exchange the data for a and b; (b, a) = (a, b)                         */
3159
void
3160
s_mp_exch(mp_int *a, mp_int *b)
3161
1.33M
{
3162
1.33M
    mp_int tmp;
3163
1.33M
    if (!a || !b) {
3164
0
        return;
3165
0
    }
3166
3167
1.33M
    tmp = *a;
3168
1.33M
    *a = *b;
3169
1.33M
    *b = tmp;
3170
3171
1.33M
} /* end s_mp_exch() */
3172
3173
/* }}} */
3174
3175
/* }}} */
3176
3177
/* {{{ Arithmetic helpers */
3178
3179
/* {{{ s_mp_lshd(mp, p) */
3180
3181
/*
3182
   Shift mp leftward by p digits, growing if needed, and zero-filling
3183
   the in-shifted digits at the right end.  This is a convenient
3184
   alternative to multiplication by powers of the radix
3185
 */
3186
3187
mp_err
3188
s_mp_lshd(mp_int *mp, mp_size p)
3189
5.38M
{
3190
5.38M
    mp_err res;
3191
5.38M
    unsigned int ix;
3192
3193
5.38M
    ARGCHK(mp != NULL, MP_BADARG);
3194
3195
5.38M
    if (p == 0)
3196
0
        return MP_OKAY;
3197
3198
5.38M
    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
3199
3.00k
        return MP_OKAY;
3200
3201
5.38M
    if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
3202
0
        return res;
3203
3204
    /* Shift all the significant figures over as needed */
3205
78.2M
    for (ix = USED(mp) - p; ix-- > 0;) {
3206
72.8M
        DIGIT(mp, ix + p) = DIGIT(mp, ix);
3207
72.8M
    }
3208
3209
    /* Fill the bottom digits with zeroes */
3210
16.3M
    for (ix = 0; (mp_size)ix < p; ix++)
3211
10.9M
        DIGIT(mp, ix) = 0;
3212
3213
5.38M
    return MP_OKAY;
3214
3215
5.38M
} /* end s_mp_lshd() */
3216
3217
/* }}} */
3218
3219
/* {{{ s_mp_mul_2d(mp, d) */
3220
3221
/*
3222
  Multiply the integer by 2^d, where d is a number of bits.  This
3223
  amounts to a bitwise shift of the value.
3224
 */
3225
mp_err
3226
s_mp_mul_2d(mp_int *mp, mp_digit d)
3227
80.0k
{
3228
80.0k
    mp_err res;
3229
80.0k
    mp_digit dshift, rshift, mask, x, prev = 0;
3230
80.0k
    mp_digit *pa = NULL;
3231
80.0k
    int i;
3232
3233
80.0k
    ARGCHK(mp != NULL, MP_BADARG);
3234
3235
80.0k
    dshift = d / MP_DIGIT_BIT;
3236
80.0k
    d %= MP_DIGIT_BIT;
3237
    /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
3238
    /* mod and corresponding mask logic avoid that when d = 0 */
3239
80.0k
    rshift = MP_DIGIT_BIT - d;
3240
80.0k
    rshift %= MP_DIGIT_BIT;
3241
    /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
3242
80.0k
    mask = (DIGIT_MAX << rshift) + 1;
3243
80.0k
    mask &= DIGIT_MAX - 1;
3244
    /* bits to be shifted out of the top word */
3245
80.0k
    x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
3246
3247
80.0k
    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
3248
0
        return res;
3249
3250
80.0k
    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3251
0
        return res;
3252
3253
80.0k
    pa = MP_DIGITS(mp) + dshift;
3254
3255
1.73M
    for (i = MP_USED(mp) - dshift; i > 0; i--) {
3256
1.65M
        x = *pa;
3257
1.65M
        *pa++ = (x << d) | prev;
3258
1.65M
        prev = (x & mask) >> rshift;
3259
1.65M
    }
3260
3261
80.0k
    s_mp_clamp(mp);
3262
80.0k
    return MP_OKAY;
3263
80.0k
} /* end s_mp_mul_2d() */
3264
3265
/* {{{ s_mp_rshd(mp, p) */
3266
3267
/*
3268
   Shift mp rightward by p digits.  Maintains the invariant that
3269
   digits above the precision are all zero.  Digits shifted off the
3270
   end are lost.  Cannot fail.
3271
 */
3272
3273
void
3274
s_mp_rshd(mp_int *mp, mp_size p)
3275
78.2M
{
3276
78.2M
    mp_size ix;
3277
78.2M
    mp_digit *src, *dst;
3278
3279
78.2M
    if (p == 0)
3280
13.4M
        return;
3281
3282
    /* Shortcut when all digits are to be shifted off */
3283
64.7M
    if (p >= USED(mp)) {
3284
4.24M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3285
4.24M
        USED(mp) = 1;
3286
4.24M
        SIGN(mp) = ZPOS;
3287
4.24M
        return;
3288
4.24M
    }
3289
3290
    /* Shift all the significant figures over as needed */
3291
60.5M
    dst = MP_DIGITS(mp);
3292
60.5M
    src = dst + p;
3293
1.24G
    for (ix = USED(mp) - p; ix > 0; ix--)
3294
1.18G
        *dst++ = *src++;
3295
3296
60.5M
    MP_USED(mp) -= p;
3297
    /* Fill the top digits with zeroes */
3298
1.22G
    while (p-- > 0)
3299
1.16G
        *dst++ = 0;
3300
3301
60.5M
} /* end s_mp_rshd() */
3302
3303
/* }}} */
3304
3305
/* {{{ s_mp_div_2(mp) */
3306
3307
/* Divide by two -- take advantage of radix properties to do it fast      */
3308
void
3309
s_mp_div_2(mp_int *mp)
3310
13.4M
{
3311
13.4M
    s_mp_div_2d(mp, 1);
3312
3313
13.4M
} /* end s_mp_div_2() */
3314
3315
/* }}} */
3316
3317
/* {{{ s_mp_mul_2(mp) */
3318
3319
mp_err
3320
s_mp_mul_2(mp_int *mp)
3321
1.54M
{
3322
1.54M
    mp_digit *pd;
3323
1.54M
    unsigned int ix, used;
3324
1.54M
    mp_digit kin = 0;
3325
3326
1.54M
    ARGCHK(mp != NULL, MP_BADARG);
3327
3328
    /* Shift digits leftward by 1 bit */
3329
1.54M
    used = MP_USED(mp);
3330
1.54M
    pd = MP_DIGITS(mp);
3331
205M
    for (ix = 0; ix < used; ix++) {
3332
203M
        mp_digit d = *pd;
3333
203M
        *pd++ = (d << 1) | kin;
3334
203M
        kin = (d >> (DIGIT_BIT - 1));
3335
203M
    }
3336
3337
    /* Deal with rollover from last digit */
3338
1.54M
    if (kin) {
3339
0
        if (ix >= ALLOC(mp)) {
3340
0
            mp_err res;
3341
0
            if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3342
0
                return res;
3343
0
        }
3344
3345
0
        DIGIT(mp, ix) = kin;
3346
0
        USED(mp) += 1;
3347
0
    }
3348
3349
1.54M
    return MP_OKAY;
3350
3351
1.54M
} /* end s_mp_mul_2() */
3352
3353
/* }}} */
3354
3355
/* {{{ s_mp_mod_2d(mp, d) */
3356
3357
/*
3358
  Remainder the integer by 2^d, where d is a number of bits.  This
3359
  amounts to a bitwise AND of the value, and does not require the full
3360
  division code
3361
 */
3362
void
3363
s_mp_mod_2d(mp_int *mp, mp_digit d)
3364
2.96M
{
3365
2.96M
    mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3366
2.96M
    mp_size ix;
3367
2.96M
    mp_digit dmask;
3368
3369
2.96M
    if (ndig >= USED(mp))
3370
126k
        return;
3371
3372
    /* Flush all the bits above 2^d in its digit */
3373
2.84M
    dmask = ((mp_digit)1 << nbit) - 1;
3374
2.84M
    DIGIT(mp, ndig) &= dmask;
3375
3376
    /* Flush all digits above the one with 2^d in it */
3377
71.5M
    for (ix = ndig + 1; ix < USED(mp); ix++)
3378
68.6M
        DIGIT(mp, ix) = 0;
3379
3380
2.84M
    s_mp_clamp(mp);
3381
3382
2.84M
} /* end s_mp_mod_2d() */
3383
3384
/* }}} */
3385
3386
/* {{{ s_mp_div_2d(mp, d) */
3387
3388
/*
3389
  Divide the integer by 2^d, where d is a number of bits.  This
3390
  amounts to a bitwise shift of the value, and does not require the
3391
  full division code (used in Barrett reduction, see below)
3392
 */
3393
void
3394
s_mp_div_2d(mp_int *mp, mp_digit d)
3395
13.5M
{
3396
13.5M
    int ix;
3397
13.5M
    mp_digit save, next, mask, lshift;
3398
3399
13.5M
    s_mp_rshd(mp, d / DIGIT_BIT);
3400
13.5M
    d %= DIGIT_BIT;
3401
    /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
3402
    /* mod and corresponding mask logic avoid that when d = 0 */
3403
13.5M
    lshift = DIGIT_BIT - d;
3404
13.5M
    lshift %= DIGIT_BIT;
3405
13.5M
    mask = ((mp_digit)1 << d) - 1;
3406
13.5M
    save = 0;
3407
145M
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3408
132M
        next = DIGIT(mp, ix) & mask;
3409
132M
        DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
3410
132M
        save = next;
3411
132M
    }
3412
13.5M
    s_mp_clamp(mp);
3413
3414
13.5M
} /* end s_mp_div_2d() */
3415
3416
/* }}} */
3417
3418
/* {{{ s_mp_norm(a, b, *d) */
3419
3420
/*
3421
  s_mp_norm(a, b, *d)
3422
3423
  Normalize a and b for division, where b is the divisor.  In order
3424
  that we might make good guesses for quotient digits, we want the
3425
  leading digit of b to be at least half the radix, which we
3426
  accomplish by multiplying a and b by a power of 2.  The exponent
3427
  (shift count) is placed in *pd, so that the remainder can be shifted
3428
  back at the end of the division process.
3429
 */
3430
3431
mp_err
3432
s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3433
259k
{
3434
259k
    mp_digit d;
3435
259k
    mp_digit mask;
3436
259k
    mp_digit b_msd;
3437
259k
    mp_err res = MP_OKAY;
3438
3439
259k
    ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
3440
3441
259k
    d = 0;
3442
259k
    mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
3443
259k
    b_msd = DIGIT(b, USED(b) - 1);
3444
1.43M
    while (!(b_msd & mask)) {
3445
1.17M
        b_msd <<= 1;
3446
1.17M
        ++d;
3447
1.17M
    }
3448
3449
259k
    if (d) {
3450
24.4k
        MP_CHECKOK(s_mp_mul_2d(a, d));
3451
24.4k
        MP_CHECKOK(s_mp_mul_2d(b, d));
3452
24.4k
    }
3453
3454
259k
    *pd = d;
3455
259k
CLEANUP:
3456
259k
    return res;
3457
3458
259k
} /* end s_mp_norm() */
3459
3460
/* }}} */
3461
3462
/* }}} */
3463
3464
/* {{{ Primitive digit arithmetic */
3465
3466
/* {{{ s_mp_add_d(mp, d) */
3467
3468
/* Add d to |mp| in place                                                 */
3469
mp_err
3470
s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
3471
21.2k
{
3472
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3473
    mp_word w, k = 0;
3474
    mp_size ix = 1;
3475
3476
    w = (mp_word)DIGIT(mp, 0) + d;
3477
    DIGIT(mp, 0) = ACCUM(w);
3478
    k = CARRYOUT(w);
3479
3480
    while (ix < USED(mp) && k) {
3481
        w = (mp_word)DIGIT(mp, ix) + k;
3482
        DIGIT(mp, ix) = ACCUM(w);
3483
        k = CARRYOUT(w);
3484
        ++ix;
3485
    }
3486
3487
    if (k != 0) {
3488
        mp_err res;
3489
3490
        if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3491
            return res;
3492
3493
        DIGIT(mp, ix) = (mp_digit)k;
3494
    }
3495
3496
    return MP_OKAY;
3497
#else
3498
21.2k
    mp_digit *pmp = MP_DIGITS(mp);
3499
21.2k
    mp_digit sum, mp_i, carry = 0;
3500
21.2k
    mp_err res = MP_OKAY;
3501
21.2k
    int used = (int)MP_USED(mp);
3502
3503
21.2k
    mp_i = *pmp;
3504
21.2k
    *pmp++ = sum = d + mp_i;
3505
21.2k
    carry = (sum < d);
3506
21.2k
    while (carry && --used > 0) {
3507
0
        mp_i = *pmp;
3508
0
        *pmp++ = sum = carry + mp_i;
3509
0
        carry = !sum;
3510
0
    }
3511
21.2k
    if (carry && !used) {
3512
        /* mp is growing */
3513
0
        used = MP_USED(mp);
3514
0
        MP_CHECKOK(s_mp_pad(mp, used + 1));
3515
0
        MP_DIGIT(mp, used) = carry;
3516
0
    }
3517
21.2k
CLEANUP:
3518
21.2k
    return res;
3519
21.2k
#endif
3520
21.2k
} /* end s_mp_add_d() */
3521
3522
/* }}} */
3523
3524
/* {{{ s_mp_sub_d(mp, d) */
3525
3526
/* Subtract d from |mp| in place, assumes |mp| > d                        */
3527
mp_err
3528
s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
3529
13.9k
{
3530
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3531
    mp_word w, b = 0;
3532
    mp_size ix = 1;
3533
3534
    /* Compute initial subtraction    */
3535
    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3536
    b = CARRYOUT(w) ? 0 : 1;
3537
    DIGIT(mp, 0) = ACCUM(w);
3538
3539
    /* Propagate borrows leftward     */
3540
    while (b && ix < USED(mp)) {
3541
        w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3542
        b = CARRYOUT(w) ? 0 : 1;
3543
        DIGIT(mp, ix) = ACCUM(w);
3544
        ++ix;
3545
    }
3546
3547
    /* Remove leading zeroes          */
3548
    s_mp_clamp(mp);
3549
3550
    /* If we have a borrow out, it's a violation of the input invariant */
3551
    if (b)
3552
        return MP_RANGE;
3553
    else
3554
        return MP_OKAY;
3555
#else
3556
13.9k
    mp_digit *pmp = MP_DIGITS(mp);
3557
13.9k
    mp_digit mp_i, diff, borrow;
3558
13.9k
    mp_size used = MP_USED(mp);
3559
3560
13.9k
    mp_i = *pmp;
3561
13.9k
    *pmp++ = diff = mp_i - d;
3562
13.9k
    borrow = (diff > mp_i);
3563
14.6k
    while (borrow && --used) {
3564
650
        mp_i = *pmp;
3565
650
        *pmp++ = diff = mp_i - borrow;
3566
650
        borrow = (diff > mp_i);
3567
650
    }
3568
13.9k
    s_mp_clamp(mp);
3569
13.9k
    return (borrow && !used) ? MP_RANGE : MP_OKAY;
3570
13.9k
#endif
3571
13.9k
} /* end s_mp_sub_d() */
3572
3573
/* }}} */
3574
3575
/* {{{ s_mp_mul_d(a, d) */
3576
3577
/* Compute a = a * d, single digit multiplication                         */
3578
mp_err
3579
s_mp_mul_d(mp_int *a, mp_digit d)
3580
3.53M
{
3581
3.53M
    mp_err res;
3582
3.53M
    mp_size used;
3583
3.53M
    int pow;
3584
3585
3.53M
    if (!d) {
3586
0
        mp_zero(a);
3587
0
        return MP_OKAY;
3588
0
    }
3589
3.53M
    if (d == 1)
3590
82.3k
        return MP_OKAY;
3591
3.45M
    if (0 <= (pow = s_mp_ispow2d(d))) {
3592
24.8k
        return s_mp_mul_2d(a, (mp_digit)pow);
3593
24.8k
    }
3594
3595
3.42M
    used = MP_USED(a);
3596
3.42M
    MP_CHECKOK(s_mp_pad(a, used + 1));
3597
3598
3.42M
    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3599
3600
3.42M
    s_mp_clamp(a);
3601
3602
3.42M
CLEANUP:
3603
3.42M
    return res;
3604
3605
3.42M
} /* end s_mp_mul_d() */
3606
3607
/* }}} */
3608
3609
/* {{{ s_mp_div_d(mp, d, r) */
3610
3611
/*
3612
  s_mp_div_d(mp, d, r)
3613
3614
  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3615
  single digit d.  If r is null, the remainder will be discarded.
3616
 */
3617
3618
mp_err
3619
s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3620
0
{
3621
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3622
    mp_word w = 0, q;
3623
#else
3624
0
    mp_digit w = 0, q;
3625
0
#endif
3626
0
    int ix;
3627
0
    mp_err res;
3628
0
    mp_int quot;
3629
0
    mp_int rem;
3630
3631
0
    if (d == 0)
3632
0
        return MP_RANGE;
3633
0
    if (d == 1) {
3634
0
        if (r)
3635
0
            *r = 0;
3636
0
        return MP_OKAY;
3637
0
    }
3638
    /* could check for power of 2 here, but mp_div_d does that. */
3639
0
    if (MP_USED(mp) == 1) {
3640
0
        mp_digit n = MP_DIGIT(mp, 0);
3641
0
        mp_digit remdig;
3642
3643
0
        q = n / d;
3644
0
        remdig = n % d;
3645
0
        MP_DIGIT(mp, 0) = q;
3646
0
        if (r) {
3647
0
            *r = remdig;
3648
0
        }
3649
0
        return MP_OKAY;
3650
0
    }
3651
3652
0
    MP_DIGITS(&rem) = 0;
3653
0
    MP_DIGITS(&quot) = 0;
3654
    /* Make room for the quotient */
3655
0
    MP_CHECKOK(mp_init_size(&quot, USED(mp)));
3656
3657
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3658
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3659
        w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3660
3661
        if (w >= d) {
3662
            q = w / d;
3663
            w = w % d;
3664
        } else {
3665
            q = 0;
3666
        }
3667
3668
        s_mp_lshd(&quot, 1);
3669
        DIGIT(&quot, 0) = (mp_digit)q;
3670
    }
3671
#else
3672
0
    {
3673
0
        mp_digit p;
3674
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3675
0
        mp_digit norm;
3676
0
#endif
3677
3678
0
        MP_CHECKOK(mp_init_copy(&rem, mp));
3679
3680
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3681
0
        MP_DIGIT(&quot, 0) = d;
3682
0
        MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
3683
0
        if (norm)
3684
0
            d <<= norm;
3685
0
        MP_DIGIT(&quot, 0) = 0;
3686
0
#endif
3687
3688
0
        p = 0;
3689
0
        for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3690
0
            w = DIGIT(&rem, ix);
3691
3692
0
            if (p) {
3693
0
                MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
3694
0
            } else if (w >= d) {
3695
0
                q = w / d;
3696
0
                w = w % d;
3697
0
            } else {
3698
0
                q = 0;
3699
0
            }
3700
3701
0
            MP_CHECKOK(s_mp_lshd(&quot, 1));
3702
0
            DIGIT(&quot, 0) = q;
3703
0
            p = w;
3704
0
        }
3705
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3706
0
        if (norm)
3707
0
            w >>= norm;
3708
0
#endif
3709
0
    }
3710
0
#endif
3711
3712
    /* Deliver the remainder, if desired */
3713
0
    if (r) {
3714
0
        *r = (mp_digit)w;
3715
0
    }
3716
3717
0
    s_mp_clamp(&quot);
3718
0
    mp_exch(&quot, mp);
3719
0
CLEANUP:
3720
0
    mp_clear(&quot);
3721
0
    mp_clear(&rem);
3722
3723
0
    return res;
3724
0
} /* end s_mp_div_d() */
3725
3726
/* }}} */
3727
3728
/* }}} */
3729
3730
/* {{{ Primitive full arithmetic */
3731
3732
/* {{{ s_mp_add(a, b) */
3733
3734
/* Compute a = |a| + |b|                                                  */
3735
mp_err
3736
s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition      */
3737
0
{
3738
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3739
    mp_word w = 0;
3740
#else
3741
0
    mp_digit d, sum, carry = 0;
3742
0
#endif
3743
0
    mp_digit *pa, *pb;
3744
0
    mp_size ix;
3745
0
    mp_size used;
3746
0
    mp_err res;
3747
3748
    /* Make sure a has enough precision for the output value */
3749
0
    if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3750
0
        return res;
3751
3752
    /*
3753
      Add up all digits up to the precision of b.  If b had initially
3754
      the same precision as a, or greater, we took care of it by the
3755
      padding step above, so there is no problem.  If b had initially
3756
      less precision, we'll have to make sure the carry out is duly
3757
      propagated upward among the higher-order digits of the sum.
3758
     */
3759
0
    pa = MP_DIGITS(a);
3760
0
    pb = MP_DIGITS(b);
3761
0
    used = MP_USED(b);
3762
0
    for (ix = 0; ix < used; ix++) {
3763
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3764
        w = w + *pa + *pb++;
3765
        *pa++ = ACCUM(w);
3766
        w = CARRYOUT(w);
3767
#else
3768
0
        d = *pa;
3769
0
        sum = d + *pb++;
3770
0
        d = (sum < d); /* detect overflow */
3771
0
        *pa++ = sum += carry;
3772
0
        carry = d + (sum < carry); /* detect overflow */
3773
0
#endif
3774
0
    }
3775
3776
    /* If we run out of 'b' digits before we're actually done, make
3777
       sure the carries get propagated upward...
3778
     */
3779
0
    used = MP_USED(a);
3780
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3781
    while (w && ix < used) {
3782
        w = w + *pa;
3783
        *pa++ = ACCUM(w);
3784
        w = CARRYOUT(w);
3785
        ++ix;
3786
    }
3787
#else
3788
0
    while (carry && ix < used) {
3789
0
        sum = carry + *pa;
3790
0
        *pa++ = sum;
3791
0
        carry = !sum;
3792
0
        ++ix;
3793
0
    }
3794
0
#endif
3795
3796
/* If there's an overall carry out, increase precision and include
3797
     it.  We could have done this initially, but why touch the memory
3798
     allocator unless we're sure we have to?
3799
   */
3800
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3801
    if (w) {
3802
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3803
            return res;
3804
3805
        DIGIT(a, ix) = (mp_digit)w;
3806
    }
3807
#else
3808
0
    if (carry) {
3809
0
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3810
0
            return res;
3811
3812
0
        DIGIT(a, used) = carry;
3813
0
    }
3814
0
#endif
3815
3816
0
    return MP_OKAY;
3817
0
} /* end s_mp_add() */
3818
3819
/* }}} */
3820
3821
/* Compute c = |a| + |b|         */ /* magnitude addition      */
3822
mp_err
3823
s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3824
10.1M
{
3825
10.1M
    mp_digit *pa, *pb, *pc;
3826
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3827
    mp_word w = 0;
3828
#else
3829
10.1M
    mp_digit sum, carry = 0, d;
3830
10.1M
#endif
3831
10.1M
    mp_size ix;
3832
10.1M
    mp_size used;
3833
10.1M
    mp_err res;
3834
3835
10.1M
    MP_SIGN(c) = MP_SIGN(a);
3836
10.1M
    if (MP_USED(a) < MP_USED(b)) {
3837
1.28M
        const mp_int *xch = a;
3838
1.28M
        a = b;
3839
1.28M
        b = xch;
3840
1.28M
    }
3841
3842
    /* Make sure a has enough precision for the output value */
3843
10.1M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3844
0
        return res;
3845
3846
    /*
3847
     Add up all digits up to the precision of b.  If b had initially
3848
     the same precision as a, or greater, we took care of it by the
3849
     exchange step above, so there is no problem.  If b had initially
3850
     less precision, we'll have to make sure the carry out is duly
3851
     propagated upward among the higher-order digits of the sum.
3852
    */
3853
10.1M
    pa = MP_DIGITS(a);
3854
10.1M
    pb = MP_DIGITS(b);
3855
10.1M
    pc = MP_DIGITS(c);
3856
10.1M
    used = MP_USED(b);
3857
107M
    for (ix = 0; ix < used; ix++) {
3858
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3859
        w = w + *pa++ + *pb++;
3860
        *pc++ = ACCUM(w);
3861
        w = CARRYOUT(w);
3862
#else
3863
96.9M
        d = *pa++;
3864
96.9M
        sum = d + *pb++;
3865
96.9M
        d = (sum < d); /* detect overflow */
3866
96.9M
        *pc++ = sum += carry;
3867
96.9M
        carry = d + (sum < carry); /* detect overflow */
3868
96.9M
#endif
3869
96.9M
    }
3870
3871
    /* If we run out of 'b' digits before we're actually done, make
3872
     sure the carries get propagated upward...
3873
   */
3874
24.3M
    for (used = MP_USED(a); ix < used; ++ix) {
3875
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3876
        w = w + *pa++;
3877
        *pc++ = ACCUM(w);
3878
        w = CARRYOUT(w);
3879
#else
3880
14.2M
        *pc++ = sum = carry + *pa++;
3881
14.2M
        carry = (sum < carry);
3882
14.2M
#endif
3883
14.2M
    }
3884
3885
/* If there's an overall carry out, increase precision and include
3886
     it.  We could have done this initially, but why touch the memory
3887
     allocator unless we're sure we have to?
3888
   */
3889
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3890
    if (w) {
3891
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3892
            return res;
3893
3894
        DIGIT(c, used) = (mp_digit)w;
3895
        ++used;
3896
    }
3897
#else
3898
10.1M
    if (carry) {
3899
843k
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3900
0
            return res;
3901
3902
843k
        DIGIT(c, used) = carry;
3903
843k
        ++used;
3904
843k
    }
3905
10.1M
#endif
3906
10.1M
    MP_USED(c) = used;
3907
10.1M
    return MP_OKAY;
3908
10.1M
}
3909
/* {{{ s_mp_add_offset(a, b, offset) */
3910
3911
/* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3912
mp_err
3913
s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3914
0
{
3915
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3916
    mp_word w, k = 0;
3917
#else
3918
0
    mp_digit d, sum, carry = 0;
3919
0
#endif
3920
0
    mp_size ib;
3921
0
    mp_size ia;
3922
0
    mp_size lim;
3923
0
    mp_err res;
3924
3925
    /* Make sure a has enough precision for the output value */
3926
0
    lim = MP_USED(b) + offset;
3927
0
    if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3928
0
        return res;
3929
3930
    /*
3931
    Add up all digits up to the precision of b.  If b had initially
3932
    the same precision as a, or greater, we took care of it by the
3933
    padding step above, so there is no problem.  If b had initially
3934
    less precision, we'll have to make sure the carry out is duly
3935
    propagated upward among the higher-order digits of the sum.
3936
   */
3937
0
    lim = USED(b);
3938
0
    for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
3939
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3940
        w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3941
        DIGIT(a, ia) = ACCUM(w);
3942
        k = CARRYOUT(w);
3943
#else
3944
0
        d = MP_DIGIT(a, ia);
3945
0
        sum = d + MP_DIGIT(b, ib);
3946
0
        d = (sum < d);
3947
0
        MP_DIGIT(a, ia) = sum += carry;
3948
0
        carry = d + (sum < carry);
3949
0
#endif
3950
0
    }
3951
3952
/* If we run out of 'b' digits before we're actually done, make
3953
     sure the carries get propagated upward...
3954
   */
3955
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3956
    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3957
        w = (mp_word)DIGIT(a, ia) + k;
3958
        DIGIT(a, ia) = ACCUM(w);
3959
        k = CARRYOUT(w);
3960
    }
3961
#else
3962
0
    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3963
0
        d = MP_DIGIT(a, ia);
3964
0
        MP_DIGIT(a, ia) = sum = d + carry;
3965
0
        carry = (sum < d);
3966
0
    }
3967
0
#endif
3968
3969
/* If there's an overall carry out, increase precision and include
3970
     it.  We could have done this initially, but why touch the memory
3971
     allocator unless we're sure we have to?
3972
   */
3973
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3974
    if (k) {
3975
        if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3976
            return res;
3977
3978
        DIGIT(a, ia) = (mp_digit)k;
3979
    }
3980
#else
3981
0
    if (carry) {
3982
0
        if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3983
0
            return res;
3984
3985
0
        DIGIT(a, lim) = carry;
3986
0
    }
3987
0
#endif
3988
0
    s_mp_clamp(a);
3989
3990
0
    return MP_OKAY;
3991
3992
0
} /* end s_mp_add_offset() */
3993
3994
/* }}} */
3995
3996
/* {{{ s_mp_sub(a, b) */
3997
3998
/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3999
mp_err
4000
s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract      */
4001
20.0M
{
4002
20.0M
    mp_digit *pa, *pb, *limit;
4003
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4004
    mp_sword w = 0;
4005
#else
4006
20.0M
    mp_digit d, diff, borrow = 0;
4007
20.0M
#endif
4008
4009
    /*
4010
    Subtract and propagate borrow.  Up to the precision of b, this
4011
    accounts for the digits of b; after that, we just make sure the
4012
    carries get to the right place.  This saves having to pad b out to
4013
    the precision of a just to make the loops work right...
4014
   */
4015
20.0M
    pa = MP_DIGITS(a);
4016
20.0M
    pb = MP_DIGITS(b);
4017
20.0M
    limit = pb + MP_USED(b);
4018
422M
    while (pb < limit) {
4019
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4020
        w = w + *pa - *pb++;
4021
        *pa++ = ACCUM(w);
4022
        w >>= MP_DIGIT_BIT;
4023
#else
4024
401M
        d = *pa;
4025
401M
        diff = d - *pb++;
4026
401M
        d = (diff > d); /* detect borrow */
4027
401M
        if (borrow && --diff == MP_DIGIT_MAX)
4028
287k
            ++d;
4029
401M
        *pa++ = diff;
4030
401M
        borrow = d;
4031
401M
#endif
4032
401M
    }
4033
20.0M
    limit = MP_DIGITS(a) + MP_USED(a);
4034
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4035
    while (w && pa < limit) {
4036
        w = w + *pa;
4037
        *pa++ = ACCUM(w);
4038
        w >>= MP_DIGIT_BIT;
4039
    }
4040
#else
4041
30.5M
    while (borrow && pa < limit) {
4042
10.4M
        d = *pa;
4043
10.4M
        *pa++ = diff = d - borrow;
4044
10.4M
        borrow = (diff > d);
4045
10.4M
    }
4046
20.0M
#endif
4047
4048
    /* Clobber any leading zeroes we created    */
4049
20.0M
    s_mp_clamp(a);
4050
4051
/*
4052
     If there was a borrow out, then |b| > |a| in violation
4053
     of our input invariant.  We've already done the work,
4054
     but we'll at least complain about it...
4055
   */
4056
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4057
    return w ? MP_RANGE : MP_OKAY;
4058
#else
4059
20.0M
    return borrow ? MP_RANGE : MP_OKAY;
4060
20.0M
#endif
4061
20.0M
} /* end s_mp_sub() */
4062
4063
/* }}} */
4064
4065
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
4066
mp_err
4067
s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
4068
8.01M
{
4069
8.01M
    mp_digit *pa, *pb, *pc;
4070
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4071
    mp_sword w = 0;
4072
#else
4073
8.01M
    mp_digit d, diff, borrow = 0;
4074
8.01M
#endif
4075
8.01M
    int ix, limit;
4076
8.01M
    mp_err res;
4077
4078
8.01M
    MP_SIGN(c) = MP_SIGN(a);
4079
4080
    /* Make sure a has enough precision for the output value */
4081
8.01M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
4082
0
        return res;
4083
4084
    /*
4085
    Subtract and propagate borrow.  Up to the precision of b, this
4086
    accounts for the digits of b; after that, we just make sure the
4087
    carries get to the right place.  This saves having to pad b out to
4088
    the precision of a just to make the loops work right...
4089
   */
4090
8.01M
    pa = MP_DIGITS(a);
4091
8.01M
    pb = MP_DIGITS(b);
4092
8.01M
    pc = MP_DIGITS(c);
4093
8.01M
    limit = MP_USED(b);
4094
113M
    for (ix = 0; ix < limit; ++ix) {
4095
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4096
        w = w + *pa++ - *pb++;
4097
        *pc++ = ACCUM(w);
4098
        w >>= MP_DIGIT_BIT;
4099
#else
4100
104M
        d = *pa++;
4101
104M
        diff = d - *pb++;
4102
104M
        d = (diff > d);
4103
104M
        if (borrow && --diff == MP_DIGIT_MAX)
4104
112k
            ++d;
4105
104M
        *pc++ = diff;
4106
104M
        borrow = d;
4107
104M
#endif
4108
104M
    }
4109
10.6M
    for (limit = MP_USED(a); ix < limit; ++ix) {
4110
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4111
        w = w + *pa++;
4112
        *pc++ = ACCUM(w);
4113
        w >>= MP_DIGIT_BIT;
4114
#else
4115
2.63M
        d = *pa++;
4116
2.63M
        *pc++ = diff = d - borrow;
4117
2.63M
        borrow = (diff > d);
4118
2.63M
#endif
4119
2.63M
    }
4120
4121
    /* Clobber any leading zeroes we created    */
4122
8.01M
    MP_USED(c) = ix;
4123
8.01M
    s_mp_clamp(c);
4124
4125
/*
4126
     If there was a borrow out, then |b| > |a| in violation
4127
     of our input invariant.  We've already done the work,
4128
     but we'll at least complain about it...
4129
   */
4130
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4131
    return w ? MP_RANGE : MP_OKAY;
4132
#else
4133
8.01M
    return borrow ? MP_RANGE : MP_OKAY;
4134
8.01M
#endif
4135
8.01M
}
4136
/* {{{ s_mp_mul(a, b) */
4137
4138
/* Compute a = |a| * |b|                                                  */
4139
mp_err
4140
s_mp_mul(mp_int *a, const mp_int *b)
4141
3.44M
{
4142
3.44M
    return mp_mul(a, b, a);
4143
3.44M
} /* end s_mp_mul() */
4144
4145
/* }}} */
4146
4147
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4148
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4149
#define MP_MUL_DxD(a, b, Phi, Plo)                              \
4150
    {                                                           \
4151
        unsigned long long product = (unsigned long long)a * b; \
4152
        Plo = (mp_digit)product;                                \
4153
        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
4154
    }
4155
#else
4156
#define MP_MUL_DxD(a, b, Phi, Plo)                                 \
4157
20.1M
    {                                                              \
4158
20.1M
        mp_digit a0b1, a1b0;                                       \
4159
20.1M
        Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
4160
20.1M
        Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
4161
20.1M
        a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
4162
20.1M
        a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
4163
20.1M
        a1b0 += a0b1;                                              \
4164
20.1M
        Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
4165
20.1M
        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
4166
20.1M
        a1b0 <<= MP_HALF_DIGIT_BIT;                                \
4167
20.1M
        Plo += a1b0;                                               \
4168
20.1M
        Phi += MP_CT_LTU(Plo, a1b0);                               \
4169
20.1M
    }
4170
#endif
4171
4172
/* Constant time version of s_mpv_mul_d_add_prop.
4173
 * Presently, this is only used by the Constant time Montgomery arithmetic code. */
4174
/* c += a * b */
4175
void
4176
s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
4177
                       mp_digit *c, mp_size c_len)
4178
628k
{
4179
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4180
    mp_digit d = 0;
4181
4182
    c_len -= a_len;
4183
    /* Inner product:  Digits of a */
4184
    while (a_len--) {
4185
        mp_word w = ((mp_word)b * *a++) + *c + d;
4186
        *c++ = ACCUM(w);
4187
        d = CARRYOUT(w);
4188
    }
4189
4190
    /* propagate the carry to the end, even if carry is zero */
4191
    while (c_len--) {
4192
        mp_word w = (mp_word)*c + d;
4193
        *c++ = ACCUM(w);
4194
        d = CARRYOUT(w);
4195
    }
4196
#else
4197
628k
    mp_digit carry = 0;
4198
628k
    c_len -= a_len;
4199
20.7M
    while (a_len--) {
4200
20.1M
        mp_digit a_i = *a++;
4201
20.1M
        mp_digit a0b0, a1b1;
4202
20.1M
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4203
4204
20.1M
        a0b0 += carry;
4205
20.1M
        a1b1 += MP_CT_LTU(a0b0, carry);
4206
20.1M
        a0b0 += a_i = *c;
4207
20.1M
        a1b1 += MP_CT_LTU(a0b0, a_i);
4208
4209
20.1M
        *c++ = a0b0;
4210
20.1M
        carry = a1b1;
4211
20.1M
    }
4212
    /* propagate the carry to the end, even if carry is zero */
4213
11.6M
    while (c_len--) {
4214
11.0M
        mp_digit c_i = *c;
4215
11.0M
        carry += c_i;
4216
11.0M
        *c++ = carry;
4217
11.0M
        carry = MP_CT_LTU(carry, c_i);
4218
11.0M
    }
4219
628k
#endif
4220
628k
}
4221
4222
#if !defined(MP_ASSEMBLY_MULTIPLY)
4223
/* c = a * b */
4224
void
4225
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4226
{
4227
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4228
    mp_digit d = 0;
4229
4230
    /* Inner product:  Digits of a */
4231
    while (a_len--) {
4232
        mp_word w = ((mp_word)b * *a++) + d;
4233
        *c++ = ACCUM(w);
4234
        d = CARRYOUT(w);
4235
    }
4236
    *c = d;
4237
#else
4238
    mp_digit carry = 0;
4239
    while (a_len--) {
4240
        mp_digit a_i = *a++;
4241
        mp_digit a0b0, a1b1;
4242
4243
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4244
4245
        a0b0 += carry;
4246
        a1b1 += MP_CT_LTU(a0b0, carry);
4247
        *c++ = a0b0;
4248
        carry = a1b1;
4249
    }
4250
    *c = carry;
4251
#endif
4252
}
4253
4254
/* c += a * b */
4255
void
4256
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
4257
                mp_digit *c)
4258
{
4259
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4260
    mp_digit d = 0;
4261
4262
    /* Inner product:  Digits of a */
4263
    while (a_len--) {
4264
        mp_word w = ((mp_word)b * *a++) + *c + d;
4265
        *c++ = ACCUM(w);
4266
        d = CARRYOUT(w);
4267
    }
4268
    *c = d;
4269
#else
4270
    mp_digit carry = 0;
4271
    while (a_len--) {
4272
        mp_digit a_i = *a++;
4273
        mp_digit a0b0, a1b1;
4274
4275
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4276
4277
        a0b0 += carry;
4278
        a1b1 += MP_CT_LTU(a0b0, carry);
4279
        a0b0 += a_i = *c;
4280
        a1b1 += MP_CT_LTU(a0b0, a_i);
4281
        *c++ = a0b0;
4282
        carry = a1b1;
4283
    }
4284
    *c = carry;
4285
#endif
4286
}
4287
4288
/* Presently, this is only used by the Montgomery arithmetic code. */
4289
/* c += a * b */
4290
void
4291
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4292
{
4293
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4294
    mp_digit d = 0;
4295
4296
    /* Inner product:  Digits of a */
4297
    while (a_len--) {
4298
        mp_word w = ((mp_word)b * *a++) + *c + d;
4299
        *c++ = ACCUM(w);
4300
        d = CARRYOUT(w);
4301
    }
4302
4303
    while (d) {
4304
        mp_word w = (mp_word)*c + d;
4305
        *c++ = ACCUM(w);
4306
        d = CARRYOUT(w);
4307
    }
4308
#else
4309
    mp_digit carry = 0;
4310
    while (a_len--) {
4311
        mp_digit a_i = *a++;
4312
        mp_digit a0b0, a1b1;
4313
4314
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4315
4316
        a0b0 += carry;
4317
        if (a0b0 < carry)
4318
            ++a1b1;
4319
4320
        a0b0 += a_i = *c;
4321
        if (a0b0 < a_i)
4322
            ++a1b1;
4323
4324
        *c++ = a0b0;
4325
        carry = a1b1;
4326
    }
4327
    while (carry) {
4328
        mp_digit c_i = *c;
4329
        carry += c_i;
4330
        *c++ = carry;
4331
        carry = carry < c_i;
4332
    }
4333
#endif
4334
}
4335
#endif
4336
4337
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4338
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4339
#define MP_SQR_D(a, Phi, Plo)                                  \
4340
    {                                                          \
4341
        unsigned long long square = (unsigned long long)a * a; \
4342
        Plo = (mp_digit)square;                                \
4343
        Phi = (mp_digit)(square >> MP_DIGIT_BIT);              \
4344
    }
4345
#else
4346
#define MP_SQR_D(a, Phi, Plo)                                      \
4347
105M
    {                                                              \
4348
105M
        mp_digit Pmid;                                             \
4349
105M
        Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX);   \
4350
105M
        Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4351
105M
        Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4352
105M
        Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);                    \
4353
105M
        Pmid <<= (MP_HALF_DIGIT_BIT + 1);                          \
4354
105M
        Plo += Pmid;                                               \
4355
105M
        if (Plo < Pmid)                                            \
4356
105M
            ++Phi;                                                 \
4357
105M
    }
4358
#endif
4359
4360
#if !defined(MP_ASSEMBLY_SQUARE)
4361
/* Add the squares of the digits of a to the digits of b. */
4362
void
4363
s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4364
5.03M
{
4365
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4366
    mp_word w;
4367
    mp_digit d;
4368
    mp_size ix;
4369
4370
    w = 0;
4371
#define ADD_SQUARE(n)                     \
4372
    d = pa[n];                            \
4373
    w += (d * (mp_word)d) + ps[2 * n];    \
4374
    ps[2 * n] = ACCUM(w);                 \
4375
    w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
4376
    ps[2 * n + 1] = ACCUM(w);             \
4377
    w = (w >> DIGIT_BIT)
4378
4379
    for (ix = a_len; ix >= 4; ix -= 4) {
4380
        ADD_SQUARE(0);
4381
        ADD_SQUARE(1);
4382
        ADD_SQUARE(2);
4383
        ADD_SQUARE(3);
4384
        pa += 4;
4385
        ps += 8;
4386
    }
4387
    if (ix) {
4388
        ps += 2 * ix;
4389
        pa += ix;
4390
        switch (ix) {
4391
            case 3:
4392
                ADD_SQUARE(-3); /* FALLTHRU */
4393
            case 2:
4394
                ADD_SQUARE(-2); /* FALLTHRU */
4395
            case 1:
4396
                ADD_SQUARE(-1); /* FALLTHRU */
4397
            case 0:
4398
                break;
4399
        }
4400
    }
4401
    while (w) {
4402
        w += *ps;
4403
        *ps++ = ACCUM(w);
4404
        w = (w >> DIGIT_BIT);
4405
    }
4406
#else
4407
5.03M
    mp_digit carry = 0;
4408
110M
    while (a_len--) {
4409
105M
        mp_digit a_i = *pa++;
4410
105M
        mp_digit a0a0, a1a1;
4411
4412
105M
        MP_SQR_D(a_i, a1a1, a0a0);
4413
4414
        /* here a1a1 and a0a0 constitute a_i ** 2 */
4415
105M
        a0a0 += carry;
4416
105M
        if (a0a0 < carry)
4417
0
            ++a1a1;
4418
4419
        /* now add to ps */
4420
105M
        a0a0 += a_i = *ps;
4421
105M
        if (a0a0 < a_i)
4422
49.7M
            ++a1a1;
4423
105M
        *ps++ = a0a0;
4424
105M
        a1a1 += a_i = *ps;
4425
105M
        carry = (a1a1 < a_i);
4426
105M
        *ps++ = a1a1;
4427
105M
    }
4428
5.03M
    while (carry) {
4429
0
        mp_digit s_i = *ps;
4430
0
        carry += s_i;
4431
0
        *ps++ = carry;
4432
0
        carry = carry < s_i;
4433
0
    }
4434
5.03M
#endif
4435
5.03M
}
4436
#endif
4437
4438
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
4439
/*
4440
** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4441
** so its high bit is 1.   This code is from NSPR.
4442
*/
4443
mp_err
4444
s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4445
                mp_digit *qp, mp_digit *rp)
4446
3.51M
{
4447
3.51M
    mp_digit d1, d0, q1, q0;
4448
3.51M
    mp_digit r1, r0, m;
4449
4450
3.51M
    d1 = divisor >> MP_HALF_DIGIT_BIT;
4451
3.51M
    d0 = divisor & MP_HALF_DIGIT_MAX;
4452
3.51M
    r1 = Nhi % d1;
4453
3.51M
    q1 = Nhi / d1;
4454
3.51M
    m = q1 * d0;
4455
3.51M
    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4456
3.51M
    if (r1 < m) {
4457
1.06M
        q1--, r1 += divisor;
4458
1.06M
        if (r1 >= divisor && r1 < m) {
4459
15.8k
            q1--, r1 += divisor;
4460
15.8k
        }
4461
1.06M
    }
4462
3.51M
    r1 -= m;
4463
3.51M
    r0 = r1 % d1;
4464
3.51M
    q0 = r1 / d1;
4465
3.51M
    m = q0 * d0;
4466
3.51M
    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4467
3.51M
    if (r0 < m) {
4468
1.03M
        q0--, r0 += divisor;
4469
1.03M
        if (r0 >= divisor && r0 < m) {
4470
15.8k
            q0--, r0 += divisor;
4471
15.8k
        }
4472
1.03M
    }
4473
3.51M
    if (qp)
4474
3.51M
        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4475
3.51M
    if (rp)
4476
3.51M
        *rp = r0 - m;
4477
3.51M
    return MP_OKAY;
4478
3.51M
}
4479
#endif
4480
4481
#if MP_SQUARE
4482
/* {{{ s_mp_sqr(a) */
4483
4484
mp_err
4485
s_mp_sqr(mp_int *a)
4486
982k
{
4487
982k
    mp_err res;
4488
982k
    mp_int tmp;
4489
4490
982k
    if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
4491
0
        return res;
4492
982k
    res = mp_sqr(a, &tmp);
4493
982k
    if (res == MP_OKAY) {
4494
982k
        s_mp_exch(&tmp, a);
4495
982k
    }
4496
982k
    mp_clear(&tmp);
4497
982k
    return res;
4498
982k
}
4499
4500
/* }}} */
4501
#endif
4502
4503
/* {{{ s_mp_div(a, b) */
4504
4505
/*
4506
  s_mp_div(a, b)
4507
4508
  Compute a = a / b and b = a mod b.  Assumes b > a.
4509
 */
4510
4511
mp_err
4512
s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4513
         mp_int *div,  /* i: divisor                */
4514
         mp_int *quot) /* i: 0;        o: quotient  */
4515
261k
{
4516
261k
    mp_int part, t;
4517
261k
    mp_digit q_msd;
4518
261k
    mp_err res;
4519
261k
    mp_digit d;
4520
261k
    mp_digit div_msd;
4521
261k
    int ix;
4522
4523
261k
    if (mp_cmp_z(div) == 0)
4524
0
        return MP_RANGE;
4525
4526
261k
    DIGITS(&t) = 0;
4527
    /* Shortcut if divisor is power of two */
4528
261k
    if ((ix = s_mp_ispow2(div)) >= 0) {
4529
1.30k
        MP_CHECKOK(mp_copy(rem, quot));
4530
1.30k
        s_mp_div_2d(quot, (mp_digit)ix);
4531
1.30k
        s_mp_mod_2d(rem, (mp_digit)ix);
4532
4533
1.30k
        return MP_OKAY;
4534
1.30k
    }
4535
4536
259k
    MP_SIGN(rem) = ZPOS;
4537
259k
    MP_SIGN(div) = ZPOS;
4538
259k
    MP_SIGN(&part) = ZPOS;
4539
4540
    /* A working temporary for division     */
4541
259k
    MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
4542
4543
    /* Normalize to optimize guessing       */
4544
259k
    MP_CHECKOK(s_mp_norm(rem, div, &d));
4545
4546
    /* Perform the division itself...woo!   */
4547
259k
    MP_USED(quot) = MP_ALLOC(quot);
4548
4549
    /* Find a partial substring of rem which is at least div */
4550
    /* If we didn't find one, we're finished dividing    */
4551
3.79M
    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4552
3.53M
        int i;
4553
3.53M
        int unusedRem;
4554
3.53M
        int partExtended = 0; /* set to true if we need to extend part */
4555
4556
3.53M
        unusedRem = MP_USED(rem) - MP_USED(div);
4557
3.53M
        MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4558
3.53M
        MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
4559
3.53M
        MP_USED(&part) = MP_USED(div);
4560
4561
        /* We have now truncated the part of the remainder to the same length as
4562
         * the divisor. If part is smaller than div, extend part by one digit. */
4563
3.53M
        if (s_mp_cmp(&part, div) < 0) {
4564
3.53M
            --unusedRem;
4565
3.53M
#if MP_ARGCHK == 2
4566
3.53M
            assert(unusedRem >= 0);
4567
3.53M
#endif
4568
3.53M
            --MP_DIGITS(&part);
4569
3.53M
            ++MP_USED(&part);
4570
3.53M
            ++MP_ALLOC(&part);
4571
3.53M
            partExtended = 1;
4572
3.53M
        }
4573
4574
        /* Compute a guess for the next quotient digit       */
4575
3.53M
        q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4576
3.53M
        div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4577
3.53M
        if (!partExtended) {
4578
            /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
4579
             * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
4580
             * we didn't extend part, q_msd >= div_msd. Therefore we know that
4581
             * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
4582
             * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
4583
4.98k
            q_msd = 1;
4584
3.53M
        } else {
4585
3.53M
            if (q_msd == div_msd) {
4586
19.7k
                q_msd = MP_DIGIT_MAX;
4587
3.51M
            } else {
4588
3.51M
                mp_digit r;
4589
3.51M
                MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4590
3.51M
                                           div_msd, &q_msd, &r));
4591
3.51M
            }
4592
3.53M
        }
4593
3.53M
#if MP_ARGCHK == 2
4594
3.53M
        assert(q_msd > 0); /* This case should never occur any more. */
4595
3.53M
#endif
4596
3.53M
        if (q_msd <= 0)
4597
0
            break;
4598
4599
        /* See what that multiplies out to                   */
4600
3.53M
        mp_copy(div, &t);
4601
3.53M
        MP_CHECKOK(s_mp_mul_d(&t, q_msd));
4602
4603
        /*
4604
           If it's too big, back it off.  We should not have to do this
4605
           more than once, or, in rare cases, twice.  Knuth describes a
4606
           method by which this could be reduced to a maximum of once, but
4607
           I didn't implement that here.
4608
           When using s_mpv_div_2dx1d, we may have to do this 3 times.
4609
         */
4610
4.23M
        for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4611
696k
            --q_msd;
4612
696k
            MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
4613
696k
        }
4614
3.53M
        if (i < 0) {
4615
0
            res = MP_RANGE;
4616
0
            goto CLEANUP;
4617
0
        }
4618
4619
        /* At this point, q_msd should be the right next digit   */
4620
3.53M
        MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
4621
3.53M
        s_mp_clamp(rem);
4622
4623
        /*
4624
          Include the digit in the quotient.  We allocated enough memory
4625
          for any quotient we could ever possibly get, so we should not
4626
          have to check for failures here
4627
         */
4628
3.53M
        MP_DIGIT(quot, unusedRem) = q_msd;
4629
3.53M
    }
4630
4631
    /* Denormalize remainder                */
4632
259k
    if (d) {
4633
24.4k
        s_mp_div_2d(rem, d);
4634
24.4k
    }
4635
4636
259k
    s_mp_clamp(quot);
4637
4638
259k
CLEANUP:
4639
259k
    mp_clear(&t);
4640
4641
259k
    return res;
4642
4643
259k
} /* end s_mp_div() */
4644
4645
/* }}} */
4646
4647
/* {{{ s_mp_2expt(a, k) */
4648
4649
mp_err
4650
s_mp_2expt(mp_int *a, mp_digit k)
4651
5.31k
{
4652
5.31k
    mp_err res;
4653
5.31k
    mp_size dig, bit;
4654
4655
5.31k
    dig = k / DIGIT_BIT;
4656
5.31k
    bit = k % DIGIT_BIT;
4657
4658
5.31k
    mp_zero(a);
4659
5.31k
    if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4660
0
        return res;
4661
4662
5.31k
    DIGIT(a, dig) |= ((mp_digit)1 << bit);
4663
4664
5.31k
    return MP_OKAY;
4665
4666
5.31k
} /* end s_mp_2expt() */
4667
4668
/* }}} */
4669
4670
/* {{{ s_mp_reduce(x, m, mu) */
4671
4672
/*
4673
  Compute Barrett reduction, x (mod m), given a precomputed value for
4674
  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4675
  faster than straight division, when many reductions by the same
4676
  value of m are required (such as in modular exponentiation).  This
4677
  can nearly halve the time required to do modular exponentiation,
4678
  as compared to using the full integer divide to reduce.
4679
4680
  This algorithm was derived from the _Handbook of Applied
4681
  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4682
  pp. 603-604.
4683
 */
4684
4685
mp_err
4686
s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4687
1.47M
{
4688
1.47M
    mp_int q;
4689
1.47M
    mp_err res;
4690
4691
1.47M
    if ((res = mp_init_copy(&q, x)) != MP_OKAY)
4692
0
        return res;
4693
4694
1.47M
    s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1)  */
4695
1.47M
    s_mp_mul(&q, mu);           /* q2 = q1 * mu      */
4696
1.47M
    s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
4697
4698
    /* x = x mod b^(k+1), quick (no division) */
4699
1.47M
    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4700
4701
    /* q = q * m mod b^(k+1), quick (no division) */
4702
1.47M
    s_mp_mul(&q, m);
4703
1.47M
    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4704
4705
    /* x = x - q */
4706
1.47M
    if ((res = mp_sub(x, &q, x)) != MP_OKAY)
4707
0
        goto CLEANUP;
4708
4709
    /* If x < 0, add b^(k+1) to it */
4710
1.47M
    if (mp_cmp_z(x) < 0) {
4711
43.6k
        mp_set(&q, 1);
4712
43.6k
        if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4713
0
            goto CLEANUP;
4714
43.6k
        if ((res = mp_add(x, &q, x)) != MP_OKAY)
4715
0
            goto CLEANUP;
4716
43.6k
    }
4717
4718
    /* Back off if it's too big */
4719
1.61M
    while (mp_cmp(x, m) >= 0) {
4720
145k
        if ((res = s_mp_sub(x, m)) != MP_OKAY)
4721
0
            break;
4722
145k
    }
4723
4724
1.47M
CLEANUP:
4725
1.47M
    mp_clear(&q);
4726
4727
1.47M
    return res;
4728
4729
1.47M
} /* end s_mp_reduce() */
4730
4731
/* }}} */
4732
4733
/* }}} */
4734
4735
/* {{{ Primitive comparisons */
4736
4737
/* {{{ s_mp_cmp(a, b) */
4738
4739
/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4740
int
4741
s_mp_cmp(const mp_int *a, const mp_int *b)
4742
93.8M
{
4743
93.8M
    ARGMPCHK(a != NULL && b != NULL);
4744
4745
93.8M
    mp_size used_a = MP_USED(a);
4746
93.8M
    {
4747
93.8M
        mp_size used_b = MP_USED(b);
4748
4749
93.8M
        if (used_a > used_b)
4750
10.5M
            goto IS_GT;
4751
83.3M
        if (used_a < used_b)
4752
4.87M
            goto IS_LT;
4753
83.3M
    }
4754
78.4M
    {
4755
78.4M
        mp_digit *pa, *pb;
4756
78.4M
        mp_digit da = 0, db = 0;
4757
4758
78.4M
#define CMP_AB(n)                     \
4759
79.3M
    if ((da = pa[n]) != (db = pb[n])) \
4760
79.3M
    goto done
4761
4762
78.4M
        pa = MP_DIGITS(a) + used_a;
4763
78.4M
        pb = MP_DIGITS(b) + used_a;
4764
78.6M
        while (used_a >= 4) {
4765
75.1M
            pa -= 4;
4766
75.1M
            pb -= 4;
4767
75.1M
            used_a -= 4;
4768
75.1M
            CMP_AB(3);
4769
3.71M
            CMP_AB(2);
4770
246k
            CMP_AB(1);
4771
224k
            CMP_AB(0);
4772
224k
        }
4773
3.55M
        while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4774
60.0k
            /* do nothing */;
4775
78.4M
    done:
4776
78.4M
        if (da > db)
4777
10.3M
            goto IS_GT;
4778
68.0M
        if (da < db)
4779
68.0M
            goto IS_LT;
4780
68.0M
    }
4781
62.6k
    return MP_EQ;
4782
72.9M
IS_LT:
4783
72.9M
    return MP_LT;
4784
20.9M
IS_GT:
4785
20.9M
    return MP_GT;
4786
68.0M
} /* end s_mp_cmp() */
4787
4788
/* }}} */
4789
4790
/* {{{ s_mp_cmp_d(a, d) */
4791
4792
/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4793
int
4794
s_mp_cmp_d(const mp_int *a, mp_digit d)
4795
18.7M
{
4796
18.7M
    ARGMPCHK(a != NULL);
4797
4798
18.7M
    if (USED(a) > 1)
4799
13.7M
        return MP_GT;
4800
4801
5.04M
    if (DIGIT(a, 0) < d)
4802
3.14k
        return MP_LT;
4803
5.03M
    else if (DIGIT(a, 0) > d)
4804
4.97M
        return MP_GT;
4805
60.2k
    else
4806
60.2k
        return MP_EQ;
4807
4808
5.04M
} /* end s_mp_cmp_d() */
4809
4810
/* }}} */
4811
4812
/* {{{ s_mp_ispow2(v) */
4813
4814
/*
4815
  Returns -1 if the value is not a power of two; otherwise, it returns
4816
  k such that v = 2^k, i.e. lg(v).
4817
 */
4818
int
4819
s_mp_ispow2(const mp_int *v)
4820
264k
{
4821
264k
    mp_digit d;
4822
264k
    int extra = 0, ix;
4823
4824
264k
    ARGMPCHK(v != NULL);
4825
4826
264k
    ix = MP_USED(v) - 1;
4827
264k
    d = MP_DIGIT(v, ix); /* most significant digit of v */
4828
4829
264k
    extra = s_mp_ispow2d(d);
4830
264k
    if (extra < 0 || ix == 0)
4831
244k
        return extra;
4832
4833
94.3k
    while (--ix >= 0) {
4834
93.3k
        if (DIGIT(v, ix) != 0)
4835
18.4k
            return -1; /* not a power of two */
4836
74.9k
        extra += MP_DIGIT_BIT;
4837
74.9k
    }
4838
4839
1.07k
    return extra;
4840
4841
19.4k
} /* end s_mp_ispow2() */
4842
4843
/* }}} */
4844
4845
/* {{{ s_mp_ispow2d(d) */
4846
4847
int
4848
s_mp_ispow2d(mp_digit d)
4849
3.71M
{
4850
3.71M
    if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
4851
44.6k
        int pow = 0;
4852
#if defined(MP_USE_UINT_DIGIT)
4853
        if (d & 0xffff0000U)
4854
            pow += 16;
4855
        if (d & 0xff00ff00U)
4856
            pow += 8;
4857
        if (d & 0xf0f0f0f0U)
4858
            pow += 4;
4859
        if (d & 0xccccccccU)
4860
            pow += 2;
4861
        if (d & 0xaaaaaaaaU)
4862
            pow += 1;
4863
#elif defined(MP_USE_LONG_LONG_DIGIT)
4864
        if (d & 0xffffffff00000000ULL)
4865
            pow += 32;
4866
        if (d & 0xffff0000ffff0000ULL)
4867
            pow += 16;
4868
        if (d & 0xff00ff00ff00ff00ULL)
4869
            pow += 8;
4870
        if (d & 0xf0f0f0f0f0f0f0f0ULL)
4871
            pow += 4;
4872
        if (d & 0xccccccccccccccccULL)
4873
            pow += 2;
4874
        if (d & 0xaaaaaaaaaaaaaaaaULL)
4875
            pow += 1;
4876
#elif defined(MP_USE_LONG_DIGIT)
4877
44.6k
        if (d & 0xffffffff00000000UL)
4878
11.7k
            pow += 32;
4879
44.6k
        if (d & 0xffff0000ffff0000UL)
4880
10.5k
            pow += 16;
4881
44.6k
        if (d & 0xff00ff00ff00ff00UL)
4882
10.9k
            pow += 8;
4883
44.6k
        if (d & 0xf0f0f0f0f0f0f0f0UL)
4884
10.0k
            pow += 4;
4885
44.6k
        if (d & 0xccccccccccccccccUL)
4886
13.8k
            pow += 2;
4887
44.6k
        if (d & 0xaaaaaaaaaaaaaaaaUL)
4888
29.2k
            pow += 1;
4889
#else
4890
#error "unknown type for mp_digit"
4891
#endif
4892
44.6k
        return pow;
4893
44.6k
    }
4894
3.67M
    return -1;
4895
4896
3.71M
} /* end s_mp_ispow2d() */
4897
4898
/* }}} */
4899
4900
/* }}} */
4901
4902
/* {{{ Primitive I/O helpers */
4903
4904
/* {{{ s_mp_tovalue(ch, r) */
4905
4906
/*
4907
  Convert the given character to its digit value, in the given radix.
4908
  If the given character is not understood in the given radix, -1 is
4909
  returned.  Otherwise the digit's numeric value is returned.
4910
4911
  The results will be odd if you use a radix < 2 or > 62, you are
4912
  expected to know what you're up to.
4913
 */
4914
int
4915
s_mp_tovalue(char ch, int r)
4916
0
{
4917
0
    int val, xch;
4918
4919
0
    if (r > 36)
4920
0
        xch = ch;
4921
0
    else
4922
0
        xch = toupper((unsigned char)ch);
4923
4924
0
    if (isdigit((unsigned char)xch))
4925
0
        val = xch - '0';
4926
0
    else if (isupper((unsigned char)xch))
4927
0
        val = xch - 'A' + 10;
4928
0
    else if (islower((unsigned char)xch))
4929
0
        val = xch - 'a' + 36;
4930
0
    else if (xch == '+')
4931
0
        val = 62;
4932
0
    else if (xch == '/')
4933
0
        val = 63;
4934
0
    else
4935
0
        return -1;
4936
4937
0
    if (val < 0 || val >= r)
4938
0
        return -1;
4939
4940
0
    return val;
4941
4942
0
} /* end s_mp_tovalue() */
4943
4944
/* }}} */
4945
4946
/* {{{ s_mp_todigit(val, r, low) */
4947
4948
/*
4949
  Convert val to a radix-r digit, if possible.  If val is out of range
4950
  for r, returns zero.  Otherwise, returns an ASCII character denoting
4951
  the value in the given radix.
4952
4953
  The results may be odd if you use a radix < 2 or > 64, you are
4954
  expected to know what you're doing.
4955
 */
4956
4957
char
4958
s_mp_todigit(mp_digit val, int r, int low)
4959
0
{
4960
0
    char ch;
4961
4962
0
    if (val >= r)
4963
0
        return 0;
4964
4965
0
    ch = s_dmap_1[val];
4966
4967
0
    if (r <= 36 && low)
4968
0
        ch = tolower((unsigned char)ch);
4969
4970
0
    return ch;
4971
4972
0
} /* end s_mp_todigit() */
4973
4974
/* }}} */
4975
4976
/* {{{ s_mp_outlen(bits, radix) */
4977
4978
/*
4979
   Return an estimate for how long a string is needed to hold a radix
4980
   r representation of a number with 'bits' significant bits, plus an
4981
   extra for a zero terminator (assuming C style strings here)
4982
 */
4983
int
4984
s_mp_outlen(int bits, int r)
4985
0
{
4986
0
    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4987
4988
0
} /* end s_mp_outlen() */
4989
4990
/* }}} */
4991
4992
/* }}} */
4993
4994
/* {{{ mp_read_unsigned_octets(mp, str, len) */
4995
/* mp_read_unsigned_octets(mp, str, len)
4996
   Read in a raw value (base 256) into the given mp_int
4997
   No sign bit, number is positive.  Leading zeros ignored.
4998
 */
4999
5000
mp_err
5001
mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
5002
357k
{
5003
357k
    int count;
5004
357k
    mp_err res;
5005
357k
    mp_digit d;
5006
5007
357k
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
5008
5009
357k
    mp_zero(mp);
5010
5011
357k
    count = len % sizeof(mp_digit);
5012
357k
    if (count) {
5013
440k
        for (d = 0; count-- > 0; --len) {
5014
314k
            d = (d << 8) | *str++;
5015
314k
        }
5016
126k
        MP_DIGIT(mp, 0) = d;
5017
126k
    }
5018
5019
    /* Read the rest of the digits */
5020
5.90M
    for (; len > 0; len -= sizeof(mp_digit)) {
5021
49.9M
        for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
5022
44.4M
            d = (d << 8) | *str++;
5023
44.4M
        }
5024
5.55M
        if (MP_EQ == mp_cmp_z(mp)) {
5025
380k
            if (!d)
5026
140k
                continue;
5027
5.17M
        } else {
5028
5.17M
            if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
5029
0
                return res;
5030
5.17M
        }
5031
5.41M
        MP_DIGIT(mp, 0) = d;
5032
5.41M
    }
5033
357k
    return MP_OKAY;
5034
357k
} /* end mp_read_unsigned_octets() */
5035
/* }}} */
5036
5037
/* {{{ mp_unsigned_octet_size(mp) */
5038
unsigned int
5039
mp_unsigned_octet_size(const mp_int *mp)
5040
34.2k
{
5041
34.2k
    unsigned int bytes;
5042
34.2k
    int ix;
5043
34.2k
    mp_digit d = 0;
5044
5045
34.2k
    ARGCHK(mp != NULL, MP_BADARG);
5046
34.2k
    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
5047
5048
34.2k
    bytes = (USED(mp) * sizeof(mp_digit));
5049
5050
    /* subtract leading zeros. */
5051
    /* Iterate over each digit... */
5052
35.1k
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5053
34.2k
        d = DIGIT(mp, ix);
5054
34.2k
        if (d)
5055
33.3k
            break;
5056
904
        bytes -= sizeof(d);
5057
904
    }
5058
34.2k
    if (!bytes)
5059
904
        return 1;
5060
5061
    /* Have MSD, check digit bytes, high order first */
5062
33.5k
    for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
5063
33.5k
        unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
5064
33.5k
        if (x)
5065
33.3k
            break;
5066
138
        --bytes;
5067
138
    }
5068
33.3k
    return bytes;
5069
34.2k
} /* end mp_unsigned_octet_size() */
5070
/* }}} */
5071
5072
/* {{{ mp_to_unsigned_octets(mp, str) */
5073
/* output a buffer of big endian octets no longer than specified. */
5074
mp_err
5075
mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5076
17.1k
{
5077
17.1k
    int ix, pos = 0;
5078
17.1k
    unsigned int bytes;
5079
5080
17.1k
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5081
5082
17.1k
    bytes = mp_unsigned_octet_size(mp);
5083
17.1k
    ARGCHK(bytes <= maxlen, MP_BADARG);
5084
5085
    /* Iterate over each digit... */
5086
720k
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5087
702k
        mp_digit d = DIGIT(mp, ix);
5088
702k
        int jx;
5089
5090
        /* Unpack digit bytes, high order first */
5091
6.32M
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5092
5.62M
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5093
5.62M
            if (!pos && !x) /* suppress leading zeros */
5094
3.68k
                continue;
5095
5.61M
            str[pos++] = x;
5096
5.61M
        }
5097
702k
    }
5098
17.1k
    if (!pos)
5099
452
        str[pos++] = 0;
5100
17.1k
    return pos;
5101
17.1k
} /* end mp_to_unsigned_octets() */
5102
/* }}} */
5103
5104
/* {{{ mp_to_signed_octets(mp, str) */
5105
/* output a buffer of big endian octets no longer than specified. */
5106
mp_err
5107
mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5108
0
{
5109
0
    int ix, pos = 0;
5110
0
    unsigned int bytes;
5111
5112
0
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5113
5114
0
    bytes = mp_unsigned_octet_size(mp);
5115
0
    ARGCHK(bytes <= maxlen, MP_BADARG);
5116
5117
    /* Iterate over each digit... */
5118
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5119
0
        mp_digit d = DIGIT(mp, ix);
5120
0
        int jx;
5121
5122
        /* Unpack digit bytes, high order first */
5123
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5124
0
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5125
0
            if (!pos) {
5126
0
                if (!x) /* suppress leading zeros */
5127
0
                    continue;
5128
0
                if (x & 0x80) { /* add one leading zero to make output positive.  */
5129
0
                    ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
5130
0
                    if (bytes + 1 > maxlen)
5131
0
                        return MP_BADARG;
5132
0
                    str[pos++] = 0;
5133
0
                }
5134
0
            }
5135
0
            str[pos++] = x;
5136
0
        }
5137
0
    }
5138
0
    if (!pos)
5139
0
        str[pos++] = 0;
5140
0
    return pos;
5141
0
} /* end mp_to_signed_octets() */
5142
/* }}} */
5143
5144
/* {{{ mp_to_fixlen_octets(mp, str) */
5145
/* output a buffer of big endian octets exactly as long as requested.
5146
   constant time on the value of mp. */
5147
mp_err
5148
mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
5149
47.4k
{
5150
47.4k
    int ix, jx;
5151
47.4k
    unsigned int bytes;
5152
5153
47.4k
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
5154
5155
    /* Constant time on the value of mp.  Don't use mp_unsigned_octet_size. */
5156
47.4k
    bytes = USED(mp) * MP_DIGIT_SIZE;
5157
5158
    /* If the output is shorter than the native size of mp, then check that any
5159
     * bytes not written have zero values.  This check isn't constant time on
5160
     * the assumption that timing-sensitive callers can guarantee that mp fits
5161
     * in the allocated space. */
5162
47.4k
    ix = USED(mp) - 1;
5163
47.4k
    if (bytes > length) {
5164
4.00k
        unsigned int zeros = bytes - length;
5165
5166
4.00k
        while (zeros >= MP_DIGIT_SIZE) {
5167
0
            ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
5168
0
            zeros -= MP_DIGIT_SIZE;
5169
0
            ix--;
5170
0
        }
5171
5172
4.00k
        if (zeros > 0) {
5173
4.00k
            mp_digit d = DIGIT(mp, ix);
5174
4.00k
            mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
5175
4.00k
            ARGCHK((d & m) == 0, MP_BADARG);
5176
8.21k
            for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
5177
4.20k
                *str++ = d >> (jx * CHAR_BIT);
5178
4.20k
            }
5179
4.00k
            ix--;
5180
4.00k
        }
5181
43.4k
    } else if (bytes < length) {
5182
        /* Place any needed leading zeros. */
5183
5.68k
        unsigned int zeros = length - bytes;
5184
5.68k
        memset(str, 0, zeros);
5185
5.68k
        str += zeros;
5186
5.68k
    }
5187
5188
    /* Iterate over each whole digit... */
5189
1.13M
    for (; ix >= 0; ix--) {
5190
1.08M
        mp_digit d = DIGIT(mp, ix);
5191
5192
        /* Unpack digit bytes, high order first */
5193
9.77M
        for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
5194
8.68M
            *str++ = d >> (jx * CHAR_BIT);
5195
8.68M
        }
5196
1.08M
    }
5197
47.4k
    return MP_OKAY;
5198
47.4k
} /* end mp_to_fixlen_octets() */
5199
/* }}} */
5200
5201
/* {{{ mp_cswap(condition, a, b, numdigits) */
5202
/* performs a conditional swap between mp_int. */
5203
mp_err
5204
mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
5205
29.9M
{
5206
29.9M
    mp_digit x;
5207
29.9M
    unsigned int i;
5208
29.9M
    mp_err res = 0;
5209
5210
    /* if pointers are equal return */
5211
29.9M
    if (a == b)
5212
0
        return res;
5213
5214
29.9M
    if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
5215
0
        MP_CHECKOK(s_mp_grow(a, numdigits));
5216
0
        MP_CHECKOK(s_mp_grow(b, numdigits));
5217
0
    }
5218
5219
29.9M
    condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
5220
5221
29.9M
    x = (USED(a) ^ USED(b)) & condition;
5222
29.9M
    USED(a) ^= x;
5223
29.9M
    USED(b) ^= x;
5224
5225
29.9M
    x = (SIGN(a) ^ SIGN(b)) & condition;
5226
29.9M
    SIGN(a) ^= x;
5227
29.9M
    SIGN(b) ^= x;
5228
5229
735M
    for (i = 0; i < numdigits; i++) {
5230
705M
        x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
5231
705M
        DIGIT(a, i) ^= x;
5232
705M
        DIGIT(b, i) ^= x;
5233
705M
    }
5234
5235
29.9M
CLEANUP:
5236
29.9M
    return res;
5237
29.9M
} /* end mp_cswap() */
5238
/* }}} */
5239
5240
/*------------------------------------------------------------------------*/
5241
/* HERE THERE BE DRAGONS                                                  */