Coverage Report

Created: 2024-11-21 07:03

/src/nss-nspr/nss/lib/freebl/mpi/mpi.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  mpi.c
3
 *
4
 *  Arbitrary precision integer arithmetic library
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9
10
#include "mpi-priv.h"
11
#include "mplogic.h"
12
13
#include <assert.h>
14
15
#if defined(__arm__) && \
16
    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
17
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
18
#undef MP_ASSEMBLY_MULTIPLY
19
#undef MP_ASSEMBLY_SQUARE
20
#endif
21
22
#if MP_LOGTAB
23
/*
24
  A table of the logs of 2 for various bases (the 0 and 1 entries of
25
  this table are meaningless and should not be referenced).
26
27
  This table is used to compute output lengths for the mp_toradix()
28
  function.  Since a number n in radix r takes up about log_r(n)
29
  digits, we estimate the output size by taking the least integer
30
  greater than log_r(n), where:
31
32
  log_r(n) = log_2(n) * log_r(2)
33
34
  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
35
  which are the output bases supported.
36
 */
37
#include "logtab.h"
38
#endif
39
40
#ifdef CT_VERIF
41
#include <valgrind/memcheck.h>
42
#endif
43
44
/* {{{ Constant strings */
45
46
/* Constant strings returned by mp_strerror() */
47
static const char *mp_err_string[] = {
48
    "unknown result code",     /* say what?            */
49
    "boolean true",            /* MP_OKAY, MP_YES      */
50
    "boolean false",           /* MP_NO                */
51
    "out of memory",           /* MP_MEM               */
52
    "argument out of range",   /* MP_RANGE             */
53
    "invalid input parameter", /* MP_BADARG            */
54
    "result is undefined"      /* MP_UNDEF             */
55
};
56
57
/* Value to digit maps for radix conversion   */
58
59
/* s_dmap_1 - standard digits and letters */
60
static const char *s_dmap_1 =
61
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
62
63
/* }}} */
64
65
/* {{{ Default precision manipulation */
66
67
/* Default precision for newly created mp_int's      */
68
static mp_size s_mp_defprec = MP_DEFPREC;
69
70
mp_size
71
mp_get_prec(void)
72
0
{
73
0
    return s_mp_defprec;
74
75
0
} /* end mp_get_prec() */
76
77
void
78
mp_set_prec(mp_size prec)
79
0
{
80
0
    if (prec == 0)
81
0
        s_mp_defprec = MP_DEFPREC;
82
0
    else
83
0
        s_mp_defprec = prec;
84
85
0
} /* end mp_set_prec() */
86
87
/* }}} */
88
89
#ifdef CT_VERIF
90
void
91
mp_taint(mp_int *mp)
92
{
93
    size_t i;
94
    for (i = 0; i < mp->used; ++i) {
95
        VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
96
    }
97
}
98
99
void
100
mp_untaint(mp_int *mp)
101
{
102
    size_t i;
103
    for (i = 0; i < mp->used; ++i) {
104
        VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
105
    }
106
}
107
#endif
108
109
/*------------------------------------------------------------------------*/
110
/* {{{ mp_init(mp) */
111
112
/*
113
  mp_init(mp)
114
115
  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
116
  MP_MEM if memory could not be allocated for the structure.
117
 */
118
119
mp_err
120
mp_init(mp_int *mp)
121
25.7k
{
122
25.7k
    return mp_init_size(mp, s_mp_defprec);
123
124
25.7k
} /* end mp_init() */
125
126
/* }}} */
127
128
/* {{{ mp_init_size(mp, prec) */
129
130
/*
131
  mp_init_size(mp, prec)
132
133
  Initialize a new zero-valued mp_int with at least the given
134
  precision; returns MP_OKAY if successful, or MP_MEM if memory could
135
  not be allocated for the structure.
136
 */
137
138
mp_err
139
mp_init_size(mp_int *mp, mp_size prec)
140
2.29M
{
141
2.29M
    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
142
143
2.29M
    prec = MP_ROUNDUP(prec, s_mp_defprec);
144
2.29M
    if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
145
0
        return MP_MEM;
146
147
2.29M
    SIGN(mp) = ZPOS;
148
2.29M
    USED(mp) = 1;
149
2.29M
    ALLOC(mp) = prec;
150
151
2.29M
    return MP_OKAY;
152
153
2.29M
} /* end mp_init_size() */
154
155
/* }}} */
156
157
/* {{{ mp_init_copy(mp, from) */
158
159
/*
160
  mp_init_copy(mp, from)
161
162
  Initialize mp as an exact copy of from.  Returns MP_OKAY if
163
  successful, MP_MEM if memory could not be allocated for the new
164
  structure.
165
 */
166
167
mp_err
168
mp_init_copy(mp_int *mp, const mp_int *from)
169
6.82M
{
170
6.82M
    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
171
172
6.82M
    if (mp == from)
173
0
        return MP_OKAY;
174
175
6.82M
    if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
176
0
        return MP_MEM;
177
178
6.82M
    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
179
6.82M
    USED(mp) = USED(from);
180
6.82M
    ALLOC(mp) = ALLOC(from);
181
6.82M
    SIGN(mp) = SIGN(from);
182
183
6.82M
    return MP_OKAY;
184
185
6.82M
} /* end mp_init_copy() */
186
187
/* }}} */
188
189
/* {{{ mp_copy(from, to) */
190
191
/*
192
  mp_copy(from, to)
193
194
  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
195
  'to' has already been initialized (if not, use mp_init_copy()
196
  instead). If 'from' and 'to' are identical, nothing happens.
197
 */
198
199
mp_err
200
mp_copy(const mp_int *from, mp_int *to)
201
46.2k
{
202
46.2k
    ARGCHK(from != NULL && to != NULL, MP_BADARG);
203
204
46.2k
    if (from == to)
205
1.26k
        return MP_OKAY;
206
207
44.9k
    { /* copy */
208
44.9k
        mp_digit *tmp;
209
210
        /*
211
          If the allocated buffer in 'to' already has enough space to hold
212
          all the used digits of 'from', we'll re-use it to avoid hitting
213
          the memory allocater more than necessary; otherwise, we'd have
214
          to grow anyway, so we just allocate a hunk and make the copy as
215
          usual
216
         */
217
44.9k
        if (ALLOC(to) >= USED(from)) {
218
44.8k
            s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
219
44.8k
            s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
220
221
44.8k
        } else {
222
132
            if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
223
0
                return MP_MEM;
224
225
132
            s_mp_copy(DIGITS(from), tmp, USED(from));
226
227
132
            if (DIGITS(to) != NULL) {
228
132
                s_mp_setz(DIGITS(to), ALLOC(to));
229
132
                s_mp_free(DIGITS(to));
230
132
            }
231
232
132
            DIGITS(to) = tmp;
233
132
            ALLOC(to) = ALLOC(from);
234
132
        }
235
236
        /* Copy the precision and sign from the original */
237
44.9k
        USED(to) = USED(from);
238
44.9k
        SIGN(to) = SIGN(from);
239
44.9k
    } /* end copy */
240
241
44.9k
    return MP_OKAY;
242
243
44.9k
} /* end mp_copy() */
244
245
/* }}} */
246
247
/* {{{ mp_exch(mp1, mp2) */
248
249
/*
250
  mp_exch(mp1, mp2)
251
252
  Exchange mp1 and mp2 without allocating any intermediate memory
253
  (well, unless you count the stack space needed for this call and the
254
  locals it creates...).  This cannot fail.
255
 */
256
257
void
258
mp_exch(mp_int *mp1, mp_int *mp2)
259
1.50M
{
260
1.50M
#if MP_ARGCHK == 2
261
1.50M
    assert(mp1 != NULL && mp2 != NULL);
262
#else
263
    if (mp1 == NULL || mp2 == NULL)
264
        return;
265
#endif
266
267
0
    s_mp_exch(mp1, mp2);
268
269
1.50M
} /* end mp_exch() */
270
271
/* }}} */
272
273
/* {{{ mp_clear(mp) */
274
275
/*
276
  mp_clear(mp)
277
278
  Release the storage used by an mp_int, and void its fields so that
279
  if someone calls mp_clear() again for the same int later, we won't
280
  get tollchocked.
281
 */
282
283
void
284
mp_clear(mp_int *mp)
285
10.6M
{
286
10.6M
    if (mp == NULL)
287
0
        return;
288
289
10.6M
    if (DIGITS(mp) != NULL) {
290
9.11M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
291
9.11M
        s_mp_free(DIGITS(mp));
292
9.11M
        DIGITS(mp) = NULL;
293
9.11M
    }
294
295
10.6M
    USED(mp) = 0;
296
10.6M
    ALLOC(mp) = 0;
297
298
10.6M
} /* end mp_clear() */
299
300
/* }}} */
301
302
/* {{{ mp_zero(mp) */
303
304
/*
305
  mp_zero(mp)
306
307
  Set mp to zero.  Does not change the allocated size of the structure,
308
  and therefore cannot fail (except on a bad argument, which we ignore)
309
 */
310
void
311
mp_zero(mp_int *mp)
312
58.2k
{
313
58.2k
    if (mp == NULL)
314
0
        return;
315
316
58.2k
    s_mp_setz(DIGITS(mp), ALLOC(mp));
317
58.2k
    USED(mp) = 1;
318
58.2k
    SIGN(mp) = ZPOS;
319
320
58.2k
} /* end mp_zero() */
321
322
/* }}} */
323
324
/* {{{ mp_set(mp, d) */
325
326
void
327
mp_set(mp_int *mp, mp_digit d)
328
2.64k
{
329
2.64k
    if (mp == NULL)
330
0
        return;
331
332
2.64k
    mp_zero(mp);
333
2.64k
    DIGIT(mp, 0) = d;
334
335
2.64k
} /* end mp_set() */
336
337
/* }}} */
338
339
/* {{{ mp_set_int(mp, z) */
340
341
mp_err
342
mp_set_int(mp_int *mp, long z)
343
12
{
344
12
    unsigned long v = labs(z);
345
12
    mp_err res;
346
347
12
    ARGCHK(mp != NULL, MP_BADARG);
348
349
    /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
350
12
    if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
351
0
        return res;
352
0
    }
353
354
12
    if (z < 0) {
355
0
        SIGN(mp) = NEG;
356
0
    }
357
358
12
    return MP_OKAY;
359
12
} /* end mp_set_int() */
360
361
/* }}} */
362
363
/* {{{ mp_set_ulong(mp, z) */
364
365
mp_err
366
mp_set_ulong(mp_int *mp, unsigned long z)
367
12
{
368
12
    int ix;
369
12
    mp_err res;
370
371
12
    ARGCHK(mp != NULL, MP_BADARG);
372
373
0
    mp_zero(mp);
374
12
    if (z == 0)
375
0
        return MP_OKAY; /* shortcut for zero */
376
377
12
    if (sizeof z <= sizeof(mp_digit)) {
378
12
        DIGIT(mp, 0) = z;
379
12
    } else {
380
0
        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
381
0
            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
382
0
                return res;
383
384
0
            res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
385
0
            if (res != MP_OKAY)
386
0
                return res;
387
0
        }
388
0
    }
389
12
    return MP_OKAY;
390
12
} /* end mp_set_ulong() */
391
392
/* }}} */
393
394
/*------------------------------------------------------------------------*/
395
/* {{{ Digit arithmetic */
396
397
/* {{{ mp_add_d(a, d, b) */
398
399
/*
400
  mp_add_d(a, d, b)
401
402
  Compute the sum b = a + d, for a single digit d.  Respects the sign of
403
  its primary addend (single digits are unsigned anyway).
404
 */
405
406
mp_err
407
mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
408
0
{
409
0
    mp_int tmp;
410
0
    mp_err res;
411
412
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
413
414
0
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
415
0
        return res;
416
417
0
    if (SIGN(&tmp) == ZPOS) {
418
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
419
0
            goto CLEANUP;
420
0
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
421
0
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
422
0
            goto CLEANUP;
423
0
    } else {
424
0
        mp_neg(&tmp, &tmp);
425
426
0
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
427
0
    }
428
429
0
    if (s_mp_cmp_d(&tmp, 0) == 0)
430
0
        SIGN(&tmp) = ZPOS;
431
432
0
    s_mp_exch(&tmp, b);
433
434
0
CLEANUP:
435
0
    mp_clear(&tmp);
436
0
    return res;
437
438
0
} /* end mp_add_d() */
439
440
/* }}} */
441
442
/* {{{ mp_sub_d(a, d, b) */
443
444
/*
445
  mp_sub_d(a, d, b)
446
447
  Compute the difference b = a - d, for a single digit d.  Respects the
448
  sign of its subtrahend (single digits are unsigned anyway).
449
 */
450
451
mp_err
452
mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
453
0
{
454
0
    mp_int tmp;
455
0
    mp_err res;
456
457
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
458
459
0
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
460
0
        return res;
461
462
0
    if (SIGN(&tmp) == NEG) {
463
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
464
0
            goto CLEANUP;
465
0
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
466
0
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
467
0
            goto CLEANUP;
468
0
    } else {
469
0
        mp_neg(&tmp, &tmp);
470
471
0
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
472
0
        SIGN(&tmp) = NEG;
473
0
    }
474
475
0
    if (s_mp_cmp_d(&tmp, 0) == 0)
476
0
        SIGN(&tmp) = ZPOS;
477
478
0
    s_mp_exch(&tmp, b);
479
480
0
CLEANUP:
481
0
    mp_clear(&tmp);
482
0
    return res;
483
484
0
} /* end mp_sub_d() */
485
486
/* }}} */
487
488
/* {{{ mp_mul_d(a, d, b) */
489
490
/*
491
  mp_mul_d(a, d, b)
492
493
  Compute the product b = a * d, for a single digit d.  Respects the sign
494
  of its multiplicand (single digits are unsigned anyway)
495
 */
496
497
mp_err
498
mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
499
0
{
500
0
    mp_err res;
501
502
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
503
504
0
    if (d == 0) {
505
0
        mp_zero(b);
506
0
        return MP_OKAY;
507
0
    }
508
509
0
    if ((res = mp_copy(a, b)) != MP_OKAY)
510
0
        return res;
511
512
0
    res = s_mp_mul_d(b, d);
513
514
0
    return res;
515
516
0
} /* end mp_mul_d() */
517
518
/* }}} */
519
520
/* {{{ mp_mul_2(a, c) */
521
522
mp_err
523
mp_mul_2(const mp_int *a, mp_int *c)
524
0
{
525
0
    mp_err res;
526
527
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
528
529
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
530
0
        return res;
531
532
0
    return s_mp_mul_2(c);
533
534
0
} /* end mp_mul_2() */
535
536
/* }}} */
537
538
/* {{{ mp_div_d(a, d, q, r) */
539
540
/*
541
  mp_div_d(a, d, q, r)
542
543
  Compute the quotient q = a / d and remainder r = a mod d, for a
544
  single digit d.  Respects the sign of its divisor (single digits are
545
  unsigned anyway).
546
 */
547
548
mp_err
549
mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
550
1.53M
{
551
1.53M
    mp_err res;
552
1.53M
    mp_int qp;
553
1.53M
    mp_digit rem = 0;
554
1.53M
    int pow;
555
556
1.53M
    ARGCHK(a != NULL, MP_BADARG);
557
558
1.53M
    if (d == 0)
559
0
        return MP_RANGE;
560
561
    /* Shortcut for powers of two ... */
562
1.53M
    if ((pow = s_mp_ispow2d(d)) >= 0) {
563
0
        mp_digit mask;
564
565
0
        mask = ((mp_digit)1 << pow) - 1;
566
0
        rem = DIGIT(a, 0) & mask;
567
568
0
        if (q) {
569
0
            if ((res = mp_copy(a, q)) != MP_OKAY) {
570
0
                return res;
571
0
            }
572
0
            s_mp_div_2d(q, pow);
573
0
        }
574
575
0
        if (r)
576
0
            *r = rem;
577
578
0
        return MP_OKAY;
579
0
    }
580
581
1.53M
    if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
582
0
        return res;
583
584
1.53M
    res = s_mp_div_d(&qp, d, &rem);
585
586
1.53M
    if (s_mp_cmp_d(&qp, 0) == 0)
587
2.05k
        SIGN(q) = ZPOS;
588
589
1.53M
    if (r) {
590
1.53M
        *r = rem;
591
1.53M
    }
592
593
1.53M
    if (q)
594
1.53M
        s_mp_exch(&qp, q);
595
596
1.53M
    mp_clear(&qp);
597
1.53M
    return res;
598
599
1.53M
} /* end mp_div_d() */
600
601
/* }}} */
602
603
/* {{{ mp_div_2(a, c) */
604
605
/*
606
  mp_div_2(a, c)
607
608
  Compute c = a / 2, disregarding the remainder.
609
 */
610
611
mp_err
612
mp_div_2(const mp_int *a, mp_int *c)
613
0
{
614
0
    mp_err res;
615
616
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
617
618
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
619
0
        return res;
620
621
0
    s_mp_div_2(c);
622
623
0
    return MP_OKAY;
624
625
0
} /* end mp_div_2() */
626
627
/* }}} */
628
629
/* {{{ mp_expt_d(a, d, b) */
630
631
mp_err
632
mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
633
0
{
634
0
    mp_int s, x;
635
0
    mp_err res;
636
637
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
638
639
0
    if ((res = mp_init(&s)) != MP_OKAY)
640
0
        return res;
641
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
642
0
        goto X;
643
644
0
    DIGIT(&s, 0) = 1;
645
646
0
    while (d != 0) {
647
0
        if (d & 1) {
648
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
649
0
                goto CLEANUP;
650
0
        }
651
652
0
        d /= 2;
653
654
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
655
0
            goto CLEANUP;
656
0
    }
657
658
0
    s_mp_exch(&s, c);
659
660
0
CLEANUP:
661
0
    mp_clear(&x);
662
0
X:
663
0
    mp_clear(&s);
664
665
0
    return res;
666
667
0
} /* end mp_expt_d() */
668
669
/* }}} */
670
671
/* }}} */
672
673
/*------------------------------------------------------------------------*/
674
/* {{{ Full arithmetic */
675
676
/* {{{ mp_abs(a, b) */
677
678
/*
679
  mp_abs(a, b)
680
681
  Compute b = |a|.  'a' and 'b' may be identical.
682
 */
683
684
mp_err
685
mp_abs(const mp_int *a, mp_int *b)
686
465
{
687
465
    mp_err res;
688
689
465
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
690
691
465
    if ((res = mp_copy(a, b)) != MP_OKAY)
692
0
        return res;
693
694
465
    SIGN(b) = ZPOS;
695
696
465
    return MP_OKAY;
697
698
465
} /* end mp_abs() */
699
700
/* }}} */
701
702
/* {{{ mp_neg(a, b) */
703
704
/*
705
  mp_neg(a, b)
706
707
  Compute b = -a.  'a' and 'b' may be identical.
708
 */
709
710
mp_err
711
mp_neg(const mp_int *a, mp_int *b)
712
15
{
713
15
    mp_err res;
714
715
15
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
716
717
15
    if ((res = mp_copy(a, b)) != MP_OKAY)
718
0
        return res;
719
720
15
    if (s_mp_cmp_d(b, 0) == MP_EQ)
721
1
        SIGN(b) = ZPOS;
722
14
    else
723
14
        SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
724
725
15
    return MP_OKAY;
726
727
15
} /* end mp_neg() */
728
729
/* }}} */
730
731
/* {{{ mp_add(a, b, c) */
732
733
/*
734
  mp_add(a, b, c)
735
736
  Compute c = a + b.  All parameters may be identical.
737
 */
738
739
mp_err
740
mp_add(const mp_int *a, const mp_int *b, mp_int *c)
741
7.14M
{
742
7.14M
    mp_err res;
743
744
7.14M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
745
746
7.14M
    if (SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
747
4.54M
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
748
4.54M
    } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b|   */
749
983k
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
750
1.62M
    } else { /* different sign: |a|  < |b|   */
751
1.62M
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
752
1.62M
    }
753
754
7.14M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
755
536
        SIGN(c) = ZPOS;
756
757
7.14M
CLEANUP:
758
7.14M
    return res;
759
760
7.14M
} /* end mp_add() */
761
762
/* }}} */
763
764
/* {{{ mp_sub(a, b, c) */
765
766
/*
767
  mp_sub(a, b, c)
768
769
  Compute c = a - b.  All parameters may be identical.
770
 */
771
772
mp_err
773
mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
774
3.20M
{
775
3.20M
    mp_err res;
776
3.20M
    int magDiff;
777
778
3.20M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
779
780
3.20M
    if (a == b) {
781
0
        mp_zero(c);
782
0
        return MP_OKAY;
783
0
    }
784
785
3.20M
    if (MP_SIGN(a) != MP_SIGN(b)) {
786
759k
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
787
2.44M
    } else if (!(magDiff = s_mp_cmp(a, b))) {
788
35.1k
        mp_zero(c);
789
35.1k
        res = MP_OKAY;
790
2.40M
    } else if (magDiff > 0) {
791
2.08M
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
792
2.08M
    } else {
793
329k
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
794
329k
        MP_SIGN(c) = !MP_SIGN(a);
795
329k
    }
796
797
3.20M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
798
35.1k
        MP_SIGN(c) = MP_ZPOS;
799
800
3.20M
CLEANUP:
801
3.20M
    return res;
802
803
3.20M
} /* end mp_sub() */
804
805
/* }}} */
806
807
/* {{{ s_mp_mulg(a, b, c) */
808
809
/*
810
  s_mp_mulg(a, b, c)
811
812
  Compute c = a * b.  All parameters may be identical. if constantTime is set,
813
  then the operations are done in constant time. The original is mostly
814
  constant time as long as s_mpv_mul_d_add() is constant time. This is true
815
  of the x86 assembler, as well as the current c code.
816
 */
817
mp_err
818
s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
819
2.64M
{
820
2.64M
    mp_digit *pb;
821
2.64M
    mp_int tmp;
822
2.64M
    mp_err res;
823
2.64M
    mp_size ib;
824
2.64M
    mp_size useda, usedb;
825
826
2.64M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
827
828
2.64M
    if (a == c) {
829
2.63M
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
830
0
            return res;
831
2.63M
        if (a == b)
832
0
            b = &tmp;
833
2.63M
        a = &tmp;
834
2.63M
    } else if (b == c) {
835
0
        if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
836
0
            return res;
837
0
        b = &tmp;
838
1.35k
    } else {
839
1.35k
        MP_DIGITS(&tmp) = 0;
840
1.35k
    }
841
842
2.64M
    if (MP_USED(a) < MP_USED(b)) {
843
546k
        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
844
546k
        b = a;
845
546k
        a = xch;
846
546k
    }
847
848
2.64M
    MP_USED(c) = 1;
849
2.64M
    MP_DIGIT(c, 0) = 0;
850
2.64M
    if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
851
0
        goto CLEANUP;
852
853
2.64M
#ifdef NSS_USE_COMBA
854
    /* comba isn't constant time because it clamps! If we cared
855
     * (we needed a constant time version of multiply that was 'faster'
856
     * we could easily pass constantTime down to the comba code and
857
     * get it to skip the clamp... but here are assembler versions
858
     * which add comba to platforms that can't compile the normal
859
     * comba's imbedded assembler which would also need to change, so
860
     * for now we just skip comba when we are running constant time. */
861
2.64M
    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
862
1.09M
        if (MP_USED(a) == 4) {
863
41.9k
            s_mp_mul_comba_4(a, b, c);
864
41.9k
            goto CLEANUP;
865
41.9k
        }
866
1.05M
        if (MP_USED(a) == 8) {
867
3.46k
            s_mp_mul_comba_8(a, b, c);
868
3.46k
            goto CLEANUP;
869
3.46k
        }
870
1.05M
        if (MP_USED(a) == 16) {
871
735
            s_mp_mul_comba_16(a, b, c);
872
735
            goto CLEANUP;
873
735
        }
874
1.05M
        if (MP_USED(a) == 32) {
875
3.65k
            s_mp_mul_comba_32(a, b, c);
876
3.65k
            goto CLEANUP;
877
3.65k
        }
878
1.05M
    }
879
2.59M
#endif
880
881
2.59M
    pb = MP_DIGITS(b);
882
2.59M
    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
883
884
    /* Outer loop:  Digits of b */
885
2.59M
    useda = MP_USED(a);
886
2.59M
    usedb = MP_USED(b);
887
204M
    for (ib = 1; ib < usedb; ib++) {
888
201M
        mp_digit b_i = *pb++;
889
890
        /* Inner product:  Digits of a */
891
201M
        if (constantTime || b_i)
892
200M
            s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
893
1.14M
        else
894
1.14M
            MP_DIGIT(c, ib + useda) = b_i;
895
201M
    }
896
897
2.59M
    if (!constantTime) {
898
2.59M
        s_mp_clamp(c);
899
2.59M
    }
900
901
2.59M
    if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
902
2.58M
        SIGN(c) = ZPOS;
903
858
    else
904
858
        SIGN(c) = NEG;
905
906
2.64M
CLEANUP:
907
2.64M
    mp_clear(&tmp);
908
2.64M
    return res;
909
2.59M
} /* end smp_mulg() */
910
911
/* }}} */
912
913
/* {{{ mp_mul(a, b, c) */
914
915
/*
916
  mp_mul(a, b, c)
917
918
  Compute c = a * b.  All parameters may be identical.
919
 */
920
921
mp_err
922
mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
923
2.64M
{
924
2.64M
    return s_mp_mulg(a, b, c, 0);
925
2.64M
} /* end mp_mul() */
926
927
/* }}} */
928
929
/* {{{ mp_mulCT(a, b, c) */
930
931
/*
932
  mp_mulCT(a, b, c)
933
934
  Compute c = a * b. In constant time. Parameters may not be identical.
935
  NOTE: a and b may be modified.
936
 */
937
938
mp_err
939
mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
940
6
{
941
6
    mp_err res;
942
943
    /* make the multiply values fixed length so multiply
944
     * doesn't leak the length. at this point all the
945
     * values are blinded, but once we finish we want the
946
     * output size to be hidden (so no clamping the out put) */
947
6
    MP_CHECKOK(s_mp_pad(a, setSize));
948
6
    MP_CHECKOK(s_mp_pad(b, setSize));
949
6
    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
950
6
    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
951
6
CLEANUP:
952
6
    return res;
953
6
} /* end mp_mulCT() */
954
955
/* }}} */
956
957
/* {{{ mp_sqr(a, sqr) */
958
959
#if MP_SQUARE
960
/*
961
  Computes the square of a.  This can be done more
962
  efficiently than a general multiplication, because many of the
963
  computation steps are redundant when squaring.  The inner product
964
  step is a bit more complicated, but we save a fair number of
965
  iterations of the multiplication loop.
966
 */
967
968
/* sqr = a^2;   Caller provides both a and tmp; */
969
mp_err
970
mp_sqr(const mp_int *a, mp_int *sqr)
971
1.50M
{
972
1.50M
    mp_digit *pa;
973
1.50M
    mp_digit d;
974
1.50M
    mp_err res;
975
1.50M
    mp_size ix;
976
1.50M
    mp_int tmp;
977
1.50M
    int count;
978
979
1.50M
    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
980
981
1.50M
    if (a == sqr) {
982
0
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
983
0
            return res;
984
0
        a = &tmp;
985
1.50M
    } else {
986
1.50M
        DIGITS(&tmp) = 0;
987
1.50M
        res = MP_OKAY;
988
1.50M
    }
989
990
1.50M
    ix = 2 * MP_USED(a);
991
1.50M
    if (ix > MP_ALLOC(sqr)) {
992
20
        MP_USED(sqr) = 1;
993
20
        MP_CHECKOK(s_mp_grow(sqr, ix));
994
20
    }
995
1.50M
    MP_USED(sqr) = ix;
996
1.50M
    MP_DIGIT(sqr, 0) = 0;
997
998
1.50M
#ifdef NSS_USE_COMBA
999
1.50M
    if (IS_POWER_OF_2(MP_USED(a))) {
1000
777k
        if (MP_USED(a) == 4) {
1001
41.3k
            s_mp_sqr_comba_4(a, sqr);
1002
41.3k
            goto CLEANUP;
1003
41.3k
        }
1004
736k
        if (MP_USED(a) == 8) {
1005
2.42k
            s_mp_sqr_comba_8(a, sqr);
1006
2.42k
            goto CLEANUP;
1007
2.42k
        }
1008
733k
        if (MP_USED(a) == 16) {
1009
15.4k
            s_mp_sqr_comba_16(a, sqr);
1010
15.4k
            goto CLEANUP;
1011
15.4k
        }
1012
718k
        if (MP_USED(a) == 32) {
1013
28.8k
            s_mp_sqr_comba_32(a, sqr);
1014
28.8k
            goto CLEANUP;
1015
28.8k
        }
1016
718k
    }
1017
1.42M
#endif
1018
1019
1.42M
    pa = MP_DIGITS(a);
1020
1.42M
    count = MP_USED(a) - 1;
1021
1.42M
    if (count > 0) {
1022
1.17M
        d = *pa++;
1023
1.17M
        s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
1024
111M
        for (ix = 3; --count > 0; ix += 2) {
1025
110M
            d = *pa++;
1026
110M
            s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
1027
110M
        }                                    /* for(ix ...) */
1028
1.17M
        MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
1029
1030
        /* now sqr *= 2 */
1031
1.17M
        s_mp_mul_2(sqr);
1032
1.17M
    } else {
1033
248k
        MP_DIGIT(sqr, 1) = 0;
1034
248k
    }
1035
1036
    /* now add the squares of the digits of a to sqr. */
1037
1.42M
    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
1038
1039
1.42M
    SIGN(sqr) = ZPOS;
1040
1.42M
    s_mp_clamp(sqr);
1041
1042
1.50M
CLEANUP:
1043
1.50M
    mp_clear(&tmp);
1044
1.50M
    return res;
1045
1046
1.42M
} /* end mp_sqr() */
1047
#endif
1048
1049
/* }}} */
1050
1051
/* {{{ mp_div(a, b, q, r) */
1052
1053
/*
1054
  mp_div(a, b, q, r)
1055
1056
  Compute q = a / b and r = a mod b.  Input parameters may be re-used
1057
  as output parameters.  If q or r is NULL, that portion of the
1058
  computation will be discarded (although it will still be computed)
1059
 */
1060
mp_err
1061
mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1062
2.36k
{
1063
2.36k
    mp_err res;
1064
2.36k
    mp_int *pQ, *pR;
1065
2.36k
    mp_int qtmp, rtmp, btmp;
1066
2.36k
    int cmp;
1067
2.36k
    mp_sign signA;
1068
2.36k
    mp_sign signB;
1069
1070
2.36k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
1071
1072
2.36k
    signA = MP_SIGN(a);
1073
2.36k
    signB = MP_SIGN(b);
1074
1075
2.36k
    if (mp_cmp_z(b) == MP_EQ)
1076
3
        return MP_RANGE;
1077
1078
2.36k
    DIGITS(&qtmp) = 0;
1079
2.36k
    DIGITS(&rtmp) = 0;
1080
2.36k
    DIGITS(&btmp) = 0;
1081
1082
    /* Set up some temporaries... */
1083
2.36k
    if (!r || r == a || r == b) {
1084
2.23k
        MP_CHECKOK(mp_init_copy(&rtmp, a));
1085
2.23k
        pR = &rtmp;
1086
2.23k
    } else {
1087
126
        MP_CHECKOK(mp_copy(a, r));
1088
126
        pR = r;
1089
126
    }
1090
1091
2.36k
    if (!q || q == a || q == b) {
1092
2.20k
        MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
1093
2.20k
        pQ = &qtmp;
1094
2.20k
    } else {
1095
152
        MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
1096
152
        pQ = q;
1097
152
        mp_zero(pQ);
1098
152
    }
1099
1100
    /*
1101
      If |a| <= |b|, we can compute the solution without division;
1102
      otherwise, we actually do the work required.
1103
     */
1104
2.36k
    if ((cmp = s_mp_cmp(a, b)) <= 0) {
1105
34
        if (cmp) {
1106
            /* r was set to a above. */
1107
34
            mp_zero(pQ);
1108
34
        } else {
1109
0
            mp_set(pQ, 1);
1110
0
            mp_zero(pR);
1111
0
        }
1112
2.32k
    } else {
1113
2.32k
        MP_CHECKOK(mp_init_copy(&btmp, b));
1114
2.32k
        MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
1115
2.32k
    }
1116
1117
    /* Compute the signs for the output  */
1118
2.36k
    MP_SIGN(pR) = signA;        /* Sr = Sa              */
1119
    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1120
2.36k
    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1121
1122
2.36k
    if (s_mp_cmp_d(pQ, 0) == MP_EQ)
1123
34
        SIGN(pQ) = ZPOS;
1124
2.36k
    if (s_mp_cmp_d(pR, 0) == MP_EQ)
1125
127
        SIGN(pR) = ZPOS;
1126
1127
    /* Copy output, if it is needed      */
1128
2.36k
    if (q && q != pQ)
1129
790
        s_mp_exch(pQ, q);
1130
1131
2.36k
    if (r && r != pR)
1132
1.29k
        s_mp_exch(pR, r);
1133
1134
2.36k
CLEANUP:
1135
2.36k
    mp_clear(&btmp);
1136
2.36k
    mp_clear(&rtmp);
1137
2.36k
    mp_clear(&qtmp);
1138
1139
2.36k
    return res;
1140
1141
2.36k
} /* end mp_div() */
1142
1143
/* }}} */
1144
1145
/* {{{ mp_div_2d(a, d, q, r) */
1146
1147
mp_err
1148
mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1149
0
{
1150
0
    mp_err res;
1151
1152
0
    ARGCHK(a != NULL, MP_BADARG);
1153
1154
0
    if (q) {
1155
0
        if ((res = mp_copy(a, q)) != MP_OKAY)
1156
0
            return res;
1157
0
    }
1158
0
    if (r) {
1159
0
        if ((res = mp_copy(a, r)) != MP_OKAY)
1160
0
            return res;
1161
0
    }
1162
0
    if (q) {
1163
0
        s_mp_div_2d(q, d);
1164
0
    }
1165
0
    if (r) {
1166
0
        s_mp_mod_2d(r, d);
1167
0
    }
1168
1169
0
    return MP_OKAY;
1170
1171
0
} /* end mp_div_2d() */
1172
1173
/* }}} */
1174
1175
/* {{{ mp_expt(a, b, c) */
1176
1177
/*
1178
  mp_expt(a, b, c)
1179
1180
  Compute c = a ** b, that is, raise a to the b power.  Uses a
1181
  standard iterative square-and-multiply technique.
1182
 */
1183
1184
mp_err
1185
mp_expt(mp_int *a, mp_int *b, mp_int *c)
1186
1
{
1187
1
    mp_int s, x;
1188
1
    mp_err res;
1189
1
    mp_digit d;
1190
1
    unsigned int dig, bit;
1191
1192
1
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1193
1194
1
    if (mp_cmp_z(b) < 0)
1195
0
        return MP_RANGE;
1196
1197
1
    if ((res = mp_init(&s)) != MP_OKAY)
1198
0
        return res;
1199
1200
1
    mp_set(&s, 1);
1201
1202
1
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1203
0
        goto X;
1204
1205
    /* Loop over low-order digits in ascending order */
1206
1
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1207
0
        d = DIGIT(b, dig);
1208
1209
        /* Loop over bits of each non-maximal digit */
1210
0
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1211
0
            if (d & 1) {
1212
0
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1213
0
                    goto CLEANUP;
1214
0
            }
1215
1216
0
            d >>= 1;
1217
1218
0
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1219
0
                goto CLEANUP;
1220
0
        }
1221
0
    }
1222
1223
    /* Consider now the last digit... */
1224
1
    d = DIGIT(b, dig);
1225
1226
1
    while (d) {
1227
0
        if (d & 1) {
1228
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1229
0
                goto CLEANUP;
1230
0
        }
1231
1232
0
        d >>= 1;
1233
1234
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1235
0
            goto CLEANUP;
1236
0
    }
1237
1238
1
    if (mp_iseven(b))
1239
1
        SIGN(&s) = SIGN(a);
1240
1241
1
    res = mp_copy(&s, c);
1242
1243
1
CLEANUP:
1244
1
    mp_clear(&x);
1245
1
X:
1246
1
    mp_clear(&s);
1247
1248
1
    return res;
1249
1250
1
} /* end mp_expt() */
1251
1252
/* }}} */
1253
1254
/* {{{ mp_2expt(a, k) */
1255
1256
/* Compute a = 2^k */
1257
1258
mp_err
1259
mp_2expt(mp_int *a, mp_digit k)
1260
0
{
1261
0
    ARGCHK(a != NULL, MP_BADARG);
1262
1263
0
    return s_mp_2expt(a, k);
1264
1265
0
} /* end mp_2expt() */
1266
1267
/* }}} */
1268
1269
/* {{{ mp_mod(a, m, c) */
1270
1271
/*
1272
  mp_mod(a, m, c)
1273
1274
  Compute c = a (mod m).  Result will always be 0 <= c < m.
1275
 */
1276
1277
mp_err
1278
mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1279
1.39k
{
1280
1.39k
    mp_err res;
1281
1.39k
    int mag;
1282
1283
1.39k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1284
1285
1.39k
    if (SIGN(m) == NEG)
1286
0
        return MP_RANGE;
1287
1288
    /*
1289
     If |a| > m, we need to divide to get the remainder and take the
1290
     absolute value.
1291
1292
     If |a| < m, we don't need to do any division, just copy and adjust
1293
     the sign (if a is negative).
1294
1295
     If |a| == m, we can simply set the result to zero.
1296
1297
     This order is intended to minimize the average path length of the
1298
     comparison chain on common workloads -- the most frequent cases are
1299
     that |a| != m, so we do those first.
1300
     */
1301
1.39k
    if ((mag = s_mp_cmp(a, m)) > 0) {
1302
308
        if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1303
3
            return res;
1304
1305
305
        if (SIGN(c) == NEG) {
1306
16
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1307
0
                return res;
1308
16
        }
1309
1310
1.08k
    } else if (mag < 0) {
1311
1.07k
        if ((res = mp_copy(a, c)) != MP_OKAY)
1312
0
            return res;
1313
1314
1.07k
        if (mp_cmp_z(a) < 0) {
1315
103
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1316
0
                return res;
1317
103
        }
1318
1319
1.07k
    } else {
1320
9
        mp_zero(c);
1321
9
    }
1322
1323
1.39k
    return MP_OKAY;
1324
1325
1.39k
} /* end mp_mod() */
1326
1327
/* }}} */
1328
1329
/* {{{ s_mp_subCT_d(a, b, borrow, c) */
1330
1331
/*
1332
  s_mp_subCT_d(a, b, borrow, c)
1333
1334
  Compute c = (a -b) - subtract in constant time. returns borrow
1335
 */
1336
mp_digit
1337
s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
1338
192
{
1339
192
    *ret = a - b - borrow;
1340
192
    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
1341
192
} /*  s_mp_subCT_d() */
1342
1343
/* }}} */
1344
1345
/* {{{ mp_subCT(a, b, ret, borrow) */
1346
1347
/* return ret= a - b and borrow in borrow. done in constant time.
1348
 * b could be modified.
1349
 */
1350
mp_err
1351
mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
1352
6
{
1353
6
    mp_size used_a = MP_USED(a);
1354
6
    mp_size i;
1355
6
    mp_err res;
1356
1357
6
    MP_CHECKOK(s_mp_pad(b, used_a));
1358
6
    MP_CHECKOK(s_mp_pad(ret, used_a));
1359
6
    *borrow = 0;
1360
198
    for (i = 0; i < used_a; i++) {
1361
192
        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
1362
192
                               &MP_DIGIT(ret, i));
1363
192
    }
1364
1365
6
    res = MP_OKAY;
1366
6
CLEANUP:
1367
6
    return res;
1368
6
} /*  end mp_subCT() */
1369
1370
/* }}} */
1371
1372
/* {{{ mp_selectCT(cond, a, b, ret) */
1373
1374
/*
1375
 * return ret= cond ? a : b; cond should be either 0 or 1
1376
 */
1377
mp_err
1378
mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
1379
6
{
1380
6
    mp_size used_a = MP_USED(a);
1381
6
    mp_err res;
1382
6
    mp_size i;
1383
1384
6
    cond *= MP_DIGIT_MAX;
1385
1386
    /* we currently require these to be equal on input,
1387
     * we could use pad to extend one of them, but that might
1388
     * leak data as it wouldn't be constant time */
1389
6
    if (used_a != MP_USED(b)) {
1390
0
        return MP_BADARG;
1391
0
    }
1392
1393
6
    MP_CHECKOK(s_mp_pad(ret, used_a));
1394
198
    for (i = 0; i < used_a; i++) {
1395
192
        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
1396
192
    }
1397
6
    res = MP_OKAY;
1398
6
CLEANUP:
1399
6
    return res;
1400
6
} /* end mp_selectCT() */
1401
1402
/* {{{ mp_reduceCT(a, m, c) */
1403
1404
/*
1405
  mp_reduceCT(a, m, c)
1406
1407
  Compute c = aR^-1 (mod m) in constant time.
1408
   input should be in montgomery form. If input is the
1409
   result of a montgomery multiply then out put will be
1410
   in mongomery form.
1411
   Result will be reduced to MP_USED(m), but not be
1412
   clamped.
1413
 */
1414
1415
mp_err
1416
mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
1417
6
{
1418
6
    mp_size used_m = MP_USED(m);
1419
6
    mp_size used_c = used_m * 2 + 1;
1420
6
    mp_digit *m_digits, *c_digits;
1421
6
    mp_size i;
1422
6
    mp_digit borrow, carry;
1423
6
    mp_err res;
1424
6
    mp_int sub;
1425
1426
6
    MP_DIGITS(&sub) = 0;
1427
6
    MP_CHECKOK(mp_init_size(&sub, used_m));
1428
1429
6
    if (a != c) {
1430
0
        MP_CHECKOK(mp_copy(a, c));
1431
0
    }
1432
6
    MP_CHECKOK(s_mp_pad(c, used_c));
1433
6
    m_digits = MP_DIGITS(m);
1434
6
    c_digits = MP_DIGITS(c);
1435
198
    for (i = 0; i < used_m; i++) {
1436
192
        mp_digit m_i = MP_DIGIT(c, i) * n0i;
1437
192
        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
1438
192
    }
1439
6
    s_mp_rshd(c, used_m);
1440
    /* MP_USED(c) should be used_m+1 with the high word being any carry
1441
     * from the previous multiply, save that carry and drop the high
1442
     * word for the substraction below */
1443
6
    carry = MP_DIGIT(c, used_m);
1444
6
    MP_DIGIT(c, used_m) = 0;
1445
6
    MP_USED(c) = used_m;
1446
    /* mp_subCT wants c and m to be the same size, we've already
1447
     * guarrenteed that in the previous statement, so mp_subCT won't actually
1448
     * modify m, so it's safe to recast */
1449
6
    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
1450
1451
    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
1452
6
    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
1453
6
    res = MP_OKAY;
1454
6
CLEANUP:
1455
6
    mp_clear(&sub);
1456
6
    return res;
1457
6
} /* end mp_reduceCT() */
1458
1459
/* }}} */
1460
1461
/* {{{ mp_mod_d(a, d, c) */
1462
1463
/*
1464
  mp_mod_d(a, d, c)
1465
1466
  Compute c = a (mod d).  Result will always be 0 <= c < d
1467
 */
1468
mp_err
1469
mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1470
0
{
1471
0
    mp_err res;
1472
0
    mp_digit rem;
1473
1474
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
1475
1476
0
    if (s_mp_cmp_d(a, d) > 0) {
1477
0
        if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1478
0
            return res;
1479
1480
0
    } else {
1481
0
        if (SIGN(a) == NEG)
1482
0
            rem = d - DIGIT(a, 0);
1483
0
        else
1484
0
            rem = DIGIT(a, 0);
1485
0
    }
1486
1487
0
    if (c)
1488
0
        *c = rem;
1489
1490
0
    return MP_OKAY;
1491
1492
0
} /* end mp_mod_d() */
1493
1494
/* }}} */
1495
1496
/* }}} */
1497
1498
/*------------------------------------------------------------------------*/
1499
/* {{{ Modular arithmetic */
1500
1501
#if MP_MODARITH
1502
/* {{{ mp_addmod(a, b, m, c) */
1503
1504
/*
1505
  mp_addmod(a, b, m, c)
1506
1507
  Compute c = (a + b) mod m
1508
 */
1509
1510
mp_err
1511
mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1512
17
{
1513
17
    mp_err res;
1514
1515
17
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1516
1517
17
    if ((res = mp_add(a, b, c)) != MP_OKAY)
1518
0
        return res;
1519
17
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1520
1
        return res;
1521
1522
16
    return MP_OKAY;
1523
17
}
1524
1525
/* }}} */
1526
1527
/* {{{ mp_submod(a, b, m, c) */
1528
1529
/*
1530
  mp_submod(a, b, m, c)
1531
1532
  Compute c = (a - b) mod m
1533
 */
1534
1535
mp_err
1536
mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1537
25
{
1538
25
    mp_err res;
1539
1540
25
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1541
1542
25
    if ((res = mp_sub(a, b, c)) != MP_OKAY)
1543
0
        return res;
1544
25
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1545
1
        return res;
1546
1547
24
    return MP_OKAY;
1548
25
}
1549
1550
/* }}} */
1551
1552
/* {{{ mp_mulmod(a, b, m, c) */
1553
1554
/*
1555
  mp_mulmod(a, b, m, c)
1556
1557
  Compute c = (a * b) mod m
1558
 */
1559
1560
mp_err
1561
mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1562
30
{
1563
30
    mp_err res;
1564
1565
30
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1566
1567
30
    if ((res = mp_mul(a, b, c)) != MP_OKAY)
1568
0
        return res;
1569
30
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1570
0
        return res;
1571
1572
30
    return MP_OKAY;
1573
30
}
1574
1575
/* }}} */
1576
1577
/* {{{ mp_mulmontmodCT(a, b, m, c) */
1578
1579
/*
1580
  mp_mulmontmodCT(a, b, m, c)
1581
1582
  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
1583
  should be in montgomery form and the output is native. If both a and b
1584
  are in montgomery form, then the output will also be in montgomery form
1585
  and can be recovered with an mp_reduceCT call.
1586
  NOTE: a and b may be modified.
1587
 */
1588
1589
mp_err
1590
mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
1591
                mp_int *c)
1592
6
{
1593
6
    mp_err res;
1594
1595
6
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1596
1597
6
    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
1598
0
        return res;
1599
1600
6
    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
1601
0
        return res;
1602
1603
6
    return MP_OKAY;
1604
6
}
1605
1606
/* }}} */
1607
1608
/* {{{ mp_sqrmod(a, m, c) */
1609
1610
#if MP_SQUARE
1611
mp_err
1612
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1613
12
{
1614
12
    mp_err res;
1615
1616
12
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1617
1618
12
    if ((res = mp_sqr(a, c)) != MP_OKAY)
1619
0
        return res;
1620
12
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1621
0
        return res;
1622
1623
12
    return MP_OKAY;
1624
1625
12
} /* end mp_sqrmod() */
1626
#endif
1627
1628
/* }}} */
1629
1630
/* {{{ s_mp_exptmod(a, b, m, c) */
1631
1632
/*
1633
  s_mp_exptmod(a, b, m, c)
1634
1635
  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1636
  method with modular reductions at each step. (This is basically the
1637
  same code as mp_expt(), except for the addition of the reductions)
1638
1639
  The modular reductions are done using Barrett's algorithm (see
1640
  s_mp_reduce() below for details)
1641
 */
1642
1643
mp_err
1644
s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1645
792
{
1646
792
    mp_int s, x, mu;
1647
792
    mp_err res;
1648
792
    mp_digit d;
1649
792
    unsigned int dig, bit;
1650
1651
792
    ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
1652
1653
792
    if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1654
2
        return MP_RANGE;
1655
1656
790
    if ((res = mp_init(&s)) != MP_OKAY)
1657
0
        return res;
1658
790
    if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
1659
790
        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1660
0
        goto X;
1661
790
    if ((res = mp_init(&mu)) != MP_OKAY)
1662
0
        goto MU;
1663
1664
790
    mp_set(&s, 1);
1665
1666
    /* mu = b^2k / m */
1667
790
    if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
1668
0
        goto CLEANUP;
1669
790
    if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
1670
0
        goto CLEANUP;
1671
790
    if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1672
0
        goto CLEANUP;
1673
1674
    /* Loop over digits of b in ascending order, except highest order */
1675
12.1k
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1676
11.3k
        d = DIGIT(b, dig);
1677
1678
        /* Loop over the bits of the lower-order digits */
1679
740k
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1680
729k
            if (d & 1) {
1681
361k
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1682
0
                    goto CLEANUP;
1683
361k
                if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1684
0
                    goto CLEANUP;
1685
361k
            }
1686
1687
729k
            d >>= 1;
1688
1689
729k
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1690
0
                goto CLEANUP;
1691
729k
            if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1692
0
                goto CLEANUP;
1693
729k
        }
1694
11.3k
    }
1695
1696
    /* Now do the last digit... */
1697
790
    d = DIGIT(b, dig);
1698
1699
28.2k
    while (d) {
1700
27.4k
        if (d & 1) {
1701
13.3k
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1702
0
                goto CLEANUP;
1703
13.3k
            if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1704
0
                goto CLEANUP;
1705
13.3k
        }
1706
1707
27.4k
        d >>= 1;
1708
1709
27.4k
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1710
0
            goto CLEANUP;
1711
27.4k
        if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1712
0
            goto CLEANUP;
1713
27.4k
    }
1714
1715
790
    s_mp_exch(&s, c);
1716
1717
790
CLEANUP:
1718
790
    mp_clear(&mu);
1719
790
MU:
1720
790
    mp_clear(&x);
1721
790
X:
1722
790
    mp_clear(&s);
1723
1724
790
    return res;
1725
1726
790
} /* end s_mp_exptmod() */
1727
1728
/* }}} */
1729
1730
/* {{{ mp_exptmod_d(a, d, m, c) */
1731
1732
mp_err
1733
mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1734
0
{
1735
0
    mp_int s, x;
1736
0
    mp_err res;
1737
1738
0
    ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
1739
1740
0
    if ((res = mp_init(&s)) != MP_OKAY)
1741
0
        return res;
1742
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1743
0
        goto X;
1744
1745
0
    mp_set(&s, 1);
1746
1747
0
    while (d != 0) {
1748
0
        if (d & 1) {
1749
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1750
0
                (res = mp_mod(&s, m, &s)) != MP_OKAY)
1751
0
                goto CLEANUP;
1752
0
        }
1753
1754
0
        d /= 2;
1755
1756
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY ||
1757
0
            (res = mp_mod(&x, m, &x)) != MP_OKAY)
1758
0
            goto CLEANUP;
1759
0
    }
1760
1761
0
    s_mp_exch(&s, c);
1762
1763
0
CLEANUP:
1764
0
    mp_clear(&x);
1765
0
X:
1766
0
    mp_clear(&s);
1767
1768
0
    return res;
1769
1770
0
} /* end mp_exptmod_d() */
1771
1772
/* }}} */
1773
#endif /* if MP_MODARITH */
1774
1775
/* }}} */
1776
1777
/*------------------------------------------------------------------------*/
1778
/* {{{ Comparison functions */
1779
1780
/* {{{ mp_cmp_z(a) */
1781
1782
/*
1783
  mp_cmp_z(a)
1784
1785
  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1786
 */
1787
1788
int
1789
mp_cmp_z(const mp_int *a)
1790
3.20M
{
1791
3.20M
    ARGMPCHK(a != NULL);
1792
1793
3.20M
    if (SIGN(a) == NEG)
1794
227
        return MP_LT;
1795
3.20M
    else if (USED(a) == 1 && DIGIT(a, 0) == 0)
1796
15.7k
        return MP_EQ;
1797
3.18M
    else
1798
3.18M
        return MP_GT;
1799
1800
3.20M
} /* end mp_cmp_z() */
1801
1802
/* }}} */
1803
1804
/* {{{ mp_cmp_d(a, d) */
1805
1806
/*
1807
  mp_cmp_d(a, d)
1808
1809
  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1810
 */
1811
1812
int
1813
mp_cmp_d(const mp_int *a, mp_digit d)
1814
547
{
1815
547
    ARGCHK(a != NULL, MP_EQ);
1816
1817
547
    if (SIGN(a) == NEG)
1818
0
        return MP_LT;
1819
1820
547
    return s_mp_cmp_d(a, d);
1821
1822
547
} /* end mp_cmp_d() */
1823
1824
/* }}} */
1825
1826
/* {{{ mp_cmp(a, b) */
1827
1828
int
1829
mp_cmp(const mp_int *a, const mp_int *b)
1830
1.77M
{
1831
1.77M
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1832
1833
1.77M
    if (SIGN(a) == SIGN(b)) {
1834
1.77M
        int mag;
1835
1836
1.77M
        if ((mag = s_mp_cmp(a, b)) == MP_EQ)
1837
346
            return MP_EQ;
1838
1839
1.77M
        if (SIGN(a) == ZPOS)
1840
1.77M
            return mag;
1841
0
        else
1842
0
            return -mag;
1843
1844
1.77M
    } else if (SIGN(a) == ZPOS) {
1845
0
        return MP_GT;
1846
0
    } else {
1847
0
        return MP_LT;
1848
0
    }
1849
1850
1.77M
} /* end mp_cmp() */
1851
1852
/* }}} */
1853
1854
/* {{{ mp_cmp_mag(a, b) */
1855
1856
/*
1857
  mp_cmp_mag(a, b)
1858
1859
  Compares |a| <=> |b|, and returns an appropriate comparison result
1860
 */
1861
1862
int
1863
mp_cmp_mag(const mp_int *a, const mp_int *b)
1864
0
{
1865
0
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1866
1867
0
    return s_mp_cmp(a, b);
1868
1869
0
} /* end mp_cmp_mag() */
1870
1871
/* }}} */
1872
1873
/* {{{ mp_isodd(a) */
1874
1875
/*
1876
  mp_isodd(a)
1877
1878
  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1879
 */
1880
int
1881
mp_isodd(const mp_int *a)
1882
3.95M
{
1883
3.95M
    ARGMPCHK(a != NULL);
1884
1885
3.95M
    return (int)(DIGIT(a, 0) & 1);
1886
1887
3.95M
} /* end mp_isodd() */
1888
1889
/* }}} */
1890
1891
/* {{{ mp_iseven(a) */
1892
1893
int
1894
mp_iseven(const mp_int *a)
1895
3.95M
{
1896
3.95M
    return !mp_isodd(a);
1897
1898
3.95M
} /* end mp_iseven() */
1899
1900
/* }}} */
1901
1902
/* }}} */
1903
1904
/*------------------------------------------------------------------------*/
1905
/* {{{ Number theoretic functions */
1906
1907
/* {{{ mp_gcd(a, b, c) */
1908
1909
/*
1910
  Computes the GCD using the constant-time algorithm
1911
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
1912
  "Fast constant-time gcd computation and modular inversion"
1913
 */
1914
mp_err
1915
mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1916
212
{
1917
212
    mp_err res;
1918
212
    mp_digit cond = 0, mask = 0;
1919
212
    mp_int g, temp, f;
1920
212
    int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
1921
212
    mp_size top, flen, glen;
1922
212
    mp_int *clear[3];
1923
1924
212
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1925
    /*
1926
    Early exit if either of the inputs is zero.
1927
    Caller is responsible for the proper handling of inputs.
1928
    */
1929
212
    if (mp_cmp_z(a) == MP_EQ) {
1930
3
        res = mp_copy(b, c);
1931
3
        SIGN(c) = ZPOS;
1932
3
        return res;
1933
209
    } else if (mp_cmp_z(b) == MP_EQ) {
1934
4
        res = mp_copy(a, c);
1935
4
        SIGN(c) = ZPOS;
1936
4
        return res;
1937
4
    }
1938
1939
205
    MP_CHECKOK(mp_init(&temp));
1940
205
    clear[++last] = &temp;
1941
205
    MP_CHECKOK(mp_init_copy(&g, a));
1942
205
    clear[++last] = &g;
1943
205
    MP_CHECKOK(mp_init_copy(&f, b));
1944
205
    clear[++last] = &f;
1945
1946
    /*
1947
    For even case compute the number of
1948
    shared powers of 2 in f and g.
1949
    */
1950
6.83k
    for (i = 0; i < USED(&f) && i < USED(&g); i++) {
1951
6.63k
        mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
1952
431k
        for (j = 0; j < MP_DIGIT_BIT; j++) {
1953
424k
            bit &= mask;
1954
424k
            shifts += bit;
1955
424k
            mask >>= 1;
1956
424k
        }
1957
6.63k
    }
1958
    /* Reduce to the odd case by removing the powers of 2. */
1959
205
    s_mp_div_2d(&f, shifts);
1960
205
    s_mp_div_2d(&g, shifts);
1961
1962
    /* Allocate to the size of largest mp_int. */
1963
205
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
1964
205
    MP_CHECKOK(s_mp_grow(&f, top));
1965
205
    MP_CHECKOK(s_mp_grow(&g, top));
1966
205
    MP_CHECKOK(s_mp_grow(&temp, top));
1967
1968
    /* Make sure f contains the odd value. */
1969
205
    MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
1970
1971
    /* Upper bound for the total iterations. */
1972
205
    flen = mpl_significant_bits(&f);
1973
205
    glen = mpl_significant_bits(&g);
1974
205
    m = 4 + 3 * ((flen >= glen) ? flen : glen);
1975
1976
#if defined(_MSC_VER)
1977
#pragma warning(push)
1978
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
1979
#endif
1980
1981
2.28M
    for (i = 0; i < m; i++) {
1982
        /* Step 1: conditional swap. */
1983
        /* Set cond if delta > 0 and g is odd. */
1984
2.28M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
1985
        /* If cond is set replace (delta,f) with (-delta,-f). */
1986
2.28M
        delta = (-cond & -delta) | ((cond - 1) & delta);
1987
2.28M
        SIGN(&f) ^= cond;
1988
        /* If cond is set swap f with g. */
1989
2.28M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
1990
1991
        /* Step 2: elemination. */
1992
        /* Update delta. */
1993
2.28M
        delta++;
1994
        /* If g is odd, right shift (g+f) else right shift g. */
1995
2.28M
        MP_CHECKOK(mp_add(&g, &f, &temp));
1996
2.28M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
1997
2.28M
        s_mp_div_2(&g);
1998
2.28M
    }
1999
2000
#if defined(_MSC_VER)
2001
#pragma warning(pop)
2002
#endif
2003
2004
    /* GCD is in f, take the absolute value. */
2005
205
    SIGN(&f) = ZPOS;
2006
2007
    /* Add back the removed powers of 2. */
2008
205
    MP_CHECKOK(s_mp_mul_2d(&f, shifts));
2009
2010
205
    MP_CHECKOK(mp_copy(&f, c));
2011
2012
205
CLEANUP:
2013
820
    while (last >= 0)
2014
615
        mp_clear(clear[last--]);
2015
205
    return res;
2016
205
} /* end mp_gcd() */
2017
2018
/* }}} */
2019
2020
/* {{{ mp_lcm(a, b, c) */
2021
2022
/* We compute the least common multiple using the rule:
2023
2024
   ab = [a, b](a, b)
2025
2026
   ... by computing the product, and dividing out the gcd.
2027
 */
2028
2029
mp_err
2030
mp_lcm(mp_int *a, mp_int *b, mp_int *c)
2031
89
{
2032
89
    mp_int gcd, prod;
2033
89
    mp_err res;
2034
2035
89
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
2036
2037
    /* Set up temporaries */
2038
89
    if ((res = mp_init(&gcd)) != MP_OKAY)
2039
0
        return res;
2040
89
    if ((res = mp_init(&prod)) != MP_OKAY)
2041
0
        goto GCD;
2042
2043
89
    if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
2044
0
        goto CLEANUP;
2045
89
    if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
2046
0
        goto CLEANUP;
2047
2048
89
    res = mp_div(&prod, &gcd, c, NULL);
2049
2050
89
CLEANUP:
2051
89
    mp_clear(&prod);
2052
89
GCD:
2053
89
    mp_clear(&gcd);
2054
2055
89
    return res;
2056
2057
89
} /* end mp_lcm() */
2058
2059
/* }}} */
2060
2061
/* {{{ mp_xgcd(a, b, g, x, y) */
2062
2063
/*
2064
  mp_xgcd(a, b, g, x, y)
2065
2066
  Compute g = (a, b) and values x and y satisfying Bezout's identity
2067
  (that is, ax + by = g).  This uses the binary extended GCD algorithm
2068
  based on the Stein algorithm used for mp_gcd()
2069
  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
2070
 */
2071
2072
mp_err
2073
mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
2074
228
{
2075
228
    mp_int gx, xc, yc, u, v, A, B, C, D;
2076
228
    mp_int *clean[9];
2077
228
    mp_err res;
2078
228
    int last = -1;
2079
2080
228
    if (mp_cmp_z(b) == 0)
2081
1
        return MP_RANGE;
2082
2083
    /* Initialize all these variables we need */
2084
227
    MP_CHECKOK(mp_init(&u));
2085
227
    clean[++last] = &u;
2086
227
    MP_CHECKOK(mp_init(&v));
2087
227
    clean[++last] = &v;
2088
227
    MP_CHECKOK(mp_init(&gx));
2089
227
    clean[++last] = &gx;
2090
227
    MP_CHECKOK(mp_init(&A));
2091
227
    clean[++last] = &A;
2092
227
    MP_CHECKOK(mp_init(&B));
2093
227
    clean[++last] = &B;
2094
227
    MP_CHECKOK(mp_init(&C));
2095
227
    clean[++last] = &C;
2096
227
    MP_CHECKOK(mp_init(&D));
2097
227
    clean[++last] = &D;
2098
227
    MP_CHECKOK(mp_init_copy(&xc, a));
2099
227
    clean[++last] = &xc;
2100
227
    mp_abs(&xc, &xc);
2101
227
    MP_CHECKOK(mp_init_copy(&yc, b));
2102
227
    clean[++last] = &yc;
2103
227
    mp_abs(&yc, &yc);
2104
2105
227
    mp_set(&gx, 1);
2106
2107
    /* Divide by two until at least one of them is odd */
2108
298
    while (mp_iseven(&xc) && mp_iseven(&yc)) {
2109
71
        mp_size nx = mp_trailing_zeros(&xc);
2110
71
        mp_size ny = mp_trailing_zeros(&yc);
2111
71
        mp_size n = MP_MIN(nx, ny);
2112
71
        s_mp_div_2d(&xc, n);
2113
71
        s_mp_div_2d(&yc, n);
2114
71
        MP_CHECKOK(s_mp_mul_2d(&gx, n));
2115
71
    }
2116
2117
227
    MP_CHECKOK(mp_copy(&xc, &u));
2118
227
    MP_CHECKOK(mp_copy(&yc, &v));
2119
227
    mp_set(&A, 1);
2120
227
    mp_set(&D, 1);
2121
2122
    /* Loop through binary GCD algorithm */
2123
517k
    do {
2124
937k
        while (mp_iseven(&u)) {
2125
419k
            s_mp_div_2(&u);
2126
2127
419k
            if (mp_iseven(&A) && mp_iseven(&B)) {
2128
211k
                s_mp_div_2(&A);
2129
211k
                s_mp_div_2(&B);
2130
211k
            } else {
2131
208k
                MP_CHECKOK(mp_add(&A, &yc, &A));
2132
208k
                s_mp_div_2(&A);
2133
208k
                MP_CHECKOK(mp_sub(&B, &xc, &B));
2134
208k
                s_mp_div_2(&B);
2135
208k
            }
2136
419k
        }
2137
2138
1.14M
        while (mp_iseven(&v)) {
2139
625k
            s_mp_div_2(&v);
2140
2141
625k
            if (mp_iseven(&C) && mp_iseven(&D)) {
2142
315k
                s_mp_div_2(&C);
2143
315k
                s_mp_div_2(&D);
2144
315k
            } else {
2145
309k
                MP_CHECKOK(mp_add(&C, &yc, &C));
2146
309k
                s_mp_div_2(&C);
2147
309k
                MP_CHECKOK(mp_sub(&D, &xc, &D));
2148
309k
                s_mp_div_2(&D);
2149
309k
            }
2150
625k
        }
2151
2152
517k
        if (mp_cmp(&u, &v) >= 0) {
2153
207k
            MP_CHECKOK(mp_sub(&u, &v, &u));
2154
207k
            MP_CHECKOK(mp_sub(&A, &C, &A));
2155
207k
            MP_CHECKOK(mp_sub(&B, &D, &B));
2156
310k
        } else {
2157
310k
            MP_CHECKOK(mp_sub(&v, &u, &v));
2158
310k
            MP_CHECKOK(mp_sub(&C, &A, &C));
2159
310k
            MP_CHECKOK(mp_sub(&D, &B, &D));
2160
310k
        }
2161
517k
    } while (mp_cmp_z(&u) != 0);
2162
2163
    /* copy results to output */
2164
227
    if (x)
2165
227
        MP_CHECKOK(mp_copy(&C, x));
2166
2167
227
    if (y)
2168
102
        MP_CHECKOK(mp_copy(&D, y));
2169
2170
227
    if (g)
2171
227
        MP_CHECKOK(mp_mul(&gx, &v, g));
2172
2173
227
CLEANUP:
2174
2.27k
    while (last >= 0)
2175
2.04k
        mp_clear(clean[last--]);
2176
2177
227
    return res;
2178
2179
227
} /* end mp_xgcd() */
2180
2181
/* }}} */
2182
2183
mp_size
2184
mp_trailing_zeros(const mp_int *mp)
2185
285
{
2186
285
    mp_digit d;
2187
285
    mp_size n = 0;
2188
285
    unsigned int ix;
2189
2190
285
    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2191
0
        return n;
2192
2193
519
    for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
2194
234
        n += MP_DIGIT_BIT;
2195
285
    if (!d)
2196
0
        return 0; /* shouldn't happen, but ... */
2197
285
#if !defined(MP_USE_UINT_DIGIT)
2198
285
    if (!(d & 0xffffffffU)) {
2199
39
        d >>= 32;
2200
39
        n += 32;
2201
39
    }
2202
285
#endif
2203
285
    if (!(d & 0xffffU)) {
2204
71
        d >>= 16;
2205
71
        n += 16;
2206
71
    }
2207
285
    if (!(d & 0xffU)) {
2208
71
        d >>= 8;
2209
71
        n += 8;
2210
71
    }
2211
285
    if (!(d & 0xfU)) {
2212
121
        d >>= 4;
2213
121
        n += 4;
2214
121
    }
2215
285
    if (!(d & 0x3U)) {
2216
142
        d >>= 2;
2217
142
        n += 2;
2218
142
    }
2219
285
    if (!(d & 0x1U)) {
2220
171
        d >>= 1;
2221
171
        n += 1;
2222
171
    }
2223
285
#if MP_ARGCHK == 2
2224
285
    assert(0 != (d & 1));
2225
0
#endif
2226
0
    return n;
2227
285
}
2228
2229
/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2230
** Returns k (positive) or error (negative).
2231
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2232
** by Richard Schroeppel (a.k.a. Captain Nemo).
2233
*/
2234
mp_err
2235
s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2236
0
{
2237
0
    mp_err res;
2238
0
    mp_err k = 0;
2239
0
    mp_int d, f, g;
2240
2241
0
    ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
2242
2243
0
    MP_DIGITS(&d) = 0;
2244
0
    MP_DIGITS(&f) = 0;
2245
0
    MP_DIGITS(&g) = 0;
2246
0
    MP_CHECKOK(mp_init(&d));
2247
0
    MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
2248
0
    MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
2249
2250
0
    mp_set(c, 1);
2251
0
    mp_zero(&d);
2252
2253
0
    if (mp_cmp_z(&f) == 0) {
2254
0
        res = MP_UNDEF;
2255
0
    } else
2256
0
        for (;;) {
2257
0
            int diff_sign;
2258
0
            while (mp_iseven(&f)) {
2259
0
                mp_size n = mp_trailing_zeros(&f);
2260
0
                if (!n) {
2261
0
                    res = MP_UNDEF;
2262
0
                    goto CLEANUP;
2263
0
                }
2264
0
                s_mp_div_2d(&f, n);
2265
0
                MP_CHECKOK(s_mp_mul_2d(&d, n));
2266
0
                k += n;
2267
0
            }
2268
0
            if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
2269
0
                res = k;
2270
0
                break;
2271
0
            }
2272
0
            diff_sign = mp_cmp(&f, &g);
2273
0
            if (diff_sign < 0) { /* f < g */
2274
0
                s_mp_exch(&f, &g);
2275
0
                s_mp_exch(c, &d);
2276
0
            } else if (diff_sign == 0) { /* f == g */
2277
0
                res = MP_UNDEF;          /* a and p are not relatively prime */
2278
0
                break;
2279
0
            }
2280
0
            if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
2281
0
                MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
2282
0
                MP_CHECKOK(mp_sub(c, &d, c));   /* c = c - d */
2283
0
            } else {
2284
0
                MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
2285
0
                MP_CHECKOK(mp_add(c, &d, c));   /* c = c + d */
2286
0
            }
2287
0
        }
2288
0
    if (res >= 0) {
2289
0
        if (mp_cmp_mag(c, p) >= 0) {
2290
0
            MP_CHECKOK(mp_div(c, p, NULL, c));
2291
0
        }
2292
0
        if (MP_SIGN(c) != MP_ZPOS) {
2293
0
            MP_CHECKOK(mp_add(c, p, c));
2294
0
        }
2295
0
        res = k;
2296
0
    }
2297
2298
0
CLEANUP:
2299
0
    mp_clear(&d);
2300
0
    mp_clear(&f);
2301
0
    mp_clear(&g);
2302
0
    return res;
2303
0
}
2304
2305
/* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2306
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2307
** by Richard Schroeppel (a.k.a. Captain Nemo).
2308
*/
2309
mp_digit
2310
s_mp_invmod_radix(mp_digit P)
2311
734
{
2312
734
    mp_digit T = P;
2313
734
    T *= 2 - (P * T);
2314
734
    T *= 2 - (P * T);
2315
734
    T *= 2 - (P * T);
2316
734
    T *= 2 - (P * T);
2317
734
#if !defined(MP_USE_UINT_DIGIT)
2318
734
    T *= 2 - (P * T);
2319
734
    T *= 2 - (P * T);
2320
734
#endif
2321
734
    return T;
2322
734
}
2323
2324
/* Given c, k, and prime p, where a*c == 2**k (mod p),
2325
** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2326
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2327
** by Richard Schroeppel (a.k.a. Captain Nemo).
2328
*/
2329
mp_err
2330
s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2331
0
{
2332
0
    int k_orig = k;
2333
0
    mp_digit r;
2334
0
    mp_size ix;
2335
0
    mp_err res;
2336
2337
0
    if (mp_cmp_z(c) < 0) {           /* c < 0 */
2338
0
        MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
2339
0
    } else {
2340
0
        MP_CHECKOK(mp_copy(c, x)); /* x = c */
2341
0
    }
2342
2343
    /* make sure x is large enough */
2344
0
    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2345
0
    ix = MP_MAX(ix, MP_USED(x));
2346
0
    MP_CHECKOK(s_mp_pad(x, ix));
2347
2348
0
    r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
2349
2350
0
    for (ix = 0; k > 0; ix++) {
2351
0
        int j = MP_MIN(k, MP_DIGIT_BIT);
2352
0
        mp_digit v = r * MP_DIGIT(x, ix);
2353
0
        if (j < MP_DIGIT_BIT) {
2354
0
            v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
2355
0
        }
2356
0
        s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2357
0
        k -= j;
2358
0
    }
2359
0
    s_mp_clamp(x);
2360
0
    s_mp_div_2d(x, k_orig);
2361
0
    res = MP_OKAY;
2362
2363
0
CLEANUP:
2364
0
    return res;
2365
0
}
2366
2367
/*
2368
  Computes the modular inverse using the constant-time algorithm
2369
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
2370
  "Fast constant-time gcd computation and modular inversion"
2371
 */
2372
mp_err
2373
s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2374
213
{
2375
213
    mp_err res;
2376
213
    mp_digit cond = 0;
2377
213
    mp_int g, f, v, r, temp;
2378
213
    int i, its, delta = 1, last = -1;
2379
213
    mp_size top, flen, glen;
2380
213
    mp_int *clear[6];
2381
2382
213
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2383
    /* Check for invalid inputs. */
2384
213
    if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
2385
4
        return MP_RANGE;
2386
2387
209
    if (a == m || mp_iseven(m))
2388
0
        return MP_UNDEF;
2389
2390
209
    MP_CHECKOK(mp_init(&temp));
2391
209
    clear[++last] = &temp;
2392
209
    MP_CHECKOK(mp_init(&v));
2393
209
    clear[++last] = &v;
2394
209
    MP_CHECKOK(mp_init(&r));
2395
209
    clear[++last] = &r;
2396
209
    MP_CHECKOK(mp_init_copy(&g, a));
2397
209
    clear[++last] = &g;
2398
209
    MP_CHECKOK(mp_init_copy(&f, m));
2399
209
    clear[++last] = &f;
2400
2401
209
    mp_set(&v, 0);
2402
209
    mp_set(&r, 1);
2403
2404
    /* Allocate to the size of largest mp_int. */
2405
209
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
2406
209
    MP_CHECKOK(s_mp_grow(&f, top));
2407
209
    MP_CHECKOK(s_mp_grow(&g, top));
2408
209
    MP_CHECKOK(s_mp_grow(&temp, top));
2409
209
    MP_CHECKOK(s_mp_grow(&v, top));
2410
209
    MP_CHECKOK(s_mp_grow(&r, top));
2411
2412
    /* Upper bound for the total iterations. */
2413
209
    flen = mpl_significant_bits(&f);
2414
209
    glen = mpl_significant_bits(&g);
2415
209
    its = 4 + 3 * ((flen >= glen) ? flen : glen);
2416
2417
#if defined(_MSC_VER)
2418
#pragma warning(push)
2419
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2420
#endif
2421
2422
1.44M
    for (i = 0; i < its; i++) {
2423
        /* Step 1: conditional swap. */
2424
        /* Set cond if delta > 0 and g is odd. */
2425
1.44M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
2426
        /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
2427
1.44M
        delta = (-cond & -delta) | ((cond - 1) & delta);
2428
1.44M
        SIGN(&f) ^= cond;
2429
1.44M
        SIGN(&v) ^= cond;
2430
        /* If cond is set swap (f,v) with (g,r). */
2431
1.44M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
2432
1.44M
        MP_CHECKOK(mp_cswap(cond, &v, &r, top));
2433
2434
        /* Step 2: elemination. */
2435
        /* Update delta */
2436
1.44M
        delta++;
2437
        /* If g is odd replace r with (r+v). */
2438
1.44M
        MP_CHECKOK(mp_add(&r, &v, &temp));
2439
1.44M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
2440
        /* If g is odd, right shift (g+f) else right shift g. */
2441
1.44M
        MP_CHECKOK(mp_add(&g, &f, &temp));
2442
1.44M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
2443
1.44M
        s_mp_div_2(&g);
2444
        /*
2445
        If r is even, right shift it.
2446
        If r is odd, right shift (r+m) which is even because m is odd.
2447
        We want the result modulo m so adding in multiples of m here vanish.
2448
        */
2449
1.44M
        MP_CHECKOK(mp_add(&r, m, &temp));
2450
1.44M
        MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
2451
1.44M
        s_mp_div_2(&r);
2452
1.44M
    }
2453
2454
#if defined(_MSC_VER)
2455
#pragma warning(pop)
2456
#endif
2457
2458
    /* We have the inverse in v, propagate sign from f. */
2459
209
    SIGN(&v) ^= SIGN(&f);
2460
    /* GCD is in f, take the absolute value. */
2461
209
    SIGN(&f) = ZPOS;
2462
2463
    /* If gcd != 1, not invertible. */
2464
209
    if (mp_cmp_d(&f, 1) != MP_EQ) {
2465
22
        res = MP_UNDEF;
2466
22
        goto CLEANUP;
2467
22
    }
2468
2469
    /* Return inverse modulo m. */
2470
187
    MP_CHECKOK(mp_mod(&v, m, c));
2471
2472
209
CLEANUP:
2473
1.25k
    while (last >= 0)
2474
1.04k
        mp_clear(clear[last--]);
2475
209
    return res;
2476
187
}
2477
2478
/* Known good algorithm for computing modular inverse.  But slow. */
2479
mp_err
2480
mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2481
130
{
2482
130
    mp_int g, x;
2483
130
    mp_err res;
2484
2485
130
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2486
2487
130
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2488
5
        return MP_RANGE;
2489
2490
125
    MP_DIGITS(&g) = 0;
2491
125
    MP_DIGITS(&x) = 0;
2492
125
    MP_CHECKOK(mp_init(&x));
2493
125
    MP_CHECKOK(mp_init(&g));
2494
2495
125
    MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
2496
2497
125
    if (mp_cmp_d(&g, 1) != MP_EQ) {
2498
32
        res = MP_UNDEF;
2499
32
        goto CLEANUP;
2500
32
    }
2501
2502
93
    res = mp_mod(&x, m, c);
2503
93
    SIGN(c) = SIGN(a);
2504
2505
125
CLEANUP:
2506
125
    mp_clear(&x);
2507
125
    mp_clear(&g);
2508
2509
125
    return res;
2510
93
}
2511
2512
/* modular inverse where modulus is 2**k. */
2513
/* c = a**-1 mod 2**k */
2514
mp_err
2515
s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2516
254
{
2517
254
    mp_err res;
2518
254
    mp_size ix = k + 4;
2519
254
    mp_int t0, t1, val, tmp, two2k;
2520
2521
254
    static const mp_digit d2 = 2;
2522
254
    static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2523
2524
254
    if (mp_iseven(a))
2525
0
        return MP_UNDEF;
2526
2527
#if defined(_MSC_VER)
2528
#pragma warning(push)
2529
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2530
#endif
2531
254
    if (k <= MP_DIGIT_BIT) {
2532
152
        mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
2533
        /* propagate the sign from mp_int */
2534
152
        i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
2535
152
        if (k < MP_DIGIT_BIT)
2536
150
            i &= ((mp_digit)1 << k) - (mp_digit)1;
2537
152
        mp_set(c, i);
2538
152
        return MP_OKAY;
2539
152
    }
2540
#if defined(_MSC_VER)
2541
#pragma warning(pop)
2542
#endif
2543
2544
102
    MP_DIGITS(&t0) = 0;
2545
102
    MP_DIGITS(&t1) = 0;
2546
102
    MP_DIGITS(&val) = 0;
2547
102
    MP_DIGITS(&tmp) = 0;
2548
102
    MP_DIGITS(&two2k) = 0;
2549
102
    MP_CHECKOK(mp_init_copy(&val, a));
2550
102
    s_mp_mod_2d(&val, k);
2551
102
    MP_CHECKOK(mp_init_copy(&t0, &val));
2552
102
    MP_CHECKOK(mp_init_copy(&t1, &t0));
2553
102
    MP_CHECKOK(mp_init(&tmp));
2554
102
    MP_CHECKOK(mp_init(&two2k));
2555
102
    MP_CHECKOK(s_mp_2expt(&two2k, k));
2556
734
    do {
2557
734
        MP_CHECKOK(mp_mul(&val, &t1, &tmp));
2558
734
        MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
2559
734
        MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
2560
734
        s_mp_mod_2d(&t1, k);
2561
1.46k
        while (MP_SIGN(&t1) != MP_ZPOS) {
2562
734
            MP_CHECKOK(mp_add(&t1, &two2k, &t1));
2563
734
        }
2564
734
        if (mp_cmp(&t1, &t0) == MP_EQ)
2565
102
            break;
2566
632
        MP_CHECKOK(mp_copy(&t1, &t0));
2567
632
    } while (--ix > 0);
2568
102
    if (!ix) {
2569
0
        res = MP_UNDEF;
2570
102
    } else {
2571
102
        mp_exch(c, &t1);
2572
102
    }
2573
2574
102
CLEANUP:
2575
102
    mp_clear(&t0);
2576
102
    mp_clear(&t1);
2577
102
    mp_clear(&val);
2578
102
    mp_clear(&tmp);
2579
102
    mp_clear(&two2k);
2580
102
    return res;
2581
102
}
2582
2583
mp_err
2584
s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2585
143
{
2586
143
    mp_err res;
2587
143
    mp_size k;
2588
143
    mp_int oddFactor, evenFactor; /* factors of the modulus */
2589
143
    mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2590
143
    mp_int C2, tmp1, tmp2;
2591
2592
143
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2593
2594
    /*static const mp_digit d1 = 1; */
2595
    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2596
2597
143
    if ((res = s_mp_ispow2(m)) >= 0) {
2598
0
        k = res;
2599
0
        return s_mp_invmod_2d(a, k, c);
2600
0
    }
2601
143
    MP_DIGITS(&oddFactor) = 0;
2602
143
    MP_DIGITS(&evenFactor) = 0;
2603
143
    MP_DIGITS(&oddPart) = 0;
2604
143
    MP_DIGITS(&evenPart) = 0;
2605
143
    MP_DIGITS(&C2) = 0;
2606
143
    MP_DIGITS(&tmp1) = 0;
2607
143
    MP_DIGITS(&tmp2) = 0;
2608
2609
143
    MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
2610
143
    MP_CHECKOK(mp_init(&evenFactor));
2611
143
    MP_CHECKOK(mp_init(&oddPart));
2612
143
    MP_CHECKOK(mp_init(&evenPart));
2613
143
    MP_CHECKOK(mp_init(&C2));
2614
143
    MP_CHECKOK(mp_init(&tmp1));
2615
143
    MP_CHECKOK(mp_init(&tmp2));
2616
2617
143
    k = mp_trailing_zeros(m);
2618
143
    s_mp_div_2d(&oddFactor, k);
2619
143
    MP_CHECKOK(s_mp_2expt(&evenFactor, k));
2620
2621
    /* compute a**-1 mod oddFactor. */
2622
143
    MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
2623
    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2624
127
    MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
2625
2626
    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2627
    /* let m1 = oddFactor,  v1 = oddPart,
2628
     * let m2 = evenFactor, v2 = evenPart.
2629
     */
2630
2631
    /* Compute C2 = m1**-1 mod m2. */
2632
127
    MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
2633
2634
    /* compute u = (v2 - v1)*C2 mod m2 */
2635
127
    MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
2636
127
    MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
2637
127
    s_mp_mod_2d(&tmp2, k);
2638
242
    while (MP_SIGN(&tmp2) != MP_ZPOS) {
2639
115
        MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
2640
115
    }
2641
2642
    /* compute answer = v1 + u*m1 */
2643
127
    MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
2644
127
    MP_CHECKOK(mp_add(&oddPart, c, c));
2645
    /* not sure this is necessary, but it's low cost if not. */
2646
127
    MP_CHECKOK(mp_mod(c, m, c));
2647
2648
143
CLEANUP:
2649
143
    mp_clear(&oddFactor);
2650
143
    mp_clear(&evenFactor);
2651
143
    mp_clear(&oddPart);
2652
143
    mp_clear(&evenPart);
2653
143
    mp_clear(&C2);
2654
143
    mp_clear(&tmp1);
2655
143
    mp_clear(&tmp2);
2656
143
    return res;
2657
127
}
2658
2659
/* {{{ mp_invmod(a, m, c) */
2660
2661
/*
2662
  mp_invmod(a, m, c)
2663
2664
  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2665
  This is equivalent to the question of whether (a, m) = 1.  If not,
2666
  MP_UNDEF is returned, and there is no inverse.
2667
 */
2668
2669
mp_err
2670
mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2671
224
{
2672
224
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2673
2674
224
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2675
3
        return MP_RANGE;
2676
2677
221
    if (mp_isodd(m)) {
2678
70
        return s_mp_invmod_odd_m(a, m, c);
2679
70
    }
2680
151
    if (mp_iseven(a))
2681
8
        return MP_UNDEF; /* not invertable */
2682
2683
143
    return s_mp_invmod_even_m(a, m, c);
2684
2685
151
} /* end mp_invmod() */
2686
2687
/* }}} */
2688
2689
/* }}} */
2690
2691
/*------------------------------------------------------------------------*/
2692
/* {{{ mp_print(mp, ofp) */
2693
2694
#if MP_IOFUNC
2695
/*
2696
  mp_print(mp, ofp)
2697
2698
  Print a textual representation of the given mp_int on the output
2699
  stream 'ofp'.  Output is generated using the internal radix.
2700
 */
2701
2702
void
2703
mp_print(mp_int *mp, FILE *ofp)
2704
{
2705
    int ix;
2706
2707
    if (mp == NULL || ofp == NULL)
2708
        return;
2709
2710
    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2711
2712
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2713
        fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2714
    }
2715
2716
} /* end mp_print() */
2717
2718
#endif /* if MP_IOFUNC */
2719
2720
/* }}} */
2721
2722
/*------------------------------------------------------------------------*/
2723
/* {{{ More I/O Functions */
2724
2725
/* {{{ mp_read_raw(mp, str, len) */
2726
2727
/*
2728
   mp_read_raw(mp, str, len)
2729
2730
   Read in a raw value (base 256) into the given mp_int
2731
 */
2732
2733
mp_err
2734
mp_read_raw(mp_int *mp, char *str, int len)
2735
0
{
2736
0
    int ix;
2737
0
    mp_err res;
2738
0
    unsigned char *ustr = (unsigned char *)str;
2739
2740
0
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2741
2742
0
    mp_zero(mp);
2743
2744
    /* Read the rest of the digits */
2745
0
    for (ix = 1; ix < len; ix++) {
2746
0
        if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2747
0
            return res;
2748
0
        if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2749
0
            return res;
2750
0
    }
2751
2752
    /* Get sign from first byte */
2753
0
    if (ustr[0])
2754
0
        SIGN(mp) = NEG;
2755
0
    else
2756
0
        SIGN(mp) = ZPOS;
2757
2758
0
    return MP_OKAY;
2759
2760
0
} /* end mp_read_raw() */
2761
2762
/* }}} */
2763
2764
/* {{{ mp_raw_size(mp) */
2765
2766
int
2767
mp_raw_size(mp_int *mp)
2768
0
{
2769
0
    ARGCHK(mp != NULL, 0);
2770
2771
0
    return (USED(mp) * sizeof(mp_digit)) + 1;
2772
2773
0
} /* end mp_raw_size() */
2774
2775
/* }}} */
2776
2777
/* {{{ mp_toraw(mp, str) */
2778
2779
mp_err
2780
mp_toraw(mp_int *mp, char *str)
2781
0
{
2782
0
    int ix, jx, pos = 1;
2783
2784
0
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2785
2786
0
    str[0] = (char)SIGN(mp);
2787
2788
    /* Iterate over each digit... */
2789
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2790
0
        mp_digit d = DIGIT(mp, ix);
2791
2792
        /* Unpack digit bytes, high order first */
2793
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2794
0
            str[pos++] = (char)(d >> (jx * CHAR_BIT));
2795
0
        }
2796
0
    }
2797
2798
0
    return MP_OKAY;
2799
2800
0
} /* end mp_toraw() */
2801
2802
/* }}} */
2803
2804
/* {{{ mp_read_radix(mp, str, radix) */
2805
2806
/*
2807
  mp_read_radix(mp, str, radix)
2808
2809
  Read an integer from the given string, and set mp to the resulting
2810
  value.  The input is presumed to be in base 10.  Leading non-digit
2811
  characters are ignored, and the function reads until a non-digit
2812
  character or the end of the string.
2813
 */
2814
2815
mp_err
2816
mp_read_radix(mp_int *mp, const char *str, int radix)
2817
18.3k
{
2818
18.3k
    int ix = 0, val = 0;
2819
18.3k
    mp_err res;
2820
18.3k
    mp_sign sig = ZPOS;
2821
2822
18.3k
    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2823
0
           MP_BADARG);
2824
2825
0
    mp_zero(mp);
2826
2827
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2828
18.3k
    while (str[ix] &&
2829
18.3k
           (s_mp_tovalue(str[ix], radix) < 0) &&
2830
18.3k
           str[ix] != '-' &&
2831
18.3k
           str[ix] != '+') {
2832
0
        ++ix;
2833
0
    }
2834
2835
18.3k
    if (str[ix] == '-') {
2836
0
        sig = NEG;
2837
0
        ++ix;
2838
18.3k
    } else if (str[ix] == '+') {
2839
0
        sig = ZPOS; /* this is the default anyway... */
2840
0
        ++ix;
2841
0
    }
2842
2843
4.58M
    while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2844
4.56M
        if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2845
0
            return res;
2846
4.56M
        if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
2847
0
            return res;
2848
4.56M
        ++ix;
2849
4.56M
    }
2850
2851
18.3k
    if (s_mp_cmp_d(mp, 0) == MP_EQ)
2852
8.09k
        SIGN(mp) = ZPOS;
2853
10.2k
    else
2854
10.2k
        SIGN(mp) = sig;
2855
2856
18.3k
    return MP_OKAY;
2857
2858
18.3k
} /* end mp_read_radix() */
2859
2860
mp_err
2861
mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
2862
18.3k
{
2863
18.3k
    int radix = default_radix;
2864
18.3k
    int cx;
2865
18.3k
    mp_sign sig = ZPOS;
2866
18.3k
    mp_err res;
2867
2868
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2869
18.3k
    while ((cx = *str) != 0 &&
2870
18.3k
           (s_mp_tovalue(cx, radix) < 0) &&
2871
18.3k
           cx != '-' &&
2872
18.3k
           cx != '+') {
2873
0
        ++str;
2874
0
    }
2875
2876
18.3k
    if (cx == '-') {
2877
5
        sig = NEG;
2878
5
        ++str;
2879
18.3k
    } else if (cx == '+') {
2880
0
        sig = ZPOS; /* this is the default anyway... */
2881
0
        ++str;
2882
0
    }
2883
2884
18.3k
    if (str[0] == '0') {
2885
8.09k
        if ((str[1] | 0x20) == 'x') {
2886
0
            radix = 16;
2887
0
            str += 2;
2888
8.09k
        } else {
2889
8.09k
            radix = 8;
2890
8.09k
            str++;
2891
8.09k
        }
2892
8.09k
    }
2893
18.3k
    res = mp_read_radix(a, str, radix);
2894
18.3k
    if (res == MP_OKAY) {
2895
18.3k
        MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2896
18.3k
    }
2897
18.3k
    return res;
2898
18.3k
}
2899
2900
/* }}} */
2901
2902
/* {{{ mp_radix_size(mp, radix) */
2903
2904
int
2905
mp_radix_size(mp_int *mp, int radix)
2906
2.15k
{
2907
2.15k
    int bits;
2908
2909
2.15k
    if (!mp || radix < 2 || radix > MAX_RADIX)
2910
0
        return 0;
2911
2912
2.15k
    bits = USED(mp) * DIGIT_BIT - 1;
2913
2914
2.15k
    return SIGN(mp) + s_mp_outlen(bits, radix);
2915
2916
2.15k
} /* end mp_radix_size() */
2917
2918
/* }}} */
2919
2920
/* {{{ mp_toradix(mp, str, radix) */
2921
2922
mp_err
2923
mp_toradix(mp_int *mp, char *str, int radix)
2924
2.15k
{
2925
2.15k
    int ix, pos = 0;
2926
2927
2.15k
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2928
2.15k
    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2929
2930
2.15k
    if (mp_cmp_z(mp) == MP_EQ) {
2931
101
        str[0] = '0';
2932
101
        str[1] = '\0';
2933
2.05k
    } else {
2934
2.05k
        mp_err res;
2935
2.05k
        mp_int tmp;
2936
2.05k
        mp_sign sgn;
2937
2.05k
        mp_digit rem, rdx = (mp_digit)radix;
2938
2.05k
        char ch;
2939
2940
2.05k
        if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2941
0
            return res;
2942
2943
        /* Save sign for later, and take absolute value */
2944
2.05k
        sgn = SIGN(&tmp);
2945
2.05k
        SIGN(&tmp) = ZPOS;
2946
2947
        /* Generate output digits in reverse order      */
2948
1.54M
        while (mp_cmp_z(&tmp) != 0) {
2949
1.53M
            if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2950
0
                mp_clear(&tmp);
2951
0
                return res;
2952
0
            }
2953
2954
            /* Generate digits, use capital letters */
2955
1.53M
            ch = s_mp_todigit(rem, radix, 0);
2956
2957
1.53M
            str[pos++] = ch;
2958
1.53M
        }
2959
2960
        /* Add - sign if original value was negative */
2961
2.05k
        if (sgn == NEG)
2962
51
            str[pos++] = '-';
2963
2964
        /* Add trailing NUL to end the string        */
2965
2.05k
        str[pos--] = '\0';
2966
2967
        /* Reverse the digits and sign indicator     */
2968
2.05k
        ix = 0;
2969
770k
        while (ix < pos) {
2970
768k
            char tmpc = str[ix];
2971
2972
768k
            str[ix] = str[pos];
2973
768k
            str[pos] = tmpc;
2974
768k
            ++ix;
2975
768k
            --pos;
2976
768k
        }
2977
2978
2.05k
        mp_clear(&tmp);
2979
2.05k
    }
2980
2981
2.15k
    return MP_OKAY;
2982
2983
2.15k
} /* end mp_toradix() */
2984
2985
/* }}} */
2986
2987
/* {{{ mp_tovalue(ch, r) */
2988
2989
int
2990
mp_tovalue(char ch, int r)
2991
0
{
2992
0
    return s_mp_tovalue(ch, r);
2993
2994
0
} /* end mp_tovalue() */
2995
2996
/* }}} */
2997
2998
/* }}} */
2999
3000
/* {{{ mp_strerror(ec) */
3001
3002
/*
3003
  mp_strerror(ec)
3004
3005
  Return a string describing the meaning of error code 'ec'.  The
3006
  string returned is allocated in static memory, so the caller should
3007
  not attempt to modify or free the memory associated with this
3008
  string.
3009
 */
3010
const char *
3011
mp_strerror(mp_err ec)
3012
0
{
3013
0
    int aec = (ec < 0) ? -ec : ec;
3014
3015
    /* Code values are negative, so the senses of these comparisons
3016
     are accurate */
3017
0
    if (ec < MP_LAST_CODE || ec > MP_OKAY) {
3018
0
        return mp_err_string[0]; /* unknown error code */
3019
0
    } else {
3020
0
        return mp_err_string[aec + 1];
3021
0
    }
3022
3023
0
} /* end mp_strerror() */
3024
3025
/* }}} */
3026
3027
/*========================================================================*/
3028
/*------------------------------------------------------------------------*/
3029
/* Static function definitions (internal use only)                        */
3030
3031
/* {{{ Memory management */
3032
3033
/* {{{ s_mp_grow(mp, min) */
3034
3035
/* Make sure there are at least 'min' digits allocated to mp              */
3036
mp_err
3037
s_mp_grow(mp_int *mp, mp_size min)
3038
545k
{
3039
545k
    ARGCHK(mp != NULL, MP_BADARG);
3040
3041
545k
    if (min > ALLOC(mp)) {
3042
543k
        mp_digit *tmp;
3043
3044
        /* Set min to next nearest default precision block size */
3045
543k
        min = MP_ROUNDUP(min, s_mp_defprec);
3046
3047
543k
        if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
3048
0
            return MP_MEM;
3049
3050
543k
        s_mp_copy(DIGITS(mp), tmp, USED(mp));
3051
3052
543k
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3053
543k
        s_mp_free(DIGITS(mp));
3054
543k
        DIGITS(mp) = tmp;
3055
543k
        ALLOC(mp) = min;
3056
543k
    }
3057
3058
545k
    return MP_OKAY;
3059
3060
545k
} /* end s_mp_grow() */
3061
3062
/* }}} */
3063
3064
/* {{{ s_mp_pad(mp, min) */
3065
3066
/* Make sure the used size of mp is at least 'min', growing if needed     */
3067
mp_err
3068
s_mp_pad(mp_int *mp, mp_size min)
3069
101M
{
3070
101M
    ARGCHK(mp != NULL, MP_BADARG);
3071
3072
101M
    if (min > USED(mp)) {
3073
91.2M
        mp_err res;
3074
3075
        /* Make sure there is room to increase precision  */
3076
91.2M
        if (min > ALLOC(mp)) {
3077
543k
            if ((res = s_mp_grow(mp, min)) != MP_OKAY)
3078
0
                return res;
3079
90.6M
        } else {
3080
90.6M
            s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
3081
90.6M
        }
3082
3083
        /* Increase precision; should already be 0-filled */
3084
91.2M
        USED(mp) = min;
3085
91.2M
    }
3086
3087
101M
    return MP_OKAY;
3088
3089
101M
} /* end s_mp_pad() */
3090
3091
/* }}} */
3092
3093
/* {{{ s_mp_setz(dp, count) */
3094
3095
/* Set 'count' digits pointed to by dp to be zeroes                       */
3096
void
3097
s_mp_setz(mp_digit *dp, mp_size count)
3098
100M
{
3099
100M
    memset(dp, 0, count * sizeof(mp_digit));
3100
100M
} /* end s_mp_setz() */
3101
3102
/* }}} */
3103
3104
/* {{{ s_mp_copy(sp, dp, count) */
3105
3106
/* Copy 'count' digits from sp to dp                                      */
3107
void
3108
s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
3109
7.40M
{
3110
7.40M
    memcpy(dp, sp, count * sizeof(mp_digit));
3111
7.40M
} /* end s_mp_copy() */
3112
3113
/* }}} */
3114
3115
/* {{{ s_mp_alloc(nb, ni) */
3116
3117
/* Allocate ni records of nb bytes each, and return a pointer to that     */
3118
void *
3119
s_mp_alloc(size_t nb, size_t ni)
3120
9.65M
{
3121
9.65M
    return calloc(nb, ni);
3122
3123
9.65M
} /* end s_mp_alloc() */
3124
3125
/* }}} */
3126
3127
/* {{{ s_mp_free(ptr) */
3128
3129
/* Free the memory pointed to by ptr                                      */
3130
void
3131
s_mp_free(void *ptr)
3132
9.65M
{
3133
9.65M
    if (ptr) {
3134
9.65M
        free(ptr);
3135
9.65M
    }
3136
9.65M
} /* end s_mp_free() */
3137
3138
/* }}} */
3139
3140
/* {{{ s_mp_clamp(mp) */
3141
3142
/* Remove leading zeroes from the given value                             */
3143
void
3144
s_mp_clamp(mp_int *mp)
3145
29.5M
{
3146
29.5M
    mp_size used = MP_USED(mp);
3147
211M
    while (used > 1 && DIGIT(mp, used - 1) == 0)
3148
181M
        --used;
3149
29.5M
    MP_USED(mp) = used;
3150
29.5M
    if (used == 1 && DIGIT(mp, 0) == 0)
3151
1.51M
        MP_SIGN(mp) = ZPOS;
3152
29.5M
} /* end s_mp_clamp() */
3153
3154
/* }}} */
3155
3156
/* {{{ s_mp_exch(a, b) */
3157
3158
/* Exchange the data for a and b; (b, a) = (a, b)                         */
3159
void
3160
s_mp_exch(mp_int *a, mp_int *b)
3161
3.80M
{
3162
3.80M
    mp_int tmp;
3163
3.80M
    if (!a || !b) {
3164
0
        return;
3165
0
    }
3166
3167
3.80M
    tmp = *a;
3168
3.80M
    *a = *b;
3169
3.80M
    *b = tmp;
3170
3171
3.80M
} /* end s_mp_exch() */
3172
3173
/* }}} */
3174
3175
/* }}} */
3176
3177
/* {{{ Arithmetic helpers */
3178
3179
/* {{{ s_mp_lshd(mp, p) */
3180
3181
/*
3182
   Shift mp leftward by p digits, growing if needed, and zero-filling
3183
   the in-shifted digits at the right end.  This is a convenient
3184
   alternative to multiplication by powers of the radix
3185
 */
3186
3187
mp_err
3188
s_mp_lshd(mp_int *mp, mp_size p)
3189
83.0M
{
3190
83.0M
    mp_err res;
3191
83.0M
    unsigned int ix;
3192
3193
83.0M
    ARGCHK(mp != NULL, MP_BADARG);
3194
3195
83.0M
    if (p == 0)
3196
0
        return MP_OKAY;
3197
3198
83.0M
    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
3199
2.98M
        return MP_OKAY;
3200
3201
80.0M
    if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
3202
0
        return res;
3203
3204
    /* Shift all the significant figures over as needed */
3205
4.53G
    for (ix = USED(mp) - p; ix-- > 0;) {
3206
4.45G
        DIGIT(mp, ix + p) = DIGIT(mp, ix);
3207
4.45G
    }
3208
3209
    /* Fill the bottom digits with zeroes */
3210
160M
    for (ix = 0; (mp_size)ix < p; ix++)
3211
80.1M
        DIGIT(mp, ix) = 0;
3212
3213
80.0M
    return MP_OKAY;
3214
3215
80.0M
} /* end s_mp_lshd() */
3216
3217
/* }}} */
3218
3219
/* {{{ s_mp_mul_2d(mp, d) */
3220
3221
/*
3222
  Multiply the integer by 2^d, where d is a number of bits.  This
3223
  amounts to a bitwise shift of the value.
3224
 */
3225
mp_err
3226
s_mp_mul_2d(mp_int *mp, mp_digit d)
3227
3.00M
{
3228
3.00M
    mp_err res;
3229
3.00M
    mp_digit dshift, rshift, mask, x, prev = 0;
3230
3.00M
    mp_digit *pa = NULL;
3231
3.00M
    int i;
3232
3233
3.00M
    ARGCHK(mp != NULL, MP_BADARG);
3234
3235
3.00M
    dshift = d / MP_DIGIT_BIT;
3236
3.00M
    d %= MP_DIGIT_BIT;
3237
    /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
3238
    /* mod and corresponding mask logic avoid that when d = 0 */
3239
3.00M
    rshift = MP_DIGIT_BIT - d;
3240
3.00M
    rshift %= MP_DIGIT_BIT;
3241
    /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
3242
3.00M
    mask = (DIGIT_MAX << rshift) + 1;
3243
3.00M
    mask &= DIGIT_MAX - 1;
3244
    /* bits to be shifted out of the top word */
3245
3.00M
    x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
3246
3247
3.00M
    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
3248
0
        return res;
3249
3250
3.00M
    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3251
0
        return res;
3252
3253
3.00M
    pa = MP_DIGITS(mp) + dshift;
3254
3255
87.7M
    for (i = MP_USED(mp) - dshift; i > 0; i--) {
3256
84.7M
        x = *pa;
3257
84.7M
        *pa++ = (x << d) | prev;
3258
84.7M
        prev = (x & mask) >> rshift;
3259
84.7M
    }
3260
3261
3.00M
    s_mp_clamp(mp);
3262
3.00M
    return MP_OKAY;
3263
3.00M
} /* end s_mp_mul_2d() */
3264
3265
/* {{{ s_mp_rshd(mp, p) */
3266
3267
/*
3268
   Shift mp rightward by p digits.  Maintains the invariant that
3269
   digits above the precision are all zero.  Digits shifted off the
3270
   end are lost.  Cannot fail.
3271
 */
3272
3273
void
3274
s_mp_rshd(mp_int *mp, mp_size p)
3275
11.4M
{
3276
11.4M
    mp_size ix;
3277
11.4M
    mp_digit *src, *dst;
3278
3279
11.4M
    if (p == 0)
3280
8.43M
        return;
3281
3282
    /* Shortcut when all digits are to be shifted off */
3283
3.03M
    if (p >= USED(mp)) {
3284
25.9k
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3285
25.9k
        USED(mp) = 1;
3286
25.9k
        SIGN(mp) = ZPOS;
3287
25.9k
        return;
3288
25.9k
    }
3289
3290
    /* Shift all the significant figures over as needed */
3291
3.01M
    dst = MP_DIGITS(mp);
3292
3.01M
    src = dst + p;
3293
244M
    for (ix = USED(mp) - p; ix > 0; ix--)
3294
241M
        *dst++ = *src++;
3295
3296
3.01M
    MP_USED(mp) -= p;
3297
    /* Fill the top digits with zeroes */
3298
243M
    while (p-- > 0)
3299
240M
        *dst++ = 0;
3300
3301
3.01M
} /* end s_mp_rshd() */
3302
3303
/* }}} */
3304
3305
/* {{{ s_mp_div_2(mp) */
3306
3307
/* Divide by two -- take advantage of radix properties to do it fast      */
3308
void
3309
s_mp_div_2(mp_int *mp)
3310
8.31M
{
3311
8.31M
    s_mp_div_2d(mp, 1);
3312
3313
8.31M
} /* end s_mp_div_2() */
3314
3315
/* }}} */
3316
3317
/* {{{ s_mp_mul_2(mp) */
3318
3319
mp_err
3320
s_mp_mul_2(mp_int *mp)
3321
1.17M
{
3322
1.17M
    mp_digit *pd;
3323
1.17M
    unsigned int ix, used;
3324
1.17M
    mp_digit kin = 0;
3325
3326
1.17M
    ARGCHK(mp != NULL, MP_BADARG);
3327
3328
    /* Shift digits leftward by 1 bit */
3329
1.17M
    used = MP_USED(mp);
3330
1.17M
    pd = MP_DIGITS(mp);
3331
226M
    for (ix = 0; ix < used; ix++) {
3332
225M
        mp_digit d = *pd;
3333
225M
        *pd++ = (d << 1) | kin;
3334
225M
        kin = (d >> (DIGIT_BIT - 1));
3335
225M
    }
3336
3337
    /* Deal with rollover from last digit */
3338
1.17M
    if (kin) {
3339
0
        if (ix >= ALLOC(mp)) {
3340
0
            mp_err res;
3341
0
            if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3342
0
                return res;
3343
0
        }
3344
3345
0
        DIGIT(mp, ix) = kin;
3346
0
        USED(mp) += 1;
3347
0
    }
3348
3349
1.17M
    return MP_OKAY;
3350
3351
1.17M
} /* end s_mp_mul_2() */
3352
3353
/* }}} */
3354
3355
/* {{{ s_mp_mod_2d(mp, d) */
3356
3357
/*
3358
  Remainder the integer by 2^d, where d is a number of bits.  This
3359
  amounts to a bitwise AND of the value, and does not require the full
3360
  division code
3361
 */
3362
void
3363
s_mp_mod_2d(mp_int *mp, mp_digit d)
3364
2.26M
{
3365
2.26M
    mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3366
2.26M
    mp_size ix;
3367
2.26M
    mp_digit dmask;
3368
3369
2.26M
    if (ndig >= USED(mp))
3370
380k
        return;
3371
3372
    /* Flush all the bits above 2^d in its digit */
3373
1.88M
    dmask = ((mp_digit)1 << nbit) - 1;
3374
1.88M
    DIGIT(mp, ndig) &= dmask;
3375
3376
    /* Flush all digits above the one with 2^d in it */
3377
171M
    for (ix = ndig + 1; ix < USED(mp); ix++)
3378
170M
        DIGIT(mp, ix) = 0;
3379
3380
1.88M
    s_mp_clamp(mp);
3381
3382
1.88M
} /* end s_mp_mod_2d() */
3383
3384
/* }}} */
3385
3386
/* {{{ s_mp_div_2d(mp, d) */
3387
3388
/*
3389
  Divide the integer by 2^d, where d is a number of bits.  This
3390
  amounts to a bitwise shift of the value, and does not require the
3391
  full division code (used in Barrett reduction, see below)
3392
 */
3393
void
3394
s_mp_div_2d(mp_int *mp, mp_digit d)
3395
8.31M
{
3396
8.31M
    int ix;
3397
8.31M
    mp_digit save, next, mask, lshift;
3398
3399
8.31M
    s_mp_rshd(mp, d / DIGIT_BIT);
3400
8.31M
    d %= DIGIT_BIT;
3401
    /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
3402
    /* mod and corresponding mask logic avoid that when d = 0 */
3403
8.31M
    lshift = DIGIT_BIT - d;
3404
8.31M
    lshift %= DIGIT_BIT;
3405
8.31M
    mask = ((mp_digit)1 << d) - 1;
3406
8.31M
    save = 0;
3407
468M
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3408
460M
        next = DIGIT(mp, ix) & mask;
3409
460M
        DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
3410
460M
        save = next;
3411
460M
    }
3412
8.31M
    s_mp_clamp(mp);
3413
3414
8.31M
} /* end s_mp_div_2d() */
3415
3416
/* }}} */
3417
3418
/* {{{ s_mp_norm(a, b, *d) */
3419
3420
/*
3421
  s_mp_norm(a, b, *d)
3422
3423
  Normalize a and b for division, where b is the divisor.  In order
3424
  that we might make good guesses for quotient digits, we want the
3425
  leading digit of b to be at least half the radix, which we
3426
  accomplish by multiplying a and b by a power of 2.  The exponent
3427
  (shift count) is placed in *pd, so that the remainder can be shifted
3428
  back at the end of the division process.
3429
 */
3430
3431
mp_err
3432
s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3433
1.50M
{
3434
1.50M
    mp_digit d;
3435
1.50M
    mp_digit mask;
3436
1.50M
    mp_digit b_msd;
3437
1.50M
    mp_err res = MP_OKAY;
3438
3439
1.50M
    ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
3440
3441
0
    d = 0;
3442
1.50M
    mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
3443
1.50M
    b_msd = DIGIT(b, USED(b) - 1);
3444
91.7M
    while (!(b_msd & mask)) {
3445
90.2M
        b_msd <<= 1;
3446
90.2M
        ++d;
3447
90.2M
    }
3448
3449
1.50M
    if (d) {
3450
1.50M
        MP_CHECKOK(s_mp_mul_2d(a, d));
3451
1.50M
        MP_CHECKOK(s_mp_mul_2d(b, d));
3452
1.50M
    }
3453
3454
1.50M
    *pd = d;
3455
1.50M
CLEANUP:
3456
1.50M
    return res;
3457
3458
1.50M
} /* end s_mp_norm() */
3459
3460
/* }}} */
3461
3462
/* }}} */
3463
3464
/* {{{ Primitive digit arithmetic */
3465
3466
/* {{{ s_mp_add_d(mp, d) */
3467
3468
/* Add d to |mp| in place                                                 */
3469
mp_err
3470
s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
3471
4.56M
{
3472
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3473
    mp_word w, k = 0;
3474
    mp_size ix = 1;
3475
3476
    w = (mp_word)DIGIT(mp, 0) + d;
3477
    DIGIT(mp, 0) = ACCUM(w);
3478
    k = CARRYOUT(w);
3479
3480
    while (ix < USED(mp) && k) {
3481
        w = (mp_word)DIGIT(mp, ix) + k;
3482
        DIGIT(mp, ix) = ACCUM(w);
3483
        k = CARRYOUT(w);
3484
        ++ix;
3485
    }
3486
3487
    if (k != 0) {
3488
        mp_err res;
3489
3490
        if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3491
            return res;
3492
3493
        DIGIT(mp, ix) = (mp_digit)k;
3494
    }
3495
3496
    return MP_OKAY;
3497
#else
3498
4.56M
    mp_digit *pmp = MP_DIGITS(mp);
3499
4.56M
    mp_digit sum, mp_i, carry = 0;
3500
4.56M
    mp_err res = MP_OKAY;
3501
4.56M
    int used = (int)MP_USED(mp);
3502
3503
4.56M
    mp_i = *pmp;
3504
4.56M
    *pmp++ = sum = d + mp_i;
3505
4.56M
    carry = (sum < d);
3506
4.56M
    while (carry && --used > 0) {
3507
95
        mp_i = *pmp;
3508
95
        *pmp++ = sum = carry + mp_i;
3509
95
        carry = !sum;
3510
95
    }
3511
4.56M
    if (carry && !used) {
3512
        /* mp is growing */
3513
5
        used = MP_USED(mp);
3514
5
        MP_CHECKOK(s_mp_pad(mp, used + 1));
3515
5
        MP_DIGIT(mp, used) = carry;
3516
5
    }
3517
4.56M
CLEANUP:
3518
4.56M
    return res;
3519
4.56M
#endif
3520
4.56M
} /* end s_mp_add_d() */
3521
3522
/* }}} */
3523
3524
/* {{{ s_mp_sub_d(mp, d) */
3525
3526
/* Subtract d from |mp| in place, assumes |mp| > d                        */
3527
mp_err
3528
s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
3529
0
{
3530
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3531
    mp_word w, b = 0;
3532
    mp_size ix = 1;
3533
3534
    /* Compute initial subtraction    */
3535
    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3536
    b = CARRYOUT(w) ? 0 : 1;
3537
    DIGIT(mp, 0) = ACCUM(w);
3538
3539
    /* Propagate borrows leftward     */
3540
    while (b && ix < USED(mp)) {
3541
        w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3542
        b = CARRYOUT(w) ? 0 : 1;
3543
        DIGIT(mp, ix) = ACCUM(w);
3544
        ++ix;
3545
    }
3546
3547
    /* Remove leading zeroes          */
3548
    s_mp_clamp(mp);
3549
3550
    /* If we have a borrow out, it's a violation of the input invariant */
3551
    if (b)
3552
        return MP_RANGE;
3553
    else
3554
        return MP_OKAY;
3555
#else
3556
0
    mp_digit *pmp = MP_DIGITS(mp);
3557
0
    mp_digit mp_i, diff, borrow;
3558
0
    mp_size used = MP_USED(mp);
3559
3560
0
    mp_i = *pmp;
3561
0
    *pmp++ = diff = mp_i - d;
3562
0
    borrow = (diff > mp_i);
3563
0
    while (borrow && --used) {
3564
0
        mp_i = *pmp;
3565
0
        *pmp++ = diff = mp_i - borrow;
3566
0
        borrow = (diff > mp_i);
3567
0
    }
3568
0
    s_mp_clamp(mp);
3569
0
    return (borrow && !used) ? MP_RANGE : MP_OKAY;
3570
0
#endif
3571
0
} /* end s_mp_sub_d() */
3572
3573
/* }}} */
3574
3575
/* {{{ s_mp_mul_d(a, d) */
3576
3577
/* Compute a = a * d, single digit multiplication                         */
3578
mp_err
3579
s_mp_mul_d(mp_int *a, mp_digit d)
3580
4.60M
{
3581
4.60M
    mp_err res;
3582
4.60M
    mp_size used;
3583
4.60M
    int pow;
3584
3585
4.60M
    if (!d) {
3586
0
        mp_zero(a);
3587
0
        return MP_OKAY;
3588
0
    }
3589
4.60M
    if (d == 1)
3590
252
        return MP_OKAY;
3591
4.60M
    if (0 <= (pow = s_mp_ispow2d(d))) {
3592
202
        return s_mp_mul_2d(a, (mp_digit)pow);
3593
202
    }
3594
3595
4.60M
    used = MP_USED(a);
3596
4.60M
    MP_CHECKOK(s_mp_pad(a, used + 1));
3597
3598
4.60M
    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3599
3600
4.60M
    s_mp_clamp(a);
3601
3602
4.60M
CLEANUP:
3603
4.60M
    return res;
3604
3605
4.60M
} /* end s_mp_mul_d() */
3606
3607
/* }}} */
3608
3609
/* {{{ s_mp_div_d(mp, d, r) */
3610
3611
/*
3612
  s_mp_div_d(mp, d, r)
3613
3614
  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3615
  single digit d.  If r is null, the remainder will be discarded.
3616
 */
3617
3618
mp_err
3619
s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3620
1.53M
{
3621
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3622
    mp_word w = 0, q;
3623
#else
3624
1.53M
    mp_digit w = 0, q;
3625
1.53M
#endif
3626
1.53M
    int ix;
3627
1.53M
    mp_err res;
3628
1.53M
    mp_int quot;
3629
1.53M
    mp_int rem;
3630
3631
1.53M
    if (d == 0)
3632
0
        return MP_RANGE;
3633
1.53M
    if (d == 1) {
3634
0
        if (r)
3635
0
            *r = 0;
3636
0
        return MP_OKAY;
3637
0
    }
3638
    /* could check for power of 2 here, but mp_div_d does that. */
3639
1.53M
    if (MP_USED(mp) == 1) {
3640
36.3k
        mp_digit n = MP_DIGIT(mp, 0);
3641
36.3k
        mp_digit remdig;
3642
3643
36.3k
        q = n / d;
3644
36.3k
        remdig = n % d;
3645
36.3k
        MP_DIGIT(mp, 0) = q;
3646
36.3k
        if (r) {
3647
36.3k
            *r = remdig;
3648
36.3k
        }
3649
36.3k
        return MP_OKAY;
3650
36.3k
    }
3651
3652
1.50M
    MP_DIGITS(&rem) = 0;
3653
1.50M
    MP_DIGITS(&quot) = 0;
3654
    /* Make room for the quotient */
3655
1.50M
    MP_CHECKOK(mp_init_size(&quot, USED(mp)));
3656
3657
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3658
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3659
        w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3660
3661
        if (w >= d) {
3662
            q = w / d;
3663
            w = w % d;
3664
        } else {
3665
            q = 0;
3666
        }
3667
3668
        s_mp_lshd(&quot, 1);
3669
        DIGIT(&quot, 0) = (mp_digit)q;
3670
    }
3671
#else
3672
1.50M
    {
3673
1.50M
        mp_digit p;
3674
1.50M
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3675
1.50M
        mp_digit norm;
3676
1.50M
#endif
3677
3678
1.50M
        MP_CHECKOK(mp_init_copy(&rem, mp));
3679
3680
1.50M
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3681
1.50M
        MP_DIGIT(&quot, 0) = d;
3682
1.50M
        MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
3683
1.50M
        if (norm)
3684
1.50M
            d <<= norm;
3685
1.50M
        MP_DIGIT(&quot, 0) = 0;
3686
1.50M
#endif
3687
3688
1.50M
        p = 0;
3689
84.5M
        for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3690
83.0M
            w = DIGIT(&rem, ix);
3691
3692
83.0M
            if (p) {
3693
81.1M
                MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
3694
81.1M
            } else if (w >= d) {
3695
16.2k
                q = w / d;
3696
16.2k
                w = w % d;
3697
1.85M
            } else {
3698
1.85M
                q = 0;
3699
1.85M
            }
3700
3701
83.0M
            MP_CHECKOK(s_mp_lshd(&quot, 1));
3702
83.0M
            DIGIT(&quot, 0) = q;
3703
83.0M
            p = w;
3704
83.0M
        }
3705
1.50M
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3706
1.50M
        if (norm)
3707
1.50M
            w >>= norm;
3708
1.50M
#endif
3709
1.50M
    }
3710
0
#endif
3711
3712
    /* Deliver the remainder, if desired */
3713
1.50M
    if (r) {
3714
1.50M
        *r = (mp_digit)w;
3715
1.50M
    }
3716
3717
1.50M
    s_mp_clamp(&quot);
3718
1.50M
    mp_exch(&quot, mp);
3719
1.50M
CLEANUP:
3720
1.50M
    mp_clear(&quot);
3721
1.50M
    mp_clear(&rem);
3722
3723
1.50M
    return res;
3724
1.50M
} /* end s_mp_div_d() */
3725
3726
/* }}} */
3727
3728
/* }}} */
3729
3730
/* {{{ Primitive full arithmetic */
3731
3732
/* {{{ s_mp_add(a, b) */
3733
3734
/* Compute a = |a| + |b|                                                  */
3735
mp_err
3736
s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition      */
3737
0
{
3738
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3739
    mp_word w = 0;
3740
#else
3741
0
    mp_digit d, sum, carry = 0;
3742
0
#endif
3743
0
    mp_digit *pa, *pb;
3744
0
    mp_size ix;
3745
0
    mp_size used;
3746
0
    mp_err res;
3747
3748
    /* Make sure a has enough precision for the output value */
3749
0
    if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3750
0
        return res;
3751
3752
    /*
3753
      Add up all digits up to the precision of b.  If b had initially
3754
      the same precision as a, or greater, we took care of it by the
3755
      padding step above, so there is no problem.  If b had initially
3756
      less precision, we'll have to make sure the carry out is duly
3757
      propagated upward among the higher-order digits of the sum.
3758
     */
3759
0
    pa = MP_DIGITS(a);
3760
0
    pb = MP_DIGITS(b);
3761
0
    used = MP_USED(b);
3762
0
    for (ix = 0; ix < used; ix++) {
3763
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3764
        w = w + *pa + *pb++;
3765
        *pa++ = ACCUM(w);
3766
        w = CARRYOUT(w);
3767
#else
3768
0
        d = *pa;
3769
0
        sum = d + *pb++;
3770
0
        d = (sum < d); /* detect overflow */
3771
0
        *pa++ = sum += carry;
3772
0
        carry = d + (sum < carry); /* detect overflow */
3773
0
#endif
3774
0
    }
3775
3776
    /* If we run out of 'b' digits before we're actually done, make
3777
       sure the carries get propagated upward...
3778
     */
3779
0
    used = MP_USED(a);
3780
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3781
    while (w && ix < used) {
3782
        w = w + *pa;
3783
        *pa++ = ACCUM(w);
3784
        w = CARRYOUT(w);
3785
        ++ix;
3786
    }
3787
#else
3788
0
    while (carry && ix < used) {
3789
0
        sum = carry + *pa;
3790
0
        *pa++ = sum;
3791
0
        carry = !sum;
3792
0
        ++ix;
3793
0
    }
3794
0
#endif
3795
3796
/* If there's an overall carry out, increase precision and include
3797
     it.  We could have done this initially, but why touch the memory
3798
     allocator unless we're sure we have to?
3799
   */
3800
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3801
    if (w) {
3802
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3803
            return res;
3804
3805
        DIGIT(a, ix) = (mp_digit)w;
3806
    }
3807
#else
3808
0
    if (carry) {
3809
0
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3810
0
            return res;
3811
3812
0
        DIGIT(a, used) = carry;
3813
0
    }
3814
0
#endif
3815
3816
0
    return MP_OKAY;
3817
0
} /* end s_mp_add() */
3818
3819
/* }}} */
3820
3821
/* Compute c = |a| + |b|         */ /* magnitude addition      */
3822
mp_err
3823
s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3824
5.30M
{
3825
5.30M
    mp_digit *pa, *pb, *pc;
3826
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3827
    mp_word w = 0;
3828
#else
3829
5.30M
    mp_digit sum, carry = 0, d;
3830
5.30M
#endif
3831
5.30M
    mp_size ix;
3832
5.30M
    mp_size used;
3833
5.30M
    mp_err res;
3834
3835
5.30M
    MP_SIGN(c) = MP_SIGN(a);
3836
5.30M
    if (MP_USED(a) < MP_USED(b)) {
3837
766k
        const mp_int *xch = a;
3838
766k
        a = b;
3839
766k
        b = xch;
3840
766k
    }
3841
3842
    /* Make sure a has enough precision for the output value */
3843
5.30M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3844
0
        return res;
3845
3846
    /*
3847
     Add up all digits up to the precision of b.  If b had initially
3848
     the same precision as a, or greater, we took care of it by the
3849
     exchange step above, so there is no problem.  If b had initially
3850
     less precision, we'll have to make sure the carry out is duly
3851
     propagated upward among the higher-order digits of the sum.
3852
    */
3853
5.30M
    pa = MP_DIGITS(a);
3854
5.30M
    pb = MP_DIGITS(b);
3855
5.30M
    pc = MP_DIGITS(c);
3856
5.30M
    used = MP_USED(b);
3857
280M
    for (ix = 0; ix < used; ix++) {
3858
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3859
        w = w + *pa++ + *pb++;
3860
        *pc++ = ACCUM(w);
3861
        w = CARRYOUT(w);
3862
#else
3863
275M
        d = *pa++;
3864
275M
        sum = d + *pb++;
3865
275M
        d = (sum < d); /* detect overflow */
3866
275M
        *pc++ = sum += carry;
3867
275M
        carry = d + (sum < carry); /* detect overflow */
3868
275M
#endif
3869
275M
    }
3870
3871
    /* If we run out of 'b' digits before we're actually done, make
3872
     sure the carries get propagated upward...
3873
   */
3874
56.1M
    for (used = MP_USED(a); ix < used; ++ix) {
3875
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3876
        w = w + *pa++;
3877
        *pc++ = ACCUM(w);
3878
        w = CARRYOUT(w);
3879
#else
3880
50.8M
        *pc++ = sum = carry + *pa++;
3881
50.8M
        carry = (sum < carry);
3882
50.8M
#endif
3883
50.8M
    }
3884
3885
/* If there's an overall carry out, increase precision and include
3886
     it.  We could have done this initially, but why touch the memory
3887
     allocator unless we're sure we have to?
3888
   */
3889
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3890
    if (w) {
3891
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3892
            return res;
3893
3894
        DIGIT(c, used) = (mp_digit)w;
3895
        ++used;
3896
    }
3897
#else
3898
5.30M
    if (carry) {
3899
158k
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3900
0
            return res;
3901
3902
158k
        DIGIT(c, used) = carry;
3903
158k
        ++used;
3904
158k
    }
3905
5.30M
#endif
3906
5.30M
    MP_USED(c) = used;
3907
5.30M
    return MP_OKAY;
3908
5.30M
}
3909
/* {{{ s_mp_add_offset(a, b, offset) */
3910
3911
/* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3912
mp_err
3913
s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3914
0
{
3915
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3916
    mp_word w, k = 0;
3917
#else
3918
0
    mp_digit d, sum, carry = 0;
3919
0
#endif
3920
0
    mp_size ib;
3921
0
    mp_size ia;
3922
0
    mp_size lim;
3923
0
    mp_err res;
3924
3925
    /* Make sure a has enough precision for the output value */
3926
0
    lim = MP_USED(b) + offset;
3927
0
    if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3928
0
        return res;
3929
3930
    /*
3931
    Add up all digits up to the precision of b.  If b had initially
3932
    the same precision as a, or greater, we took care of it by the
3933
    padding step above, so there is no problem.  If b had initially
3934
    less precision, we'll have to make sure the carry out is duly
3935
    propagated upward among the higher-order digits of the sum.
3936
   */
3937
0
    lim = USED(b);
3938
0
    for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
3939
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3940
        w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3941
        DIGIT(a, ia) = ACCUM(w);
3942
        k = CARRYOUT(w);
3943
#else
3944
0
        d = MP_DIGIT(a, ia);
3945
0
        sum = d + MP_DIGIT(b, ib);
3946
0
        d = (sum < d);
3947
0
        MP_DIGIT(a, ia) = sum += carry;
3948
0
        carry = d + (sum < carry);
3949
0
#endif
3950
0
    }
3951
3952
/* If we run out of 'b' digits before we're actually done, make
3953
     sure the carries get propagated upward...
3954
   */
3955
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3956
    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3957
        w = (mp_word)DIGIT(a, ia) + k;
3958
        DIGIT(a, ia) = ACCUM(w);
3959
        k = CARRYOUT(w);
3960
    }
3961
#else
3962
0
    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3963
0
        d = MP_DIGIT(a, ia);
3964
0
        MP_DIGIT(a, ia) = sum = d + carry;
3965
0
        carry = (sum < d);
3966
0
    }
3967
0
#endif
3968
3969
/* If there's an overall carry out, increase precision and include
3970
     it.  We could have done this initially, but why touch the memory
3971
     allocator unless we're sure we have to?
3972
   */
3973
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3974
    if (k) {
3975
        if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3976
            return res;
3977
3978
        DIGIT(a, ia) = (mp_digit)k;
3979
    }
3980
#else
3981
0
    if (carry) {
3982
0
        if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3983
0
            return res;
3984
3985
0
        DIGIT(a, lim) = carry;
3986
0
    }
3987
0
#endif
3988
0
    s_mp_clamp(a);
3989
3990
0
    return MP_OKAY;
3991
3992
0
} /* end s_mp_add_offset() */
3993
3994
/* }}} */
3995
3996
/* {{{ s_mp_sub(a, b) */
3997
3998
/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3999
mp_err
4000
s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract      */
4001
184k
{
4002
184k
    mp_digit *pa, *pb, *limit;
4003
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4004
    mp_sword w = 0;
4005
#else
4006
184k
    mp_digit d, diff, borrow = 0;
4007
184k
#endif
4008
4009
    /*
4010
    Subtract and propagate borrow.  Up to the precision of b, this
4011
    accounts for the digits of b; after that, we just make sure the
4012
    carries get to the right place.  This saves having to pad b out to
4013
    the precision of a just to make the loops work right...
4014
   */
4015
184k
    pa = MP_DIGITS(a);
4016
184k
    pb = MP_DIGITS(b);
4017
184k
    limit = pb + MP_USED(b);
4018
8.42M
    while (pb < limit) {
4019
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4020
        w = w + *pa - *pb++;
4021
        *pa++ = ACCUM(w);
4022
        w >>= MP_DIGIT_BIT;
4023
#else
4024
8.24M
        d = *pa;
4025
8.24M
        diff = d - *pb++;
4026
8.24M
        d = (diff > d); /* detect borrow */
4027
8.24M
        if (borrow && --diff == MP_DIGIT_MAX)
4028
1.16k
            ++d;
4029
8.24M
        *pa++ = diff;
4030
8.24M
        borrow = d;
4031
8.24M
#endif
4032
8.24M
    }
4033
184k
    limit = MP_DIGITS(a) + MP_USED(a);
4034
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4035
    while (w && pa < limit) {
4036
        w = w + *pa;
4037
        *pa++ = ACCUM(w);
4038
        w >>= MP_DIGIT_BIT;
4039
    }
4040
#else
4041
227k
    while (borrow && pa < limit) {
4042
42.9k
        d = *pa;
4043
42.9k
        *pa++ = diff = d - borrow;
4044
42.9k
        borrow = (diff > d);
4045
42.9k
    }
4046
184k
#endif
4047
4048
    /* Clobber any leading zeroes we created    */
4049
184k
    s_mp_clamp(a);
4050
4051
/*
4052
     If there was a borrow out, then |b| > |a| in violation
4053
     of our input invariant.  We've already done the work,
4054
     but we'll at least complain about it...
4055
   */
4056
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4057
    return w ? MP_RANGE : MP_OKAY;
4058
#else
4059
184k
    return borrow ? MP_RANGE : MP_OKAY;
4060
184k
#endif
4061
184k
} /* end s_mp_sub() */
4062
4063
/* }}} */
4064
4065
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
4066
mp_err
4067
s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
4068
5.01M
{
4069
5.01M
    mp_digit *pa, *pb, *pc;
4070
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4071
    mp_sword w = 0;
4072
#else
4073
5.01M
    mp_digit d, diff, borrow = 0;
4074
5.01M
#endif
4075
5.01M
    int ix, limit;
4076
5.01M
    mp_err res;
4077
4078
5.01M
    MP_SIGN(c) = MP_SIGN(a);
4079
4080
    /* Make sure a has enough precision for the output value */
4081
5.01M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
4082
0
        return res;
4083
4084
    /*
4085
    Subtract and propagate borrow.  Up to the precision of b, this
4086
    accounts for the digits of b; after that, we just make sure the
4087
    carries get to the right place.  This saves having to pad b out to
4088
    the precision of a just to make the loops work right...
4089
   */
4090
5.01M
    pa = MP_DIGITS(a);
4091
5.01M
    pb = MP_DIGITS(b);
4092
5.01M
    pc = MP_DIGITS(c);
4093
5.01M
    limit = MP_USED(b);
4094
281M
    for (ix = 0; ix < limit; ++ix) {
4095
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4096
        w = w + *pa++ - *pb++;
4097
        *pc++ = ACCUM(w);
4098
        w >>= MP_DIGIT_BIT;
4099
#else
4100
276M
        d = *pa++;
4101
276M
        diff = d - *pb++;
4102
276M
        d = (diff > d);
4103
276M
        if (borrow && --diff == MP_DIGIT_MAX)
4104
11.9k
            ++d;
4105
276M
        *pc++ = diff;
4106
276M
        borrow = d;
4107
276M
#endif
4108
276M
    }
4109
42.1M
    for (limit = MP_USED(a); ix < limit; ++ix) {
4110
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4111
        w = w + *pa++;
4112
        *pc++ = ACCUM(w);
4113
        w >>= MP_DIGIT_BIT;
4114
#else
4115
37.1M
        d = *pa++;
4116
37.1M
        *pc++ = diff = d - borrow;
4117
37.1M
        borrow = (diff > d);
4118
37.1M
#endif
4119
37.1M
    }
4120
4121
    /* Clobber any leading zeroes we created    */
4122
5.01M
    MP_USED(c) = ix;
4123
5.01M
    s_mp_clamp(c);
4124
4125
/*
4126
     If there was a borrow out, then |b| > |a| in violation
4127
     of our input invariant.  We've already done the work,
4128
     but we'll at least complain about it...
4129
   */
4130
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4131
    return w ? MP_RANGE : MP_OKAY;
4132
#else
4133
5.01M
    return borrow ? MP_RANGE : MP_OKAY;
4134
5.01M
#endif
4135
5.01M
}
4136
/* {{{ s_mp_mul(a, b) */
4137
4138
/* Compute a = |a| * |b|                                                  */
4139
mp_err
4140
s_mp_mul(mp_int *a, const mp_int *b)
4141
2.63M
{
4142
2.63M
    return mp_mul(a, b, a);
4143
2.63M
} /* end s_mp_mul() */
4144
4145
/* }}} */
4146
4147
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4148
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4149
#define MP_MUL_DxD(a, b, Phi, Plo)                              \
4150
    {                                                           \
4151
        unsigned long long product = (unsigned long long)a * b; \
4152
        Plo = (mp_digit)product;                                \
4153
        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
4154
    }
4155
#else
4156
#define MP_MUL_DxD(a, b, Phi, Plo)                                 \
4157
6.14k
    {                                                              \
4158
6.14k
        mp_digit a0b1, a1b0;                                       \
4159
6.14k
        Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
4160
6.14k
        Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
4161
6.14k
        a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
4162
6.14k
        a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
4163
6.14k
        a1b0 += a0b1;                                              \
4164
6.14k
        Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
4165
6.14k
        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
4166
6.14k
        a1b0 <<= MP_HALF_DIGIT_BIT;                                \
4167
6.14k
        Plo += a1b0;                                               \
4168
6.14k
        Phi += MP_CT_LTU(Plo, a1b0);                               \
4169
6.14k
    }
4170
#endif
4171
4172
/* Constant time version of s_mpv_mul_d_add_prop.
4173
 * Presently, this is only used by the Constant time Montgomery arithmetic code. */
4174
/* c += a * b */
4175
void
4176
s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
4177
                       mp_digit *c, mp_size c_len)
4178
192
{
4179
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4180
    mp_digit d = 0;
4181
4182
    c_len -= a_len;
4183
    /* Inner product:  Digits of a */
4184
    while (a_len--) {
4185
        mp_word w = ((mp_word)b * *a++) + *c + d;
4186
        *c++ = ACCUM(w);
4187
        d = CARRYOUT(w);
4188
    }
4189
4190
    /* propagate the carry to the end, even if carry is zero */
4191
    while (c_len--) {
4192
        mp_word w = (mp_word)*c + d;
4193
        *c++ = ACCUM(w);
4194
        d = CARRYOUT(w);
4195
    }
4196
#else
4197
192
    mp_digit carry = 0;
4198
192
    c_len -= a_len;
4199
6.33k
    while (a_len--) {
4200
6.14k
        mp_digit a_i = *a++;
4201
6.14k
        mp_digit a0b0, a1b1;
4202
6.14k
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4203
4204
6.14k
        a0b0 += carry;
4205
6.14k
        a1b1 += MP_CT_LTU(a0b0, carry);
4206
6.14k
        a0b0 += a_i = *c;
4207
6.14k
        a1b1 += MP_CT_LTU(a0b0, a_i);
4208
4209
6.14k
        *c++ = a0b0;
4210
6.14k
        carry = a1b1;
4211
6.14k
    }
4212
    /* propagate the carry to the end, even if carry is zero */
4213
3.55k
    while (c_len--) {
4214
3.36k
        mp_digit c_i = *c;
4215
3.36k
        carry += c_i;
4216
3.36k
        *c++ = carry;
4217
3.36k
        carry = MP_CT_LTU(carry, c_i);
4218
3.36k
    }
4219
192
#endif
4220
192
}
4221
4222
#if !defined(MP_ASSEMBLY_MULTIPLY)
4223
/* c = a * b */
4224
void
4225
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4226
{
4227
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4228
    mp_digit d = 0;
4229
4230
    /* Inner product:  Digits of a */
4231
    while (a_len--) {
4232
        mp_word w = ((mp_word)b * *a++) + d;
4233
        *c++ = ACCUM(w);
4234
        d = CARRYOUT(w);
4235
    }
4236
    *c = d;
4237
#else
4238
    mp_digit carry = 0;
4239
    while (a_len--) {
4240
        mp_digit a_i = *a++;
4241
        mp_digit a0b0, a1b1;
4242
4243
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4244
4245
        a0b0 += carry;
4246
        a1b1 += MP_CT_LTU(a0b0, carry);
4247
        *c++ = a0b0;
4248
        carry = a1b1;
4249
    }
4250
    *c = carry;
4251
#endif
4252
}
4253
4254
/* c += a * b */
4255
void
4256
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
4257
                mp_digit *c)
4258
{
4259
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4260
    mp_digit d = 0;
4261
4262
    /* Inner product:  Digits of a */
4263
    while (a_len--) {
4264
        mp_word w = ((mp_word)b * *a++) + *c + d;
4265
        *c++ = ACCUM(w);
4266
        d = CARRYOUT(w);
4267
    }
4268
    *c = d;
4269
#else
4270
    mp_digit carry = 0;
4271
    while (a_len--) {
4272
        mp_digit a_i = *a++;
4273
        mp_digit a0b0, a1b1;
4274
4275
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4276
4277
        a0b0 += carry;
4278
        a1b1 += MP_CT_LTU(a0b0, carry);
4279
        a0b0 += a_i = *c;
4280
        a1b1 += MP_CT_LTU(a0b0, a_i);
4281
        *c++ = a0b0;
4282
        carry = a1b1;
4283
    }
4284
    *c = carry;
4285
#endif
4286
}
4287
4288
/* Presently, this is only used by the Montgomery arithmetic code. */
4289
/* c += a * b */
4290
void
4291
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4292
{
4293
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4294
    mp_digit d = 0;
4295
4296
    /* Inner product:  Digits of a */
4297
    while (a_len--) {
4298
        mp_word w = ((mp_word)b * *a++) + *c + d;
4299
        *c++ = ACCUM(w);
4300
        d = CARRYOUT(w);
4301
    }
4302
4303
    while (d) {
4304
        mp_word w = (mp_word)*c + d;
4305
        *c++ = ACCUM(w);
4306
        d = CARRYOUT(w);
4307
    }
4308
#else
4309
    mp_digit carry = 0;
4310
    while (a_len--) {
4311
        mp_digit a_i = *a++;
4312
        mp_digit a0b0, a1b1;
4313
4314
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4315
4316
        a0b0 += carry;
4317
        if (a0b0 < carry)
4318
            ++a1b1;
4319
4320
        a0b0 += a_i = *c;
4321
        if (a0b0 < a_i)
4322
            ++a1b1;
4323
4324
        *c++ = a0b0;
4325
        carry = a1b1;
4326
    }
4327
    while (carry) {
4328
        mp_digit c_i = *c;
4329
        carry += c_i;
4330
        *c++ = carry;
4331
        carry = carry < c_i;
4332
    }
4333
#endif
4334
}
4335
#endif
4336
4337
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4338
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4339
#define MP_SQR_D(a, Phi, Plo)                                  \
4340
    {                                                          \
4341
        unsigned long long square = (unsigned long long)a * a; \
4342
        Plo = (mp_digit)square;                                \
4343
        Phi = (mp_digit)(square >> MP_DIGIT_BIT);              \
4344
    }
4345
#else
4346
#define MP_SQR_D(a, Phi, Plo)                                      \
4347
112M
    {                                                              \
4348
112M
        mp_digit Pmid;                                             \
4349
112M
        Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX);   \
4350
112M
        Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4351
112M
        Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4352
112M
        Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);                    \
4353
112M
        Pmid <<= (MP_HALF_DIGIT_BIT + 1);                          \
4354
112M
        Plo += Pmid;                                               \
4355
112M
        if (Plo < Pmid)                                            \
4356
112M
            ++Phi;                                                 \
4357
112M
    }
4358
#endif
4359
4360
#if !defined(MP_ASSEMBLY_SQUARE)
4361
/* Add the squares of the digits of a to the digits of b. */
4362
void
4363
s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4364
1.42M
{
4365
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4366
    mp_word w;
4367
    mp_digit d;
4368
    mp_size ix;
4369
4370
    w = 0;
4371
#define ADD_SQUARE(n)                     \
4372
    d = pa[n];                            \
4373
    w += (d * (mp_word)d) + ps[2 * n];    \
4374
    ps[2 * n] = ACCUM(w);                 \
4375
    w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
4376
    ps[2 * n + 1] = ACCUM(w);             \
4377
    w = (w >> DIGIT_BIT)
4378
4379
    for (ix = a_len; ix >= 4; ix -= 4) {
4380
        ADD_SQUARE(0);
4381
        ADD_SQUARE(1);
4382
        ADD_SQUARE(2);
4383
        ADD_SQUARE(3);
4384
        pa += 4;
4385
        ps += 8;
4386
    }
4387
    if (ix) {
4388
        ps += 2 * ix;
4389
        pa += ix;
4390
        switch (ix) {
4391
            case 3:
4392
                ADD_SQUARE(-3); /* FALLTHRU */
4393
            case 2:
4394
                ADD_SQUARE(-2); /* FALLTHRU */
4395
            case 1:
4396
                ADD_SQUARE(-1); /* FALLTHRU */
4397
            case 0:
4398
                break;
4399
        }
4400
    }
4401
    while (w) {
4402
        w += *ps;
4403
        *ps++ = ACCUM(w);
4404
        w = (w >> DIGIT_BIT);
4405
    }
4406
#else
4407
1.42M
    mp_digit carry = 0;
4408
114M
    while (a_len--) {
4409
112M
        mp_digit a_i = *pa++;
4410
112M
        mp_digit a0a0, a1a1;
4411
4412
112M
        MP_SQR_D(a_i, a1a1, a0a0);
4413
4414
        /* here a1a1 and a0a0 constitute a_i ** 2 */
4415
112M
        a0a0 += carry;
4416
112M
        if (a0a0 < carry)
4417
0
            ++a1a1;
4418
4419
        /* now add to ps */
4420
112M
        a0a0 += a_i = *ps;
4421
112M
        if (a0a0 < a_i)
4422
55.2M
            ++a1a1;
4423
112M
        *ps++ = a0a0;
4424
112M
        a1a1 += a_i = *ps;
4425
112M
        carry = (a1a1 < a_i);
4426
112M
        *ps++ = a1a1;
4427
112M
    }
4428
1.42M
    while (carry) {
4429
0
        mp_digit s_i = *ps;
4430
0
        carry += s_i;
4431
0
        *ps++ = carry;
4432
0
        carry = carry < s_i;
4433
0
    }
4434
1.42M
#endif
4435
1.42M
}
4436
#endif
4437
4438
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
4439
/*
4440
** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4441
** so its high bit is 1.   This code is from NSPR.
4442
*/
4443
mp_err
4444
s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4445
                mp_digit *qp, mp_digit *rp)
4446
81.1M
{
4447
81.1M
    mp_digit d1, d0, q1, q0;
4448
81.1M
    mp_digit r1, r0, m;
4449
4450
81.1M
    d1 = divisor >> MP_HALF_DIGIT_BIT;
4451
81.1M
    d0 = divisor & MP_HALF_DIGIT_MAX;
4452
81.1M
    r1 = Nhi % d1;
4453
81.1M
    q1 = Nhi / d1;
4454
81.1M
    m = q1 * d0;
4455
81.1M
    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4456
81.1M
    if (r1 < m) {
4457
15.1k
        q1--, r1 += divisor;
4458
15.1k
        if (r1 >= divisor && r1 < m) {
4459
337
            q1--, r1 += divisor;
4460
337
        }
4461
15.1k
    }
4462
81.1M
    r1 -= m;
4463
81.1M
    r0 = r1 % d1;
4464
81.1M
    q0 = r1 / d1;
4465
81.1M
    m = q0 * d0;
4466
81.1M
    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4467
81.1M
    if (r0 < m) {
4468
15.3k
        q0--, r0 += divisor;
4469
15.3k
        if (r0 >= divisor && r0 < m) {
4470
345
            q0--, r0 += divisor;
4471
345
        }
4472
15.3k
    }
4473
81.1M
    if (qp)
4474
81.1M
        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4475
81.1M
    if (rp)
4476
81.1M
        *rp = r0 - m;
4477
81.1M
    return MP_OKAY;
4478
81.1M
}
4479
#endif
4480
4481
#if MP_SQUARE
4482
/* {{{ s_mp_sqr(a) */
4483
4484
mp_err
4485
s_mp_sqr(mp_int *a)
4486
756k
{
4487
756k
    mp_err res;
4488
756k
    mp_int tmp;
4489
4490
756k
    if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
4491
0
        return res;
4492
756k
    res = mp_sqr(a, &tmp);
4493
756k
    if (res == MP_OKAY) {
4494
756k
        s_mp_exch(&tmp, a);
4495
756k
    }
4496
756k
    mp_clear(&tmp);
4497
756k
    return res;
4498
756k
}
4499
4500
/* }}} */
4501
#endif
4502
4503
/* {{{ s_mp_div(a, b) */
4504
4505
/*
4506
  s_mp_div(a, b)
4507
4508
  Compute a = a / b and b = a mod b.  Assumes b > a.
4509
 */
4510
4511
mp_err
4512
s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4513
         mp_int *div,  /* i: divisor                */
4514
         mp_int *quot) /* i: 0;        o: quotient  */
4515
2.32k
{
4516
2.32k
    mp_int part, t;
4517
2.32k
    mp_digit q_msd;
4518
2.32k
    mp_err res;
4519
2.32k
    mp_digit d;
4520
2.32k
    mp_digit div_msd;
4521
2.32k
    int ix;
4522
4523
2.32k
    if (mp_cmp_z(div) == 0)
4524
0
        return MP_RANGE;
4525
4526
2.32k
    DIGITS(&t) = 0;
4527
    /* Shortcut if divisor is power of two */
4528
2.32k
    if ((ix = s_mp_ispow2(div)) >= 0) {
4529
89
        MP_CHECKOK(mp_copy(rem, quot));
4530
89
        s_mp_div_2d(quot, (mp_digit)ix);
4531
89
        s_mp_mod_2d(rem, (mp_digit)ix);
4532
4533
89
        return MP_OKAY;
4534
89
    }
4535
4536
2.23k
    MP_SIGN(rem) = ZPOS;
4537
2.23k
    MP_SIGN(div) = ZPOS;
4538
2.23k
    MP_SIGN(&part) = ZPOS;
4539
4540
    /* A working temporary for division     */
4541
2.23k
    MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
4542
4543
    /* Normalize to optimize guessing       */
4544
2.23k
    MP_CHECKOK(s_mp_norm(rem, div, &d));
4545
4546
    /* Perform the division itself...woo!   */
4547
2.23k
    MP_USED(quot) = MP_ALLOC(quot);
4548
4549
    /* Find a partial substring of rem which is at least div */
4550
    /* If we didn't find one, we're finished dividing    */
4551
42.8k
    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4552
40.5k
        int i;
4553
40.5k
        int unusedRem;
4554
40.5k
        int partExtended = 0; /* set to true if we need to extend part */
4555
4556
40.5k
        unusedRem = MP_USED(rem) - MP_USED(div);
4557
40.5k
        MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4558
40.5k
        MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
4559
40.5k
        MP_USED(&part) = MP_USED(div);
4560
4561
        /* We have now truncated the part of the remainder to the same length as
4562
         * the divisor. If part is smaller than div, extend part by one digit. */
4563
40.5k
        if (s_mp_cmp(&part, div) < 0) {
4564
40.5k
            --unusedRem;
4565
40.5k
#if MP_ARGCHK == 2
4566
40.5k
            assert(unusedRem >= 0);
4567
0
#endif
4568
40.5k
            --MP_DIGITS(&part);
4569
40.5k
            ++MP_USED(&part);
4570
40.5k
            ++MP_ALLOC(&part);
4571
40.5k
            partExtended = 1;
4572
40.5k
        }
4573
4574
        /* Compute a guess for the next quotient digit       */
4575
40.5k
        q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4576
40.5k
        div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4577
40.5k
        if (!partExtended) {
4578
            /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
4579
             * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
4580
             * we didn't extend part, q_msd >= div_msd. Therefore we know that
4581
             * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
4582
             * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
4583
39
            q_msd = 1;
4584
40.5k
        } else {
4585
40.5k
            if (q_msd == div_msd) {
4586
6
                q_msd = MP_DIGIT_MAX;
4587
40.5k
            } else {
4588
40.5k
                mp_digit r;
4589
40.5k
                MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4590
40.5k
                                           div_msd, &q_msd, &r));
4591
40.5k
            }
4592
40.5k
        }
4593
40.5k
#if MP_ARGCHK == 2
4594
40.5k
        assert(q_msd > 0); /* This case should never occur any more. */
4595
0
#endif
4596
40.5k
        if (q_msd <= 0)
4597
0
            break;
4598
4599
        /* See what that multiplies out to                   */
4600
40.5k
        mp_copy(div, &t);
4601
40.5k
        MP_CHECKOK(s_mp_mul_d(&t, q_msd));
4602
4603
        /*
4604
           If it's too big, back it off.  We should not have to do this
4605
           more than once, or, in rare cases, twice.  Knuth describes a
4606
           method by which this could be reduced to a maximum of once, but
4607
           I didn't implement that here.
4608
           When using s_mpv_div_2dx1d, we may have to do this 3 times.
4609
         */
4610
53.1k
        for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4611
12.5k
            --q_msd;
4612
12.5k
            MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
4613
12.5k
        }
4614
40.5k
        if (i < 0) {
4615
0
            res = MP_RANGE;
4616
0
            goto CLEANUP;
4617
0
        }
4618
4619
        /* At this point, q_msd should be the right next digit   */
4620
40.5k
        MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
4621
40.5k
        s_mp_clamp(rem);
4622
4623
        /*
4624
          Include the digit in the quotient.  We allocated enough memory
4625
          for any quotient we could ever possibly get, so we should not
4626
          have to check for failures here
4627
         */
4628
40.5k
        MP_DIGIT(quot, unusedRem) = q_msd;
4629
40.5k
    }
4630
4631
    /* Denormalize remainder                */
4632
2.23k
    if (d) {
4633
1.94k
        s_mp_div_2d(rem, d);
4634
1.94k
    }
4635
4636
2.23k
    s_mp_clamp(quot);
4637
4638
2.23k
CLEANUP:
4639
2.23k
    mp_clear(&t);
4640
4641
2.23k
    return res;
4642
4643
2.23k
} /* end s_mp_div() */
4644
4645
/* }}} */
4646
4647
/* {{{ s_mp_2expt(a, k) */
4648
4649
mp_err
4650
s_mp_2expt(mp_int *a, mp_digit k)
4651
245
{
4652
245
    mp_err res;
4653
245
    mp_size dig, bit;
4654
4655
245
    dig = k / DIGIT_BIT;
4656
245
    bit = k % DIGIT_BIT;
4657
4658
245
    mp_zero(a);
4659
245
    if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4660
0
        return res;
4661
4662
245
    DIGIT(a, dig) |= ((mp_digit)1 << bit);
4663
4664
245
    return MP_OKAY;
4665
4666
245
} /* end s_mp_2expt() */
4667
4668
/* }}} */
4669
4670
/* {{{ s_mp_reduce(x, m, mu) */
4671
4672
/*
4673
  Compute Barrett reduction, x (mod m), given a precomputed value for
4674
  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4675
  faster than straight division, when many reductions by the same
4676
  value of m are required (such as in modular exponentiation).  This
4677
  can nearly halve the time required to do modular exponentiation,
4678
  as compared to using the full integer divide to reduce.
4679
4680
  This algorithm was derived from the _Handbook of Applied
4681
  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4682
  pp. 603-604.
4683
 */
4684
4685
mp_err
4686
s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4687
1.13M
{
4688
1.13M
    mp_int q;
4689
1.13M
    mp_err res;
4690
4691
1.13M
    if ((res = mp_init_copy(&q, x)) != MP_OKAY)
4692
0
        return res;
4693
4694
1.13M
    s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1)  */
4695
1.13M
    s_mp_mul(&q, mu);           /* q2 = q1 * mu      */
4696
1.13M
    s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
4697
4698
    /* x = x mod b^(k+1), quick (no division) */
4699
1.13M
    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4700
4701
    /* q = q * m mod b^(k+1), quick (no division) */
4702
1.13M
    s_mp_mul(&q, m);
4703
1.13M
    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4704
4705
    /* x = x - q */
4706
1.13M
    if ((res = mp_sub(x, &q, x)) != MP_OKAY)
4707
0
        goto CLEANUP;
4708
4709
    /* If x < 0, add b^(k+1) to it */
4710
1.13M
    if (mp_cmp_z(x) < 0) {
4711
73
        mp_set(&q, 1);
4712
73
        if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4713
0
            goto CLEANUP;
4714
73
        if ((res = mp_add(x, &q, x)) != MP_OKAY)
4715
0
            goto CLEANUP;
4716
73
    }
4717
4718
    /* Back off if it's too big */
4719
1.21M
    while (mp_cmp(x, m) >= 0) {
4720
84.6k
        if ((res = s_mp_sub(x, m)) != MP_OKAY)
4721
0
            break;
4722
84.6k
    }
4723
4724
1.13M
CLEANUP:
4725
1.13M
    mp_clear(&q);
4726
4727
1.13M
    return res;
4728
4729
1.13M
} /* end s_mp_reduce() */
4730
4731
/* }}} */
4732
4733
/* }}} */
4734
4735
/* {{{ Primitive comparisons */
4736
4737
/* {{{ s_mp_cmp(a, b) */
4738
4739
/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4740
int
4741
s_mp_cmp(const mp_int *a, const mp_int *b)
4742
7.81M
{
4743
7.81M
    ARGMPCHK(a != NULL && b != NULL);
4744
4745
7.81M
    mp_size used_a = MP_USED(a);
4746
7.81M
    {
4747
7.81M
        mp_size used_b = MP_USED(b);
4748
4749
7.81M
        if (used_a > used_b)
4750
683k
            goto IS_GT;
4751
7.13M
        if (used_a < used_b)
4752
533k
            goto IS_LT;
4753
7.13M
    }
4754
6.60M
    {
4755
6.60M
        mp_digit *pa, *pb;
4756
6.60M
        mp_digit da = 0, db = 0;
4757
4758
6.60M
#define CMP_AB(n)                     \
4759
6.64M
    if ((da = pa[n]) != (db = pb[n])) \
4760
6.64M
    goto done
4761
4762
6.60M
        pa = MP_DIGITS(a) + used_a;
4763
6.60M
        pb = MP_DIGITS(b) + used_a;
4764
6.73M
        while (used_a >= 4) {
4765
5.32M
            pa -= 4;
4766
5.32M
            pb -= 4;
4767
5.32M
            used_a -= 4;
4768
5.32M
            CMP_AB(3);
4769
1.03M
            CMP_AB(2);
4770
148k
            CMP_AB(1);
4771
139k
            CMP_AB(0);
4772
139k
        }
4773
1.61M
        while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4774
212k
            /* do nothing */;
4775
6.60M
    done:
4776
6.60M
        if (da > db)
4777
2.73M
            goto IS_GT;
4778
3.86M
        if (da < db)
4779
3.83M
            goto IS_LT;
4780
3.86M
    }
4781
36.0k
    return MP_EQ;
4782
4.36M
IS_LT:
4783
4.36M
    return MP_LT;
4784
3.41M
IS_GT:
4785
3.41M
    return MP_GT;
4786
3.86M
} /* end s_mp_cmp() */
4787
4788
/* }}} */
4789
4790
/* {{{ s_mp_cmp_d(a, d) */
4791
4792
/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4793
int
4794
s_mp_cmp_d(const mp_int *a, mp_digit d)
4795
11.9M
{
4796
11.9M
    ARGMPCHK(a != NULL);
4797
4798
11.9M
    if (USED(a) > 1)
4799
10.4M
        return MP_GT;
4800
4801
1.46M
    if (DIGIT(a, 0) < d)
4802
4
        return MP_LT;
4803
1.46M
    else if (DIGIT(a, 0) > d)
4804
1.41M
        return MP_GT;
4805
54.3k
    else
4806
54.3k
        return MP_EQ;
4807
4808
1.46M
} /* end s_mp_cmp_d() */
4809
4810
/* }}} */
4811
4812
/* {{{ s_mp_ispow2(v) */
4813
4814
/*
4815
  Returns -1 if the value is not a power of two; otherwise, it returns
4816
  k such that v = 2^k, i.e. lg(v).
4817
 */
4818
int
4819
s_mp_ispow2(const mp_int *v)
4820
2.47k
{
4821
2.47k
    mp_digit d;
4822
2.47k
    int extra = 0, ix;
4823
4824
2.47k
    ARGMPCHK(v != NULL);
4825
4826
2.47k
    ix = MP_USED(v) - 1;
4827
2.47k
    d = MP_DIGIT(v, ix); /* most significant digit of v */
4828
4829
2.47k
    extra = s_mp_ispow2d(d);
4830
2.47k
    if (extra < 0 || ix == 0)
4831
1.76k
        return extra;
4832
4833
1.02k
    while (--ix >= 0) {
4834
1.02k
        if (DIGIT(v, ix) != 0)
4835
701
            return -1; /* not a power of two */
4836
328
        extra += MP_DIGIT_BIT;
4837
328
    }
4838
4839
0
    return extra;
4840
4841
701
} /* end s_mp_ispow2() */
4842
4843
/* }}} */
4844
4845
/* {{{ s_mp_ispow2d(d) */
4846
4847
int
4848
s_mp_ispow2d(mp_digit d)
4849
6.14M
{
4850
6.14M
    if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
4851
992
        int pow = 0;
4852
#if defined(MP_USE_UINT_DIGIT)
4853
        if (d & 0xffff0000U)
4854
            pow += 16;
4855
        if (d & 0xff00ff00U)
4856
            pow += 8;
4857
        if (d & 0xf0f0f0f0U)
4858
            pow += 4;
4859
        if (d & 0xccccccccU)
4860
            pow += 2;
4861
        if (d & 0xaaaaaaaaU)
4862
            pow += 1;
4863
#elif defined(MP_USE_LONG_LONG_DIGIT)
4864
        if (d & 0xffffffff00000000ULL)
4865
            pow += 32;
4866
        if (d & 0xffff0000ffff0000ULL)
4867
            pow += 16;
4868
        if (d & 0xff00ff00ff00ff00ULL)
4869
            pow += 8;
4870
        if (d & 0xf0f0f0f0f0f0f0f0ULL)
4871
            pow += 4;
4872
        if (d & 0xccccccccccccccccULL)
4873
            pow += 2;
4874
        if (d & 0xaaaaaaaaaaaaaaaaULL)
4875
            pow += 1;
4876
#elif defined(MP_USE_LONG_DIGIT)
4877
992
        if (d & 0xffffffff00000000UL)
4878
43
            pow += 32;
4879
992
        if (d & 0xffff0000ffff0000UL)
4880
67
            pow += 16;
4881
992
        if (d & 0xff00ff00ff00ff00UL)
4882
100
            pow += 8;
4883
992
        if (d & 0xf0f0f0f0f0f0f0f0UL)
4884
24
            pow += 4;
4885
992
        if (d & 0xccccccccccccccccUL)
4886
46
            pow += 2;
4887
992
        if (d & 0xaaaaaaaaaaaaaaaaUL)
4888
365
            pow += 1;
4889
#else
4890
#error "unknown type for mp_digit"
4891
#endif
4892
992
        return pow;
4893
992
    }
4894
6.14M
    return -1;
4895
4896
6.14M
} /* end s_mp_ispow2d() */
4897
4898
/* }}} */
4899
4900
/* }}} */
4901
4902
/* {{{ Primitive I/O helpers */
4903
4904
/* {{{ s_mp_tovalue(ch, r) */
4905
4906
/*
4907
  Convert the given character to its digit value, in the given radix.
4908
  If the given character is not understood in the given radix, -1 is
4909
  returned.  Otherwise the digit's numeric value is returned.
4910
4911
  The results will be odd if you use a radix < 2 or > 62, you are
4912
  expected to know what you're up to.
4913
 */
4914
int
4915
s_mp_tovalue(char ch, int r)
4916
4.61M
{
4917
4.61M
    int val, xch;
4918
4919
4.61M
    if (r > 36)
4920
0
        xch = ch;
4921
4.61M
    else
4922
4.61M
        xch = toupper((unsigned char)ch);
4923
4924
4.61M
    if (isdigit((unsigned char)xch))
4925
4.59M
        val = xch - '0';
4926
18.3k
    else if (isupper((unsigned char)xch))
4927
0
        val = xch - 'A' + 10;
4928
18.3k
    else if (islower((unsigned char)xch))
4929
0
        val = xch - 'a' + 36;
4930
18.3k
    else if (xch == '+')
4931
0
        val = 62;
4932
18.3k
    else if (xch == '/')
4933
0
        val = 63;
4934
18.3k
    else
4935
18.3k
        return -1;
4936
4937
4.59M
    if (val < 0 || val >= r)
4938
0
        return -1;
4939
4940
4.59M
    return val;
4941
4942
4.59M
} /* end s_mp_tovalue() */
4943
4944
/* }}} */
4945
4946
/* {{{ s_mp_todigit(val, r, low) */
4947
4948
/*
4949
  Convert val to a radix-r digit, if possible.  If val is out of range
4950
  for r, returns zero.  Otherwise, returns an ASCII character denoting
4951
  the value in the given radix.
4952
4953
  The results may be odd if you use a radix < 2 or > 64, you are
4954
  expected to know what you're doing.
4955
 */
4956
4957
char
4958
s_mp_todigit(mp_digit val, int r, int low)
4959
1.53M
{
4960
1.53M
    char ch;
4961
4962
1.53M
    if (val >= r)
4963
0
        return 0;
4964
4965
1.53M
    ch = s_dmap_1[val];
4966
4967
1.53M
    if (r <= 36 && low)
4968
0
        ch = tolower((unsigned char)ch);
4969
4970
1.53M
    return ch;
4971
4972
1.53M
} /* end s_mp_todigit() */
4973
4974
/* }}} */
4975
4976
/* {{{ s_mp_outlen(bits, radix) */
4977
4978
/*
4979
   Return an estimate for how long a string is needed to hold a radix
4980
   r representation of a number with 'bits' significant bits, plus an
4981
   extra for a zero terminator (assuming C style strings here)
4982
 */
4983
int
4984
s_mp_outlen(int bits, int r)
4985
2.15k
{
4986
2.15k
    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4987
4988
2.15k
} /* end s_mp_outlen() */
4989
4990
/* }}} */
4991
4992
/* }}} */
4993
4994
/* {{{ mp_read_unsigned_octets(mp, str, len) */
4995
/* mp_read_unsigned_octets(mp, str, len)
4996
   Read in a raw value (base 256) into the given mp_int
4997
   No sign bit, number is positive.  Leading zeros ignored.
4998
 */
4999
5000
mp_err
5001
mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
5002
76
{
5003
76
    int count;
5004
76
    mp_err res;
5005
76
    mp_digit d;
5006
5007
76
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
5008
5009
0
    mp_zero(mp);
5010
5011
76
    count = len % sizeof(mp_digit);
5012
76
    if (count) {
5013
56
        for (d = 0; count-- > 0; --len) {
5014
42
            d = (d << 8) | *str++;
5015
42
        }
5016
14
        MP_DIGIT(mp, 0) = d;
5017
14
    }
5018
5019
    /* Read the rest of the digits */
5020
1.58k
    for (; len > 0; len -= sizeof(mp_digit)) {
5021
13.5k
        for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
5022
12.0k
            d = (d << 8) | *str++;
5023
12.0k
        }
5024
1.50k
        if (MP_EQ == mp_cmp_z(mp)) {
5025
62
            if (!d)
5026
0
                continue;
5027
1.44k
        } else {
5028
1.44k
            if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
5029
0
                return res;
5030
1.44k
        }
5031
1.50k
        MP_DIGIT(mp, 0) = d;
5032
1.50k
    }
5033
76
    return MP_OKAY;
5034
76
} /* end mp_read_unsigned_octets() */
5035
/* }}} */
5036
5037
/* {{{ mp_unsigned_octet_size(mp) */
5038
unsigned int
5039
mp_unsigned_octet_size(const mp_int *mp)
5040
0
{
5041
0
    unsigned int bytes;
5042
0
    int ix;
5043
0
    mp_digit d = 0;
5044
5045
0
    ARGCHK(mp != NULL, MP_BADARG);
5046
0
    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
5047
5048
0
    bytes = (USED(mp) * sizeof(mp_digit));
5049
5050
    /* subtract leading zeros. */
5051
    /* Iterate over each digit... */
5052
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5053
0
        d = DIGIT(mp, ix);
5054
0
        if (d)
5055
0
            break;
5056
0
        bytes -= sizeof(d);
5057
0
    }
5058
0
    if (!bytes)
5059
0
        return 1;
5060
5061
    /* Have MSD, check digit bytes, high order first */
5062
0
    for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
5063
0
        unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
5064
0
        if (x)
5065
0
            break;
5066
0
        --bytes;
5067
0
    }
5068
0
    return bytes;
5069
0
} /* end mp_unsigned_octet_size() */
5070
/* }}} */
5071
5072
/* {{{ mp_to_unsigned_octets(mp, str) */
5073
/* output a buffer of big endian octets no longer than specified. */
5074
mp_err
5075
mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5076
0
{
5077
0
    int ix, pos = 0;
5078
0
    unsigned int bytes;
5079
5080
0
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5081
5082
0
    bytes = mp_unsigned_octet_size(mp);
5083
0
    ARGCHK(bytes <= maxlen, MP_BADARG);
5084
5085
    /* Iterate over each digit... */
5086
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5087
0
        mp_digit d = DIGIT(mp, ix);
5088
0
        int jx;
5089
5090
        /* Unpack digit bytes, high order first */
5091
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5092
0
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5093
0
            if (!pos && !x) /* suppress leading zeros */
5094
0
                continue;
5095
0
            str[pos++] = x;
5096
0
        }
5097
0
    }
5098
0
    if (!pos)
5099
0
        str[pos++] = 0;
5100
0
    return pos;
5101
0
} /* end mp_to_unsigned_octets() */
5102
/* }}} */
5103
5104
/* {{{ mp_to_signed_octets(mp, str) */
5105
/* output a buffer of big endian octets no longer than specified. */
5106
mp_err
5107
mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5108
0
{
5109
0
    int ix, pos = 0;
5110
0
    unsigned int bytes;
5111
5112
0
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5113
5114
0
    bytes = mp_unsigned_octet_size(mp);
5115
0
    ARGCHK(bytes <= maxlen, MP_BADARG);
5116
5117
    /* Iterate over each digit... */
5118
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5119
0
        mp_digit d = DIGIT(mp, ix);
5120
0
        int jx;
5121
5122
        /* Unpack digit bytes, high order first */
5123
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5124
0
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5125
0
            if (!pos) {
5126
0
                if (!x) /* suppress leading zeros */
5127
0
                    continue;
5128
0
                if (x & 0x80) { /* add one leading zero to make output positive.  */
5129
0
                    ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
5130
0
                    if (bytes + 1 > maxlen)
5131
0
                        return MP_BADARG;
5132
0
                    str[pos++] = 0;
5133
0
                }
5134
0
            }
5135
0
            str[pos++] = x;
5136
0
        }
5137
0
    }
5138
0
    if (!pos)
5139
0
        str[pos++] = 0;
5140
0
    return pos;
5141
0
} /* end mp_to_signed_octets() */
5142
/* }}} */
5143
5144
/* {{{ mp_to_fixlen_octets(mp, str) */
5145
/* output a buffer of big endian octets exactly as long as requested.
5146
   constant time on the value of mp. */
5147
mp_err
5148
mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
5149
12
{
5150
12
    int ix, jx;
5151
12
    unsigned int bytes;
5152
5153
12
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
5154
5155
    /* Constant time on the value of mp.  Don't use mp_unsigned_octet_size. */
5156
12
    bytes = USED(mp) * MP_DIGIT_SIZE;
5157
5158
    /* If the output is shorter than the native size of mp, then check that any
5159
     * bytes not written have zero values.  This check isn't constant time on
5160
     * the assumption that timing-sensitive callers can guarantee that mp fits
5161
     * in the allocated space. */
5162
12
    ix = USED(mp) - 1;
5163
12
    if (bytes > length) {
5164
0
        unsigned int zeros = bytes - length;
5165
5166
0
        while (zeros >= MP_DIGIT_SIZE) {
5167
0
            ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
5168
0
            zeros -= MP_DIGIT_SIZE;
5169
0
            ix--;
5170
0
        }
5171
5172
0
        if (zeros > 0) {
5173
0
            mp_digit d = DIGIT(mp, ix);
5174
0
            mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
5175
0
            ARGCHK((d & m) == 0, MP_BADARG);
5176
0
            for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
5177
0
                *str++ = d >> (jx * CHAR_BIT);
5178
0
            }
5179
0
            ix--;
5180
0
        }
5181
12
    } else if (bytes < length) {
5182
        /* Place any needed leading zeros. */
5183
0
        unsigned int zeros = length - bytes;
5184
0
        memset(str, 0, zeros);
5185
0
        str += zeros;
5186
0
    }
5187
5188
    /* Iterate over each whole digit... */
5189
396
    for (; ix >= 0; ix--) {
5190
384
        mp_digit d = DIGIT(mp, ix);
5191
5192
        /* Unpack digit bytes, high order first */
5193
3.45k
        for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
5194
3.07k
            *str++ = d >> (jx * CHAR_BIT);
5195
3.07k
        }
5196
384
    }
5197
12
    return MP_OKAY;
5198
12
} /* end mp_to_fixlen_octets() */
5199
/* }}} */
5200
5201
/* {{{ mp_cswap(condition, a, b, numdigits) */
5202
/* performs a conditional swap between mp_int. */
5203
mp_err
5204
mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
5205
11.8M
{
5206
11.8M
    mp_digit x;
5207
11.8M
    unsigned int i;
5208
11.8M
    mp_err res = 0;
5209
5210
    /* if pointers are equal return */
5211
11.8M
    if (a == b)
5212
0
        return res;
5213
5214
11.8M
    if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
5215
0
        MP_CHECKOK(s_mp_grow(a, numdigits));
5216
0
        MP_CHECKOK(s_mp_grow(b, numdigits));
5217
0
    }
5218
5219
11.8M
    condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
5220
5221
11.8M
    x = (USED(a) ^ USED(b)) & condition;
5222
11.8M
    USED(a) ^= x;
5223
11.8M
    USED(b) ^= x;
5224
5225
11.8M
    x = (SIGN(a) ^ SIGN(b)) & condition;
5226
11.8M
    SIGN(a) ^= x;
5227
11.8M
    SIGN(b) ^= x;
5228
5229
1.07G
    for (i = 0; i < numdigits; i++) {
5230
1.06G
        x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
5231
1.06G
        DIGIT(a, i) ^= x;
5232
1.06G
        DIGIT(b, i) ^= x;
5233
1.06G
    }
5234
5235
11.8M
CLEANUP:
5236
11.8M
    return res;
5237
11.8M
} /* end mp_cswap() */
5238
/* }}} */
5239
5240
/*------------------------------------------------------------------------*/
5241
/* HERE THERE BE DRAGONS                                                  */