Coverage Report

Created: 2025-06-24 06:49

/src/nss/lib/freebl/mpi/mpi.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  mpi.c
3
 *
4
 *  Arbitrary precision integer arithmetic library
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9
10
#include "mpi-priv.h"
11
#include "mplogic.h"
12
13
#include <assert.h>
14
15
#if defined(__arm__) && \
16
    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
17
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
18
#undef MP_ASSEMBLY_MULTIPLY
19
#undef MP_ASSEMBLY_SQUARE
20
#endif
21
22
#if MP_LOGTAB
23
/*
24
  A table of the logs of 2 for various bases (the 0 and 1 entries of
25
  this table are meaningless and should not be referenced).
26
27
  This table is used to compute output lengths for the mp_toradix()
28
  function.  Since a number n in radix r takes up about log_r(n)
29
  digits, we estimate the output size by taking the least integer
30
  greater than log_r(n), where:
31
32
  log_r(n) = log_2(n) * log_r(2)
33
34
  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
35
  which are the output bases supported.
36
 */
37
#include "logtab.h"
38
#endif
39
40
#ifdef CT_VERIF
41
#include <valgrind/memcheck.h>
42
#endif
43
44
/* {{{ Constant strings */
45
46
/* Constant strings returned by mp_strerror() */
47
static const char *mp_err_string[] = {
48
    "unknown result code",     /* say what?            */
49
    "boolean true",            /* MP_OKAY, MP_YES      */
50
    "boolean false",           /* MP_NO                */
51
    "out of memory",           /* MP_MEM               */
52
    "argument out of range",   /* MP_RANGE             */
53
    "invalid input parameter", /* MP_BADARG            */
54
    "result is undefined"      /* MP_UNDEF             */
55
};
56
57
/* Value to digit maps for radix conversion   */
58
59
/* s_dmap_1 - standard digits and letters */
60
static const char *s_dmap_1 =
61
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
62
63
/* }}} */
64
65
/* {{{ Default precision manipulation */
66
67
/* Default precision for newly created mp_int's      */
68
static mp_size s_mp_defprec = MP_DEFPREC;
69
70
mp_size
71
mp_get_prec(void)
72
0
{
73
0
    return s_mp_defprec;
74
75
0
} /* end mp_get_prec() */
76
77
void
78
mp_set_prec(mp_size prec)
79
0
{
80
0
    if (prec == 0)
81
0
        s_mp_defprec = MP_DEFPREC;
82
0
    else
83
0
        s_mp_defprec = prec;
84
85
0
} /* end mp_set_prec() */
86
87
/* }}} */
88
89
#ifdef CT_VERIF
90
void
91
mp_taint(mp_int *mp)
92
{
93
    size_t i;
94
    for (i = 0; i < mp->used; ++i) {
95
        VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
96
    }
97
}
98
99
void
100
mp_untaint(mp_int *mp)
101
{
102
    size_t i;
103
    for (i = 0; i < mp->used; ++i) {
104
        VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
105
    }
106
}
107
#endif
108
109
/*------------------------------------------------------------------------*/
110
/* {{{ mp_init(mp) */
111
112
/*
113
  mp_init(mp)
114
115
  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
116
  MP_MEM if memory could not be allocated for the structure.
117
 */
118
119
mp_err
120
mp_init(mp_int *mp)
121
866k
{
122
866k
    return mp_init_size(mp, s_mp_defprec);
123
124
866k
} /* end mp_init() */
125
126
/* }}} */
127
128
/* {{{ mp_init_size(mp, prec) */
129
130
/*
131
  mp_init_size(mp, prec)
132
133
  Initialize a new zero-valued mp_int with at least the given
134
  precision; returns MP_OKAY if successful, or MP_MEM if memory could
135
  not be allocated for the structure.
136
 */
137
138
mp_err
139
mp_init_size(mp_int *mp, mp_size prec)
140
4.11M
{
141
4.11M
    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
142
143
4.11M
    prec = MP_ROUNDUP(prec, s_mp_defprec);
144
4.11M
    if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
145
0
        return MP_MEM;
146
147
4.11M
    SIGN(mp) = ZPOS;
148
4.11M
    USED(mp) = 1;
149
4.11M
    ALLOC(mp) = prec;
150
151
4.11M
    return MP_OKAY;
152
153
4.11M
} /* end mp_init_size() */
154
155
/* }}} */
156
157
/* {{{ mp_init_copy(mp, from) */
158
159
/*
160
  mp_init_copy(mp, from)
161
162
  Initialize mp as an exact copy of from.  Returns MP_OKAY if
163
  successful, MP_MEM if memory could not be allocated for the new
164
  structure.
165
 */
166
167
mp_err
168
mp_init_copy(mp_int *mp, const mp_int *from)
169
9.76M
{
170
9.76M
    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
171
172
9.76M
    if (mp == from)
173
0
        return MP_OKAY;
174
175
9.76M
    if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
176
0
        return MP_MEM;
177
178
9.76M
    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
179
9.76M
    USED(mp) = USED(from);
180
9.76M
    ALLOC(mp) = ALLOC(from);
181
9.76M
    SIGN(mp) = SIGN(from);
182
183
9.76M
    return MP_OKAY;
184
185
9.76M
} /* end mp_init_copy() */
186
187
/* }}} */
188
189
/* {{{ mp_copy(from, to) */
190
191
/*
192
  mp_copy(from, to)
193
194
  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
195
  'to' has already been initialized (if not, use mp_init_copy()
196
  instead). If 'from' and 'to' are identical, nothing happens.
197
 */
198
199
mp_err
200
mp_copy(const mp_int *from, mp_int *to)
201
5.00M
{
202
5.00M
    ARGCHK(from != NULL && to != NULL, MP_BADARG);
203
204
5.00M
    if (from == to)
205
65.4k
        return MP_OKAY;
206
207
4.93M
    { /* copy */
208
4.93M
        mp_digit *tmp;
209
210
        /*
211
          If the allocated buffer in 'to' already has enough space to hold
212
          all the used digits of 'from', we'll re-use it to avoid hitting
213
          the memory allocater more than necessary; otherwise, we'd have
214
          to grow anyway, so we just allocate a hunk and make the copy as
215
          usual
216
         */
217
4.93M
        if (ALLOC(to) >= USED(from)) {
218
4.93M
            s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
219
4.93M
            s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
220
221
4.93M
        } else {
222
1
            if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
223
0
                return MP_MEM;
224
225
1
            s_mp_copy(DIGITS(from), tmp, USED(from));
226
227
1
            if (DIGITS(to) != NULL) {
228
1
                s_mp_setz(DIGITS(to), ALLOC(to));
229
1
                s_mp_free(DIGITS(to));
230
1
            }
231
232
1
            DIGITS(to) = tmp;
233
1
            ALLOC(to) = ALLOC(from);
234
1
        }
235
236
        /* Copy the precision and sign from the original */
237
4.93M
        USED(to) = USED(from);
238
4.93M
        SIGN(to) = SIGN(from);
239
4.93M
    } /* end copy */
240
241
4.93M
    return MP_OKAY;
242
243
4.93M
} /* end mp_copy() */
244
245
/* }}} */
246
247
/* {{{ mp_exch(mp1, mp2) */
248
249
/*
250
  mp_exch(mp1, mp2)
251
252
  Exchange mp1 and mp2 without allocating any intermediate memory
253
  (well, unless you count the stack space needed for this call and the
254
  locals it creates...).  This cannot fail.
255
 */
256
257
void
258
mp_exch(mp_int *mp1, mp_int *mp2)
259
108k
{
260
108k
#if MP_ARGCHK == 2
261
108k
    assert(mp1 != NULL && mp2 != NULL);
262
#else
263
    if (mp1 == NULL || mp2 == NULL)
264
        return;
265
#endif
266
267
108k
    s_mp_exch(mp1, mp2);
268
269
108k
} /* end mp_exch() */
270
271
/* }}} */
272
273
/* {{{ mp_clear(mp) */
274
275
/*
276
  mp_clear(mp)
277
278
  Release the storage used by an mp_int, and void its fields so that
279
  if someone calls mp_clear() again for the same int later, we won't
280
  get tollchocked.
281
 */
282
283
void
284
mp_clear(mp_int *mp)
285
75.3M
{
286
75.3M
    if (mp == NULL)
287
0
        return;
288
289
75.3M
    if (DIGITS(mp) != NULL) {
290
13.8M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
291
13.8M
        s_mp_free(DIGITS(mp));
292
13.8M
        DIGITS(mp) = NULL;
293
13.8M
    }
294
295
75.3M
    USED(mp) = 0;
296
75.3M
    ALLOC(mp) = 0;
297
298
75.3M
} /* end mp_clear() */
299
300
/* }}} */
301
302
/* {{{ mp_zero(mp) */
303
304
/*
305
  mp_zero(mp)
306
307
  Set mp to zero.  Does not change the allocated size of the structure,
308
  and therefore cannot fail (except on a bad argument, which we ignore)
309
 */
310
void
311
mp_zero(mp_int *mp)
312
823k
{
313
823k
    if (mp == NULL)
314
0
        return;
315
316
823k
    s_mp_setz(DIGITS(mp), ALLOC(mp));
317
823k
    USED(mp) = 1;
318
823k
    SIGN(mp) = ZPOS;
319
320
823k
} /* end mp_zero() */
321
322
/* }}} */
323
324
/* {{{ mp_set(mp, d) */
325
326
void
327
mp_set(mp_int *mp, mp_digit d)
328
258k
{
329
258k
    if (mp == NULL)
330
0
        return;
331
332
258k
    mp_zero(mp);
333
258k
    DIGIT(mp, 0) = d;
334
335
258k
} /* end mp_set() */
336
337
/* }}} */
338
339
/* {{{ mp_set_int(mp, z) */
340
341
mp_err
342
mp_set_int(mp_int *mp, long z)
343
45.1k
{
344
45.1k
    unsigned long v = labs(z);
345
45.1k
    mp_err res;
346
347
45.1k
    ARGCHK(mp != NULL, MP_BADARG);
348
349
    /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
350
45.1k
    if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
351
0
        return res;
352
0
    }
353
354
45.1k
    if (z < 0) {
355
0
        SIGN(mp) = NEG;
356
0
    }
357
358
45.1k
    return MP_OKAY;
359
45.1k
} /* end mp_set_int() */
360
361
/* }}} */
362
363
/* {{{ mp_set_ulong(mp, z) */
364
365
mp_err
366
mp_set_ulong(mp_int *mp, unsigned long z)
367
45.1k
{
368
45.1k
    int ix;
369
45.1k
    mp_err res;
370
371
45.1k
    ARGCHK(mp != NULL, MP_BADARG);
372
373
45.1k
    mp_zero(mp);
374
45.1k
    if (z == 0)
375
0
        return MP_OKAY; /* shortcut for zero */
376
377
45.1k
    if (sizeof z <= sizeof(mp_digit)) {
378
45.1k
        DIGIT(mp, 0) = z;
379
45.1k
    } else {
380
0
        for (ix = sizeof(long) - 1; ix >= 0; ix--) {
381
0
            if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
382
0
                return res;
383
384
0
            res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
385
0
            if (res != MP_OKAY)
386
0
                return res;
387
0
        }
388
0
    }
389
45.1k
    return MP_OKAY;
390
45.1k
} /* end mp_set_ulong() */
391
392
/* }}} */
393
394
/*------------------------------------------------------------------------*/
395
/* {{{ Digit arithmetic */
396
397
/* {{{ mp_add_d(a, d, b) */
398
399
/*
400
  mp_add_d(a, d, b)
401
402
  Compute the sum b = a + d, for a single digit d.  Respects the sign of
403
  its primary addend (single digits are unsigned anyway).
404
 */
405
406
mp_err
407
mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
408
0
{
409
0
    mp_int tmp;
410
0
    mp_err res;
411
412
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
413
414
0
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
415
0
        return res;
416
417
0
    if (SIGN(&tmp) == ZPOS) {
418
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
419
0
            goto CLEANUP;
420
0
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
421
0
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
422
0
            goto CLEANUP;
423
0
    } else {
424
0
        mp_neg(&tmp, &tmp);
425
426
0
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
427
0
    }
428
429
0
    if (s_mp_cmp_d(&tmp, 0) == 0)
430
0
        SIGN(&tmp) = ZPOS;
431
432
0
    s_mp_exch(&tmp, b);
433
434
0
CLEANUP:
435
0
    mp_clear(&tmp);
436
0
    return res;
437
438
0
} /* end mp_add_d() */
439
440
/* }}} */
441
442
/* {{{ mp_sub_d(a, d, b) */
443
444
/*
445
  mp_sub_d(a, d, b)
446
447
  Compute the difference b = a - d, for a single digit d.  Respects the
448
  sign of its subtrahend (single digits are unsigned anyway).
449
 */
450
451
mp_err
452
mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
453
19.0k
{
454
19.0k
    mp_int tmp;
455
19.0k
    mp_err res;
456
457
19.0k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
458
459
19.0k
    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
460
0
        return res;
461
462
19.0k
    if (SIGN(&tmp) == NEG) {
463
0
        if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
464
0
            goto CLEANUP;
465
19.0k
    } else if (s_mp_cmp_d(&tmp, d) >= 0) {
466
16.1k
        if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
467
0
            goto CLEANUP;
468
16.1k
    } else {
469
2.94k
        mp_neg(&tmp, &tmp);
470
471
2.94k
        DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
472
2.94k
        SIGN(&tmp) = NEG;
473
2.94k
    }
474
475
19.0k
    if (s_mp_cmp_d(&tmp, 0) == 0)
476
6
        SIGN(&tmp) = ZPOS;
477
478
19.0k
    s_mp_exch(&tmp, b);
479
480
19.0k
CLEANUP:
481
19.0k
    mp_clear(&tmp);
482
19.0k
    return res;
483
484
19.0k
} /* end mp_sub_d() */
485
486
/* }}} */
487
488
/* {{{ mp_mul_d(a, d, b) */
489
490
/*
491
  mp_mul_d(a, d, b)
492
493
  Compute the product b = a * d, for a single digit d.  Respects the sign
494
  of its multiplicand (single digits are unsigned anyway)
495
 */
496
497
mp_err
498
mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
499
0
{
500
0
    mp_err res;
501
502
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
503
504
0
    if (d == 0) {
505
0
        mp_zero(b);
506
0
        return MP_OKAY;
507
0
    }
508
509
0
    if ((res = mp_copy(a, b)) != MP_OKAY)
510
0
        return res;
511
512
0
    res = s_mp_mul_d(b, d);
513
514
0
    return res;
515
516
0
} /* end mp_mul_d() */
517
518
/* }}} */
519
520
/* {{{ mp_mul_2(a, c) */
521
522
mp_err
523
mp_mul_2(const mp_int *a, mp_int *c)
524
0
{
525
0
    mp_err res;
526
527
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
528
529
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
530
0
        return res;
531
532
0
    return s_mp_mul_2(c);
533
534
0
} /* end mp_mul_2() */
535
536
/* }}} */
537
538
/* {{{ mp_div_d(a, d, q, r) */
539
540
/*
541
  mp_div_d(a, d, q, r)
542
543
  Compute the quotient q = a / d and remainder r = a mod d, for a
544
  single digit d.  Respects the sign of its divisor (single digits are
545
  unsigned anyway).
546
 */
547
548
mp_err
549
mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
550
0
{
551
0
    mp_err res;
552
0
    mp_int qp;
553
0
    mp_digit rem = 0;
554
0
    int pow;
555
556
0
    ARGCHK(a != NULL, MP_BADARG);
557
558
0
    if (d == 0)
559
0
        return MP_RANGE;
560
561
    /* Shortcut for powers of two ... */
562
0
    if ((pow = s_mp_ispow2d(d)) >= 0) {
563
0
        mp_digit mask;
564
565
0
        mask = ((mp_digit)1 << pow) - 1;
566
0
        rem = DIGIT(a, 0) & mask;
567
568
0
        if (q) {
569
0
            if ((res = mp_copy(a, q)) != MP_OKAY) {
570
0
                return res;
571
0
            }
572
0
            s_mp_div_2d(q, pow);
573
0
        }
574
575
0
        if (r)
576
0
            *r = rem;
577
578
0
        return MP_OKAY;
579
0
    }
580
581
0
    if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
582
0
        return res;
583
584
0
    res = s_mp_div_d(&qp, d, &rem);
585
586
0
    if (s_mp_cmp_d(&qp, 0) == 0)
587
0
        SIGN(q) = ZPOS;
588
589
0
    if (r) {
590
0
        *r = rem;
591
0
    }
592
593
0
    if (q)
594
0
        s_mp_exch(&qp, q);
595
596
0
    mp_clear(&qp);
597
0
    return res;
598
599
0
} /* end mp_div_d() */
600
601
/* }}} */
602
603
/* {{{ mp_div_2(a, c) */
604
605
/*
606
  mp_div_2(a, c)
607
608
  Compute c = a / 2, disregarding the remainder.
609
 */
610
611
mp_err
612
mp_div_2(const mp_int *a, mp_int *c)
613
0
{
614
0
    mp_err res;
615
616
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
617
618
0
    if ((res = mp_copy(a, c)) != MP_OKAY)
619
0
        return res;
620
621
0
    s_mp_div_2(c);
622
623
0
    return MP_OKAY;
624
625
0
} /* end mp_div_2() */
626
627
/* }}} */
628
629
/* {{{ mp_expt_d(a, d, b) */
630
631
mp_err
632
mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
633
0
{
634
0
    mp_int s, x;
635
0
    mp_err res;
636
637
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
638
639
0
    if ((res = mp_init(&s)) != MP_OKAY)
640
0
        return res;
641
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
642
0
        goto X;
643
644
0
    DIGIT(&s, 0) = 1;
645
646
0
    while (d != 0) {
647
0
        if (d & 1) {
648
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
649
0
                goto CLEANUP;
650
0
        }
651
652
0
        d /= 2;
653
654
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
655
0
            goto CLEANUP;
656
0
    }
657
658
0
    s_mp_exch(&s, c);
659
660
0
CLEANUP:
661
0
    mp_clear(&x);
662
0
X:
663
0
    mp_clear(&s);
664
665
0
    return res;
666
667
0
} /* end mp_expt_d() */
668
669
/* }}} */
670
671
/* }}} */
672
673
/*------------------------------------------------------------------------*/
674
/* {{{ Full arithmetic */
675
676
/* {{{ mp_abs(a, b) */
677
678
/*
679
  mp_abs(a, b)
680
681
  Compute b = |a|.  'a' and 'b' may be identical.
682
 */
683
684
mp_err
685
mp_abs(const mp_int *a, mp_int *b)
686
0
{
687
0
    mp_err res;
688
689
0
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
690
691
0
    if ((res = mp_copy(a, b)) != MP_OKAY)
692
0
        return res;
693
694
0
    SIGN(b) = ZPOS;
695
696
0
    return MP_OKAY;
697
698
0
} /* end mp_abs() */
699
700
/* }}} */
701
702
/* {{{ mp_neg(a, b) */
703
704
/*
705
  mp_neg(a, b)
706
707
  Compute b = -a.  'a' and 'b' may be identical.
708
 */
709
710
mp_err
711
mp_neg(const mp_int *a, mp_int *b)
712
2.94k
{
713
2.94k
    mp_err res;
714
715
2.94k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
716
717
2.94k
    if ((res = mp_copy(a, b)) != MP_OKAY)
718
0
        return res;
719
720
2.94k
    if (s_mp_cmp_d(b, 0) == MP_EQ)
721
2.94k
        SIGN(b) = ZPOS;
722
0
    else
723
0
        SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
724
725
2.94k
    return MP_OKAY;
726
727
2.94k
} /* end mp_neg() */
728
729
/* }}} */
730
731
/* {{{ mp_add(a, b, c) */
732
733
/*
734
  mp_add(a, b, c)
735
736
  Compute c = a + b.  All parameters may be identical.
737
 */
738
739
mp_err
740
mp_add(const mp_int *a, const mp_int *b, mp_int *c)
741
22.7M
{
742
22.7M
    mp_err res;
743
744
22.7M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
745
746
22.7M
    if (SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
747
14.1M
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
748
14.1M
    } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b|   */
749
3.31M
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
750
5.22M
    } else { /* different sign: |a|  < |b|   */
751
5.22M
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
752
5.22M
    }
753
754
22.7M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
755
17.8k
        SIGN(c) = ZPOS;
756
757
22.7M
CLEANUP:
758
22.7M
    return res;
759
760
22.7M
} /* end mp_add() */
761
762
/* }}} */
763
764
/* {{{ mp_sub(a, b, c) */
765
766
/*
767
  mp_sub(a, b, c)
768
769
  Compute c = a - b.  All parameters may be identical.
770
 */
771
772
mp_err
773
mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
774
2.78M
{
775
2.78M
    mp_err res;
776
2.78M
    int magDiff;
777
778
2.78M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
779
780
2.78M
    if (a == b) {
781
0
        mp_zero(c);
782
0
        return MP_OKAY;
783
0
    }
784
785
2.78M
    if (MP_SIGN(a) != MP_SIGN(b)) {
786
0
        MP_CHECKOK(s_mp_add_3arg(a, b, c));
787
2.78M
    } else if (!(magDiff = s_mp_cmp(a, b))) {
788
78.9k
        mp_zero(c);
789
78.9k
        res = MP_OKAY;
790
2.70M
    } else if (magDiff > 0) {
791
2.53M
        MP_CHECKOK(s_mp_sub_3arg(a, b, c));
792
2.53M
    } else {
793
176k
        MP_CHECKOK(s_mp_sub_3arg(b, a, c));
794
176k
        MP_SIGN(c) = !MP_SIGN(a);
795
176k
    }
796
797
2.78M
    if (s_mp_cmp_d(c, 0) == MP_EQ)
798
78.9k
        MP_SIGN(c) = MP_ZPOS;
799
800
2.78M
CLEANUP:
801
2.78M
    return res;
802
803
2.78M
} /* end mp_sub() */
804
805
/* }}} */
806
807
/* {{{ s_mp_mulg(a, b, c) */
808
809
/*
810
  s_mp_mulg(a, b, c)
811
812
  Compute c = a * b.  All parameters may be identical. if constantTime is set,
813
  then the operations are done in constant time. The original is mostly
814
  constant time as long as s_mpv_mul_d_add() is constant time. This is true
815
  of the x86 assembler, as well as the current c code.
816
 */
817
mp_err
818
s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
819
6.46M
{
820
6.46M
    mp_digit *pb;
821
6.46M
    mp_int tmp;
822
6.46M
    mp_err res;
823
6.46M
    mp_size ib;
824
6.46M
    mp_size useda, usedb;
825
826
6.46M
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
827
828
6.46M
    if (a == c) {
829
6.38M
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
830
0
            return res;
831
6.38M
        if (a == b)
832
0
            b = &tmp;
833
6.38M
        a = &tmp;
834
6.38M
    } else if (b == c) {
835
0
        if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
836
0
            return res;
837
0
        b = &tmp;
838
79.2k
    } else {
839
79.2k
        MP_DIGITS(&tmp) = 0;
840
79.2k
    }
841
842
6.46M
    if (MP_USED(a) < MP_USED(b)) {
843
709k
        const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
844
709k
        b = a;
845
709k
        a = xch;
846
709k
    }
847
848
6.46M
    MP_USED(c) = 1;
849
6.46M
    MP_DIGIT(c, 0) = 0;
850
6.46M
    if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
851
0
        goto CLEANUP;
852
853
6.46M
#ifdef NSS_USE_COMBA
854
    /* comba isn't constant time because it clamps! If we cared
855
     * (we needed a constant time version of multiply that was 'faster'
856
     * we could easily pass constantTime down to the comba code and
857
     * get it to skip the clamp... but here are assembler versions
858
     * which add comba to platforms that can't compile the normal
859
     * comba's imbedded assembler which would also need to change, so
860
     * for now we just skip comba when we are running constant time. */
861
6.46M
    if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
862
3.23M
        if (MP_USED(a) == 4) {
863
19.7k
            s_mp_mul_comba_4(a, b, c);
864
19.7k
            goto CLEANUP;
865
19.7k
        }
866
3.21M
        if (MP_USED(a) == 8) {
867
789
            s_mp_mul_comba_8(a, b, c);
868
789
            goto CLEANUP;
869
789
        }
870
3.21M
        if (MP_USED(a) == 16) {
871
544k
            s_mp_mul_comba_16(a, b, c);
872
544k
            goto CLEANUP;
873
544k
        }
874
2.66M
        if (MP_USED(a) == 32) {
875
2.62M
            s_mp_mul_comba_32(a, b, c);
876
2.62M
            goto CLEANUP;
877
2.62M
        }
878
2.66M
    }
879
3.27M
#endif
880
881
3.27M
    pb = MP_DIGITS(b);
882
3.27M
    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
883
884
    /* Outer loop:  Digits of b */
885
3.27M
    useda = MP_USED(a);
886
3.27M
    usedb = MP_USED(b);
887
81.2M
    for (ib = 1; ib < usedb; ib++) {
888
77.9M
        mp_digit b_i = *pb++;
889
890
        /* Inner product:  Digits of a */
891
77.9M
        if (constantTime || b_i)
892
73.2M
            s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
893
4.64M
        else
894
4.64M
            MP_DIGIT(c, ib + useda) = b_i;
895
77.9M
    }
896
897
3.27M
    if (!constantTime) {
898
3.25M
        s_mp_clamp(c);
899
3.25M
    }
900
901
3.27M
    if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
902
3.24M
        SIGN(c) = ZPOS;
903
28.7k
    else
904
28.7k
        SIGN(c) = NEG;
905
906
6.46M
CLEANUP:
907
6.46M
    mp_clear(&tmp);
908
6.46M
    return res;
909
3.27M
} /* end smp_mulg() */
910
911
/* }}} */
912
913
/* {{{ mp_mul(a, b, c) */
914
915
/*
916
  mp_mul(a, b, c)
917
918
  Compute c = a * b.  All parameters may be identical.
919
 */
920
921
mp_err
922
mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
923
6.44M
{
924
6.44M
    return s_mp_mulg(a, b, c, 0);
925
6.44M
} /* end mp_mul() */
926
927
/* }}} */
928
929
/* {{{ mp_mulCT(a, b, c) */
930
931
/*
932
  mp_mulCT(a, b, c)
933
934
  Compute c = a * b. In constant time. Parameters may not be identical.
935
  NOTE: a and b may be modified.
936
 */
937
938
mp_err
939
mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
940
22.5k
{
941
22.5k
    mp_err res;
942
943
    /* make the multiply values fixed length so multiply
944
     * doesn't leak the length. at this point all the
945
     * values are blinded, but once we finish we want the
946
     * output size to be hidden (so no clamping the out put) */
947
22.5k
    MP_CHECKOK(s_mp_pad(a, setSize));
948
22.5k
    MP_CHECKOK(s_mp_pad(b, setSize));
949
22.5k
    MP_CHECKOK(s_mp_pad(c, 2 * setSize));
950
22.5k
    MP_CHECKOK(s_mp_mulg(a, b, c, 1));
951
22.5k
CLEANUP:
952
22.5k
    return res;
953
22.5k
} /* end mp_mulCT() */
954
955
/* }}} */
956
957
/* {{{ mp_sqr(a, sqr) */
958
959
#if MP_SQUARE
960
/*
961
  Computes the square of a.  This can be done more
962
  efficiently than a general multiplication, because many of the
963
  computation steps are redundant when squaring.  The inner product
964
  step is a bit more complicated, but we save a fair number of
965
  iterations of the multiplication loop.
966
 */
967
968
/* sqr = a^2;   Caller provides both a and tmp; */
969
mp_err
970
mp_sqr(const mp_int *a, mp_int *sqr)
971
61.0M
{
972
61.0M
    mp_digit *pa;
973
61.0M
    mp_digit d;
974
61.0M
    mp_err res;
975
61.0M
    mp_size ix;
976
61.0M
    mp_int tmp;
977
61.0M
    int count;
978
979
61.0M
    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
980
981
61.0M
    if (a == sqr) {
982
0
        if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
983
0
            return res;
984
0
        a = &tmp;
985
61.0M
    } else {
986
61.0M
        DIGITS(&tmp) = 0;
987
61.0M
        res = MP_OKAY;
988
61.0M
    }
989
990
61.0M
    ix = 2 * MP_USED(a);
991
61.0M
    if (ix > MP_ALLOC(sqr)) {
992
0
        MP_USED(sqr) = 1;
993
0
        MP_CHECKOK(s_mp_grow(sqr, ix));
994
0
    }
995
61.0M
    MP_USED(sqr) = ix;
996
61.0M
    MP_DIGIT(sqr, 0) = 0;
997
998
61.0M
#ifdef NSS_USE_COMBA
999
61.0M
    if (IS_POWER_OF_2(MP_USED(a))) {
1000
59.8M
        if (MP_USED(a) == 4) {
1001
3.32k
            s_mp_sqr_comba_4(a, sqr);
1002
3.32k
            goto CLEANUP;
1003
3.32k
        }
1004
59.8M
        if (MP_USED(a) == 8) {
1005
3.69k
            s_mp_sqr_comba_8(a, sqr);
1006
3.69k
            goto CLEANUP;
1007
3.69k
        }
1008
59.8M
        if (MP_USED(a) == 16) {
1009
50.7M
            s_mp_sqr_comba_16(a, sqr);
1010
50.7M
            goto CLEANUP;
1011
50.7M
        }
1012
9.05M
        if (MP_USED(a) == 32) {
1013
5.93M
            s_mp_sqr_comba_32(a, sqr);
1014
5.93M
            goto CLEANUP;
1015
5.93M
        }
1016
9.05M
    }
1017
4.37M
#endif
1018
1019
4.37M
    pa = MP_DIGITS(a);
1020
4.37M
    count = MP_USED(a) - 1;
1021
4.37M
    if (count > 0) {
1022
1.98M
        d = *pa++;
1023
1.98M
        s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
1024
148M
        for (ix = 3; --count > 0; ix += 2) {
1025
146M
            d = *pa++;
1026
146M
            s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
1027
146M
        }                                    /* for(ix ...) */
1028
1.98M
        MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
1029
1030
        /* now sqr *= 2 */
1031
1.98M
        s_mp_mul_2(sqr);
1032
2.38M
    } else {
1033
2.38M
        MP_DIGIT(sqr, 1) = 0;
1034
2.38M
    }
1035
1036
    /* now add the squares of the digits of a to sqr. */
1037
4.37M
    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
1038
1039
4.37M
    SIGN(sqr) = ZPOS;
1040
4.37M
    s_mp_clamp(sqr);
1041
1042
61.0M
CLEANUP:
1043
61.0M
    mp_clear(&tmp);
1044
61.0M
    return res;
1045
1046
4.37M
} /* end mp_sqr() */
1047
#endif
1048
1049
/* }}} */
1050
1051
/* {{{ mp_div(a, b, q, r) */
1052
1053
/*
1054
  mp_div(a, b, q, r)
1055
1056
  Compute q = a / b and r = a mod b.  Input parameters may be re-used
1057
  as output parameters.  If q or r is NULL, that portion of the
1058
  computation will be discarded (although it will still be computed)
1059
 */
1060
mp_err
1061
mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1062
326k
{
1063
326k
    mp_err res;
1064
326k
    mp_int *pQ, *pR;
1065
326k
    mp_int qtmp, rtmp, btmp;
1066
326k
    int cmp;
1067
326k
    mp_sign signA;
1068
326k
    mp_sign signB;
1069
1070
326k
    ARGCHK(a != NULL && b != NULL, MP_BADARG);
1071
1072
326k
    signA = MP_SIGN(a);
1073
326k
    signB = MP_SIGN(b);
1074
1075
326k
    if (mp_cmp_z(b) == MP_EQ)
1076
1
        return MP_RANGE;
1077
1078
326k
    DIGITS(&qtmp) = 0;
1079
326k
    DIGITS(&rtmp) = 0;
1080
326k
    DIGITS(&btmp) = 0;
1081
1082
    /* Set up some temporaries... */
1083
326k
    if (!r || r == a || r == b) {
1084
280k
        MP_CHECKOK(mp_init_copy(&rtmp, a));
1085
280k
        pR = &rtmp;
1086
280k
    } else {
1087
46.5k
        MP_CHECKOK(mp_copy(a, r));
1088
46.5k
        pR = r;
1089
46.5k
    }
1090
1091
326k
    if (!q || q == a || q == b) {
1092
326k
        MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
1093
326k
        pQ = &qtmp;
1094
326k
    } else {
1095
0
        MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
1096
0
        pQ = q;
1097
0
        mp_zero(pQ);
1098
0
    }
1099
1100
    /*
1101
      If |a| <= |b|, we can compute the solution without division;
1102
      otherwise, we actually do the work required.
1103
     */
1104
326k
    if ((cmp = s_mp_cmp(a, b)) <= 0) {
1105
2.06k
        if (cmp) {
1106
            /* r was set to a above. */
1107
2.06k
            mp_zero(pQ);
1108
2.06k
        } else {
1109
0
            mp_set(pQ, 1);
1110
0
            mp_zero(pR);
1111
0
        }
1112
324k
    } else {
1113
324k
        MP_CHECKOK(mp_init_copy(&btmp, b));
1114
324k
        MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
1115
324k
    }
1116
1117
    /* Compute the signs for the output  */
1118
326k
    MP_SIGN(pR) = signA;        /* Sr = Sa              */
1119
    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1120
326k
    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1121
1122
326k
    if (s_mp_cmp_d(pQ, 0) == MP_EQ)
1123
2.06k
        SIGN(pQ) = ZPOS;
1124
326k
    if (s_mp_cmp_d(pR, 0) == MP_EQ)
1125
3.39k
        SIGN(pR) = ZPOS;
1126
1127
    /* Copy output, if it is needed      */
1128
326k
    if (q && q != pQ)
1129
26.5k
        s_mp_exch(pQ, q);
1130
1131
326k
    if (r && r != pR)
1132
253k
        s_mp_exch(pR, r);
1133
1134
326k
CLEANUP:
1135
326k
    mp_clear(&btmp);
1136
326k
    mp_clear(&rtmp);
1137
326k
    mp_clear(&qtmp);
1138
1139
326k
    return res;
1140
1141
326k
} /* end mp_div() */
1142
1143
/* }}} */
1144
1145
/* {{{ mp_div_2d(a, d, q, r) */
1146
1147
mp_err
1148
mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1149
0
{
1150
0
    mp_err res;
1151
1152
0
    ARGCHK(a != NULL, MP_BADARG);
1153
1154
0
    if (q) {
1155
0
        if ((res = mp_copy(a, q)) != MP_OKAY)
1156
0
            return res;
1157
0
    }
1158
0
    if (r) {
1159
0
        if ((res = mp_copy(a, r)) != MP_OKAY)
1160
0
            return res;
1161
0
    }
1162
0
    if (q) {
1163
0
        s_mp_div_2d(q, d);
1164
0
    }
1165
0
    if (r) {
1166
0
        s_mp_mod_2d(r, d);
1167
0
    }
1168
1169
0
    return MP_OKAY;
1170
1171
0
} /* end mp_div_2d() */
1172
1173
/* }}} */
1174
1175
/* {{{ mp_expt(a, b, c) */
1176
1177
/*
1178
  mp_expt(a, b, c)
1179
1180
  Compute c = a ** b, that is, raise a to the b power.  Uses a
1181
  standard iterative square-and-multiply technique.
1182
 */
1183
1184
mp_err
1185
mp_expt(mp_int *a, mp_int *b, mp_int *c)
1186
0
{
1187
0
    mp_int s, x;
1188
0
    mp_err res;
1189
0
    mp_digit d;
1190
0
    unsigned int dig, bit;
1191
1192
0
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1193
1194
0
    if (mp_cmp_z(b) < 0)
1195
0
        return MP_RANGE;
1196
1197
0
    if ((res = mp_init(&s)) != MP_OKAY)
1198
0
        return res;
1199
1200
0
    mp_set(&s, 1);
1201
1202
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1203
0
        goto X;
1204
1205
    /* Loop over low-order digits in ascending order */
1206
0
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1207
0
        d = DIGIT(b, dig);
1208
1209
        /* Loop over bits of each non-maximal digit */
1210
0
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1211
0
            if (d & 1) {
1212
0
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1213
0
                    goto CLEANUP;
1214
0
            }
1215
1216
0
            d >>= 1;
1217
1218
0
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1219
0
                goto CLEANUP;
1220
0
        }
1221
0
    }
1222
1223
    /* Consider now the last digit... */
1224
0
    d = DIGIT(b, dig);
1225
1226
0
    while (d) {
1227
0
        if (d & 1) {
1228
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1229
0
                goto CLEANUP;
1230
0
        }
1231
1232
0
        d >>= 1;
1233
1234
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1235
0
            goto CLEANUP;
1236
0
    }
1237
1238
0
    if (mp_iseven(b))
1239
0
        SIGN(&s) = SIGN(a);
1240
1241
0
    res = mp_copy(&s, c);
1242
1243
0
CLEANUP:
1244
0
    mp_clear(&x);
1245
0
X:
1246
0
    mp_clear(&s);
1247
1248
0
    return res;
1249
1250
0
} /* end mp_expt() */
1251
1252
/* }}} */
1253
1254
/* {{{ mp_2expt(a, k) */
1255
1256
/* Compute a = 2^k */
1257
1258
mp_err
1259
mp_2expt(mp_int *a, mp_digit k)
1260
0
{
1261
0
    ARGCHK(a != NULL, MP_BADARG);
1262
1263
0
    return s_mp_2expt(a, k);
1264
1265
0
} /* end mp_2expt() */
1266
1267
/* }}} */
1268
1269
/* {{{ mp_mod(a, m, c) */
1270
1271
/*
1272
  mp_mod(a, m, c)
1273
1274
  Compute c = a (mod m).  Result will always be 0 <= c < m.
1275
 */
1276
1277
mp_err
1278
mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1279
184k
{
1280
184k
    mp_err res;
1281
184k
    int mag;
1282
1283
184k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1284
1285
184k
    if (SIGN(m) == NEG)
1286
119
        return MP_RANGE;
1287
1288
    /*
1289
     If |a| > m, we need to divide to get the remainder and take the
1290
     absolute value.
1291
1292
     If |a| < m, we don't need to do any division, just copy and adjust
1293
     the sign (if a is negative).
1294
1295
     If |a| == m, we can simply set the result to zero.
1296
1297
     This order is intended to minimize the average path length of the
1298
     comparison chain on common workloads -- the most frequent cases are
1299
     that |a| != m, so we do those first.
1300
     */
1301
184k
    if ((mag = s_mp_cmp(a, m)) > 0) {
1302
116k
        if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1303
1
            return res;
1304
1305
116k
        if (SIGN(c) == NEG) {
1306
829
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1307
0
                return res;
1308
829
        }
1309
1310
116k
    } else if (mag < 0) {
1311
68.4k
        if ((res = mp_copy(a, c)) != MP_OKAY)
1312
0
            return res;
1313
1314
68.4k
        if (mp_cmp_z(a) < 0) {
1315
10.8k
            if ((res = mp_add(c, m, c)) != MP_OKAY)
1316
0
                return res;
1317
10.8k
        }
1318
1319
68.4k
    } else {
1320
8
        mp_zero(c);
1321
8
    }
1322
1323
184k
    return MP_OKAY;
1324
1325
184k
} /* end mp_mod() */
1326
1327
/* }}} */
1328
1329
/* {{{ s_mp_subCT_d(a, b, borrow, c) */
1330
1331
/*
1332
  s_mp_subCT_d(a, b, borrow, c)
1333
1334
  Compute c = (a -b) - subtract in constant time. returns borrow
1335
 */
1336
mp_digit
1337
s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
1338
722k
{
1339
722k
    *ret = a - b - borrow;
1340
722k
    return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
1341
722k
} /*  s_mp_subCT_d() */
1342
1343
/* }}} */
1344
1345
/* {{{ mp_subCT(a, b, ret, borrow) */
1346
1347
/* return ret= a - b and borrow in borrow. done in constant time.
1348
 * b could be modified.
1349
 */
1350
mp_err
1351
mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
1352
22.5k
{
1353
22.5k
    mp_size used_a = MP_USED(a);
1354
22.5k
    mp_size i;
1355
22.5k
    mp_err res;
1356
1357
22.5k
    MP_CHECKOK(s_mp_pad(b, used_a));
1358
22.5k
    MP_CHECKOK(s_mp_pad(ret, used_a));
1359
22.5k
    *borrow = 0;
1360
745k
    for (i = 0; i < used_a; i++) {
1361
722k
        *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
1362
722k
                               &MP_DIGIT(ret, i));
1363
722k
    }
1364
1365
22.5k
    res = MP_OKAY;
1366
22.5k
CLEANUP:
1367
22.5k
    return res;
1368
22.5k
} /*  end mp_subCT() */
1369
1370
/* }}} */
1371
1372
/* {{{ mp_selectCT(cond, a, b, ret) */
1373
1374
/*
1375
 * return ret= cond ? a : b; cond should be either 0 or 1
1376
 */
1377
mp_err
1378
mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
1379
22.5k
{
1380
22.5k
    mp_size used_a = MP_USED(a);
1381
22.5k
    mp_err res;
1382
22.5k
    mp_size i;
1383
1384
22.5k
    cond *= MP_DIGIT_MAX;
1385
1386
    /* we currently require these to be equal on input,
1387
     * we could use pad to extend one of them, but that might
1388
     * leak data as it wouldn't be constant time */
1389
22.5k
    if (used_a != MP_USED(b)) {
1390
0
        return MP_BADARG;
1391
0
    }
1392
1393
22.5k
    MP_CHECKOK(s_mp_pad(ret, used_a));
1394
745k
    for (i = 0; i < used_a; i++) {
1395
722k
        MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
1396
722k
    }
1397
22.5k
    res = MP_OKAY;
1398
22.5k
CLEANUP:
1399
22.5k
    return res;
1400
22.5k
} /* end mp_selectCT() */
1401
1402
/* {{{ mp_reduceCT(a, m, c) */
1403
1404
/*
1405
  mp_reduceCT(a, m, c)
1406
1407
  Compute c = aR^-1 (mod m) in constant time.
1408
   input should be in montgomery form. If input is the
1409
   result of a montgomery multiply then out put will be
1410
   in mongomery form.
1411
   Result will be reduced to MP_USED(m), but not be
1412
   clamped.
1413
 */
1414
1415
mp_err
1416
mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
1417
22.5k
{
1418
22.5k
    mp_size used_m = MP_USED(m);
1419
22.5k
    mp_size used_c = used_m * 2 + 1;
1420
22.5k
    mp_digit *m_digits, *c_digits;
1421
22.5k
    mp_size i;
1422
22.5k
    mp_digit borrow, carry;
1423
22.5k
    mp_err res;
1424
22.5k
    mp_int sub;
1425
1426
22.5k
    MP_DIGITS(&sub) = 0;
1427
22.5k
    MP_CHECKOK(mp_init_size(&sub, used_m));
1428
1429
22.5k
    if (a != c) {
1430
0
        MP_CHECKOK(mp_copy(a, c));
1431
0
    }
1432
22.5k
    MP_CHECKOK(s_mp_pad(c, used_c));
1433
22.5k
    m_digits = MP_DIGITS(m);
1434
22.5k
    c_digits = MP_DIGITS(c);
1435
745k
    for (i = 0; i < used_m; i++) {
1436
722k
        mp_digit m_i = MP_DIGIT(c, i) * n0i;
1437
722k
        s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
1438
722k
    }
1439
22.5k
    s_mp_rshd(c, used_m);
1440
    /* MP_USED(c) should be used_m+1 with the high word being any carry
1441
     * from the previous multiply, save that carry and drop the high
1442
     * word for the substraction below */
1443
22.5k
    carry = MP_DIGIT(c, used_m);
1444
22.5k
    MP_DIGIT(c, used_m) = 0;
1445
22.5k
    MP_USED(c) = used_m;
1446
    /* mp_subCT wants c and m to be the same size, we've already
1447
     * guarrenteed that in the previous statement, so mp_subCT won't actually
1448
     * modify m, so it's safe to recast */
1449
22.5k
    MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
1450
1451
    /* we return c-m if c >= m no borrow or there was a borrow and a carry */
1452
22.5k
    MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
1453
22.5k
    res = MP_OKAY;
1454
22.5k
CLEANUP:
1455
22.5k
    mp_clear(&sub);
1456
22.5k
    return res;
1457
22.5k
} /* end mp_reduceCT() */
1458
1459
/* }}} */
1460
1461
/* {{{ mp_mod_d(a, d, c) */
1462
1463
/*
1464
  mp_mod_d(a, d, c)
1465
1466
  Compute c = a (mod d).  Result will always be 0 <= c < d
1467
 */
1468
mp_err
1469
mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1470
0
{
1471
0
    mp_err res;
1472
0
    mp_digit rem;
1473
1474
0
    ARGCHK(a != NULL && c != NULL, MP_BADARG);
1475
1476
0
    if (s_mp_cmp_d(a, d) > 0) {
1477
0
        if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1478
0
            return res;
1479
1480
0
    } else {
1481
0
        if (SIGN(a) == NEG)
1482
0
            rem = d - DIGIT(a, 0);
1483
0
        else
1484
0
            rem = DIGIT(a, 0);
1485
0
    }
1486
1487
0
    if (c)
1488
0
        *c = rem;
1489
1490
0
    return MP_OKAY;
1491
1492
0
} /* end mp_mod_d() */
1493
1494
/* }}} */
1495
1496
/* }}} */
1497
1498
/*------------------------------------------------------------------------*/
1499
/* {{{ Modular arithmetic */
1500
1501
#if MP_MODARITH
1502
/* {{{ mp_addmod(a, b, m, c) */
1503
1504
/*
1505
  mp_addmod(a, b, m, c)
1506
1507
  Compute c = (a + b) mod m
1508
 */
1509
1510
mp_err
1511
mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1512
0
{
1513
0
    mp_err res;
1514
1515
0
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1516
1517
0
    if ((res = mp_add(a, b, c)) != MP_OKAY)
1518
0
        return res;
1519
0
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1520
0
        return res;
1521
1522
0
    return MP_OKAY;
1523
0
}
1524
1525
/* }}} */
1526
1527
/* {{{ mp_submod(a, b, m, c) */
1528
1529
/*
1530
  mp_submod(a, b, m, c)
1531
1532
  Compute c = (a - b) mod m
1533
 */
1534
1535
mp_err
1536
mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1537
22.5k
{
1538
22.5k
    mp_err res;
1539
1540
22.5k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1541
1542
22.5k
    if ((res = mp_sub(a, b, c)) != MP_OKAY)
1543
0
        return res;
1544
22.5k
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1545
0
        return res;
1546
1547
22.5k
    return MP_OKAY;
1548
22.5k
}
1549
1550
/* }}} */
1551
1552
/* {{{ mp_mulmod(a, b, m, c) */
1553
1554
/*
1555
  mp_mulmod(a, b, m, c)
1556
1557
  Compute c = (a * b) mod m
1558
 */
1559
1560
mp_err
1561
mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1562
64.8k
{
1563
64.8k
    mp_err res;
1564
1565
64.8k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1566
1567
64.8k
    if ((res = mp_mul(a, b, c)) != MP_OKAY)
1568
0
        return res;
1569
64.8k
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1570
120
        return res;
1571
1572
64.7k
    return MP_OKAY;
1573
64.8k
}
1574
1575
/* }}} */
1576
1577
/* {{{ mp_mulmontmodCT(a, b, m, c) */
1578
1579
/*
1580
  mp_mulmontmodCT(a, b, m, c)
1581
1582
  Compute c = (a * b) mod m in constant time wrt a and b. either a or b
1583
  should be in montgomery form and the output is native. If both a and b
1584
  are in montgomery form, then the output will also be in montgomery form
1585
  and can be recovered with an mp_reduceCT call.
1586
  NOTE: a and b may be modified.
1587
 */
1588
1589
mp_err
1590
mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
1591
                mp_int *c)
1592
22.5k
{
1593
22.5k
    mp_err res;
1594
1595
22.5k
    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1596
1597
22.5k
    if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
1598
0
        return res;
1599
1600
22.5k
    if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
1601
0
        return res;
1602
1603
22.5k
    return MP_OKAY;
1604
22.5k
}
1605
1606
/* }}} */
1607
1608
/* {{{ mp_sqrmod(a, m, c) */
1609
1610
#if MP_SQUARE
1611
mp_err
1612
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1613
0
{
1614
0
    mp_err res;
1615
1616
0
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1617
1618
0
    if ((res = mp_sqr(a, c)) != MP_OKAY)
1619
0
        return res;
1620
0
    if ((res = mp_mod(c, m, c)) != MP_OKAY)
1621
0
        return res;
1622
1623
0
    return MP_OKAY;
1624
1625
0
} /* end mp_sqrmod() */
1626
#endif
1627
1628
/* }}} */
1629
1630
/* {{{ s_mp_exptmod(a, b, m, c) */
1631
1632
/*
1633
  s_mp_exptmod(a, b, m, c)
1634
1635
  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1636
  method with modular reductions at each step. (This is basically the
1637
  same code as mp_expt(), except for the addition of the reductions)
1638
1639
  The modular reductions are done using Barrett's algorithm (see
1640
  s_mp_reduce() below for details)
1641
 */
1642
1643
mp_err
1644
s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1645
26.5k
{
1646
26.5k
    mp_int s, x, mu;
1647
26.5k
    mp_err res;
1648
26.5k
    mp_digit d;
1649
26.5k
    unsigned int dig, bit;
1650
1651
26.5k
    ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
1652
1653
26.5k
    if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1654
0
        return MP_RANGE;
1655
1656
26.5k
    if ((res = mp_init(&s)) != MP_OKAY)
1657
0
        return res;
1658
26.5k
    if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
1659
26.5k
        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1660
0
        goto X;
1661
26.5k
    if ((res = mp_init(&mu)) != MP_OKAY)
1662
0
        goto MU;
1663
1664
26.5k
    mp_set(&s, 1);
1665
1666
    /* mu = b^2k / m */
1667
26.5k
    if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
1668
0
        goto CLEANUP;
1669
26.5k
    if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
1670
0
        goto CLEANUP;
1671
26.5k
    if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1672
0
        goto CLEANUP;
1673
1674
    /* Loop over digits of b in ascending order, except highest order */
1675
44.8k
    for (dig = 0; dig < (USED(b) - 1); dig++) {
1676
18.3k
        d = DIGIT(b, dig);
1677
1678
        /* Loop over the bits of the lower-order digits */
1679
1.19M
        for (bit = 0; bit < DIGIT_BIT; bit++) {
1680
1.17M
            if (d & 1) {
1681
547k
                if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1682
0
                    goto CLEANUP;
1683
547k
                if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1684
0
                    goto CLEANUP;
1685
547k
            }
1686
1687
1.17M
            d >>= 1;
1688
1689
1.17M
            if ((res = s_mp_sqr(&x)) != MP_OKAY)
1690
0
                goto CLEANUP;
1691
1.17M
            if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1692
0
                goto CLEANUP;
1693
1.17M
        }
1694
18.3k
    }
1695
1696
    /* Now do the last digit... */
1697
26.5k
    d = DIGIT(b, dig);
1698
1699
671k
    while (d) {
1700
645k
        if (d & 1) {
1701
320k
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
1702
0
                goto CLEANUP;
1703
320k
            if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1704
0
                goto CLEANUP;
1705
320k
        }
1706
1707
645k
        d >>= 1;
1708
1709
645k
        if ((res = s_mp_sqr(&x)) != MP_OKAY)
1710
0
            goto CLEANUP;
1711
645k
        if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1712
0
            goto CLEANUP;
1713
645k
    }
1714
1715
26.5k
    s_mp_exch(&s, c);
1716
1717
26.5k
CLEANUP:
1718
26.5k
    mp_clear(&mu);
1719
26.5k
MU:
1720
26.5k
    mp_clear(&x);
1721
26.5k
X:
1722
26.5k
    mp_clear(&s);
1723
1724
26.5k
    return res;
1725
1726
26.5k
} /* end s_mp_exptmod() */
1727
1728
/* }}} */
1729
1730
/* {{{ mp_exptmod_d(a, d, m, c) */
1731
1732
mp_err
1733
mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1734
0
{
1735
0
    mp_int s, x;
1736
0
    mp_err res;
1737
1738
0
    ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
1739
1740
0
    if ((res = mp_init(&s)) != MP_OKAY)
1741
0
        return res;
1742
0
    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
1743
0
        goto X;
1744
1745
0
    mp_set(&s, 1);
1746
1747
0
    while (d != 0) {
1748
0
        if (d & 1) {
1749
0
            if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1750
0
                (res = mp_mod(&s, m, &s)) != MP_OKAY)
1751
0
                goto CLEANUP;
1752
0
        }
1753
1754
0
        d /= 2;
1755
1756
0
        if ((res = s_mp_sqr(&x)) != MP_OKAY ||
1757
0
            (res = mp_mod(&x, m, &x)) != MP_OKAY)
1758
0
            goto CLEANUP;
1759
0
    }
1760
1761
0
    s_mp_exch(&s, c);
1762
1763
0
CLEANUP:
1764
0
    mp_clear(&x);
1765
0
X:
1766
0
    mp_clear(&s);
1767
1768
0
    return res;
1769
1770
0
} /* end mp_exptmod_d() */
1771
1772
/* }}} */
1773
#endif /* if MP_MODARITH */
1774
1775
/* }}} */
1776
1777
/*------------------------------------------------------------------------*/
1778
/* {{{ Comparison functions */
1779
1780
/* {{{ mp_cmp_z(a) */
1781
1782
/*
1783
  mp_cmp_z(a)
1784
1785
  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1786
 */
1787
1788
int
1789
mp_cmp_z(const mp_int *a)
1790
10.3M
{
1791
10.3M
    ARGMPCHK(a != NULL);
1792
1793
10.3M
    if (SIGN(a) == NEG)
1794
148k
        return MP_LT;
1795
10.2M
    else if (USED(a) == 1 && DIGIT(a, 0) == 0)
1796
509k
        return MP_EQ;
1797
9.73M
    else
1798
9.73M
        return MP_GT;
1799
1800
10.3M
} /* end mp_cmp_z() */
1801
1802
/* }}} */
1803
1804
/* {{{ mp_cmp_d(a, d) */
1805
1806
/*
1807
  mp_cmp_d(a, d)
1808
1809
  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1810
 */
1811
1812
int
1813
mp_cmp_d(const mp_int *a, mp_digit d)
1814
59.2k
{
1815
59.2k
    ARGCHK(a != NULL, MP_EQ);
1816
1817
59.2k
    if (SIGN(a) == NEG)
1818
0
        return MP_LT;
1819
1820
59.2k
    return s_mp_cmp_d(a, d);
1821
1822
59.2k
} /* end mp_cmp_d() */
1823
1824
/* }}} */
1825
1826
/* {{{ mp_cmp(a, b) */
1827
1828
int
1829
mp_cmp(const mp_int *a, const mp_int *b)
1830
19.5M
{
1831
19.5M
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1832
1833
19.5M
    if (SIGN(a) == SIGN(b)) {
1834
19.5M
        int mag;
1835
1836
19.5M
        if ((mag = s_mp_cmp(a, b)) == MP_EQ)
1837
29.2k
            return MP_EQ;
1838
1839
19.4M
        if (SIGN(a) == ZPOS)
1840
19.4M
            return mag;
1841
0
        else
1842
0
            return -mag;
1843
1844
19.4M
    } else if (SIGN(a) == ZPOS) {
1845
0
        return MP_GT;
1846
0
    } else {
1847
0
        return MP_LT;
1848
0
    }
1849
1850
19.5M
} /* end mp_cmp() */
1851
1852
/* }}} */
1853
1854
/* {{{ mp_cmp_mag(a, b) */
1855
1856
/*
1857
  mp_cmp_mag(a, b)
1858
1859
  Compares |a| <=> |b|, and returns an appropriate comparison result
1860
 */
1861
1862
int
1863
mp_cmp_mag(const mp_int *a, const mp_int *b)
1864
0
{
1865
0
    ARGCHK(a != NULL && b != NULL, MP_EQ);
1866
1867
0
    return s_mp_cmp(a, b);
1868
1869
0
} /* end mp_cmp_mag() */
1870
1871
/* }}} */
1872
1873
/* {{{ mp_isodd(a) */
1874
1875
/*
1876
  mp_isodd(a)
1877
1878
  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1879
 */
1880
int
1881
mp_isodd(const mp_int *a)
1882
156k
{
1883
156k
    ARGMPCHK(a != NULL);
1884
1885
156k
    return (int)(DIGIT(a, 0) & 1);
1886
1887
156k
} /* end mp_isodd() */
1888
1889
/* }}} */
1890
1891
/* {{{ mp_iseven(a) */
1892
1893
int
1894
mp_iseven(const mp_int *a)
1895
20.8k
{
1896
20.8k
    return !mp_isodd(a);
1897
1898
20.8k
} /* end mp_iseven() */
1899
1900
/* }}} */
1901
1902
/* }}} */
1903
1904
/*------------------------------------------------------------------------*/
1905
/* {{{ Number theoretic functions */
1906
1907
/* {{{ mp_gcd(a, b, c) */
1908
1909
/*
1910
  Computes the GCD using the constant-time algorithm
1911
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
1912
  "Fast constant-time gcd computation and modular inversion"
1913
 */
1914
mp_err
1915
mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1916
6.22k
{
1917
6.22k
    mp_err res;
1918
6.22k
    mp_digit cond = 0, mask = 0;
1919
6.22k
    mp_int g, temp, f;
1920
6.22k
    int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
1921
6.22k
    mp_size top, flen, glen;
1922
6.22k
    mp_int *clear[3];
1923
1924
6.22k
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1925
    /*
1926
    Early exit if either of the inputs is zero.
1927
    Caller is responsible for the proper handling of inputs.
1928
    */
1929
6.22k
    if (mp_cmp_z(a) == MP_EQ) {
1930
15
        res = mp_copy(b, c);
1931
15
        SIGN(c) = ZPOS;
1932
15
        return res;
1933
6.20k
    } else if (mp_cmp_z(b) == MP_EQ) {
1934
4
        res = mp_copy(a, c);
1935
4
        SIGN(c) = ZPOS;
1936
4
        return res;
1937
4
    }
1938
1939
6.20k
    MP_CHECKOK(mp_init(&temp));
1940
6.20k
    clear[++last] = &temp;
1941
6.20k
    MP_CHECKOK(mp_init_copy(&g, a));
1942
6.20k
    clear[++last] = &g;
1943
6.20k
    MP_CHECKOK(mp_init_copy(&f, b));
1944
6.20k
    clear[++last] = &f;
1945
1946
    /*
1947
    For even case compute the number of
1948
    shared powers of 2 in f and g.
1949
    */
1950
23.9k
    for (i = 0; i < USED(&f) && i < USED(&g); i++) {
1951
17.7k
        mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
1952
1.15M
        for (j = 0; j < MP_DIGIT_BIT; j++) {
1953
1.13M
            bit &= mask;
1954
1.13M
            shifts += bit;
1955
1.13M
            mask >>= 1;
1956
1.13M
        }
1957
17.7k
    }
1958
    /* Reduce to the odd case by removing the powers of 2. */
1959
6.20k
    s_mp_div_2d(&f, shifts);
1960
6.20k
    s_mp_div_2d(&g, shifts);
1961
1962
    /* Allocate to the size of largest mp_int. */
1963
6.20k
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
1964
6.20k
    MP_CHECKOK(s_mp_grow(&f, top));
1965
6.20k
    MP_CHECKOK(s_mp_grow(&g, top));
1966
6.20k
    MP_CHECKOK(s_mp_grow(&temp, top));
1967
1968
    /* Make sure f contains the odd value. */
1969
6.20k
    MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
1970
1971
    /* Upper bound for the total iterations. */
1972
6.20k
    flen = mpl_significant_bits(&f);
1973
6.20k
    glen = mpl_significant_bits(&g);
1974
6.20k
    m = 4 + 3 * ((flen >= glen) ? flen : glen);
1975
1976
#if defined(_MSC_VER)
1977
#pragma warning(push)
1978
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
1979
#endif
1980
1981
8.21M
    for (i = 0; i < m; i++) {
1982
        /* Step 1: conditional swap. */
1983
        /* Set cond if delta > 0 and g is odd. */
1984
8.20M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
1985
        /* If cond is set replace (delta,f) with (-delta,-f). */
1986
8.20M
        delta = (-cond & -delta) | ((cond - 1) & delta);
1987
8.20M
        SIGN(&f) ^= cond;
1988
        /* If cond is set swap f with g. */
1989
8.20M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
1990
1991
        /* Step 2: elemination. */
1992
        /* Update delta. */
1993
8.20M
        delta++;
1994
        /* If g is odd, right shift (g+f) else right shift g. */
1995
8.20M
        MP_CHECKOK(mp_add(&g, &f, &temp));
1996
8.20M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
1997
8.20M
        s_mp_div_2(&g);
1998
8.20M
    }
1999
2000
#if defined(_MSC_VER)
2001
#pragma warning(pop)
2002
#endif
2003
2004
    /* GCD is in f, take the absolute value. */
2005
6.20k
    SIGN(&f) = ZPOS;
2006
2007
    /* Add back the removed powers of 2. */
2008
6.20k
    MP_CHECKOK(s_mp_mul_2d(&f, shifts));
2009
2010
6.20k
    MP_CHECKOK(mp_copy(&f, c));
2011
2012
6.20k
CLEANUP:
2013
24.8k
    while (last >= 0)
2014
18.6k
        mp_clear(clear[last--]);
2015
6.20k
    return res;
2016
6.20k
} /* end mp_gcd() */
2017
2018
/* }}} */
2019
2020
/* {{{ mp_lcm(a, b, c) */
2021
2022
/* We compute the least common multiple using the rule:
2023
2024
   ab = [a, b](a, b)
2025
2026
   ... by computing the product, and dividing out the gcd.
2027
 */
2028
2029
mp_err
2030
mp_lcm(mp_int *a, mp_int *b, mp_int *c)
2031
0
{
2032
0
    mp_int gcd, prod;
2033
0
    mp_err res;
2034
2035
0
    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
2036
2037
    /* Set up temporaries */
2038
0
    if ((res = mp_init(&gcd)) != MP_OKAY)
2039
0
        return res;
2040
0
    if ((res = mp_init(&prod)) != MP_OKAY)
2041
0
        goto GCD;
2042
2043
0
    if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
2044
0
        goto CLEANUP;
2045
0
    if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
2046
0
        goto CLEANUP;
2047
2048
0
    res = mp_div(&prod, &gcd, c, NULL);
2049
2050
0
CLEANUP:
2051
0
    mp_clear(&prod);
2052
0
GCD:
2053
0
    mp_clear(&gcd);
2054
2055
0
    return res;
2056
2057
0
} /* end mp_lcm() */
2058
2059
/* }}} */
2060
2061
/* {{{ mp_xgcd(a, b, g, x, y) */
2062
2063
/*
2064
  mp_xgcd(a, b, g, x, y)
2065
2066
  Compute g = (a, b) and values x and y satisfying Bezout's identity
2067
  (that is, ax + by = g).  This uses the binary extended GCD algorithm
2068
  based on the Stein algorithm used for mp_gcd()
2069
  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
2070
 */
2071
2072
mp_err
2073
mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
2074
0
{
2075
0
    mp_int gx, xc, yc, u, v, A, B, C, D;
2076
0
    mp_int *clean[9];
2077
0
    mp_err res;
2078
0
    int last = -1;
2079
2080
0
    if (mp_cmp_z(b) == 0)
2081
0
        return MP_RANGE;
2082
2083
    /* Initialize all these variables we need */
2084
0
    MP_CHECKOK(mp_init(&u));
2085
0
    clean[++last] = &u;
2086
0
    MP_CHECKOK(mp_init(&v));
2087
0
    clean[++last] = &v;
2088
0
    MP_CHECKOK(mp_init(&gx));
2089
0
    clean[++last] = &gx;
2090
0
    MP_CHECKOK(mp_init(&A));
2091
0
    clean[++last] = &A;
2092
0
    MP_CHECKOK(mp_init(&B));
2093
0
    clean[++last] = &B;
2094
0
    MP_CHECKOK(mp_init(&C));
2095
0
    clean[++last] = &C;
2096
0
    MP_CHECKOK(mp_init(&D));
2097
0
    clean[++last] = &D;
2098
0
    MP_CHECKOK(mp_init_copy(&xc, a));
2099
0
    clean[++last] = &xc;
2100
0
    mp_abs(&xc, &xc);
2101
0
    MP_CHECKOK(mp_init_copy(&yc, b));
2102
0
    clean[++last] = &yc;
2103
0
    mp_abs(&yc, &yc);
2104
2105
0
    mp_set(&gx, 1);
2106
2107
    /* Divide by two until at least one of them is odd */
2108
0
    while (mp_iseven(&xc) && mp_iseven(&yc)) {
2109
0
        mp_size nx = mp_trailing_zeros(&xc);
2110
0
        mp_size ny = mp_trailing_zeros(&yc);
2111
0
        mp_size n = MP_MIN(nx, ny);
2112
0
        s_mp_div_2d(&xc, n);
2113
0
        s_mp_div_2d(&yc, n);
2114
0
        MP_CHECKOK(s_mp_mul_2d(&gx, n));
2115
0
    }
2116
2117
0
    MP_CHECKOK(mp_copy(&xc, &u));
2118
0
    MP_CHECKOK(mp_copy(&yc, &v));
2119
0
    mp_set(&A, 1);
2120
0
    mp_set(&D, 1);
2121
2122
    /* Loop through binary GCD algorithm */
2123
0
    do {
2124
0
        while (mp_iseven(&u)) {
2125
0
            s_mp_div_2(&u);
2126
2127
0
            if (mp_iseven(&A) && mp_iseven(&B)) {
2128
0
                s_mp_div_2(&A);
2129
0
                s_mp_div_2(&B);
2130
0
            } else {
2131
0
                MP_CHECKOK(mp_add(&A, &yc, &A));
2132
0
                s_mp_div_2(&A);
2133
0
                MP_CHECKOK(mp_sub(&B, &xc, &B));
2134
0
                s_mp_div_2(&B);
2135
0
            }
2136
0
        }
2137
2138
0
        while (mp_iseven(&v)) {
2139
0
            s_mp_div_2(&v);
2140
2141
0
            if (mp_iseven(&C) && mp_iseven(&D)) {
2142
0
                s_mp_div_2(&C);
2143
0
                s_mp_div_2(&D);
2144
0
            } else {
2145
0
                MP_CHECKOK(mp_add(&C, &yc, &C));
2146
0
                s_mp_div_2(&C);
2147
0
                MP_CHECKOK(mp_sub(&D, &xc, &D));
2148
0
                s_mp_div_2(&D);
2149
0
            }
2150
0
        }
2151
2152
0
        if (mp_cmp(&u, &v) >= 0) {
2153
0
            MP_CHECKOK(mp_sub(&u, &v, &u));
2154
0
            MP_CHECKOK(mp_sub(&A, &C, &A));
2155
0
            MP_CHECKOK(mp_sub(&B, &D, &B));
2156
0
        } else {
2157
0
            MP_CHECKOK(mp_sub(&v, &u, &v));
2158
0
            MP_CHECKOK(mp_sub(&C, &A, &C));
2159
0
            MP_CHECKOK(mp_sub(&D, &B, &D));
2160
0
        }
2161
0
    } while (mp_cmp_z(&u) != 0);
2162
2163
    /* copy results to output */
2164
0
    if (x)
2165
0
        MP_CHECKOK(mp_copy(&C, x));
2166
2167
0
    if (y)
2168
0
        MP_CHECKOK(mp_copy(&D, y));
2169
2170
0
    if (g)
2171
0
        MP_CHECKOK(mp_mul(&gx, &v, g));
2172
2173
0
CLEANUP:
2174
0
    while (last >= 0)
2175
0
        mp_clear(clean[last--]);
2176
2177
0
    return res;
2178
2179
0
} /* end mp_xgcd() */
2180
2181
/* }}} */
2182
2183
mp_size
2184
mp_trailing_zeros(const mp_int *mp)
2185
5.07k
{
2186
5.07k
    mp_digit d;
2187
5.07k
    mp_size n = 0;
2188
5.07k
    unsigned int ix;
2189
2190
5.07k
    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2191
0
        return n;
2192
2193
9.86k
    for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
2194
4.79k
        n += MP_DIGIT_BIT;
2195
5.07k
    if (!d)
2196
0
        return 0; /* shouldn't happen, but ... */
2197
5.07k
#if !defined(MP_USE_UINT_DIGIT)
2198
5.07k
    if (!(d & 0xffffffffU)) {
2199
1.55k
        d >>= 32;
2200
1.55k
        n += 32;
2201
1.55k
    }
2202
5.07k
#endif
2203
5.07k
    if (!(d & 0xffffU)) {
2204
1.32k
        d >>= 16;
2205
1.32k
        n += 16;
2206
1.32k
    }
2207
5.07k
    if (!(d & 0xffU)) {
2208
2.02k
        d >>= 8;
2209
2.02k
        n += 8;
2210
2.02k
    }
2211
5.07k
    if (!(d & 0xfU)) {
2212
597
        d >>= 4;
2213
597
        n += 4;
2214
597
    }
2215
5.07k
    if (!(d & 0x3U)) {
2216
1.77k
        d >>= 2;
2217
1.77k
        n += 2;
2218
1.77k
    }
2219
5.07k
    if (!(d & 0x1U)) {
2220
1.74k
        d >>= 1;
2221
1.74k
        n += 1;
2222
1.74k
    }
2223
5.07k
#if MP_ARGCHK == 2
2224
5.07k
    assert(0 != (d & 1));
2225
5.07k
#endif
2226
5.07k
    return n;
2227
5.07k
}
2228
2229
/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2230
** Returns k (positive) or error (negative).
2231
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2232
** by Richard Schroeppel (a.k.a. Captain Nemo).
2233
*/
2234
mp_err
2235
s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2236
0
{
2237
0
    mp_err res;
2238
0
    mp_err k = 0;
2239
0
    mp_int d, f, g;
2240
2241
0
    ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
2242
2243
0
    MP_DIGITS(&d) = 0;
2244
0
    MP_DIGITS(&f) = 0;
2245
0
    MP_DIGITS(&g) = 0;
2246
0
    MP_CHECKOK(mp_init(&d));
2247
0
    MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
2248
0
    MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
2249
2250
0
    mp_set(c, 1);
2251
0
    mp_zero(&d);
2252
2253
0
    if (mp_cmp_z(&f) == 0) {
2254
0
        res = MP_UNDEF;
2255
0
    } else
2256
0
        for (;;) {
2257
0
            int diff_sign;
2258
0
            while (mp_iseven(&f)) {
2259
0
                mp_size n = mp_trailing_zeros(&f);
2260
0
                if (!n) {
2261
0
                    res = MP_UNDEF;
2262
0
                    goto CLEANUP;
2263
0
                }
2264
0
                s_mp_div_2d(&f, n);
2265
0
                MP_CHECKOK(s_mp_mul_2d(&d, n));
2266
0
                k += n;
2267
0
            }
2268
0
            if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
2269
0
                res = k;
2270
0
                break;
2271
0
            }
2272
0
            diff_sign = mp_cmp(&f, &g);
2273
0
            if (diff_sign < 0) { /* f < g */
2274
0
                s_mp_exch(&f, &g);
2275
0
                s_mp_exch(c, &d);
2276
0
            } else if (diff_sign == 0) { /* f == g */
2277
0
                res = MP_UNDEF;          /* a and p are not relatively prime */
2278
0
                break;
2279
0
            }
2280
0
            if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
2281
0
                MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
2282
0
                MP_CHECKOK(mp_sub(c, &d, c));   /* c = c - d */
2283
0
            } else {
2284
0
                MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
2285
0
                MP_CHECKOK(mp_add(c, &d, c));   /* c = c + d */
2286
0
            }
2287
0
        }
2288
0
    if (res >= 0) {
2289
0
        if (mp_cmp_mag(c, p) >= 0) {
2290
0
            MP_CHECKOK(mp_div(c, p, NULL, c));
2291
0
        }
2292
0
        if (MP_SIGN(c) != MP_ZPOS) {
2293
0
            MP_CHECKOK(mp_add(c, p, c));
2294
0
        }
2295
0
        res = k;
2296
0
    }
2297
2298
0
CLEANUP:
2299
0
    mp_clear(&d);
2300
0
    mp_clear(&f);
2301
0
    mp_clear(&g);
2302
0
    return res;
2303
0
}
2304
2305
/* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2306
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2307
** by Richard Schroeppel (a.k.a. Captain Nemo).
2308
*/
2309
mp_digit
2310
s_mp_invmod_radix(mp_digit P)
2311
107k
{
2312
107k
    mp_digit T = P;
2313
107k
    T *= 2 - (P * T);
2314
107k
    T *= 2 - (P * T);
2315
107k
    T *= 2 - (P * T);
2316
107k
    T *= 2 - (P * T);
2317
107k
#if !defined(MP_USE_UINT_DIGIT)
2318
107k
    T *= 2 - (P * T);
2319
107k
    T *= 2 - (P * T);
2320
107k
#endif
2321
107k
    return T;
2322
107k
}
2323
2324
/* Given c, k, and prime p, where a*c == 2**k (mod p),
2325
** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2326
** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2327
** by Richard Schroeppel (a.k.a. Captain Nemo).
2328
*/
2329
mp_err
2330
s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2331
0
{
2332
0
    int k_orig = k;
2333
0
    mp_digit r;
2334
0
    mp_size ix;
2335
0
    mp_err res;
2336
2337
0
    if (mp_cmp_z(c) < 0) {           /* c < 0 */
2338
0
        MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
2339
0
    } else {
2340
0
        MP_CHECKOK(mp_copy(c, x)); /* x = c */
2341
0
    }
2342
2343
    /* make sure x is large enough */
2344
0
    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2345
0
    ix = MP_MAX(ix, MP_USED(x));
2346
0
    MP_CHECKOK(s_mp_pad(x, ix));
2347
2348
0
    r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
2349
2350
0
    for (ix = 0; k > 0; ix++) {
2351
0
        int j = MP_MIN(k, MP_DIGIT_BIT);
2352
0
        mp_digit v = r * MP_DIGIT(x, ix);
2353
0
        if (j < MP_DIGIT_BIT) {
2354
0
            v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
2355
0
        }
2356
0
        s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2357
0
        k -= j;
2358
0
    }
2359
0
    s_mp_clamp(x);
2360
0
    s_mp_div_2d(x, k_orig);
2361
0
    res = MP_OKAY;
2362
2363
0
CLEANUP:
2364
0
    return res;
2365
0
}
2366
2367
/*
2368
  Computes the modular inverse using the constant-time algorithm
2369
  by Bernstein and Yang (https://eprint.iacr.org/2019/266)
2370
  "Fast constant-time gcd computation and modular inversion"
2371
 */
2372
mp_err
2373
s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2374
5.70k
{
2375
5.70k
    mp_err res;
2376
5.70k
    mp_digit cond = 0;
2377
5.70k
    mp_int g, f, v, r, temp;
2378
5.70k
    int i, its, delta = 1, last = -1;
2379
5.70k
    mp_size top, flen, glen;
2380
5.70k
    mp_int *clear[6];
2381
2382
5.70k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2383
    /* Check for invalid inputs. */
2384
5.70k
    if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
2385
0
        return MP_RANGE;
2386
2387
5.70k
    if (a == m || mp_iseven(m))
2388
0
        return MP_UNDEF;
2389
2390
5.70k
    MP_CHECKOK(mp_init(&temp));
2391
5.70k
    clear[++last] = &temp;
2392
5.70k
    MP_CHECKOK(mp_init(&v));
2393
5.70k
    clear[++last] = &v;
2394
5.70k
    MP_CHECKOK(mp_init(&r));
2395
5.70k
    clear[++last] = &r;
2396
5.70k
    MP_CHECKOK(mp_init_copy(&g, a));
2397
5.70k
    clear[++last] = &g;
2398
5.70k
    MP_CHECKOK(mp_init_copy(&f, m));
2399
5.70k
    clear[++last] = &f;
2400
2401
5.70k
    mp_set(&v, 0);
2402
5.70k
    mp_set(&r, 1);
2403
2404
    /* Allocate to the size of largest mp_int. */
2405
5.70k
    top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
2406
5.70k
    MP_CHECKOK(s_mp_grow(&f, top));
2407
5.70k
    MP_CHECKOK(s_mp_grow(&g, top));
2408
5.70k
    MP_CHECKOK(s_mp_grow(&temp, top));
2409
5.70k
    MP_CHECKOK(s_mp_grow(&v, top));
2410
5.70k
    MP_CHECKOK(s_mp_grow(&r, top));
2411
2412
    /* Upper bound for the total iterations. */
2413
5.70k
    flen = mpl_significant_bits(&f);
2414
5.70k
    glen = mpl_significant_bits(&g);
2415
5.70k
    its = 4 + 3 * ((flen >= glen) ? flen : glen);
2416
2417
#if defined(_MSC_VER)
2418
#pragma warning(push)
2419
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2420
#endif
2421
2422
4.76M
    for (i = 0; i < its; i++) {
2423
        /* Step 1: conditional swap. */
2424
        /* Set cond if delta > 0 and g is odd. */
2425
4.76M
        cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
2426
        /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
2427
4.76M
        delta = (-cond & -delta) | ((cond - 1) & delta);
2428
4.76M
        SIGN(&f) ^= cond;
2429
4.76M
        SIGN(&v) ^= cond;
2430
        /* If cond is set swap (f,v) with (g,r). */
2431
4.76M
        MP_CHECKOK(mp_cswap(cond, &f, &g, top));
2432
4.76M
        MP_CHECKOK(mp_cswap(cond, &v, &r, top));
2433
2434
        /* Step 2: elemination. */
2435
        /* Update delta */
2436
4.76M
        delta++;
2437
        /* If g is odd replace r with (r+v). */
2438
4.76M
        MP_CHECKOK(mp_add(&r, &v, &temp));
2439
4.76M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
2440
        /* If g is odd, right shift (g+f) else right shift g. */
2441
4.76M
        MP_CHECKOK(mp_add(&g, &f, &temp));
2442
4.76M
        MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
2443
4.76M
        s_mp_div_2(&g);
2444
        /*
2445
        If r is even, right shift it.
2446
        If r is odd, right shift (r+m) which is even because m is odd.
2447
        We want the result modulo m so adding in multiples of m here vanish.
2448
        */
2449
4.76M
        MP_CHECKOK(mp_add(&r, m, &temp));
2450
4.76M
        MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
2451
4.76M
        s_mp_div_2(&r);
2452
4.76M
    }
2453
2454
#if defined(_MSC_VER)
2455
#pragma warning(pop)
2456
#endif
2457
2458
    /* We have the inverse in v, propagate sign from f. */
2459
5.70k
    SIGN(&v) ^= SIGN(&f);
2460
    /* GCD is in f, take the absolute value. */
2461
5.70k
    SIGN(&f) = ZPOS;
2462
2463
    /* If gcd != 1, not invertible. */
2464
5.70k
    if (mp_cmp_d(&f, 1) != MP_EQ) {
2465
240
        res = MP_UNDEF;
2466
240
        goto CLEANUP;
2467
240
    }
2468
2469
    /* Return inverse modulo m. */
2470
5.46k
    MP_CHECKOK(mp_mod(&v, m, c));
2471
2472
5.70k
CLEANUP:
2473
34.2k
    while (last >= 0)
2474
28.5k
        mp_clear(clear[last--]);
2475
5.70k
    return res;
2476
5.46k
}
2477
2478
/* Known good algorithm for computing modular inverse.  But slow. */
2479
mp_err
2480
mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2481
0
{
2482
0
    mp_int g, x;
2483
0
    mp_err res;
2484
2485
0
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2486
2487
0
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2488
0
        return MP_RANGE;
2489
2490
0
    MP_DIGITS(&g) = 0;
2491
0
    MP_DIGITS(&x) = 0;
2492
0
    MP_CHECKOK(mp_init(&x));
2493
0
    MP_CHECKOK(mp_init(&g));
2494
2495
0
    MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
2496
2497
0
    if (mp_cmp_d(&g, 1) != MP_EQ) {
2498
0
        res = MP_UNDEF;
2499
0
        goto CLEANUP;
2500
0
    }
2501
2502
0
    res = mp_mod(&x, m, c);
2503
0
    SIGN(c) = SIGN(a);
2504
2505
0
CLEANUP:
2506
0
    mp_clear(&x);
2507
0
    mp_clear(&g);
2508
2509
0
    return res;
2510
0
}
2511
2512
/* modular inverse where modulus is 2**k. */
2513
/* c = a**-1 mod 2**k */
2514
mp_err
2515
s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2516
9.92k
{
2517
9.92k
    mp_err res;
2518
9.92k
    mp_size ix = k + 4;
2519
9.92k
    mp_int t0, t1, val, tmp, two2k;
2520
2521
9.92k
    static const mp_digit d2 = 2;
2522
9.92k
    static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2523
2524
9.92k
    if (mp_iseven(a))
2525
0
        return MP_UNDEF;
2526
2527
#if defined(_MSC_VER)
2528
#pragma warning(push)
2529
#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
2530
#endif
2531
9.92k
    if (k <= MP_DIGIT_BIT) {
2532
4.48k
        mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
2533
        /* propagate the sign from mp_int */
2534
4.48k
        i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
2535
4.48k
        if (k < MP_DIGIT_BIT)
2536
4.39k
            i &= ((mp_digit)1 << k) - (mp_digit)1;
2537
4.48k
        mp_set(c, i);
2538
4.48k
        return MP_OKAY;
2539
4.48k
    }
2540
#if defined(_MSC_VER)
2541
#pragma warning(pop)
2542
#endif
2543
2544
5.44k
    MP_DIGITS(&t0) = 0;
2545
5.44k
    MP_DIGITS(&t1) = 0;
2546
5.44k
    MP_DIGITS(&val) = 0;
2547
5.44k
    MP_DIGITS(&tmp) = 0;
2548
5.44k
    MP_DIGITS(&two2k) = 0;
2549
5.44k
    MP_CHECKOK(mp_init_copy(&val, a));
2550
5.44k
    s_mp_mod_2d(&val, k);
2551
5.44k
    MP_CHECKOK(mp_init_copy(&t0, &val));
2552
5.44k
    MP_CHECKOK(mp_init_copy(&t1, &t0));
2553
5.44k
    MP_CHECKOK(mp_init(&tmp));
2554
5.44k
    MP_CHECKOK(mp_init(&two2k));
2555
5.44k
    MP_CHECKOK(s_mp_2expt(&two2k, k));
2556
29.2k
    do {
2557
29.2k
        MP_CHECKOK(mp_mul(&val, &t1, &tmp));
2558
29.2k
        MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
2559
29.2k
        MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
2560
29.2k
        s_mp_mod_2d(&t1, k);
2561
58.4k
        while (MP_SIGN(&t1) != MP_ZPOS) {
2562
29.2k
            MP_CHECKOK(mp_add(&t1, &two2k, &t1));
2563
29.2k
        }
2564
29.2k
        if (mp_cmp(&t1, &t0) == MP_EQ)
2565
5.44k
            break;
2566
23.8k
        MP_CHECKOK(mp_copy(&t1, &t0));
2567
23.8k
    } while (--ix > 0);
2568
5.44k
    if (!ix) {
2569
0
        res = MP_UNDEF;
2570
5.44k
    } else {
2571
5.44k
        mp_exch(c, &t1);
2572
5.44k
    }
2573
2574
5.44k
CLEANUP:
2575
5.44k
    mp_clear(&t0);
2576
5.44k
    mp_clear(&t1);
2577
5.44k
    mp_clear(&val);
2578
5.44k
    mp_clear(&tmp);
2579
5.44k
    mp_clear(&two2k);
2580
5.44k
    return res;
2581
5.44k
}
2582
2583
mp_err
2584
s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2585
5.25k
{
2586
5.25k
    mp_err res;
2587
5.25k
    mp_size k;
2588
5.25k
    mp_int oddFactor, evenFactor; /* factors of the modulus */
2589
5.25k
    mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2590
5.25k
    mp_int C2, tmp1, tmp2;
2591
2592
5.25k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2593
2594
    /*static const mp_digit d1 = 1; */
2595
    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2596
2597
5.25k
    if ((res = s_mp_ispow2(m)) >= 0) {
2598
180
        k = res;
2599
180
        return s_mp_invmod_2d(a, k, c);
2600
180
    }
2601
5.07k
    MP_DIGITS(&oddFactor) = 0;
2602
5.07k
    MP_DIGITS(&evenFactor) = 0;
2603
5.07k
    MP_DIGITS(&oddPart) = 0;
2604
5.07k
    MP_DIGITS(&evenPart) = 0;
2605
5.07k
    MP_DIGITS(&C2) = 0;
2606
5.07k
    MP_DIGITS(&tmp1) = 0;
2607
5.07k
    MP_DIGITS(&tmp2) = 0;
2608
2609
5.07k
    MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
2610
5.07k
    MP_CHECKOK(mp_init(&evenFactor));
2611
5.07k
    MP_CHECKOK(mp_init(&oddPart));
2612
5.07k
    MP_CHECKOK(mp_init(&evenPart));
2613
5.07k
    MP_CHECKOK(mp_init(&C2));
2614
5.07k
    MP_CHECKOK(mp_init(&tmp1));
2615
5.07k
    MP_CHECKOK(mp_init(&tmp2));
2616
2617
5.07k
    k = mp_trailing_zeros(m);
2618
5.07k
    s_mp_div_2d(&oddFactor, k);
2619
5.07k
    MP_CHECKOK(s_mp_2expt(&evenFactor, k));
2620
2621
    /* compute a**-1 mod oddFactor. */
2622
5.07k
    MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
2623
    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2624
4.87k
    MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
2625
2626
    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2627
    /* let m1 = oddFactor,  v1 = oddPart,
2628
     * let m2 = evenFactor, v2 = evenPart.
2629
     */
2630
2631
    /* Compute C2 = m1**-1 mod m2. */
2632
4.87k
    MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
2633
2634
    /* compute u = (v2 - v1)*C2 mod m2 */
2635
4.87k
    MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
2636
4.87k
    MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
2637
4.87k
    s_mp_mod_2d(&tmp2, k);
2638
8.02k
    while (MP_SIGN(&tmp2) != MP_ZPOS) {
2639
3.14k
        MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
2640
3.14k
    }
2641
2642
    /* compute answer = v1 + u*m1 */
2643
4.87k
    MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
2644
4.87k
    MP_CHECKOK(mp_add(&oddPart, c, c));
2645
    /* not sure this is necessary, but it's low cost if not. */
2646
4.87k
    MP_CHECKOK(mp_mod(c, m, c));
2647
2648
5.07k
CLEANUP:
2649
5.07k
    mp_clear(&oddFactor);
2650
5.07k
    mp_clear(&evenFactor);
2651
5.07k
    mp_clear(&oddPart);
2652
5.07k
    mp_clear(&evenPart);
2653
5.07k
    mp_clear(&C2);
2654
5.07k
    mp_clear(&tmp1);
2655
5.07k
    mp_clear(&tmp2);
2656
5.07k
    return res;
2657
4.87k
}
2658
2659
/* {{{ mp_invmod(a, m, c) */
2660
2661
/*
2662
  mp_invmod(a, m, c)
2663
2664
  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2665
  This is equivalent to the question of whether (a, m) = 1.  If not,
2666
  MP_UNDEF is returned, and there is no inverse.
2667
 */
2668
2669
mp_err
2670
mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2671
5.89k
{
2672
5.89k
    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
2673
2674
5.89k
    if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2675
0
        return MP_RANGE;
2676
2677
5.89k
    if (mp_isodd(m)) {
2678
630
        return s_mp_invmod_odd_m(a, m, c);
2679
630
    }
2680
5.26k
    if (mp_iseven(a))
2681
6
        return MP_UNDEF; /* not invertable */
2682
2683
5.25k
    return s_mp_invmod_even_m(a, m, c);
2684
2685
5.26k
} /* end mp_invmod() */
2686
2687
/* }}} */
2688
2689
/* }}} */
2690
2691
/*------------------------------------------------------------------------*/
2692
/* {{{ mp_print(mp, ofp) */
2693
2694
#if MP_IOFUNC
2695
/*
2696
  mp_print(mp, ofp)
2697
2698
  Print a textual representation of the given mp_int on the output
2699
  stream 'ofp'.  Output is generated using the internal radix.
2700
 */
2701
2702
void
2703
mp_print(mp_int *mp, FILE *ofp)
2704
{
2705
    int ix;
2706
2707
    if (mp == NULL || ofp == NULL)
2708
        return;
2709
2710
    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2711
2712
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2713
        fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2714
    }
2715
2716
} /* end mp_print() */
2717
2718
#endif /* if MP_IOFUNC */
2719
2720
/* }}} */
2721
2722
/*------------------------------------------------------------------------*/
2723
/* {{{ More I/O Functions */
2724
2725
/* {{{ mp_read_raw(mp, str, len) */
2726
2727
/*
2728
   mp_read_raw(mp, str, len)
2729
2730
   Read in a raw value (base 256) into the given mp_int
2731
 */
2732
2733
mp_err
2734
mp_read_raw(mp_int *mp, char *str, int len)
2735
0
{
2736
0
    int ix;
2737
0
    mp_err res;
2738
0
    unsigned char *ustr = (unsigned char *)str;
2739
2740
0
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2741
2742
0
    mp_zero(mp);
2743
2744
    /* Read the rest of the digits */
2745
0
    for (ix = 1; ix < len; ix++) {
2746
0
        if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2747
0
            return res;
2748
0
        if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2749
0
            return res;
2750
0
    }
2751
2752
    /* Get sign from first byte */
2753
0
    if (ustr[0])
2754
0
        SIGN(mp) = NEG;
2755
0
    else
2756
0
        SIGN(mp) = ZPOS;
2757
2758
0
    return MP_OKAY;
2759
2760
0
} /* end mp_read_raw() */
2761
2762
/* }}} */
2763
2764
/* {{{ mp_raw_size(mp) */
2765
2766
int
2767
mp_raw_size(mp_int *mp)
2768
0
{
2769
0
    ARGCHK(mp != NULL, 0);
2770
2771
0
    return (USED(mp) * sizeof(mp_digit)) + 1;
2772
2773
0
} /* end mp_raw_size() */
2774
2775
/* }}} */
2776
2777
/* {{{ mp_toraw(mp, str) */
2778
2779
mp_err
2780
mp_toraw(mp_int *mp, char *str)
2781
0
{
2782
0
    int ix, jx, pos = 1;
2783
2784
0
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2785
2786
0
    str[0] = (char)SIGN(mp);
2787
2788
    /* Iterate over each digit... */
2789
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
2790
0
        mp_digit d = DIGIT(mp, ix);
2791
2792
        /* Unpack digit bytes, high order first */
2793
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2794
0
            str[pos++] = (char)(d >> (jx * CHAR_BIT));
2795
0
        }
2796
0
    }
2797
2798
0
    return MP_OKAY;
2799
2800
0
} /* end mp_toraw() */
2801
2802
/* }}} */
2803
2804
/* {{{ mp_read_radix(mp, str, radix) */
2805
2806
/*
2807
  mp_read_radix(mp, str, radix)
2808
2809
  Read an integer from the given string, and set mp to the resulting
2810
  value.  The input is presumed to be in base 10.  Leading non-digit
2811
  characters are ignored, and the function reads until a non-digit
2812
  character or the end of the string.
2813
 */
2814
2815
mp_err
2816
mp_read_radix(mp_int *mp, const char *str, int radix)
2817
0
{
2818
0
    int ix = 0, val = 0;
2819
0
    mp_err res;
2820
0
    mp_sign sig = ZPOS;
2821
2822
0
    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2823
0
           MP_BADARG);
2824
2825
0
    mp_zero(mp);
2826
2827
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2828
0
    while (str[ix] &&
2829
0
           (s_mp_tovalue(str[ix], radix) < 0) &&
2830
0
           str[ix] != '-' &&
2831
0
           str[ix] != '+') {
2832
0
        ++ix;
2833
0
    }
2834
2835
0
    if (str[ix] == '-') {
2836
0
        sig = NEG;
2837
0
        ++ix;
2838
0
    } else if (str[ix] == '+') {
2839
0
        sig = ZPOS; /* this is the default anyway... */
2840
0
        ++ix;
2841
0
    }
2842
2843
0
    while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2844
0
        if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2845
0
            return res;
2846
0
        if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
2847
0
            return res;
2848
0
        ++ix;
2849
0
    }
2850
2851
0
    if (s_mp_cmp_d(mp, 0) == MP_EQ)
2852
0
        SIGN(mp) = ZPOS;
2853
0
    else
2854
0
        SIGN(mp) = sig;
2855
2856
0
    return MP_OKAY;
2857
2858
0
} /* end mp_read_radix() */
2859
2860
mp_err
2861
mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
2862
0
{
2863
0
    int radix = default_radix;
2864
0
    int cx;
2865
0
    mp_sign sig = ZPOS;
2866
0
    mp_err res;
2867
2868
    /* Skip leading non-digit characters until a digit or '-' or '+' */
2869
0
    while ((cx = *str) != 0 &&
2870
0
           (s_mp_tovalue(cx, radix) < 0) &&
2871
0
           cx != '-' &&
2872
0
           cx != '+') {
2873
0
        ++str;
2874
0
    }
2875
2876
0
    if (cx == '-') {
2877
0
        sig = NEG;
2878
0
        ++str;
2879
0
    } else if (cx == '+') {
2880
0
        sig = ZPOS; /* this is the default anyway... */
2881
0
        ++str;
2882
0
    }
2883
2884
0
    if (str[0] == '0') {
2885
0
        if ((str[1] | 0x20) == 'x') {
2886
0
            radix = 16;
2887
0
            str += 2;
2888
0
        } else {
2889
0
            radix = 8;
2890
0
            str++;
2891
0
        }
2892
0
    }
2893
0
    res = mp_read_radix(a, str, radix);
2894
0
    if (res == MP_OKAY) {
2895
0
        MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2896
0
    }
2897
0
    return res;
2898
0
}
2899
2900
/* }}} */
2901
2902
/* {{{ mp_radix_size(mp, radix) */
2903
2904
int
2905
mp_radix_size(mp_int *mp, int radix)
2906
0
{
2907
0
    int bits;
2908
2909
0
    if (!mp || radix < 2 || radix > MAX_RADIX)
2910
0
        return 0;
2911
2912
0
    bits = USED(mp) * DIGIT_BIT - 1;
2913
2914
0
    return SIGN(mp) + s_mp_outlen(bits, radix);
2915
2916
0
} /* end mp_radix_size() */
2917
2918
/* }}} */
2919
2920
/* {{{ mp_toradix(mp, str, radix) */
2921
2922
mp_err
2923
mp_toradix(mp_int *mp, char *str, int radix)
2924
0
{
2925
0
    int ix, pos = 0;
2926
2927
0
    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2928
0
    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2929
2930
0
    if (mp_cmp_z(mp) == MP_EQ) {
2931
0
        str[0] = '0';
2932
0
        str[1] = '\0';
2933
0
    } else {
2934
0
        mp_err res;
2935
0
        mp_int tmp;
2936
0
        mp_sign sgn;
2937
0
        mp_digit rem, rdx = (mp_digit)radix;
2938
0
        char ch;
2939
2940
0
        if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2941
0
            return res;
2942
2943
        /* Save sign for later, and take absolute value */
2944
0
        sgn = SIGN(&tmp);
2945
0
        SIGN(&tmp) = ZPOS;
2946
2947
        /* Generate output digits in reverse order      */
2948
0
        while (mp_cmp_z(&tmp) != 0) {
2949
0
            if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2950
0
                mp_clear(&tmp);
2951
0
                return res;
2952
0
            }
2953
2954
            /* Generate digits, use capital letters */
2955
0
            ch = s_mp_todigit(rem, radix, 0);
2956
2957
0
            str[pos++] = ch;
2958
0
        }
2959
2960
        /* Add - sign if original value was negative */
2961
0
        if (sgn == NEG)
2962
0
            str[pos++] = '-';
2963
2964
        /* Add trailing NUL to end the string        */
2965
0
        str[pos--] = '\0';
2966
2967
        /* Reverse the digits and sign indicator     */
2968
0
        ix = 0;
2969
0
        while (ix < pos) {
2970
0
            char tmpc = str[ix];
2971
2972
0
            str[ix] = str[pos];
2973
0
            str[pos] = tmpc;
2974
0
            ++ix;
2975
0
            --pos;
2976
0
        }
2977
2978
0
        mp_clear(&tmp);
2979
0
    }
2980
2981
0
    return MP_OKAY;
2982
2983
0
} /* end mp_toradix() */
2984
2985
/* }}} */
2986
2987
/* {{{ mp_tovalue(ch, r) */
2988
2989
int
2990
mp_tovalue(char ch, int r)
2991
0
{
2992
0
    return s_mp_tovalue(ch, r);
2993
2994
0
} /* end mp_tovalue() */
2995
2996
/* }}} */
2997
2998
/* }}} */
2999
3000
/* {{{ mp_strerror(ec) */
3001
3002
/*
3003
  mp_strerror(ec)
3004
3005
  Return a string describing the meaning of error code 'ec'.  The
3006
  string returned is allocated in static memory, so the caller should
3007
  not attempt to modify or free the memory associated with this
3008
  string.
3009
 */
3010
const char *
3011
mp_strerror(mp_err ec)
3012
0
{
3013
0
    int aec = (ec < 0) ? -ec : ec;
3014
3015
    /* Code values are negative, so the senses of these comparisons
3016
     are accurate */
3017
0
    if (ec < MP_LAST_CODE || ec > MP_OKAY) {
3018
0
        return mp_err_string[0]; /* unknown error code */
3019
0
    } else {
3020
0
        return mp_err_string[aec + 1];
3021
0
    }
3022
3023
0
} /* end mp_strerror() */
3024
3025
/* }}} */
3026
3027
/*========================================================================*/
3028
/*------------------------------------------------------------------------*/
3029
/* Static function definitions (internal use only)                        */
3030
3031
/* {{{ Memory management */
3032
3033
/* {{{ s_mp_grow(mp, min) */
3034
3035
/* Make sure there are at least 'min' digits allocated to mp              */
3036
mp_err
3037
s_mp_grow(mp_int *mp, mp_size min)
3038
2.05M
{
3039
2.05M
    ARGCHK(mp != NULL, MP_BADARG);
3040
3041
2.05M
    if (min > ALLOC(mp)) {
3042
2.01M
        mp_digit *tmp;
3043
3044
        /* Set min to next nearest default precision block size */
3045
2.01M
        min = MP_ROUNDUP(min, s_mp_defprec);
3046
3047
2.01M
        if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
3048
0
            return MP_MEM;
3049
3050
2.01M
        s_mp_copy(DIGITS(mp), tmp, USED(mp));
3051
3052
2.01M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3053
2.01M
        s_mp_free(DIGITS(mp));
3054
2.01M
        DIGITS(mp) = tmp;
3055
2.01M
        ALLOC(mp) = min;
3056
2.01M
    }
3057
3058
2.05M
    return MP_OKAY;
3059
3060
2.05M
} /* end s_mp_grow() */
3061
3062
/* }}} */
3063
3064
/* {{{ s_mp_pad(mp, min) */
3065
3066
/* Make sure the used size of mp is at least 'min', growing if needed     */
3067
mp_err
3068
s_mp_pad(mp_int *mp, mp_size min)
3069
116M
{
3070
116M
    ARGCHK(mp != NULL, MP_BADARG);
3071
3072
116M
    if (min > USED(mp)) {
3073
95.1M
        mp_err res;
3074
3075
        /* Make sure there is room to increase precision  */
3076
95.1M
        if (min > ALLOC(mp)) {
3077
2.01M
            if ((res = s_mp_grow(mp, min)) != MP_OKAY)
3078
0
                return res;
3079
93.1M
        } else {
3080
93.1M
            s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
3081
93.1M
        }
3082
3083
        /* Increase precision; should already be 0-filled */
3084
95.1M
        USED(mp) = min;
3085
95.1M
    }
3086
3087
116M
    return MP_OKAY;
3088
3089
116M
} /* end s_mp_pad() */
3090
3091
/* }}} */
3092
3093
/* {{{ s_mp_setz(dp, count) */
3094
3095
/* Set 'count' digits pointed to by dp to be zeroes                       */
3096
void
3097
s_mp_setz(mp_digit *dp, mp_size count)
3098
130M
{
3099
130M
    memset(dp, 0, count * sizeof(mp_digit));
3100
130M
} /* end s_mp_setz() */
3101
3102
/* }}} */
3103
3104
/* {{{ s_mp_copy(sp, dp, count) */
3105
3106
/* Copy 'count' digits from sp to dp                                      */
3107
void
3108
s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
3109
16.7M
{
3110
16.7M
    memcpy(dp, sp, count * sizeof(mp_digit));
3111
16.7M
} /* end s_mp_copy() */
3112
3113
/* }}} */
3114
3115
/* {{{ s_mp_alloc(nb, ni) */
3116
3117
/* Allocate ni records of nb bytes each, and return a pointer to that     */
3118
void *
3119
s_mp_alloc(size_t nb, size_t ni)
3120
15.8M
{
3121
15.8M
    return calloc(nb, ni);
3122
3123
15.8M
} /* end s_mp_alloc() */
3124
3125
/* }}} */
3126
3127
/* {{{ s_mp_free(ptr) */
3128
3129
/* Free the memory pointed to by ptr                                      */
3130
void
3131
s_mp_free(void *ptr)
3132
15.8M
{
3133
15.8M
    if (ptr) {
3134
15.8M
        free(ptr);
3135
15.8M
    }
3136
15.8M
} /* end s_mp_free() */
3137
3138
/* }}} */
3139
3140
/* {{{ s_mp_clamp(mp) */
3141
3142
/* Remove leading zeroes from the given value                             */
3143
void
3144
s_mp_clamp(mp_int *mp)
3145
160M
{
3146
160M
    mp_size used = MP_USED(mp);
3147
513M
    while (used > 1 && DIGIT(mp, used - 1) == 0)
3148
353M
        --used;
3149
160M
    MP_USED(mp) = used;
3150
160M
    if (used == 1 && DIGIT(mp, 0) == 0)
3151
10.9M
        MP_SIGN(mp) = ZPOS;
3152
160M
} /* end s_mp_clamp() */
3153
3154
/* }}} */
3155
3156
/* {{{ s_mp_exch(a, b) */
3157
3158
/* Exchange the data for a and b; (b, a) = (a, b)                         */
3159
void
3160
s_mp_exch(mp_int *a, mp_int *b)
3161
2.25M
{
3162
2.25M
    mp_int tmp;
3163
2.25M
    if (!a || !b) {
3164
0
        return;
3165
0
    }
3166
3167
2.25M
    tmp = *a;
3168
2.25M
    *a = *b;
3169
2.25M
    *b = tmp;
3170
3171
2.25M
} /* end s_mp_exch() */
3172
3173
/* }}} */
3174
3175
/* }}} */
3176
3177
/* {{{ Arithmetic helpers */
3178
3179
/* {{{ s_mp_lshd(mp, p) */
3180
3181
/*
3182
   Shift mp leftward by p digits, growing if needed, and zero-filling
3183
   the in-shifted digits at the right end.  This is a convenient
3184
   alternative to multiplication by powers of the radix
3185
 */
3186
3187
mp_err
3188
s_mp_lshd(mp_int *mp, mp_size p)
3189
6.70M
{
3190
6.70M
    mp_err res;
3191
6.70M
    unsigned int ix;
3192
3193
6.70M
    ARGCHK(mp != NULL, MP_BADARG);
3194
3195
6.70M
    if (p == 0)
3196
0
        return MP_OKAY;
3197
3198
6.70M
    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
3199
2.06k
        return MP_OKAY;
3200
3201
6.70M
    if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
3202
0
        return res;
3203
3204
    /* Shift all the significant figures over as needed */
3205
101M
    for (ix = USED(mp) - p; ix-- > 0;) {
3206
95.1M
        DIGIT(mp, ix + p) = DIGIT(mp, ix);
3207
95.1M
    }
3208
3209
    /* Fill the bottom digits with zeroes */
3210
22.9M
    for (ix = 0; (mp_size)ix < p; ix++)
3211
16.2M
        DIGIT(mp, ix) = 0;
3212
3213
6.70M
    return MP_OKAY;
3214
3215
6.70M
} /* end s_mp_lshd() */
3216
3217
/* }}} */
3218
3219
/* {{{ s_mp_mul_2d(mp, d) */
3220
3221
/*
3222
  Multiply the integer by 2^d, where d is a number of bits.  This
3223
  amounts to a bitwise shift of the value.
3224
 */
3225
mp_err
3226
s_mp_mul_2d(mp_int *mp, mp_digit d)
3227
108k
{
3228
108k
    mp_err res;
3229
108k
    mp_digit dshift, rshift, mask, x, prev = 0;
3230
108k
    mp_digit *pa = NULL;
3231
108k
    int i;
3232
3233
108k
    ARGCHK(mp != NULL, MP_BADARG);
3234
3235
108k
    dshift = d / MP_DIGIT_BIT;
3236
108k
    d %= MP_DIGIT_BIT;
3237
    /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
3238
    /* mod and corresponding mask logic avoid that when d = 0 */
3239
108k
    rshift = MP_DIGIT_BIT - d;
3240
108k
    rshift %= MP_DIGIT_BIT;
3241
    /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
3242
108k
    mask = (DIGIT_MAX << rshift) + 1;
3243
108k
    mask &= DIGIT_MAX - 1;
3244
    /* bits to be shifted out of the top word */
3245
108k
    x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
3246
3247
108k
    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
3248
0
        return res;
3249
3250
108k
    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3251
0
        return res;
3252
3253
108k
    pa = MP_DIGITS(mp) + dshift;
3254
3255
2.32M
    for (i = MP_USED(mp) - dshift; i > 0; i--) {
3256
2.21M
        x = *pa;
3257
2.21M
        *pa++ = (x << d) | prev;
3258
2.21M
        prev = (x & mask) >> rshift;
3259
2.21M
    }
3260
3261
108k
    s_mp_clamp(mp);
3262
108k
    return MP_OKAY;
3263
108k
} /* end s_mp_mul_2d() */
3264
3265
/* {{{ s_mp_rshd(mp, p) */
3266
3267
/*
3268
   Shift mp rightward by p digits.  Maintains the invariant that
3269
   digits above the precision are all zero.  Digits shifted off the
3270
   end are lost.  Cannot fail.
3271
 */
3272
3273
void
3274
s_mp_rshd(mp_int *mp, mp_size p)
3275
95.3M
{
3276
95.3M
    mp_size ix;
3277
95.3M
    mp_digit *src, *dst;
3278
3279
95.3M
    if (p == 0)
3280
17.7M
        return;
3281
3282
    /* Shortcut when all digits are to be shifted off */
3283
77.6M
    if (p >= USED(mp)) {
3284
3.05M
        s_mp_setz(DIGITS(mp), ALLOC(mp));
3285
3.05M
        USED(mp) = 1;
3286
3.05M
        SIGN(mp) = ZPOS;
3287
3.05M
        return;
3288
3.05M
    }
3289
3290
    /* Shift all the significant figures over as needed */
3291
74.5M
    dst = MP_DIGITS(mp);
3292
74.5M
    src = dst + p;
3293
1.58G
    for (ix = USED(mp) - p; ix > 0; ix--)
3294
1.51G
        *dst++ = *src++;
3295
3296
74.5M
    MP_USED(mp) -= p;
3297
    /* Fill the top digits with zeroes */
3298
1.57G
    while (p-- > 0)
3299
1.49G
        *dst++ = 0;
3300
3301
74.5M
} /* end s_mp_rshd() */
3302
3303
/* }}} */
3304
3305
/* {{{ s_mp_div_2(mp) */
3306
3307
/* Divide by two -- take advantage of radix properties to do it fast      */
3308
void
3309
s_mp_div_2(mp_int *mp)
3310
17.7M
{
3311
17.7M
    s_mp_div_2d(mp, 1);
3312
3313
17.7M
} /* end s_mp_div_2() */
3314
3315
/* }}} */
3316
3317
/* {{{ s_mp_mul_2(mp) */
3318
3319
mp_err
3320
s_mp_mul_2(mp_int *mp)
3321
1.98M
{
3322
1.98M
    mp_digit *pd;
3323
1.98M
    unsigned int ix, used;
3324
1.98M
    mp_digit kin = 0;
3325
3326
1.98M
    ARGCHK(mp != NULL, MP_BADARG);
3327
3328
    /* Shift digits leftward by 1 bit */
3329
1.98M
    used = MP_USED(mp);
3330
1.98M
    pd = MP_DIGITS(mp);
3331
303M
    for (ix = 0; ix < used; ix++) {
3332
301M
        mp_digit d = *pd;
3333
301M
        *pd++ = (d << 1) | kin;
3334
301M
        kin = (d >> (DIGIT_BIT - 1));
3335
301M
    }
3336
3337
    /* Deal with rollover from last digit */
3338
1.98M
    if (kin) {
3339
0
        if (ix >= ALLOC(mp)) {
3340
0
            mp_err res;
3341
0
            if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3342
0
                return res;
3343
0
        }
3344
3345
0
        DIGIT(mp, ix) = kin;
3346
0
        USED(mp) += 1;
3347
0
    }
3348
3349
1.98M
    return MP_OKAY;
3350
3351
1.98M
} /* end s_mp_mul_2() */
3352
3353
/* }}} */
3354
3355
/* {{{ s_mp_mod_2d(mp, d) */
3356
3357
/*
3358
  Remainder the integer by 2^d, where d is a number of bits.  This
3359
  amounts to a bitwise AND of the value, and does not require the full
3360
  division code
3361
 */
3362
void
3363
s_mp_mod_2d(mp_int *mp, mp_digit d)
3364
5.41M
{
3365
5.41M
    mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3366
5.41M
    mp_size ix;
3367
5.41M
    mp_digit dmask;
3368
3369
5.41M
    if (ndig >= USED(mp))
3370
262k
        return;
3371
3372
    /* Flush all the bits above 2^d in its digit */
3373
5.15M
    dmask = ((mp_digit)1 << nbit) - 1;
3374
5.15M
    DIGIT(mp, ndig) &= dmask;
3375
3376
    /* Flush all digits above the one with 2^d in it */
3377
140M
    for (ix = ndig + 1; ix < USED(mp); ix++)
3378
135M
        DIGIT(mp, ix) = 0;
3379
3380
5.15M
    s_mp_clamp(mp);
3381
3382
5.15M
} /* end s_mp_mod_2d() */
3383
3384
/* }}} */
3385
3386
/* {{{ s_mp_div_2d(mp, d) */
3387
3388
/*
3389
  Divide the integer by 2^d, where d is a number of bits.  This
3390
  amounts to a bitwise shift of the value, and does not require the
3391
  full division code (used in Barrett reduction, see below)
3392
 */
3393
void
3394
s_mp_div_2d(mp_int *mp, mp_digit d)
3395
17.7M
{
3396
17.7M
    int ix;
3397
17.7M
    mp_digit save, next, mask, lshift;
3398
3399
17.7M
    s_mp_rshd(mp, d / DIGIT_BIT);
3400
17.7M
    d %= DIGIT_BIT;
3401
    /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
3402
    /* mod and corresponding mask logic avoid that when d = 0 */
3403
17.7M
    lshift = DIGIT_BIT - d;
3404
17.7M
    lshift %= DIGIT_BIT;
3405
17.7M
    mask = ((mp_digit)1 << d) - 1;
3406
17.7M
    save = 0;
3407
176M
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3408
159M
        next = DIGIT(mp, ix) & mask;
3409
159M
        DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
3410
159M
        save = next;
3411
159M
    }
3412
17.7M
    s_mp_clamp(mp);
3413
3414
17.7M
} /* end s_mp_div_2d() */
3415
3416
/* }}} */
3417
3418
/* {{{ s_mp_norm(a, b, *d) */
3419
3420
/*
3421
  s_mp_norm(a, b, *d)
3422
3423
  Normalize a and b for division, where b is the divisor.  In order
3424
  that we might make good guesses for quotient digits, we want the
3425
  leading digit of b to be at least half the radix, which we
3426
  accomplish by multiplying a and b by a power of 2.  The exponent
3427
  (shift count) is placed in *pd, so that the remainder can be shifted
3428
  back at the end of the division process.
3429
 */
3430
3431
mp_err
3432
s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3433
322k
{
3434
322k
    mp_digit d;
3435
322k
    mp_digit mask;
3436
322k
    mp_digit b_msd;
3437
322k
    mp_err res = MP_OKAY;
3438
3439
322k
    ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
3440
3441
322k
    d = 0;
3442
322k
    mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
3443
322k
    b_msd = DIGIT(b, USED(b) - 1);
3444
1.83M
    while (!(b_msd & mask)) {
3445
1.51M
        b_msd <<= 1;
3446
1.51M
        ++d;
3447
1.51M
    }
3448
3449
322k
    if (d) {
3450
34.1k
        MP_CHECKOK(s_mp_mul_2d(a, d));
3451
34.1k
        MP_CHECKOK(s_mp_mul_2d(b, d));
3452
34.1k
    }
3453
3454
322k
    *pd = d;
3455
322k
CLEANUP:
3456
322k
    return res;
3457
3458
322k
} /* end s_mp_norm() */
3459
3460
/* }}} */
3461
3462
/* }}} */
3463
3464
/* {{{ Primitive digit arithmetic */
3465
3466
/* {{{ s_mp_add_d(mp, d) */
3467
3468
/* Add d to |mp| in place                                                 */
3469
mp_err
3470
s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
3471
26.5k
{
3472
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3473
    mp_word w, k = 0;
3474
    mp_size ix = 1;
3475
3476
    w = (mp_word)DIGIT(mp, 0) + d;
3477
    DIGIT(mp, 0) = ACCUM(w);
3478
    k = CARRYOUT(w);
3479
3480
    while (ix < USED(mp) && k) {
3481
        w = (mp_word)DIGIT(mp, ix) + k;
3482
        DIGIT(mp, ix) = ACCUM(w);
3483
        k = CARRYOUT(w);
3484
        ++ix;
3485
    }
3486
3487
    if (k != 0) {
3488
        mp_err res;
3489
3490
        if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3491
            return res;
3492
3493
        DIGIT(mp, ix) = (mp_digit)k;
3494
    }
3495
3496
    return MP_OKAY;
3497
#else
3498
26.5k
    mp_digit *pmp = MP_DIGITS(mp);
3499
26.5k
    mp_digit sum, mp_i, carry = 0;
3500
26.5k
    mp_err res = MP_OKAY;
3501
26.5k
    int used = (int)MP_USED(mp);
3502
3503
26.5k
    mp_i = *pmp;
3504
26.5k
    *pmp++ = sum = d + mp_i;
3505
26.5k
    carry = (sum < d);
3506
26.5k
    while (carry && --used > 0) {
3507
0
        mp_i = *pmp;
3508
0
        *pmp++ = sum = carry + mp_i;
3509
0
        carry = !sum;
3510
0
    }
3511
26.5k
    if (carry && !used) {
3512
        /* mp is growing */
3513
0
        used = MP_USED(mp);
3514
0
        MP_CHECKOK(s_mp_pad(mp, used + 1));
3515
0
        MP_DIGIT(mp, used) = carry;
3516
0
    }
3517
26.5k
CLEANUP:
3518
26.5k
    return res;
3519
26.5k
#endif
3520
26.5k
} /* end s_mp_add_d() */
3521
3522
/* }}} */
3523
3524
/* {{{ s_mp_sub_d(mp, d) */
3525
3526
/* Subtract d from |mp| in place, assumes |mp| > d                        */
3527
mp_err
3528
s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
3529
16.1k
{
3530
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3531
    mp_word w, b = 0;
3532
    mp_size ix = 1;
3533
3534
    /* Compute initial subtraction    */
3535
    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3536
    b = CARRYOUT(w) ? 0 : 1;
3537
    DIGIT(mp, 0) = ACCUM(w);
3538
3539
    /* Propagate borrows leftward     */
3540
    while (b && ix < USED(mp)) {
3541
        w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3542
        b = CARRYOUT(w) ? 0 : 1;
3543
        DIGIT(mp, ix) = ACCUM(w);
3544
        ++ix;
3545
    }
3546
3547
    /* Remove leading zeroes          */
3548
    s_mp_clamp(mp);
3549
3550
    /* If we have a borrow out, it's a violation of the input invariant */
3551
    if (b)
3552
        return MP_RANGE;
3553
    else
3554
        return MP_OKAY;
3555
#else
3556
16.1k
    mp_digit *pmp = MP_DIGITS(mp);
3557
16.1k
    mp_digit mp_i, diff, borrow;
3558
16.1k
    mp_size used = MP_USED(mp);
3559
3560
16.1k
    mp_i = *pmp;
3561
16.1k
    *pmp++ = diff = mp_i - d;
3562
16.1k
    borrow = (diff > mp_i);
3563
17.1k
    while (borrow && --used) {
3564
986
        mp_i = *pmp;
3565
986
        *pmp++ = diff = mp_i - borrow;
3566
986
        borrow = (diff > mp_i);
3567
986
    }
3568
16.1k
    s_mp_clamp(mp);
3569
16.1k
    return (borrow && !used) ? MP_RANGE : MP_OKAY;
3570
16.1k
#endif
3571
16.1k
} /* end s_mp_sub_d() */
3572
3573
/* }}} */
3574
3575
/* {{{ s_mp_mul_d(a, d) */
3576
3577
/* Compute a = a * d, single digit multiplication                         */
3578
mp_err
3579
s_mp_mul_d(mp_int *a, mp_digit d)
3580
4.44M
{
3581
4.44M
    mp_err res;
3582
4.44M
    mp_size used;
3583
4.44M
    int pow;
3584
3585
4.44M
    if (!d) {
3586
0
        mp_zero(a);
3587
0
        return MP_OKAY;
3588
0
    }
3589
4.44M
    if (d == 1)
3590
101k
        return MP_OKAY;
3591
4.34M
    if (0 <= (pow = s_mp_ispow2d(d))) {
3592
33.9k
        return s_mp_mul_2d(a, (mp_digit)pow);
3593
33.9k
    }
3594
3595
4.30M
    used = MP_USED(a);
3596
4.30M
    MP_CHECKOK(s_mp_pad(a, used + 1));
3597
3598
4.30M
    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3599
3600
4.30M
    s_mp_clamp(a);
3601
3602
4.30M
CLEANUP:
3603
4.30M
    return res;
3604
3605
4.30M
} /* end s_mp_mul_d() */
3606
3607
/* }}} */
3608
3609
/* {{{ s_mp_div_d(mp, d, r) */
3610
3611
/*
3612
  s_mp_div_d(mp, d, r)
3613
3614
  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3615
  single digit d.  If r is null, the remainder will be discarded.
3616
 */
3617
3618
mp_err
3619
s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3620
0
{
3621
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3622
    mp_word w = 0, q;
3623
#else
3624
0
    mp_digit w = 0, q;
3625
0
#endif
3626
0
    int ix;
3627
0
    mp_err res;
3628
0
    mp_int quot;
3629
0
    mp_int rem;
3630
3631
0
    if (d == 0)
3632
0
        return MP_RANGE;
3633
0
    if (d == 1) {
3634
0
        if (r)
3635
0
            *r = 0;
3636
0
        return MP_OKAY;
3637
0
    }
3638
    /* could check for power of 2 here, but mp_div_d does that. */
3639
0
    if (MP_USED(mp) == 1) {
3640
0
        mp_digit n = MP_DIGIT(mp, 0);
3641
0
        mp_digit remdig;
3642
3643
0
        q = n / d;
3644
0
        remdig = n % d;
3645
0
        MP_DIGIT(mp, 0) = q;
3646
0
        if (r) {
3647
0
            *r = remdig;
3648
0
        }
3649
0
        return MP_OKAY;
3650
0
    }
3651
3652
0
    MP_DIGITS(&rem) = 0;
3653
0
    MP_DIGITS(&quot) = 0;
3654
    /* Make room for the quotient */
3655
0
    MP_CHECKOK(mp_init_size(&quot, USED(mp)));
3656
3657
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3658
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
3659
        w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3660
3661
        if (w >= d) {
3662
            q = w / d;
3663
            w = w % d;
3664
        } else {
3665
            q = 0;
3666
        }
3667
3668
        s_mp_lshd(&quot, 1);
3669
        DIGIT(&quot, 0) = (mp_digit)q;
3670
    }
3671
#else
3672
0
    {
3673
0
        mp_digit p;
3674
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3675
0
        mp_digit norm;
3676
0
#endif
3677
3678
0
        MP_CHECKOK(mp_init_copy(&rem, mp));
3679
3680
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3681
0
        MP_DIGIT(&quot, 0) = d;
3682
0
        MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
3683
0
        if (norm)
3684
0
            d <<= norm;
3685
0
        MP_DIGIT(&quot, 0) = 0;
3686
0
#endif
3687
3688
0
        p = 0;
3689
0
        for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3690
0
            w = DIGIT(&rem, ix);
3691
3692
0
            if (p) {
3693
0
                MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
3694
0
            } else if (w >= d) {
3695
0
                q = w / d;
3696
0
                w = w % d;
3697
0
            } else {
3698
0
                q = 0;
3699
0
            }
3700
3701
0
            MP_CHECKOK(s_mp_lshd(&quot, 1));
3702
0
            DIGIT(&quot, 0) = q;
3703
0
            p = w;
3704
0
        }
3705
0
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3706
0
        if (norm)
3707
0
            w >>= norm;
3708
0
#endif
3709
0
    }
3710
0
#endif
3711
3712
    /* Deliver the remainder, if desired */
3713
0
    if (r) {
3714
0
        *r = (mp_digit)w;
3715
0
    }
3716
3717
0
    s_mp_clamp(&quot);
3718
0
    mp_exch(&quot, mp);
3719
0
CLEANUP:
3720
0
    mp_clear(&quot);
3721
0
    mp_clear(&rem);
3722
3723
0
    return res;
3724
0
} /* end s_mp_div_d() */
3725
3726
/* }}} */
3727
3728
/* }}} */
3729
3730
/* {{{ Primitive full arithmetic */
3731
3732
/* {{{ s_mp_add(a, b) */
3733
3734
/* Compute a = |a| + |b|                                                  */
3735
mp_err
3736
s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition      */
3737
0
{
3738
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3739
    mp_word w = 0;
3740
#else
3741
0
    mp_digit d, sum, carry = 0;
3742
0
#endif
3743
0
    mp_digit *pa, *pb;
3744
0
    mp_size ix;
3745
0
    mp_size used;
3746
0
    mp_err res;
3747
3748
    /* Make sure a has enough precision for the output value */
3749
0
    if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3750
0
        return res;
3751
3752
    /*
3753
      Add up all digits up to the precision of b.  If b had initially
3754
      the same precision as a, or greater, we took care of it by the
3755
      padding step above, so there is no problem.  If b had initially
3756
      less precision, we'll have to make sure the carry out is duly
3757
      propagated upward among the higher-order digits of the sum.
3758
     */
3759
0
    pa = MP_DIGITS(a);
3760
0
    pb = MP_DIGITS(b);
3761
0
    used = MP_USED(b);
3762
0
    for (ix = 0; ix < used; ix++) {
3763
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3764
        w = w + *pa + *pb++;
3765
        *pa++ = ACCUM(w);
3766
        w = CARRYOUT(w);
3767
#else
3768
0
        d = *pa;
3769
0
        sum = d + *pb++;
3770
0
        d = (sum < d); /* detect overflow */
3771
0
        *pa++ = sum += carry;
3772
0
        carry = d + (sum < carry); /* detect overflow */
3773
0
#endif
3774
0
    }
3775
3776
    /* If we run out of 'b' digits before we're actually done, make
3777
       sure the carries get propagated upward...
3778
     */
3779
0
    used = MP_USED(a);
3780
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3781
    while (w && ix < used) {
3782
        w = w + *pa;
3783
        *pa++ = ACCUM(w);
3784
        w = CARRYOUT(w);
3785
        ++ix;
3786
    }
3787
#else
3788
0
    while (carry && ix < used) {
3789
0
        sum = carry + *pa;
3790
0
        *pa++ = sum;
3791
0
        carry = !sum;
3792
0
        ++ix;
3793
0
    }
3794
0
#endif
3795
3796
/* If there's an overall carry out, increase precision and include
3797
     it.  We could have done this initially, but why touch the memory
3798
     allocator unless we're sure we have to?
3799
   */
3800
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3801
    if (w) {
3802
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3803
            return res;
3804
3805
        DIGIT(a, ix) = (mp_digit)w;
3806
    }
3807
#else
3808
0
    if (carry) {
3809
0
        if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3810
0
            return res;
3811
3812
0
        DIGIT(a, used) = carry;
3813
0
    }
3814
0
#endif
3815
3816
0
    return MP_OKAY;
3817
0
} /* end s_mp_add() */
3818
3819
/* }}} */
3820
3821
/* Compute c = |a| + |b|         */ /* magnitude addition      */
3822
mp_err
3823
s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3824
14.1M
{
3825
14.1M
    mp_digit *pa, *pb, *pc;
3826
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3827
    mp_word w = 0;
3828
#else
3829
14.1M
    mp_digit sum, carry = 0, d;
3830
14.1M
#endif
3831
14.1M
    mp_size ix;
3832
14.1M
    mp_size used;
3833
14.1M
    mp_err res;
3834
3835
14.1M
    MP_SIGN(c) = MP_SIGN(a);
3836
14.1M
    if (MP_USED(a) < MP_USED(b)) {
3837
1.79M
        const mp_int *xch = a;
3838
1.79M
        a = b;
3839
1.79M
        b = xch;
3840
1.79M
    }
3841
3842
    /* Make sure a has enough precision for the output value */
3843
14.1M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3844
0
        return res;
3845
3846
    /*
3847
     Add up all digits up to the precision of b.  If b had initially
3848
     the same precision as a, or greater, we took care of it by the
3849
     exchange step above, so there is no problem.  If b had initially
3850
     less precision, we'll have to make sure the carry out is duly
3851
     propagated upward among the higher-order digits of the sum.
3852
    */
3853
14.1M
    pa = MP_DIGITS(a);
3854
14.1M
    pb = MP_DIGITS(b);
3855
14.1M
    pc = MP_DIGITS(c);
3856
14.1M
    used = MP_USED(b);
3857
130M
    for (ix = 0; ix < used; ix++) {
3858
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3859
        w = w + *pa++ + *pb++;
3860
        *pc++ = ACCUM(w);
3861
        w = CARRYOUT(w);
3862
#else
3863
116M
        d = *pa++;
3864
116M
        sum = d + *pb++;
3865
116M
        d = (sum < d); /* detect overflow */
3866
116M
        *pc++ = sum += carry;
3867
116M
        carry = d + (sum < carry); /* detect overflow */
3868
116M
#endif
3869
116M
    }
3870
3871
    /* If we run out of 'b' digits before we're actually done, make
3872
     sure the carries get propagated upward...
3873
   */
3874
30.7M
    for (used = MP_USED(a); ix < used; ++ix) {
3875
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3876
        w = w + *pa++;
3877
        *pc++ = ACCUM(w);
3878
        w = CARRYOUT(w);
3879
#else
3880
16.5M
        *pc++ = sum = carry + *pa++;
3881
16.5M
        carry = (sum < carry);
3882
16.5M
#endif
3883
16.5M
    }
3884
3885
/* If there's an overall carry out, increase precision and include
3886
     it.  We could have done this initially, but why touch the memory
3887
     allocator unless we're sure we have to?
3888
   */
3889
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3890
    if (w) {
3891
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3892
            return res;
3893
3894
        DIGIT(c, used) = (mp_digit)w;
3895
        ++used;
3896
    }
3897
#else
3898
14.1M
    if (carry) {
3899
1.03M
        if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3900
0
            return res;
3901
3902
1.03M
        DIGIT(c, used) = carry;
3903
1.03M
        ++used;
3904
1.03M
    }
3905
14.1M
#endif
3906
14.1M
    MP_USED(c) = used;
3907
14.1M
    return MP_OKAY;
3908
14.1M
}
3909
/* {{{ s_mp_add_offset(a, b, offset) */
3910
3911
/* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3912
mp_err
3913
s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3914
0
{
3915
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3916
    mp_word w, k = 0;
3917
#else
3918
0
    mp_digit d, sum, carry = 0;
3919
0
#endif
3920
0
    mp_size ib;
3921
0
    mp_size ia;
3922
0
    mp_size lim;
3923
0
    mp_err res;
3924
3925
    /* Make sure a has enough precision for the output value */
3926
0
    lim = MP_USED(b) + offset;
3927
0
    if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3928
0
        return res;
3929
3930
    /*
3931
    Add up all digits up to the precision of b.  If b had initially
3932
    the same precision as a, or greater, we took care of it by the
3933
    padding step above, so there is no problem.  If b had initially
3934
    less precision, we'll have to make sure the carry out is duly
3935
    propagated upward among the higher-order digits of the sum.
3936
   */
3937
0
    lim = USED(b);
3938
0
    for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
3939
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3940
        w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3941
        DIGIT(a, ia) = ACCUM(w);
3942
        k = CARRYOUT(w);
3943
#else
3944
0
        d = MP_DIGIT(a, ia);
3945
0
        sum = d + MP_DIGIT(b, ib);
3946
0
        d = (sum < d);
3947
0
        MP_DIGIT(a, ia) = sum += carry;
3948
0
        carry = d + (sum < carry);
3949
0
#endif
3950
0
    }
3951
3952
/* If we run out of 'b' digits before we're actually done, make
3953
     sure the carries get propagated upward...
3954
   */
3955
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3956
    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3957
        w = (mp_word)DIGIT(a, ia) + k;
3958
        DIGIT(a, ia) = ACCUM(w);
3959
        k = CARRYOUT(w);
3960
    }
3961
#else
3962
0
    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3963
0
        d = MP_DIGIT(a, ia);
3964
0
        MP_DIGIT(a, ia) = sum = d + carry;
3965
0
        carry = (sum < d);
3966
0
    }
3967
0
#endif
3968
3969
/* If there's an overall carry out, increase precision and include
3970
     it.  We could have done this initially, but why touch the memory
3971
     allocator unless we're sure we have to?
3972
   */
3973
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3974
    if (k) {
3975
        if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3976
            return res;
3977
3978
        DIGIT(a, ia) = (mp_digit)k;
3979
    }
3980
#else
3981
0
    if (carry) {
3982
0
        if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3983
0
            return res;
3984
3985
0
        DIGIT(a, lim) = carry;
3986
0
    }
3987
0
#endif
3988
0
    s_mp_clamp(a);
3989
3990
0
    return MP_OKAY;
3991
3992
0
} /* end s_mp_add_offset() */
3993
3994
/* }}} */
3995
3996
/* {{{ s_mp_sub(a, b) */
3997
3998
/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3999
mp_err
4000
s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract      */
4001
24.3M
{
4002
24.3M
    mp_digit *pa, *pb, *limit;
4003
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4004
    mp_sword w = 0;
4005
#else
4006
24.3M
    mp_digit d, diff, borrow = 0;
4007
24.3M
#endif
4008
4009
    /*
4010
    Subtract and propagate borrow.  Up to the precision of b, this
4011
    accounts for the digits of b; after that, we just make sure the
4012
    carries get to the right place.  This saves having to pad b out to
4013
    the precision of a just to make the loops work right...
4014
   */
4015
24.3M
    pa = MP_DIGITS(a);
4016
24.3M
    pb = MP_DIGITS(b);
4017
24.3M
    limit = pb + MP_USED(b);
4018
525M
    while (pb < limit) {
4019
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4020
        w = w + *pa - *pb++;
4021
        *pa++ = ACCUM(w);
4022
        w >>= MP_DIGIT_BIT;
4023
#else
4024
501M
        d = *pa;
4025
501M
        diff = d - *pb++;
4026
501M
        d = (diff > d); /* detect borrow */
4027
501M
        if (borrow && --diff == MP_DIGIT_MAX)
4028
596k
            ++d;
4029
501M
        *pa++ = diff;
4030
501M
        borrow = d;
4031
501M
#endif
4032
501M
    }
4033
24.3M
    limit = MP_DIGITS(a) + MP_USED(a);
4034
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4035
    while (w && pa < limit) {
4036
        w = w + *pa;
4037
        *pa++ = ACCUM(w);
4038
        w >>= MP_DIGIT_BIT;
4039
    }
4040
#else
4041
36.8M
    while (borrow && pa < limit) {
4042
12.5M
        d = *pa;
4043
12.5M
        *pa++ = diff = d - borrow;
4044
12.5M
        borrow = (diff > d);
4045
12.5M
    }
4046
24.3M
#endif
4047
4048
    /* Clobber any leading zeroes we created    */
4049
24.3M
    s_mp_clamp(a);
4050
4051
/*
4052
     If there was a borrow out, then |b| > |a| in violation
4053
     of our input invariant.  We've already done the work,
4054
     but we'll at least complain about it...
4055
   */
4056
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4057
    return w ? MP_RANGE : MP_OKAY;
4058
#else
4059
24.3M
    return borrow ? MP_RANGE : MP_OKAY;
4060
24.3M
#endif
4061
24.3M
} /* end s_mp_sub() */
4062
4063
/* }}} */
4064
4065
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
4066
mp_err
4067
s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
4068
11.2M
{
4069
11.2M
    mp_digit *pa, *pb, *pc;
4070
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4071
    mp_sword w = 0;
4072
#else
4073
11.2M
    mp_digit d, diff, borrow = 0;
4074
11.2M
#endif
4075
11.2M
    int ix, limit;
4076
11.2M
    mp_err res;
4077
4078
11.2M
    MP_SIGN(c) = MP_SIGN(a);
4079
4080
    /* Make sure a has enough precision for the output value */
4081
11.2M
    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
4082
0
        return res;
4083
4084
    /*
4085
    Subtract and propagate borrow.  Up to the precision of b, this
4086
    accounts for the digits of b; after that, we just make sure the
4087
    carries get to the right place.  This saves having to pad b out to
4088
    the precision of a just to make the loops work right...
4089
   */
4090
11.2M
    pa = MP_DIGITS(a);
4091
11.2M
    pb = MP_DIGITS(b);
4092
11.2M
    pc = MP_DIGITS(c);
4093
11.2M
    limit = MP_USED(b);
4094
169M
    for (ix = 0; ix < limit; ++ix) {
4095
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4096
        w = w + *pa++ - *pb++;
4097
        *pc++ = ACCUM(w);
4098
        w >>= MP_DIGIT_BIT;
4099
#else
4100
157M
        d = *pa++;
4101
157M
        diff = d - *pb++;
4102
157M
        d = (diff > d);
4103
157M
        if (borrow && --diff == MP_DIGIT_MAX)
4104
356k
            ++d;
4105
157M
        *pc++ = diff;
4106
157M
        borrow = d;
4107
157M
#endif
4108
157M
    }
4109
14.8M
    for (limit = MP_USED(a); ix < limit; ++ix) {
4110
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4111
        w = w + *pa++;
4112
        *pc++ = ACCUM(w);
4113
        w >>= MP_DIGIT_BIT;
4114
#else
4115
3.61M
        d = *pa++;
4116
3.61M
        *pc++ = diff = d - borrow;
4117
3.61M
        borrow = (diff > d);
4118
3.61M
#endif
4119
3.61M
    }
4120
4121
    /* Clobber any leading zeroes we created    */
4122
11.2M
    MP_USED(c) = ix;
4123
11.2M
    s_mp_clamp(c);
4124
4125
/*
4126
     If there was a borrow out, then |b| > |a| in violation
4127
     of our input invariant.  We've already done the work,
4128
     but we'll at least complain about it...
4129
   */
4130
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
4131
    return w ? MP_RANGE : MP_OKAY;
4132
#else
4133
11.2M
    return borrow ? MP_RANGE : MP_OKAY;
4134
11.2M
#endif
4135
11.2M
}
4136
/* {{{ s_mp_mul(a, b) */
4137
4138
/* Compute a = |a| * |b|                                                  */
4139
mp_err
4140
s_mp_mul(mp_int *a, const mp_int *b)
4141
6.24M
{
4142
6.24M
    return mp_mul(a, b, a);
4143
6.24M
} /* end s_mp_mul() */
4144
4145
/* }}} */
4146
4147
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4148
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4149
#define MP_MUL_DxD(a, b, Phi, Plo)                              \
4150
    {                                                           \
4151
        unsigned long long product = (unsigned long long)a * b; \
4152
        Plo = (mp_digit)product;                                \
4153
        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
4154
    }
4155
#else
4156
#define MP_MUL_DxD(a, b, Phi, Plo)                                 \
4157
23.1M
    {                                                              \
4158
23.1M
        mp_digit a0b1, a1b0;                                       \
4159
23.1M
        Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX);   \
4160
23.1M
        Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
4161
23.1M
        a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
4162
23.1M
        a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
4163
23.1M
        a1b0 += a0b1;                                              \
4164
23.1M
        Phi += a1b0 >> MP_HALF_DIGIT_BIT;                          \
4165
23.1M
        Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT;       \
4166
23.1M
        a1b0 <<= MP_HALF_DIGIT_BIT;                                \
4167
23.1M
        Plo += a1b0;                                               \
4168
23.1M
        Phi += MP_CT_LTU(Plo, a1b0);                               \
4169
23.1M
    }
4170
#endif
4171
4172
/* Constant time version of s_mpv_mul_d_add_prop.
4173
 * Presently, this is only used by the Constant time Montgomery arithmetic code. */
4174
/* c += a * b */
4175
void
4176
s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
4177
                       mp_digit *c, mp_size c_len)
4178
722k
{
4179
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4180
    mp_digit d = 0;
4181
4182
    c_len -= a_len;
4183
    /* Inner product:  Digits of a */
4184
    while (a_len--) {
4185
        mp_word w = ((mp_word)b * *a++) + *c + d;
4186
        *c++ = ACCUM(w);
4187
        d = CARRYOUT(w);
4188
    }
4189
4190
    /* propagate the carry to the end, even if carry is zero */
4191
    while (c_len--) {
4192
        mp_word w = (mp_word)*c + d;
4193
        *c++ = ACCUM(w);
4194
        d = CARRYOUT(w);
4195
    }
4196
#else
4197
722k
    mp_digit carry = 0;
4198
722k
    c_len -= a_len;
4199
23.8M
    while (a_len--) {
4200
23.1M
        mp_digit a_i = *a++;
4201
23.1M
        mp_digit a0b0, a1b1;
4202
23.1M
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4203
4204
23.1M
        a0b0 += carry;
4205
23.1M
        a1b1 += MP_CT_LTU(a0b0, carry);
4206
23.1M
        a0b0 += a_i = *c;
4207
23.1M
        a1b1 += MP_CT_LTU(a0b0, a_i);
4208
4209
23.1M
        *c++ = a0b0;
4210
23.1M
        carry = a1b1;
4211
23.1M
    }
4212
    /* propagate the carry to the end, even if carry is zero */
4213
13.3M
    while (c_len--) {
4214
12.6M
        mp_digit c_i = *c;
4215
12.6M
        carry += c_i;
4216
12.6M
        *c++ = carry;
4217
12.6M
        carry = MP_CT_LTU(carry, c_i);
4218
12.6M
    }
4219
722k
#endif
4220
722k
}
4221
4222
#if !defined(MP_ASSEMBLY_MULTIPLY)
4223
/* c = a * b */
4224
void
4225
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4226
{
4227
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4228
    mp_digit d = 0;
4229
4230
    /* Inner product:  Digits of a */
4231
    while (a_len--) {
4232
        mp_word w = ((mp_word)b * *a++) + d;
4233
        *c++ = ACCUM(w);
4234
        d = CARRYOUT(w);
4235
    }
4236
    *c = d;
4237
#else
4238
    mp_digit carry = 0;
4239
    while (a_len--) {
4240
        mp_digit a_i = *a++;
4241
        mp_digit a0b0, a1b1;
4242
4243
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4244
4245
        a0b0 += carry;
4246
        a1b1 += MP_CT_LTU(a0b0, carry);
4247
        *c++ = a0b0;
4248
        carry = a1b1;
4249
    }
4250
    *c = carry;
4251
#endif
4252
}
4253
4254
/* c += a * b */
4255
void
4256
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
4257
                mp_digit *c)
4258
{
4259
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4260
    mp_digit d = 0;
4261
4262
    /* Inner product:  Digits of a */
4263
    while (a_len--) {
4264
        mp_word w = ((mp_word)b * *a++) + *c + d;
4265
        *c++ = ACCUM(w);
4266
        d = CARRYOUT(w);
4267
    }
4268
    *c = d;
4269
#else
4270
    mp_digit carry = 0;
4271
    while (a_len--) {
4272
        mp_digit a_i = *a++;
4273
        mp_digit a0b0, a1b1;
4274
4275
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4276
4277
        a0b0 += carry;
4278
        a1b1 += MP_CT_LTU(a0b0, carry);
4279
        a0b0 += a_i = *c;
4280
        a1b1 += MP_CT_LTU(a0b0, a_i);
4281
        *c++ = a0b0;
4282
        carry = a1b1;
4283
    }
4284
    *c = carry;
4285
#endif
4286
}
4287
4288
/* Presently, this is only used by the Montgomery arithmetic code. */
4289
/* c += a * b */
4290
void
4291
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4292
{
4293
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4294
    mp_digit d = 0;
4295
4296
    /* Inner product:  Digits of a */
4297
    while (a_len--) {
4298
        mp_word w = ((mp_word)b * *a++) + *c + d;
4299
        *c++ = ACCUM(w);
4300
        d = CARRYOUT(w);
4301
    }
4302
4303
    while (d) {
4304
        mp_word w = (mp_word)*c + d;
4305
        *c++ = ACCUM(w);
4306
        d = CARRYOUT(w);
4307
    }
4308
#else
4309
    mp_digit carry = 0;
4310
    while (a_len--) {
4311
        mp_digit a_i = *a++;
4312
        mp_digit a0b0, a1b1;
4313
4314
        MP_MUL_DxD(a_i, b, a1b1, a0b0);
4315
4316
        a0b0 += carry;
4317
        if (a0b0 < carry)
4318
            ++a1b1;
4319
4320
        a0b0 += a_i = *c;
4321
        if (a0b0 < a_i)
4322
            ++a1b1;
4323
4324
        *c++ = a0b0;
4325
        carry = a1b1;
4326
    }
4327
    while (carry) {
4328
        mp_digit c_i = *c;
4329
        carry += c_i;
4330
        *c++ = carry;
4331
        carry = carry < c_i;
4332
    }
4333
#endif
4334
}
4335
#endif
4336
4337
#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4338
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4339
#define MP_SQR_D(a, Phi, Plo)                                  \
4340
    {                                                          \
4341
        unsigned long long square = (unsigned long long)a * a; \
4342
        Plo = (mp_digit)square;                                \
4343
        Phi = (mp_digit)(square >> MP_DIGIT_BIT);              \
4344
    }
4345
#else
4346
#define MP_SQR_D(a, Phi, Plo)                                      \
4347
153M
    {                                                              \
4348
153M
        mp_digit Pmid;                                             \
4349
153M
        Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX);   \
4350
153M
        Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4351
153M
        Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4352
153M
        Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);                    \
4353
153M
        Pmid <<= (MP_HALF_DIGIT_BIT + 1);                          \
4354
153M
        Plo += Pmid;                                               \
4355
153M
        if (Plo < Pmid)                                            \
4356
153M
            ++Phi;                                                 \
4357
153M
    }
4358
#endif
4359
4360
#if !defined(MP_ASSEMBLY_SQUARE)
4361
/* Add the squares of the digits of a to the digits of b. */
4362
void
4363
s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4364
4.37M
{
4365
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4366
    mp_word w;
4367
    mp_digit d;
4368
    mp_size ix;
4369
4370
    w = 0;
4371
#define ADD_SQUARE(n)                     \
4372
    d = pa[n];                            \
4373
    w += (d * (mp_word)d) + ps[2 * n];    \
4374
    ps[2 * n] = ACCUM(w);                 \
4375
    w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
4376
    ps[2 * n + 1] = ACCUM(w);             \
4377
    w = (w >> DIGIT_BIT)
4378
4379
    for (ix = a_len; ix >= 4; ix -= 4) {
4380
        ADD_SQUARE(0);
4381
        ADD_SQUARE(1);
4382
        ADD_SQUARE(2);
4383
        ADD_SQUARE(3);
4384
        pa += 4;
4385
        ps += 8;
4386
    }
4387
    if (ix) {
4388
        ps += 2 * ix;
4389
        pa += ix;
4390
        switch (ix) {
4391
            case 3:
4392
                ADD_SQUARE(-3); /* FALLTHRU */
4393
            case 2:
4394
                ADD_SQUARE(-2); /* FALLTHRU */
4395
            case 1:
4396
                ADD_SQUARE(-1); /* FALLTHRU */
4397
            case 0:
4398
                break;
4399
        }
4400
    }
4401
    while (w) {
4402
        w += *ps;
4403
        *ps++ = ACCUM(w);
4404
        w = (w >> DIGIT_BIT);
4405
    }
4406
#else
4407
4.37M
    mp_digit carry = 0;
4408
157M
    while (a_len--) {
4409
153M
        mp_digit a_i = *pa++;
4410
153M
        mp_digit a0a0, a1a1;
4411
4412
153M
        MP_SQR_D(a_i, a1a1, a0a0);
4413
4414
        /* here a1a1 and a0a0 constitute a_i ** 2 */
4415
153M
        a0a0 += carry;
4416
153M
        if (a0a0 < carry)
4417
0
            ++a1a1;
4418
4419
        /* now add to ps */
4420
153M
        a0a0 += a_i = *ps;
4421
153M
        if (a0a0 < a_i)
4422
73.9M
            ++a1a1;
4423
153M
        *ps++ = a0a0;
4424
153M
        a1a1 += a_i = *ps;
4425
153M
        carry = (a1a1 < a_i);
4426
153M
        *ps++ = a1a1;
4427
153M
    }
4428
4.37M
    while (carry) {
4429
0
        mp_digit s_i = *ps;
4430
0
        carry += s_i;
4431
0
        *ps++ = carry;
4432
0
        carry = carry < s_i;
4433
0
    }
4434
4.37M
#endif
4435
4.37M
}
4436
#endif
4437
4438
#if !defined(MP_ASSEMBLY_DIV_2DX1D)
4439
/*
4440
** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4441
** so its high bit is 1.   This code is from NSPR.
4442
*/
4443
mp_err
4444
s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4445
                mp_digit *qp, mp_digit *rp)
4446
4.40M
{
4447
4.40M
    mp_digit d1, d0, q1, q0;
4448
4.40M
    mp_digit r1, r0, m;
4449
4450
4.40M
    d1 = divisor >> MP_HALF_DIGIT_BIT;
4451
4.40M
    d0 = divisor & MP_HALF_DIGIT_MAX;
4452
4.40M
    r1 = Nhi % d1;
4453
4.40M
    q1 = Nhi / d1;
4454
4.40M
    m = q1 * d0;
4455
4.40M
    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4456
4.40M
    if (r1 < m) {
4457
1.34M
        q1--, r1 += divisor;
4458
1.34M
        if (r1 >= divisor && r1 < m) {
4459
24.7k
            q1--, r1 += divisor;
4460
24.7k
        }
4461
1.34M
    }
4462
4.40M
    r1 -= m;
4463
4.40M
    r0 = r1 % d1;
4464
4.40M
    q0 = r1 / d1;
4465
4.40M
    m = q0 * d0;
4466
4.40M
    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4467
4.40M
    if (r0 < m) {
4468
1.30M
        q0--, r0 += divisor;
4469
1.30M
        if (r0 >= divisor && r0 < m) {
4470
24.9k
            q0--, r0 += divisor;
4471
24.9k
        }
4472
1.30M
    }
4473
4.40M
    if (qp)
4474
4.40M
        *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4475
4.40M
    if (rp)
4476
4.40M
        *rp = r0 - m;
4477
4.40M
    return MP_OKAY;
4478
4.40M
}
4479
#endif
4480
4481
#if MP_SQUARE
4482
/* {{{ s_mp_sqr(a) */
4483
4484
mp_err
4485
s_mp_sqr(mp_int *a)
4486
1.81M
{
4487
1.81M
    mp_err res;
4488
1.81M
    mp_int tmp;
4489
4490
1.81M
    if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
4491
0
        return res;
4492
1.81M
    res = mp_sqr(a, &tmp);
4493
1.81M
    if (res == MP_OKAY) {
4494
1.81M
        s_mp_exch(&tmp, a);
4495
1.81M
    }
4496
1.81M
    mp_clear(&tmp);
4497
1.81M
    return res;
4498
1.81M
}
4499
4500
/* }}} */
4501
#endif
4502
4503
/* {{{ s_mp_div(a, b) */
4504
4505
/*
4506
  s_mp_div(a, b)
4507
4508
  Compute a = a / b and b = a mod b.  Assumes b > a.
4509
 */
4510
4511
mp_err
4512
s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4513
         mp_int *div,  /* i: divisor                */
4514
         mp_int *quot) /* i: 0;        o: quotient  */
4515
324k
{
4516
324k
    mp_int part, t;
4517
324k
    mp_digit q_msd;
4518
324k
    mp_err res;
4519
324k
    mp_digit d;
4520
324k
    mp_digit div_msd;
4521
324k
    int ix;
4522
4523
324k
    if (mp_cmp_z(div) == 0)
4524
0
        return MP_RANGE;
4525
4526
324k
    DIGITS(&t) = 0;
4527
    /* Shortcut if divisor is power of two */
4528
324k
    if ((ix = s_mp_ispow2(div)) >= 0) {
4529
2.50k
        MP_CHECKOK(mp_copy(rem, quot));
4530
2.50k
        s_mp_div_2d(quot, (mp_digit)ix);
4531
2.50k
        s_mp_mod_2d(rem, (mp_digit)ix);
4532
4533
2.50k
        return MP_OKAY;
4534
2.50k
    }
4535
4536
322k
    MP_SIGN(rem) = ZPOS;
4537
322k
    MP_SIGN(div) = ZPOS;
4538
322k
    MP_SIGN(&part) = ZPOS;
4539
4540
    /* A working temporary for division     */
4541
322k
    MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
4542
4543
    /* Normalize to optimize guessing       */
4544
322k
    MP_CHECKOK(s_mp_norm(rem, div, &d));
4545
4546
    /* Perform the division itself...woo!   */
4547
322k
    MP_USED(quot) = MP_ALLOC(quot);
4548
4549
    /* Find a partial substring of rem which is at least div */
4550
    /* If we didn't find one, we're finished dividing    */
4551
4.76M
    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4552
4.44M
        int i;
4553
4.44M
        int unusedRem;
4554
4.44M
        int partExtended = 0; /* set to true if we need to extend part */
4555
4556
4.44M
        unusedRem = MP_USED(rem) - MP_USED(div);
4557
4.44M
        MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4558
4.44M
        MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
4559
4.44M
        MP_USED(&part) = MP_USED(div);
4560
4561
        /* We have now truncated the part of the remainder to the same length as
4562
         * the divisor. If part is smaller than div, extend part by one digit. */
4563
4.44M
        if (s_mp_cmp(&part, div) < 0) {
4564
4.43M
            --unusedRem;
4565
4.43M
#if MP_ARGCHK == 2
4566
4.43M
            assert(unusedRem >= 0);
4567
4.43M
#endif
4568
4.43M
            --MP_DIGITS(&part);
4569
4.43M
            ++MP_USED(&part);
4570
4.43M
            ++MP_ALLOC(&part);
4571
4.43M
            partExtended = 1;
4572
4.43M
        }
4573
4574
        /* Compute a guess for the next quotient digit       */
4575
4.44M
        q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4576
4.44M
        div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4577
4.44M
        if (!partExtended) {
4578
            /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
4579
             * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
4580
             * we didn't extend part, q_msd >= div_msd. Therefore we know that
4581
             * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
4582
             * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
4583
8.13k
            q_msd = 1;
4584
4.43M
        } else {
4585
4.43M
            if (q_msd == div_msd) {
4586
29.1k
                q_msd = MP_DIGIT_MAX;
4587
4.40M
            } else {
4588
4.40M
                mp_digit r;
4589
4.40M
                MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4590
4.40M
                                           div_msd, &q_msd, &r));
4591
4.40M
            }
4592
4.43M
        }
4593
4.44M
#if MP_ARGCHK == 2
4594
4.44M
        assert(q_msd > 0); /* This case should never occur any more. */
4595
4.44M
#endif
4596
4.44M
        if (q_msd <= 0)
4597
0
            break;
4598
4599
        /* See what that multiplies out to                   */
4600
4.44M
        mp_copy(div, &t);
4601
4.44M
        MP_CHECKOK(s_mp_mul_d(&t, q_msd));
4602
4603
        /*
4604
           If it's too big, back it off.  We should not have to do this
4605
           more than once, or, in rare cases, twice.  Knuth describes a
4606
           method by which this could be reduced to a maximum of once, but
4607
           I didn't implement that here.
4608
           When using s_mpv_div_2dx1d, we may have to do this 3 times.
4609
         */
4610
5.32M
        for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4611
886k
            --q_msd;
4612
886k
            MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
4613
886k
        }
4614
4.44M
        if (i < 0) {
4615
0
            res = MP_RANGE;
4616
0
            goto CLEANUP;
4617
0
        }
4618
4619
        /* At this point, q_msd should be the right next digit   */
4620
4.44M
        MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
4621
4.44M
        s_mp_clamp(rem);
4622
4623
        /*
4624
          Include the digit in the quotient.  We allocated enough memory
4625
          for any quotient we could ever possibly get, so we should not
4626
          have to check for failures here
4627
         */
4628
4.44M
        MP_DIGIT(quot, unusedRem) = q_msd;
4629
4.44M
    }
4630
4631
    /* Denormalize remainder                */
4632
322k
    if (d) {
4633
34.1k
        s_mp_div_2d(rem, d);
4634
34.1k
    }
4635
4636
322k
    s_mp_clamp(quot);
4637
4638
322k
CLEANUP:
4639
322k
    mp_clear(&t);
4640
4641
322k
    return res;
4642
4643
322k
} /* end s_mp_div() */
4644
4645
/* }}} */
4646
4647
/* {{{ s_mp_2expt(a, k) */
4648
4649
mp_err
4650
s_mp_2expt(mp_int *a, mp_digit k)
4651
10.5k
{
4652
10.5k
    mp_err res;
4653
10.5k
    mp_size dig, bit;
4654
4655
10.5k
    dig = k / DIGIT_BIT;
4656
10.5k
    bit = k % DIGIT_BIT;
4657
4658
10.5k
    mp_zero(a);
4659
10.5k
    if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4660
0
        return res;
4661
4662
10.5k
    DIGIT(a, dig) |= ((mp_digit)1 << bit);
4663
4664
10.5k
    return MP_OKAY;
4665
4666
10.5k
} /* end s_mp_2expt() */
4667
4668
/* }}} */
4669
4670
/* {{{ s_mp_reduce(x, m, mu) */
4671
4672
/*
4673
  Compute Barrett reduction, x (mod m), given a precomputed value for
4674
  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4675
  faster than straight division, when many reductions by the same
4676
  value of m are required (such as in modular exponentiation).  This
4677
  can nearly halve the time required to do modular exponentiation,
4678
  as compared to using the full integer divide to reduce.
4679
4680
  This algorithm was derived from the _Handbook of Applied
4681
  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4682
  pp. 603-604.
4683
 */
4684
4685
mp_err
4686
s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4687
2.68M
{
4688
2.68M
    mp_int q;
4689
2.68M
    mp_err res;
4690
4691
2.68M
    if ((res = mp_init_copy(&q, x)) != MP_OKAY)
4692
0
        return res;
4693
4694
2.68M
    s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1)  */
4695
2.68M
    s_mp_mul(&q, mu);           /* q2 = q1 * mu      */
4696
2.68M
    s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
4697
4698
    /* x = x mod b^(k+1), quick (no division) */
4699
2.68M
    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4700
4701
    /* q = q * m mod b^(k+1), quick (no division) */
4702
2.68M
    s_mp_mul(&q, m);
4703
2.68M
    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4704
4705
    /* x = x - q */
4706
2.68M
    if ((res = mp_sub(x, &q, x)) != MP_OKAY)
4707
0
        goto CLEANUP;
4708
4709
    /* If x < 0, add b^(k+1) to it */
4710
2.68M
    if (mp_cmp_z(x) < 0) {
4711
134k
        mp_set(&q, 1);
4712
134k
        if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4713
0
            goto CLEANUP;
4714
134k
        if ((res = mp_add(x, &q, x)) != MP_OKAY)
4715
0
            goto CLEANUP;
4716
134k
    }
4717
4718
    /* Back off if it's too big */
4719
2.92M
    while (mp_cmp(x, m) >= 0) {
4720
243k
        if ((res = s_mp_sub(x, m)) != MP_OKAY)
4721
0
            break;
4722
243k
    }
4723
4724
2.68M
CLEANUP:
4725
2.68M
    mp_clear(&q);
4726
4727
2.68M
    return res;
4728
4729
2.68M
} /* end s_mp_reduce() */
4730
4731
/* }}} */
4732
4733
/* }}} */
4734
4735
/* {{{ Primitive comparisons */
4736
4737
/* {{{ s_mp_cmp(a, b) */
4738
4739
/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4740
int
4741
s_mp_cmp(const mp_int *a, const mp_int *b)
4742
113M
{
4743
113M
    ARGMPCHK(a != NULL && b != NULL);
4744
4745
113M
    mp_size used_a = MP_USED(a);
4746
113M
    {
4747
113M
        mp_size used_b = MP_USED(b);
4748
4749
113M
        if (used_a > used_b)
4750
12.6M
            goto IS_GT;
4751
101M
        if (used_a < used_b)
4752
3.97M
            goto IS_LT;
4753
101M
    }
4754
97.0M
    {
4755
97.0M
        mp_digit *pa, *pb;
4756
97.0M
        mp_digit da = 0, db = 0;
4757
4758
97.0M
#define CMP_AB(n)                     \
4759
98.2M
    if ((da = pa[n]) != (db = pb[n])) \
4760
98.2M
    goto done
4761
4762
97.0M
        pa = MP_DIGITS(a) + used_a;
4763
97.0M
        pb = MP_DIGITS(b) + used_a;
4764
97.3M
        while (used_a >= 4) {
4765
92.4M
            pa -= 4;
4766
92.4M
            pb -= 4;
4767
92.4M
            used_a -= 4;
4768
92.4M
            CMP_AB(3);
4769
5.11M
            CMP_AB(2);
4770
328k
            CMP_AB(1);
4771
295k
            CMP_AB(0);
4772
295k
        }
4773
4.94M
        while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4774
127k
            /* do nothing */;
4775
97.0M
    done:
4776
97.0M
        if (da > db)
4777
13.5M
            goto IS_GT;
4778
83.5M
        if (da < db)
4779
83.3M
            goto IS_LT;
4780
83.5M
    }
4781
125k
    return MP_EQ;
4782
87.3M
IS_LT:
4783
87.3M
    return MP_LT;
4784
26.1M
IS_GT:
4785
26.1M
    return MP_GT;
4786
83.5M
} /* end s_mp_cmp() */
4787
4788
/* }}} */
4789
4790
/* {{{ s_mp_cmp_d(a, d) */
4791
4792
/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4793
int
4794
s_mp_cmp_d(const mp_int *a, mp_digit d)
4795
26.3M
{
4796
26.3M
    ARGMPCHK(a != NULL);
4797
4798
26.3M
    if (USED(a) > 1)
4799
19.1M
        return MP_GT;
4800
4801
7.20M
    if (DIGIT(a, 0) < d)
4802
3.22k
        return MP_LT;
4803
7.19M
    else if (DIGIT(a, 0) > d)
4804
7.08M
        return MP_GT;
4805
116k
    else
4806
116k
        return MP_EQ;
4807
4808
7.20M
} /* end s_mp_cmp_d() */
4809
4810
/* }}} */
4811
4812
/* {{{ s_mp_ispow2(v) */
4813
4814
/*
4815
  Returns -1 if the value is not a power of two; otherwise, it returns
4816
  k such that v = 2^k, i.e. lg(v).
4817
 */
4818
int
4819
s_mp_ispow2(const mp_int *v)
4820
330k
{
4821
330k
    mp_digit d;
4822
330k
    int extra = 0, ix;
4823
4824
330k
    ARGMPCHK(v != NULL);
4825
4826
330k
    ix = MP_USED(v) - 1;
4827
330k
    d = MP_DIGIT(v, ix); /* most significant digit of v */
4828
4829
330k
    extra = s_mp_ispow2d(d);
4830
330k
    if (extra < 0 || ix == 0)
4831
303k
        return extra;
4832
4833
147k
    while (--ix >= 0) {
4834
144k
        if (DIGIT(v, ix) != 0)
4835
24.3k
            return -1; /* not a power of two */
4836
120k
        extra += MP_DIGIT_BIT;
4837
120k
    }
4838
4839
2.44k
    return extra;
4840
4841
26.7k
} /* end s_mp_ispow2() */
4842
4843
/* }}} */
4844
4845
/* {{{ s_mp_ispow2d(d) */
4846
4847
int
4848
s_mp_ispow2d(mp_digit d)
4849
4.67M
{
4850
4.67M
    if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
4851
60.9k
        int pow = 0;
4852
#if defined(MP_USE_UINT_DIGIT)
4853
        if (d & 0xffff0000U)
4854
            pow += 16;
4855
        if (d & 0xff00ff00U)
4856
            pow += 8;
4857
        if (d & 0xf0f0f0f0U)
4858
            pow += 4;
4859
        if (d & 0xccccccccU)
4860
            pow += 2;
4861
        if (d & 0xaaaaaaaaU)
4862
            pow += 1;
4863
#elif defined(MP_USE_LONG_LONG_DIGIT)
4864
        if (d & 0xffffffff00000000ULL)
4865
            pow += 32;
4866
        if (d & 0xffff0000ffff0000ULL)
4867
            pow += 16;
4868
        if (d & 0xff00ff00ff00ff00ULL)
4869
            pow += 8;
4870
        if (d & 0xf0f0f0f0f0f0f0f0ULL)
4871
            pow += 4;
4872
        if (d & 0xccccccccccccccccULL)
4873
            pow += 2;
4874
        if (d & 0xaaaaaaaaaaaaaaaaULL)
4875
            pow += 1;
4876
#elif defined(MP_USE_LONG_DIGIT)
4877
60.9k
        if (d & 0xffffffff00000000UL)
4878
19.2k
            pow += 32;
4879
60.9k
        if (d & 0xffff0000ffff0000UL)
4880
16.7k
            pow += 16;
4881
60.9k
        if (d & 0xff00ff00ff00ff00UL)
4882
17.2k
            pow += 8;
4883
60.9k
        if (d & 0xf0f0f0f0f0f0f0f0UL)
4884
16.1k
            pow += 4;
4885
60.9k
        if (d & 0xccccccccccccccccUL)
4886
20.7k
            pow += 2;
4887
60.9k
        if (d & 0xaaaaaaaaaaaaaaaaUL)
4888
41.5k
            pow += 1;
4889
#else
4890
#error "unknown type for mp_digit"
4891
#endif
4892
60.9k
        return pow;
4893
60.9k
    }
4894
4.61M
    return -1;
4895
4896
4.67M
} /* end s_mp_ispow2d() */
4897
4898
/* }}} */
4899
4900
/* }}} */
4901
4902
/* {{{ Primitive I/O helpers */
4903
4904
/* {{{ s_mp_tovalue(ch, r) */
4905
4906
/*
4907
  Convert the given character to its digit value, in the given radix.
4908
  If the given character is not understood in the given radix, -1 is
4909
  returned.  Otherwise the digit's numeric value is returned.
4910
4911
  The results will be odd if you use a radix < 2 or > 62, you are
4912
  expected to know what you're up to.
4913
 */
4914
int
4915
s_mp_tovalue(char ch, int r)
4916
0
{
4917
0
    int val, xch;
4918
4919
0
    if (r > 36)
4920
0
        xch = ch;
4921
0
    else
4922
0
        xch = toupper((unsigned char)ch);
4923
4924
0
    if (isdigit((unsigned char)xch))
4925
0
        val = xch - '0';
4926
0
    else if (isupper((unsigned char)xch))
4927
0
        val = xch - 'A' + 10;
4928
0
    else if (islower((unsigned char)xch))
4929
0
        val = xch - 'a' + 36;
4930
0
    else if (xch == '+')
4931
0
        val = 62;
4932
0
    else if (xch == '/')
4933
0
        val = 63;
4934
0
    else
4935
0
        return -1;
4936
4937
0
    if (val < 0 || val >= r)
4938
0
        return -1;
4939
4940
0
    return val;
4941
4942
0
} /* end s_mp_tovalue() */
4943
4944
/* }}} */
4945
4946
/* {{{ s_mp_todigit(val, r, low) */
4947
4948
/*
4949
  Convert val to a radix-r digit, if possible.  If val is out of range
4950
  for r, returns zero.  Otherwise, returns an ASCII character denoting
4951
  the value in the given radix.
4952
4953
  The results may be odd if you use a radix < 2 or > 64, you are
4954
  expected to know what you're doing.
4955
 */
4956
4957
char
4958
s_mp_todigit(mp_digit val, int r, int low)
4959
0
{
4960
0
    char ch;
4961
4962
0
    if (val >= r)
4963
0
        return 0;
4964
4965
0
    ch = s_dmap_1[val];
4966
4967
0
    if (r <= 36 && low)
4968
0
        ch = tolower((unsigned char)ch);
4969
4970
0
    return ch;
4971
4972
0
} /* end s_mp_todigit() */
4973
4974
/* }}} */
4975
4976
/* {{{ s_mp_outlen(bits, radix) */
4977
4978
/*
4979
   Return an estimate for how long a string is needed to hold a radix
4980
   r representation of a number with 'bits' significant bits, plus an
4981
   extra for a zero terminator (assuming C style strings here)
4982
 */
4983
int
4984
s_mp_outlen(int bits, int r)
4985
0
{
4986
0
    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4987
4988
0
} /* end s_mp_outlen() */
4989
4990
/* }}} */
4991
4992
/* }}} */
4993
4994
/* {{{ mp_read_unsigned_octets(mp, str, len) */
4995
/* mp_read_unsigned_octets(mp, str, len)
4996
   Read in a raw value (base 256) into the given mp_int
4997
   No sign bit, number is positive.  Leading zeros ignored.
4998
 */
4999
5000
mp_err
5001
mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
5002
428k
{
5003
428k
    int count;
5004
428k
    mp_err res;
5005
428k
    mp_digit d;
5006
5007
428k
    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
5008
5009
428k
    mp_zero(mp);
5010
5011
428k
    count = len % sizeof(mp_digit);
5012
428k
    if (count) {
5013
560k
        for (d = 0; count-- > 0; --len) {
5014
406k
            d = (d << 8) | *str++;
5015
406k
        }
5016
153k
        MP_DIGIT(mp, 0) = d;
5017
153k
    }
5018
5019
    /* Read the rest of the digits */
5020
7.21M
    for (; len > 0; len -= sizeof(mp_digit)) {
5021
61.0M
        for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
5022
54.2M
            d = (d << 8) | *str++;
5023
54.2M
        }
5024
6.78M
        if (MP_EQ == mp_cmp_z(mp)) {
5025
423k
            if (!d)
5026
138k
                continue;
5027
6.36M
        } else {
5028
6.36M
            if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
5029
0
                return res;
5030
6.36M
        }
5031
6.64M
        MP_DIGIT(mp, 0) = d;
5032
6.64M
    }
5033
428k
    return MP_OKAY;
5034
428k
} /* end mp_read_unsigned_octets() */
5035
/* }}} */
5036
5037
/* {{{ mp_unsigned_octet_size(mp) */
5038
unsigned int
5039
mp_unsigned_octet_size(const mp_int *mp)
5040
41.6k
{
5041
41.6k
    unsigned int bytes;
5042
41.6k
    int ix;
5043
41.6k
    mp_digit d = 0;
5044
5045
41.6k
    ARGCHK(mp != NULL, MP_BADARG);
5046
41.6k
    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
5047
5048
41.6k
    bytes = (USED(mp) * sizeof(mp_digit));
5049
5050
    /* subtract leading zeros. */
5051
    /* Iterate over each digit... */
5052
42.9k
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5053
41.6k
        d = DIGIT(mp, ix);
5054
41.6k
        if (d)
5055
40.3k
            break;
5056
1.30k
        bytes -= sizeof(d);
5057
1.30k
    }
5058
41.6k
    if (!bytes)
5059
1.30k
        return 1;
5060
5061
    /* Have MSD, check digit bytes, high order first */
5062
40.5k
    for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
5063
40.5k
        unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
5064
40.5k
        if (x)
5065
40.3k
            break;
5066
202
        --bytes;
5067
202
    }
5068
40.3k
    return bytes;
5069
41.6k
} /* end mp_unsigned_octet_size() */
5070
/* }}} */
5071
5072
/* {{{ mp_to_unsigned_octets(mp, str) */
5073
/* output a buffer of big endian octets no longer than specified. */
5074
mp_err
5075
mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5076
20.8k
{
5077
20.8k
    int ix, pos = 0;
5078
20.8k
    unsigned int bytes;
5079
5080
20.8k
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5081
5082
20.8k
    bytes = mp_unsigned_octet_size(mp);
5083
20.8k
    ARGCHK(bytes <= maxlen, MP_BADARG);
5084
5085
    /* Iterate over each digit... */
5086
927k
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5087
906k
        mp_digit d = DIGIT(mp, ix);
5088
906k
        int jx;
5089
5090
        /* Unpack digit bytes, high order first */
5091
8.15M
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5092
7.25M
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5093
7.25M
            if (!pos && !x) /* suppress leading zeros */
5094
5.33k
                continue;
5095
7.24M
            str[pos++] = x;
5096
7.24M
        }
5097
906k
    }
5098
20.8k
    if (!pos)
5099
654
        str[pos++] = 0;
5100
20.8k
    return pos;
5101
20.8k
} /* end mp_to_unsigned_octets() */
5102
/* }}} */
5103
5104
/* {{{ mp_to_signed_octets(mp, str) */
5105
/* output a buffer of big endian octets no longer than specified. */
5106
mp_err
5107
mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
5108
0
{
5109
0
    int ix, pos = 0;
5110
0
    unsigned int bytes;
5111
5112
0
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
5113
5114
0
    bytes = mp_unsigned_octet_size(mp);
5115
0
    ARGCHK(bytes <= maxlen, MP_BADARG);
5116
5117
    /* Iterate over each digit... */
5118
0
    for (ix = USED(mp) - 1; ix >= 0; ix--) {
5119
0
        mp_digit d = DIGIT(mp, ix);
5120
0
        int jx;
5121
5122
        /* Unpack digit bytes, high order first */
5123
0
        for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
5124
0
            unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
5125
0
            if (!pos) {
5126
0
                if (!x) /* suppress leading zeros */
5127
0
                    continue;
5128
0
                if (x & 0x80) { /* add one leading zero to make output positive.  */
5129
0
                    ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
5130
0
                    if (bytes + 1 > maxlen)
5131
0
                        return MP_BADARG;
5132
0
                    str[pos++] = 0;
5133
0
                }
5134
0
            }
5135
0
            str[pos++] = x;
5136
0
        }
5137
0
    }
5138
0
    if (!pos)
5139
0
        str[pos++] = 0;
5140
0
    return pos;
5141
0
} /* end mp_to_signed_octets() */
5142
/* }}} */
5143
5144
/* {{{ mp_to_fixlen_octets(mp, str) */
5145
/* output a buffer of big endian octets exactly as long as requested.
5146
   constant time on the value of mp. */
5147
mp_err
5148
mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
5149
55.6k
{
5150
55.6k
    int ix, jx;
5151
55.6k
    unsigned int bytes;
5152
5153
55.6k
    ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
5154
5155
    /* Constant time on the value of mp.  Don't use mp_unsigned_octet_size. */
5156
55.6k
    bytes = USED(mp) * MP_DIGIT_SIZE;
5157
5158
    /* If the output is shorter than the native size of mp, then check that any
5159
     * bytes not written have zero values.  This check isn't constant time on
5160
     * the assumption that timing-sensitive callers can guarantee that mp fits
5161
     * in the allocated space. */
5162
55.6k
    ix = USED(mp) - 1;
5163
55.6k
    if (bytes > length) {
5164
5.07k
        unsigned int zeros = bytes - length;
5165
5166
5.07k
        while (zeros >= MP_DIGIT_SIZE) {
5167
0
            ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
5168
0
            zeros -= MP_DIGIT_SIZE;
5169
0
            ix--;
5170
0
        }
5171
5172
5.07k
        if (zeros > 0) {
5173
5.07k
            mp_digit d = DIGIT(mp, ix);
5174
5.07k
            mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
5175
5.07k
            ARGCHK((d & m) == 0, MP_BADARG);
5176
10.4k
            for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
5177
5.35k
                *str++ = d >> (jx * CHAR_BIT);
5178
5.35k
            }
5179
5.07k
            ix--;
5180
5.07k
        }
5181
50.5k
    } else if (bytes < length) {
5182
        /* Place any needed leading zeros. */
5183
6.61k
        unsigned int zeros = length - bytes;
5184
6.61k
        memset(str, 0, zeros);
5185
6.61k
        str += zeros;
5186
6.61k
    }
5187
5188
    /* Iterate over each whole digit... */
5189
1.32M
    for (; ix >= 0; ix--) {
5190
1.26M
        mp_digit d = DIGIT(mp, ix);
5191
5192
        /* Unpack digit bytes, high order first */
5193
11.4M
        for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
5194
10.1M
            *str++ = d >> (jx * CHAR_BIT);
5195
10.1M
        }
5196
1.26M
    }
5197
55.6k
    return MP_OKAY;
5198
55.6k
} /* end mp_to_fixlen_octets() */
5199
/* }}} */
5200
5201
/* {{{ mp_cswap(condition, a, b, numdigits) */
5202
/* performs a conditional swap between mp_int. */
5203
mp_err
5204
mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
5205
40.2M
{
5206
40.2M
    mp_digit x;
5207
40.2M
    unsigned int i;
5208
40.2M
    mp_err res = 0;
5209
5210
    /* if pointers are equal return */
5211
40.2M
    if (a == b)
5212
0
        return res;
5213
5214
40.2M
    if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
5215
0
        MP_CHECKOK(s_mp_grow(a, numdigits));
5216
0
        MP_CHECKOK(s_mp_grow(b, numdigits));
5217
0
    }
5218
5219
40.2M
    condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
5220
5221
40.2M
    x = (USED(a) ^ USED(b)) & condition;
5222
40.2M
    USED(a) ^= x;
5223
40.2M
    USED(b) ^= x;
5224
5225
40.2M
    x = (SIGN(a) ^ SIGN(b)) & condition;
5226
40.2M
    SIGN(a) ^= x;
5227
40.2M
    SIGN(b) ^= x;
5228
5229
893M
    for (i = 0; i < numdigits; i++) {
5230
852M
        x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
5231
852M
        DIGIT(a, i) ^= x;
5232
852M
        DIGIT(b, i) ^= x;
5233
852M
    }
5234
5235
40.2M
CLEANUP:
5236
40.2M
    return res;
5237
40.2M
} /* end mp_cswap() */
5238
/* }}} */
5239
5240
/*------------------------------------------------------------------------*/
5241
/* HERE THERE BE DRAGONS                                                  */