Coverage Report

Created: 2025-04-24 06:18

/src/hostap/src/tls/libtommath.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Minimal code for RSA support from LibTomMath 0.41
3
 * http://libtom.org/
4
 * http://libtom.org/files/ltm-0.41.tar.bz2
5
 * This library was released in public domain by Tom St Denis.
6
 *
7
 * The combination in this file may not use all of the optimized algorithms
8
 * from LibTomMath and may be considerable slower than the LibTomMath with its
9
 * default settings. The main purpose of having this version here is to make it
10
 * easier to build bignum.c wrapper without having to install and build an
11
 * external library.
12
 *
13
 * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
14
 * libtommath.c file instead of using the external LibTomMath library.
15
 */
16
17
#ifndef CHAR_BIT
18
0
#define CHAR_BIT 8
19
#endif
20
21
#define BN_MP_INVMOD_C
22
#define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
23
         * require BN_MP_EXPTMOD_FAST_C instead */
24
#define BN_S_MP_MUL_DIGS_C
25
#define BN_MP_INVMOD_SLOW_C
26
#define BN_S_MP_SQR_C
27
#define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
28
         * would require other than mp_reduce */
29
30
#ifdef LTM_FAST
31
32
/* Use faster div at the cost of about 1 kB */
33
#define BN_MP_MUL_D_C
34
35
/* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
36
#define BN_MP_EXPTMOD_FAST_C
37
#define BN_MP_MONTGOMERY_SETUP_C
38
#define BN_FAST_MP_MONTGOMERY_REDUCE_C
39
#define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
40
#define BN_MP_MUL_2_C
41
42
/* Include faster sqr at the cost of about 0.5 kB in code */
43
#define BN_FAST_S_MP_SQR_C
44
45
/* About 0.25 kB of code, but ~1.7kB of stack space! */
46
#define BN_FAST_S_MP_MUL_DIGS_C
47
48
#else /* LTM_FAST */
49
50
#define BN_MP_DIV_SMALL
51
#define BN_MP_INIT_MULTI_C
52
#define BN_MP_CLEAR_MULTI_C
53
#define BN_MP_ABS_C
54
#endif /* LTM_FAST */
55
56
/* Current uses do not require support for negative exponent in exptmod, so we
57
 * can save about 1.5 kB in leaving out invmod. */
58
#define LTM_NO_NEG_EXP
59
60
/* from tommath.h */
61
62
#define  OPT_CAST(x)
63
64
#ifdef __x86_64__
65
typedef unsigned long mp_digit;
66
typedef unsigned long mp_word __attribute__((mode(TI)));
67
68
40.9k
#define DIGIT_BIT 60
69
#define MP_64BIT
70
#else
71
typedef unsigned long mp_digit;
72
typedef u64 mp_word;
73
74
#define DIGIT_BIT          28
75
#define MP_28BIT
76
#endif
77
78
79
36
#define XMALLOC  os_malloc
80
36
#define XFREE    os_free
81
18
#define XREALLOC os_realloc
82
83
84
31.5k
#define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
85
86
0
#define MP_LT        -1   /* less than */
87
4
#define MP_EQ         0   /* equal to */
88
0
#define MP_GT         1   /* greater than */
89
90
126
#define MP_ZPOS       0   /* positive integer */
91
4
#define MP_NEG        1   /* negative */
92
93
4.79k
#define MP_OKAY       0   /* ok result */
94
0
#define MP_MEM        -2  /* out of mem */
95
0
#define MP_VAL        -3  /* invalid input */
96
97
0
#define MP_YES        1   /* yes response */
98
0
#define MP_NO         0   /* no response */
99
100
typedef int           mp_err;
101
102
/* define this to use lower memory usage routines (exptmods mostly) */
103
#define MP_LOW_MEM
104
105
/* default precision */
106
#ifndef MP_PREC
107
   #ifndef MP_LOW_MEM
108
      #define MP_PREC                 32     /* default digits of precision */
109
   #else
110
396
      #define MP_PREC                 8      /* default digits of precision */
111
   #endif
112
#endif
113
114
/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
115
#define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
116
117
/* the infamous mp_int structure */
118
typedef struct  {
119
    int used, alloc, sign;
120
    mp_digit *dp;
121
} mp_int;
122
123
124
/* ---> Basic Manipulations <--- */
125
0
#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
126
#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
127
#define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
128
129
130
/* prototypes for copied functions */
131
0
#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
132
static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
133
static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
134
static int s_mp_sqr(mp_int * a, mp_int * b);
135
static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
136
137
#ifdef BN_FAST_S_MP_MUL_DIGS_C
138
static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
139
#endif
140
141
#ifdef BN_MP_INIT_MULTI_C
142
static int mp_init_multi(mp_int *mp, ...);
143
#endif
144
#ifdef BN_MP_CLEAR_MULTI_C
145
static void mp_clear_multi(mp_int *mp, ...);
146
#endif
147
static int mp_lshd(mp_int * a, int b);
148
static void mp_set(mp_int * a, mp_digit b);
149
static void mp_clamp(mp_int * a);
150
static void mp_exch(mp_int * a, mp_int * b);
151
static void mp_rshd(mp_int * a, int b);
152
static void mp_zero(mp_int * a);
153
static int mp_mod_2d(mp_int * a, int b, mp_int * c);
154
static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
155
static int mp_init_copy(mp_int * a, mp_int * b);
156
static int mp_mul_2d(mp_int * a, int b, mp_int * c);
157
#ifndef LTM_NO_NEG_EXP
158
static int mp_div_2(mp_int * a, mp_int * b);
159
static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
160
static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
161
#endif /* LTM_NO_NEG_EXP */
162
static int mp_copy(mp_int * a, mp_int * b);
163
static int mp_count_bits(mp_int * a);
164
static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
165
static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
166
static int mp_grow(mp_int * a, int size);
167
static int mp_cmp_mag(mp_int * a, mp_int * b);
168
#ifdef BN_MP_ABS_C
169
static int mp_abs(mp_int * a, mp_int * b);
170
#endif
171
static int mp_sqr(mp_int * a, mp_int * b);
172
static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
173
static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
174
static int mp_2expt(mp_int * a, int b);
175
static int mp_reduce_setup(mp_int * a, mp_int * b);
176
static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
177
static int mp_init_size(mp_int * a, int size);
178
#ifdef BN_MP_EXPTMOD_FAST_C
179
static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
180
#endif /* BN_MP_EXPTMOD_FAST_C */
181
#ifdef BN_FAST_S_MP_SQR_C
182
static int fast_s_mp_sqr (mp_int * a, mp_int * b);
183
#endif /* BN_FAST_S_MP_SQR_C */
184
#ifdef BN_MP_MUL_D_C
185
static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
186
#endif /* BN_MP_MUL_D_C */
187
188
189
190
/* functions from bn_<func name>.c */
191
192
193
/* reverse an array, used for radix code */
194
static void bn_reverse (unsigned char *s, int len)
195
0
{
196
0
  int     ix, iy;
197
0
  unsigned char t;
198
199
0
  ix = 0;
200
0
  iy = len - 1;
201
0
  while (ix < iy) {
202
0
    t     = s[ix];
203
0
    s[ix] = s[iy];
204
0
    s[iy] = t;
205
0
    ++ix;
206
0
    --iy;
207
0
  }
208
0
}
209
210
211
/* low level addition, based on HAC pp.594, Algorithm 14.7 */
212
static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
213
0
{
214
0
  mp_int *x;
215
0
  int     olduse, res, min, max;
216
217
  /* find sizes, we let |a| <= |b| which means we have to sort
218
   * them.  "x" will point to the input with the most digits
219
   */
220
0
  if (a->used > b->used) {
221
0
    min = b->used;
222
0
    max = a->used;
223
0
    x = a;
224
0
  } else {
225
0
    min = a->used;
226
0
    max = b->used;
227
0
    x = b;
228
0
  }
229
230
  /* init result */
231
0
  if (c->alloc < max + 1) {
232
0
    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
233
0
      return res;
234
0
    }
235
0
  }
236
237
  /* get old used digit count and set new one */
238
0
  olduse = c->used;
239
0
  c->used = max + 1;
240
241
0
  {
242
0
    register mp_digit u, *tmpa, *tmpb, *tmpc;
243
0
    register int i;
244
245
    /* alias for digit pointers */
246
247
    /* first input */
248
0
    tmpa = a->dp;
249
250
    /* second input */
251
0
    tmpb = b->dp;
252
253
    /* destination */
254
0
    tmpc = c->dp;
255
256
    /* zero the carry */
257
0
    u = 0;
258
0
    for (i = 0; i < min; i++) {
259
      /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
260
0
      *tmpc = *tmpa++ + *tmpb++ + u;
261
262
      /* U = carry bit of T[i] */
263
0
      u = *tmpc >> ((mp_digit)DIGIT_BIT);
264
265
      /* take away carry bit from T[i] */
266
0
      *tmpc++ &= MP_MASK;
267
0
    }
268
269
    /* now copy higher words if any, that is in A+B
270
     * if A or B has more digits add those in
271
     */
272
0
    if (min != max) {
273
0
      for (; i < max; i++) {
274
        /* T[i] = X[i] + U */
275
0
        *tmpc = x->dp[i] + u;
276
277
        /* U = carry bit of T[i] */
278
0
        u = *tmpc >> ((mp_digit)DIGIT_BIT);
279
280
        /* take away carry bit from T[i] */
281
0
        *tmpc++ &= MP_MASK;
282
0
      }
283
0
    }
284
285
    /* add carry */
286
0
    *tmpc++ = u;
287
288
    /* clear digits above oldused */
289
0
    for (i = c->used; i < olduse; i++) {
290
0
      *tmpc++ = 0;
291
0
    }
292
0
  }
293
294
0
  mp_clamp (c);
295
0
  return MP_OKAY;
296
0
}
297
298
299
/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
300
static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
301
0
{
302
0
  int     olduse, res, min, max;
303
304
  /* find sizes */
305
0
  min = b->used;
306
0
  max = a->used;
307
308
  /* init result */
309
0
  if (c->alloc < max) {
310
0
    if ((res = mp_grow (c, max)) != MP_OKAY) {
311
0
      return res;
312
0
    }
313
0
  }
314
0
  olduse = c->used;
315
0
  c->used = max;
316
317
0
  {
318
0
    register mp_digit u, *tmpa, *tmpb, *tmpc;
319
0
    register int i;
320
321
    /* alias for digit pointers */
322
0
    tmpa = a->dp;
323
0
    tmpb = b->dp;
324
0
    tmpc = c->dp;
325
326
    /* set carry to zero */
327
0
    u = 0;
328
0
    for (i = 0; i < min; i++) {
329
      /* T[i] = A[i] - B[i] - U */
330
0
      *tmpc = *tmpa++ - *tmpb++ - u;
331
332
      /* U = carry bit of T[i]
333
       * Note this saves performing an AND operation since
334
       * if a carry does occur it will propagate all the way to the
335
       * MSB.  As a result a single shift is enough to get the carry
336
       */
337
0
      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
338
339
      /* Clear carry from T[i] */
340
0
      *tmpc++ &= MP_MASK;
341
0
    }
342
343
    /* now copy higher words if any, e.g. if A has more digits than B  */
344
0
    for (; i < max; i++) {
345
      /* T[i] = A[i] - U */
346
0
      *tmpc = *tmpa++ - u;
347
348
      /* U = carry bit of T[i] */
349
0
      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
350
351
      /* Clear carry from T[i] */
352
0
      *tmpc++ &= MP_MASK;
353
0
    }
354
355
    /* clear digits above used (since we may not have grown result above) */
356
0
    for (i = c->used; i < olduse; i++) {
357
0
      *tmpc++ = 0;
358
0
    }
359
0
  }
360
361
0
  mp_clamp (c);
362
0
  return MP_OKAY;
363
0
}
364
365
366
/* init a new mp_int */
367
static int mp_init (mp_int * a)
368
36
{
369
36
  int i;
370
371
  /* allocate memory required and clear it */
372
36
  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
373
36
  if (a->dp == NULL) {
374
0
    return MP_MEM;
375
0
  }
376
377
  /* set the digits to zero */
378
324
  for (i = 0; i < MP_PREC; i++) {
379
288
      a->dp[i] = 0;
380
288
  }
381
382
  /* set the used to zero, allocated digits to the default precision
383
   * and sign to positive */
384
36
  a->used  = 0;
385
36
  a->alloc = MP_PREC;
386
36
  a->sign  = MP_ZPOS;
387
388
36
  return MP_OKAY;
389
36
}
390
391
392
/* clear one (frees)  */
393
static void mp_clear (mp_int * a)
394
36
{
395
36
  int i;
396
397
  /* only do anything if a hasn't been freed previously */
398
36
  if (a->dp != NULL) {
399
    /* first zero the digits */
400
356
    for (i = 0; i < a->used; i++) {
401
320
        a->dp[i] = 0;
402
320
    }
403
404
    /* free ram */
405
36
    XFREE(a->dp);
406
407
    /* reset members to make debugging easier */
408
36
    a->dp    = NULL;
409
36
    a->alloc = a->used = 0;
410
36
    a->sign  = MP_ZPOS;
411
36
  }
412
36
}
413
414
415
/* high level addition (handles signs) */
416
static int mp_add (mp_int * a, mp_int * b, mp_int * c)
417
0
{
418
0
  int     sa, sb, res;
419
420
  /* get sign of both inputs */
421
0
  sa = a->sign;
422
0
  sb = b->sign;
423
424
  /* handle two cases, not four */
425
0
  if (sa == sb) {
426
    /* both positive or both negative */
427
    /* add their magnitudes, copy the sign */
428
0
    c->sign = sa;
429
0
    res = s_mp_add (a, b, c);
430
0
  } else {
431
    /* one positive, the other negative */
432
    /* subtract the one with the greater magnitude from */
433
    /* the one of the lesser magnitude.  The result gets */
434
    /* the sign of the one with the greater magnitude. */
435
0
    if (mp_cmp_mag (a, b) == MP_LT) {
436
0
      c->sign = sb;
437
0
      res = s_mp_sub (b, a, c);
438
0
    } else {
439
0
      c->sign = sa;
440
0
      res = s_mp_sub (a, b, c);
441
0
    }
442
0
  }
443
0
  return res;
444
0
}
445
446
447
/* high level subtraction (handles signs) */
448
static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
449
0
{
450
0
  int     sa, sb, res;
451
452
0
  sa = a->sign;
453
0
  sb = b->sign;
454
455
0
  if (sa != sb) {
456
    /* subtract a negative from a positive, OR */
457
    /* subtract a positive from a negative. */
458
    /* In either case, ADD their magnitudes, */
459
    /* and use the sign of the first number. */
460
0
    c->sign = sa;
461
0
    res = s_mp_add (a, b, c);
462
0
  } else {
463
    /* subtract a positive from a positive, OR */
464
    /* subtract a negative from a negative. */
465
    /* First, take the difference between their */
466
    /* magnitudes, then... */
467
0
    if (mp_cmp_mag (a, b) != MP_LT) {
468
      /* Copy the sign from the first */
469
0
      c->sign = sa;
470
      /* The first has a larger or equal magnitude */
471
0
      res = s_mp_sub (a, b, c);
472
0
    } else {
473
      /* The result has the *opposite* sign from */
474
      /* the first number. */
475
0
      c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
476
      /* The second has a larger magnitude */
477
0
      res = s_mp_sub (b, a, c);
478
0
    }
479
0
  }
480
0
  return res;
481
0
}
482
483
484
/* high level multiplication (handles sign) */
485
static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
486
0
{
487
0
  int     res, neg;
488
0
  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
489
490
  /* use Toom-Cook? */
491
#ifdef BN_MP_TOOM_MUL_C
492
  if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
493
    res = mp_toom_mul(a, b, c);
494
  } else
495
#endif
496
#ifdef BN_MP_KARATSUBA_MUL_C
497
  /* use Karatsuba? */
498
  if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
499
    res = mp_karatsuba_mul (a, b, c);
500
  } else
501
#endif
502
0
  {
503
    /* can we use the fast multiplier?
504
     *
505
     * The fast multiplier can be used if the output will
506
     * have less than MP_WARRAY digits and the number of
507
     * digits won't affect carry propagation
508
     */
509
#ifdef BN_FAST_S_MP_MUL_DIGS_C
510
    int     digs = a->used + b->used + 1;
511
512
    if ((digs < MP_WARRAY) &&
513
        MIN(a->used, b->used) <=
514
        (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
515
      res = fast_s_mp_mul_digs (a, b, c, digs);
516
    } else
517
#endif
518
0
#ifdef BN_S_MP_MUL_DIGS_C
519
0
      res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
520
#else
521
#error mp_mul could fail
522
      res = MP_VAL;
523
#endif
524
525
0
  }
526
0
  c->sign = (c->used > 0) ? neg : MP_ZPOS;
527
0
  return res;
528
0
}
529
530
531
/* d = a * b (mod c) */
532
static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
533
0
{
534
0
  int     res;
535
0
  mp_int  t;
536
537
0
  if ((res = mp_init (&t)) != MP_OKAY) {
538
0
    return res;
539
0
  }
540
541
0
  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
542
0
    mp_clear (&t);
543
0
    return res;
544
0
  }
545
0
  res = mp_mod (&t, c, d);
546
0
  mp_clear (&t);
547
0
  return res;
548
0
}
549
550
551
/* c = a mod b, 0 <= c < b */
552
static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
553
0
{
554
0
  mp_int  t;
555
0
  int     res;
556
557
0
  if ((res = mp_init (&t)) != MP_OKAY) {
558
0
    return res;
559
0
  }
560
561
0
  if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
562
0
    mp_clear (&t);
563
0
    return res;
564
0
  }
565
566
0
  if (t.sign != b->sign) {
567
0
    res = mp_add (b, &t, c);
568
0
  } else {
569
0
    res = MP_OKAY;
570
0
    mp_exch (&t, c);
571
0
  }
572
573
0
  mp_clear (&t);
574
0
  return res;
575
0
}
576
577
578
/* this is a shell function that calls either the normal or Montgomery
579
 * exptmod functions.  Originally the call to the montgomery code was
580
 * embedded in the normal function but that wasted a lot of stack space
581
 * for nothing (since 99% of the time the Montgomery code would be called)
582
 */
583
static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
584
0
{
585
0
  int dr;
586
587
  /* modulus P must be positive */
588
0
  if (P->sign == MP_NEG) {
589
0
     return MP_VAL;
590
0
  }
591
592
  /* if exponent X is negative we have to recurse */
593
0
  if (X->sign == MP_NEG) {
594
0
#ifdef LTM_NO_NEG_EXP
595
0
        return MP_VAL;
596
#else /* LTM_NO_NEG_EXP */
597
#ifdef BN_MP_INVMOD_C
598
     mp_int tmpG, tmpX;
599
     int err;
600
601
     /* first compute 1/G mod P */
602
     if ((err = mp_init(&tmpG)) != MP_OKAY) {
603
        return err;
604
     }
605
     if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
606
        mp_clear(&tmpG);
607
        return err;
608
     }
609
610
     /* now get |X| */
611
     if ((err = mp_init(&tmpX)) != MP_OKAY) {
612
        mp_clear(&tmpG);
613
        return err;
614
     }
615
     if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
616
        mp_clear_multi(&tmpG, &tmpX, NULL);
617
        return err;
618
     }
619
620
     /* and now compute (1/G)**|X| instead of G**X [X < 0] */
621
     err = mp_exptmod(&tmpG, &tmpX, P, Y);
622
     mp_clear_multi(&tmpG, &tmpX, NULL);
623
     return err;
624
#else
625
#error mp_exptmod would always fail
626
     /* no invmod */
627
     return MP_VAL;
628
#endif
629
#endif /* LTM_NO_NEG_EXP */
630
0
  }
631
632
/* modified diminished radix reduction */
633
#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
634
  if (mp_reduce_is_2k_l(P) == MP_YES) {
635
     return s_mp_exptmod(G, X, P, Y, 1);
636
  }
637
#endif
638
639
#ifdef BN_MP_DR_IS_MODULUS_C
640
  /* is it a DR modulus? */
641
  dr = mp_dr_is_modulus(P);
642
#else
643
  /* default to no */
644
0
  dr = 0;
645
0
#endif
646
647
#ifdef BN_MP_REDUCE_IS_2K_C
648
  /* if not, is it a unrestricted DR modulus? */
649
  if (dr == 0) {
650
     dr = mp_reduce_is_2k(P) << 1;
651
  }
652
#endif
653
654
  /* if the modulus is odd or dr != 0 use the montgomery method */
655
#ifdef BN_MP_EXPTMOD_FAST_C
656
  if (mp_isodd (P) == 1 || dr !=  0) {
657
    return mp_exptmod_fast (G, X, P, Y, dr);
658
  } else {
659
#endif
660
0
#ifdef BN_S_MP_EXPTMOD_C
661
    /* otherwise use the generic Barrett reduction technique */
662
0
    return s_mp_exptmod (G, X, P, Y, 0);
663
#else
664
#error mp_exptmod could fail
665
    /* no exptmod for evens */
666
    return MP_VAL;
667
#endif
668
#ifdef BN_MP_EXPTMOD_FAST_C
669
  }
670
#endif
671
0
  if (dr == 0) {
672
    /* avoid compiler warnings about possibly unused variable */
673
0
  }
674
0
}
675
676
677
/* compare two ints (signed)*/
678
static int mp_cmp (mp_int * a, mp_int * b)
679
0
{
680
  /* compare based on sign */
681
0
  if (a->sign != b->sign) {
682
0
     if (a->sign == MP_NEG) {
683
0
        return MP_LT;
684
0
     } else {
685
0
        return MP_GT;
686
0
     }
687
0
  }
688
689
  /* compare digits */
690
0
  if (a->sign == MP_NEG) {
691
     /* if negative compare opposite direction */
692
0
     return mp_cmp_mag(b, a);
693
0
  } else {
694
0
     return mp_cmp_mag(a, b);
695
0
  }
696
0
}
697
698
699
/* compare a digit */
700
static int mp_cmp_d(mp_int * a, mp_digit b)
701
4
{
702
  /* compare based on sign */
703
4
  if (a->sign == MP_NEG) {
704
0
    return MP_LT;
705
0
  }
706
707
  /* compare based on magnitude */
708
4
  if (a->used > 1) {
709
0
    return MP_GT;
710
0
  }
711
712
  /* compare the only digit of a to b */
713
4
  if (a->dp[0] > b) {
714
0
    return MP_GT;
715
4
  } else if (a->dp[0] < b) {
716
0
    return MP_LT;
717
4
  } else {
718
4
    return MP_EQ;
719
4
  }
720
4
}
721
722
723
#ifndef LTM_NO_NEG_EXP
724
/* hac 14.61, pp608 */
725
static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
726
{
727
  /* b cannot be negative */
728
  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
729
    return MP_VAL;
730
  }
731
732
#ifdef BN_FAST_MP_INVMOD_C
733
  /* if the modulus is odd we can use a faster routine instead */
734
  if (mp_isodd (b) == 1) {
735
    return fast_mp_invmod (a, b, c);
736
  }
737
#endif
738
739
#ifdef BN_MP_INVMOD_SLOW_C
740
  return mp_invmod_slow(a, b, c);
741
#endif
742
743
#ifndef BN_FAST_MP_INVMOD_C
744
#ifndef BN_MP_INVMOD_SLOW_C
745
#error mp_invmod would always fail
746
#endif
747
#endif
748
  return MP_VAL;
749
}
750
#endif /* LTM_NO_NEG_EXP */
751
752
753
/* get the size for an unsigned equivalent */
754
static int mp_unsigned_bin_size (mp_int * a)
755
0
{
756
0
  int     size = mp_count_bits (a);
757
0
  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
758
0
}
759
760
761
#ifndef LTM_NO_NEG_EXP
762
/* hac 14.61, pp608 */
763
static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
764
{
765
  mp_int  x, y, u, v, A, B, C, D;
766
  int     res;
767
768
  /* b cannot be negative */
769
  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
770
    return MP_VAL;
771
  }
772
773
  /* init temps */
774
  if ((res = mp_init_multi(&x, &y, &u, &v,
775
                           &A, &B, &C, &D, NULL)) != MP_OKAY) {
776
     return res;
777
  }
778
779
  /* x = a, y = b */
780
  if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
781
      goto LBL_ERR;
782
  }
783
  if ((res = mp_copy (b, &y)) != MP_OKAY) {
784
    goto LBL_ERR;
785
  }
786
787
  /* 2. [modified] if x,y are both even then return an error! */
788
  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
789
    res = MP_VAL;
790
    goto LBL_ERR;
791
  }
792
793
  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
794
  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
795
    goto LBL_ERR;
796
  }
797
  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
798
    goto LBL_ERR;
799
  }
800
  mp_set (&A, 1);
801
  mp_set (&D, 1);
802
803
top:
804
  /* 4.  while u is even do */
805
  while (mp_iseven (&u) == 1) {
806
    /* 4.1 u = u/2 */
807
    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
808
      goto LBL_ERR;
809
    }
810
    /* 4.2 if A or B is odd then */
811
    if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
812
      /* A = (A+y)/2, B = (B-x)/2 */
813
      if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
814
         goto LBL_ERR;
815
      }
816
      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
817
         goto LBL_ERR;
818
      }
819
    }
820
    /* A = A/2, B = B/2 */
821
    if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
822
      goto LBL_ERR;
823
    }
824
    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
825
      goto LBL_ERR;
826
    }
827
  }
828
829
  /* 5.  while v is even do */
830
  while (mp_iseven (&v) == 1) {
831
    /* 5.1 v = v/2 */
832
    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
833
      goto LBL_ERR;
834
    }
835
    /* 5.2 if C or D is odd then */
836
    if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
837
      /* C = (C+y)/2, D = (D-x)/2 */
838
      if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
839
         goto LBL_ERR;
840
      }
841
      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
842
         goto LBL_ERR;
843
      }
844
    }
845
    /* C = C/2, D = D/2 */
846
    if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
847
      goto LBL_ERR;
848
    }
849
    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
850
      goto LBL_ERR;
851
    }
852
  }
853
854
  /* 6.  if u >= v then */
855
  if (mp_cmp (&u, &v) != MP_LT) {
856
    /* u = u - v, A = A - C, B = B - D */
857
    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
858
      goto LBL_ERR;
859
    }
860
861
    if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
862
      goto LBL_ERR;
863
    }
864
865
    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
866
      goto LBL_ERR;
867
    }
868
  } else {
869
    /* v - v - u, C = C - A, D = D - B */
870
    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
871
      goto LBL_ERR;
872
    }
873
874
    if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
875
      goto LBL_ERR;
876
    }
877
878
    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
879
      goto LBL_ERR;
880
    }
881
  }
882
883
  /* if not zero goto step 4 */
884
  if (mp_iszero (&u) == 0)
885
    goto top;
886
887
  /* now a = C, b = D, gcd == g*v */
888
889
  /* if v != 1 then there is no inverse */
890
  if (mp_cmp_d (&v, 1) != MP_EQ) {
891
    res = MP_VAL;
892
    goto LBL_ERR;
893
  }
894
895
  /* if its too low */
896
  while (mp_cmp_d(&C, 0) == MP_LT) {
897
      if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
898
         goto LBL_ERR;
899
      }
900
  }
901
902
  /* too big */
903
  while (mp_cmp_mag(&C, b) != MP_LT) {
904
      if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
905
         goto LBL_ERR;
906
      }
907
  }
908
909
  /* C is now the inverse */
910
  mp_exch (&C, c);
911
  res = MP_OKAY;
912
LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
913
  return res;
914
}
915
#endif /* LTM_NO_NEG_EXP */
916
917
918
/* compare maginitude of two ints (unsigned) */
919
static int mp_cmp_mag (mp_int * a, mp_int * b)
920
0
{
921
0
  int     n;
922
0
  mp_digit *tmpa, *tmpb;
923
924
  /* compare based on # of non-zero digits */
925
0
  if (a->used > b->used) {
926
0
    return MP_GT;
927
0
  }
928
929
0
  if (a->used < b->used) {
930
0
    return MP_LT;
931
0
  }
932
933
  /* alias for a */
934
0
  tmpa = a->dp + (a->used - 1);
935
936
  /* alias for b */
937
0
  tmpb = b->dp + (a->used - 1);
938
939
  /* compare based on digits  */
940
0
  for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
941
0
    if (*tmpa > *tmpb) {
942
0
      return MP_GT;
943
0
    }
944
945
0
    if (*tmpa < *tmpb) {
946
0
      return MP_LT;
947
0
    }
948
0
  }
949
0
  return MP_EQ;
950
0
}
951
952
953
/* reads a unsigned char array, assumes the msb is stored first [big endian] */
954
static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
955
20
{
956
20
  int     res;
957
958
  /* make sure there are at least two digits */
959
20
  if (a->alloc < 2) {
960
0
     if ((res = mp_grow(a, 2)) != MP_OKAY) {
961
0
        return res;
962
0
     }
963
0
  }
964
965
  /* zero the int */
966
20
  mp_zero (a);
967
968
  /* read the bytes in */
969
2.34k
  while (c-- > 0) {
970
2.32k
    if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
971
0
      return res;
972
0
    }
973
974
2.32k
#ifndef MP_8BIT
975
2.32k
      a->dp[0] |= *b++;
976
2.32k
      a->used += 1;
977
#else
978
      a->dp[0] = (*b & MP_MASK);
979
      a->dp[1] |= ((*b++ >> 7U) & 1);
980
      a->used += 2;
981
#endif
982
2.32k
  }
983
20
  mp_clamp (a);
984
20
  return MP_OKAY;
985
20
}
986
987
988
/* store in unsigned [big endian] format */
989
static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
990
0
{
991
0
  int     x, res;
992
0
  mp_int  t;
993
994
0
  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
995
0
    return res;
996
0
  }
997
998
0
  x = 0;
999
0
  while (mp_iszero (&t) == 0) {
1000
0
#ifndef MP_8BIT
1001
0
      b[x++] = (unsigned char) (t.dp[0] & 255);
1002
#else
1003
      b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
1004
#endif
1005
0
    if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
1006
0
      mp_clear (&t);
1007
0
      return res;
1008
0
    }
1009
0
  }
1010
0
  bn_reverse (b, x);
1011
0
  mp_clear (&t);
1012
0
  return MP_OKAY;
1013
0
}
1014
1015
1016
/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1017
static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
1018
0
{
1019
0
  mp_digit D, r, rr;
1020
0
  int     x, res;
1021
0
  mp_int  t;
1022
1023
1024
  /* if the shift count is <= 0 then we do no work */
1025
0
  if (b <= 0) {
1026
0
    res = mp_copy (a, c);
1027
0
    if (d != NULL) {
1028
0
      mp_zero (d);
1029
0
    }
1030
0
    return res;
1031
0
  }
1032
1033
0
  if ((res = mp_init (&t)) != MP_OKAY) {
1034
0
    return res;
1035
0
  }
1036
1037
  /* get the remainder */
1038
0
  if (d != NULL) {
1039
0
    if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1040
0
      mp_clear (&t);
1041
0
      return res;
1042
0
    }
1043
0
  }
1044
1045
  /* copy */
1046
0
  if ((res = mp_copy (a, c)) != MP_OKAY) {
1047
0
    mp_clear (&t);
1048
0
    return res;
1049
0
  }
1050
1051
  /* shift by as many digits in the bit count */
1052
0
  if (b >= (int)DIGIT_BIT) {
1053
0
    mp_rshd (c, b / DIGIT_BIT);
1054
0
  }
1055
1056
  /* shift any bit count < DIGIT_BIT */
1057
0
  D = (mp_digit) (b % DIGIT_BIT);
1058
0
  if (D != 0) {
1059
0
    register mp_digit *tmpc, mask, shift;
1060
1061
    /* mask */
1062
0
    mask = (((mp_digit)1) << D) - 1;
1063
1064
    /* shift for lsb */
1065
0
    shift = DIGIT_BIT - D;
1066
1067
    /* alias */
1068
0
    tmpc = c->dp + (c->used - 1);
1069
1070
    /* carry */
1071
0
    r = 0;
1072
0
    for (x = c->used - 1; x >= 0; x--) {
1073
      /* get the lower  bits of this word in a temp */
1074
0
      rr = *tmpc & mask;
1075
1076
      /* shift the current word and mix in the carry bits from the previous word */
1077
0
      *tmpc = (*tmpc >> D) | (r << shift);
1078
0
      --tmpc;
1079
1080
      /* set the carry to the carry bits of the current word found above */
1081
0
      r = rr;
1082
0
    }
1083
0
  }
1084
0
  mp_clamp (c);
1085
0
  if (d != NULL) {
1086
0
    mp_exch (&t, d);
1087
0
  }
1088
0
  mp_clear (&t);
1089
0
  return MP_OKAY;
1090
0
}
1091
1092
1093
static int mp_init_copy (mp_int * a, mp_int * b)
1094
0
{
1095
0
  int     res;
1096
1097
0
  if ((res = mp_init (a)) != MP_OKAY) {
1098
0
    return res;
1099
0
  }
1100
0
  return mp_copy (b, a);
1101
0
}
1102
1103
1104
/* set to zero */
1105
static void mp_zero (mp_int * a)
1106
20
{
1107
20
  int       n;
1108
20
  mp_digit *tmp;
1109
1110
20
  a->sign = MP_ZPOS;
1111
20
  a->used = 0;
1112
1113
20
  tmp = a->dp;
1114
180
  for (n = 0; n < a->alloc; n++) {
1115
160
     *tmp++ = 0;
1116
160
  }
1117
20
}
1118
1119
1120
/* copy, b = a */
1121
static int mp_copy (mp_int * a, mp_int * b)
1122
0
{
1123
0
  int     res, n;
1124
1125
  /* if dst == src do nothing */
1126
0
  if (a == b) {
1127
0
    return MP_OKAY;
1128
0
  }
1129
1130
  /* grow dest */
1131
0
  if (b->alloc < a->used) {
1132
0
     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1133
0
        return res;
1134
0
     }
1135
0
  }
1136
1137
  /* zero b and copy the parameters over */
1138
0
  {
1139
0
    register mp_digit *tmpa, *tmpb;
1140
1141
    /* pointer aliases */
1142
1143
    /* source */
1144
0
    tmpa = a->dp;
1145
1146
    /* destination */
1147
0
    tmpb = b->dp;
1148
1149
    /* copy all the digits */
1150
0
    for (n = 0; n < a->used; n++) {
1151
0
      *tmpb++ = *tmpa++;
1152
0
    }
1153
1154
    /* clear high digits */
1155
0
    for (; n < b->used; n++) {
1156
0
      *tmpb++ = 0;
1157
0
    }
1158
0
  }
1159
1160
  /* copy used count and sign */
1161
0
  b->used = a->used;
1162
0
  b->sign = a->sign;
1163
0
  return MP_OKAY;
1164
0
}
1165
1166
1167
/* shift right a certain amount of digits */
1168
static void mp_rshd (mp_int * a, int b)
1169
0
{
1170
0
  int     x;
1171
1172
  /* if b <= 0 then ignore it */
1173
0
  if (b <= 0) {
1174
0
    return;
1175
0
  }
1176
1177
  /* if b > used then simply zero it and return */
1178
0
  if (a->used <= b) {
1179
0
    mp_zero (a);
1180
0
    return;
1181
0
  }
1182
1183
0
  {
1184
0
    register mp_digit *bottom, *top;
1185
1186
    /* shift the digits down */
1187
1188
    /* bottom */
1189
0
    bottom = a->dp;
1190
1191
    /* top [offset into digits] */
1192
0
    top = a->dp + b;
1193
1194
    /* this is implemented as a sliding window where
1195
     * the window is b-digits long and digits from
1196
     * the top of the window are copied to the bottom
1197
     *
1198
     * e.g.
1199
1200
     b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
1201
                 /\                   |      ---->
1202
                  \-------------------/      ---->
1203
     */
1204
0
    for (x = 0; x < (a->used - b); x++) {
1205
0
      *bottom++ = *top++;
1206
0
    }
1207
1208
    /* zero the top digits */
1209
0
    for (; x < a->used; x++) {
1210
0
      *bottom++ = 0;
1211
0
    }
1212
0
  }
1213
1214
  /* remove excess digits */
1215
0
  a->used -= b;
1216
0
}
1217
1218
1219
/* swap the elements of two integers, for cases where you can't simply swap the
1220
 * mp_int pointers around
1221
 */
1222
static void mp_exch (mp_int * a, mp_int * b)
1223
0
{
1224
0
  mp_int  t;
1225
1226
0
  t  = *a;
1227
0
  *a = *b;
1228
0
  *b = t;
1229
0
}
1230
1231
1232
/* trim unused digits
1233
 *
1234
 * This is used to ensure that leading zero digits are
1235
 * trimed and the leading "used" digit will be non-zero
1236
 * Typically very fast.  Also fixes the sign if there
1237
 * are no more leading digits
1238
 */
1239
static void mp_clamp (mp_int * a)
1240
2.34k
{
1241
  /* decrease used while the most significant digit is
1242
   * zero.
1243
   */
1244
4.34k
  while (a->used > 0 && a->dp[a->used - 1] == 0) {
1245
2.00k
    --(a->used);
1246
2.00k
  }
1247
1248
  /* reset the sign flag if used == 0 */
1249
2.34k
  if (a->used == 0) {
1250
34
    a->sign = MP_ZPOS;
1251
34
  }
1252
2.34k
}
1253
1254
1255
/* grow as required */
1256
static int mp_grow (mp_int * a, int size)
1257
18
{
1258
18
  int     i;
1259
18
  mp_digit *tmp;
1260
1261
  /* if the alloc size is smaller alloc more ram */
1262
18
  if (a->alloc < size) {
1263
    /* ensure there are always at least MP_PREC digits extra on top */
1264
18
    size += (MP_PREC * 2) - (size % MP_PREC);
1265
1266
    /* reallocate the array a->dp
1267
     *
1268
     * We store the return in a temporary variable
1269
     * in case the operation failed we don't want
1270
     * to overwrite the dp member of a.
1271
     */
1272
18
    tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
1273
18
    if (tmp == NULL) {
1274
      /* reallocation failed but "a" is still valid [can be freed] */
1275
0
      return MP_MEM;
1276
0
    }
1277
1278
    /* reallocation succeeded so set a->dp */
1279
18
    a->dp = tmp;
1280
1281
    /* zero excess digits */
1282
18
    i        = a->alloc;
1283
18
    a->alloc = size;
1284
306
    for (; i < a->alloc; i++) {
1285
288
      a->dp[i] = 0;
1286
288
    }
1287
18
  }
1288
18
  return MP_OKAY;
1289
18
}
1290
1291
1292
#ifdef BN_MP_ABS_C
1293
/* b = |a|
1294
 *
1295
 * Simple function copies the input and fixes the sign to positive
1296
 */
1297
static int mp_abs (mp_int * a, mp_int * b)
1298
0
{
1299
0
  int     res;
1300
1301
  /* copy a to b */
1302
0
  if (a != b) {
1303
0
     if ((res = mp_copy (a, b)) != MP_OKAY) {
1304
0
       return res;
1305
0
     }
1306
0
  }
1307
1308
  /* force the sign of b to positive */
1309
0
  b->sign = MP_ZPOS;
1310
1311
0
  return MP_OKAY;
1312
0
}
1313
#endif
1314
1315
1316
/* set to a digit */
1317
static void mp_set (mp_int * a, mp_digit b)
1318
0
{
1319
0
  mp_zero (a);
1320
0
  a->dp[0] = b & MP_MASK;
1321
0
  a->used  = (a->dp[0] != 0) ? 1 : 0;
1322
0
}
1323
1324
1325
#ifndef LTM_NO_NEG_EXP
1326
/* b = a/2 */
1327
static int mp_div_2(mp_int * a, mp_int * b)
1328
{
1329
  int     x, res, oldused;
1330
1331
  /* copy */
1332
  if (b->alloc < a->used) {
1333
    if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1334
      return res;
1335
    }
1336
  }
1337
1338
  oldused = b->used;
1339
  b->used = a->used;
1340
  {
1341
    register mp_digit r, rr, *tmpa, *tmpb;
1342
1343
    /* source alias */
1344
    tmpa = a->dp + b->used - 1;
1345
1346
    /* dest alias */
1347
    tmpb = b->dp + b->used - 1;
1348
1349
    /* carry */
1350
    r = 0;
1351
    for (x = b->used - 1; x >= 0; x--) {
1352
      /* get the carry for the next iteration */
1353
      rr = *tmpa & 1;
1354
1355
      /* shift the current digit, add in carry and store */
1356
      *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1357
1358
      /* forward carry to next iteration */
1359
      r = rr;
1360
    }
1361
1362
    /* zero excess digits */
1363
    tmpb = b->dp + b->used;
1364
    for (x = b->used; x < oldused; x++) {
1365
      *tmpb++ = 0;
1366
    }
1367
  }
1368
  b->sign = a->sign;
1369
  mp_clamp (b);
1370
  return MP_OKAY;
1371
}
1372
#endif /* LTM_NO_NEG_EXP */
1373
1374
1375
/* shift left by a certain bit count */
1376
static int mp_mul_2d (mp_int * a, int b, mp_int * c)
1377
2.32k
{
1378
2.32k
  mp_digit d;
1379
2.32k
  int      res;
1380
1381
  /* copy */
1382
2.32k
  if (a != c) {
1383
0
     if ((res = mp_copy (a, c)) != MP_OKAY) {
1384
0
       return res;
1385
0
     }
1386
0
  }
1387
1388
2.32k
  if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
1389
18
     if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
1390
0
       return res;
1391
0
     }
1392
18
  }
1393
1394
  /* shift by as many digits in the bit count */
1395
2.32k
  if (b >= (int)DIGIT_BIT) {
1396
0
    if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
1397
0
      return res;
1398
0
    }
1399
0
  }
1400
1401
  /* shift any bit count < DIGIT_BIT */
1402
2.32k
  d = (mp_digit) (b % DIGIT_BIT);
1403
2.32k
  if (d != 0) {
1404
2.32k
    register mp_digit *tmpc, shift, mask, r, rr;
1405
2.32k
    register int x;
1406
1407
    /* bitmask for carries */
1408
2.32k
    mask = (((mp_digit)1) << d) - 1;
1409
1410
    /* shift for msbs */
1411
2.32k
    shift = DIGIT_BIT - d;
1412
1413
    /* alias */
1414
2.32k
    tmpc = c->dp;
1415
1416
    /* carry */
1417
2.32k
    r    = 0;
1418
33.9k
    for (x = 0; x < c->used; x++) {
1419
      /* get the higher bits of the current word */
1420
31.5k
      rr = (*tmpc >> shift) & mask;
1421
1422
      /* shift the current word and OR in the carry */
1423
31.5k
      *tmpc = ((*tmpc << d) | r) & MP_MASK;
1424
31.5k
      ++tmpc;
1425
1426
      /* set the carry to the carry bits of the current word */
1427
31.5k
      r = rr;
1428
31.5k
    }
1429
1430
    /* set final carry */
1431
2.32k
    if (r != 0) {
1432
0
       c->dp[(c->used)++] = r;
1433
0
    }
1434
2.32k
  }
1435
2.32k
  mp_clamp (c);
1436
2.32k
  return MP_OKAY;
1437
2.32k
}
1438
1439
1440
#ifdef BN_MP_INIT_MULTI_C
1441
static int mp_init_multi(mp_int *mp, ...)
1442
0
{
1443
0
    mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
1444
0
    int n = 0;                 /* Number of ok inits */
1445
0
    mp_int* cur_arg = mp;
1446
0
    va_list args;
1447
1448
0
    va_start(args, mp);        /* init args to next argument from caller */
1449
0
    while (cur_arg != NULL) {
1450
0
        if (mp_init(cur_arg) != MP_OKAY) {
1451
            /* Oops - error! Back-track and mp_clear what we already
1452
               succeeded in init-ing, then return error.
1453
            */
1454
0
            va_list clean_args;
1455
1456
            /* end the current list */
1457
0
            va_end(args);
1458
1459
            /* now start cleaning up */
1460
0
            cur_arg = mp;
1461
0
            va_start(clean_args, mp);
1462
0
            while (n--) {
1463
0
                mp_clear(cur_arg);
1464
0
                cur_arg = va_arg(clean_args, mp_int*);
1465
0
            }
1466
0
            va_end(clean_args);
1467
0
            return MP_MEM;
1468
0
        }
1469
0
        n++;
1470
0
        cur_arg = va_arg(args, mp_int*);
1471
0
    }
1472
0
    va_end(args);
1473
0
    return res;                /* Assumed ok, if error flagged above. */
1474
0
}
1475
#endif
1476
1477
1478
#ifdef BN_MP_CLEAR_MULTI_C
1479
static void mp_clear_multi(mp_int *mp, ...)
1480
0
{
1481
0
    mp_int* next_mp = mp;
1482
0
    va_list args;
1483
0
    va_start(args, mp);
1484
0
    while (next_mp != NULL) {
1485
0
        mp_clear(next_mp);
1486
0
        next_mp = va_arg(args, mp_int*);
1487
0
    }
1488
0
    va_end(args);
1489
0
}
1490
#endif
1491
1492
1493
/* shift left a certain amount of digits */
1494
static int mp_lshd (mp_int * a, int b)
1495
0
{
1496
0
  int     x, res;
1497
1498
  /* if its less than zero return */
1499
0
  if (b <= 0) {
1500
0
    return MP_OKAY;
1501
0
  }
1502
1503
  /* grow to fit the new digits */
1504
0
  if (a->alloc < a->used + b) {
1505
0
     if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1506
0
       return res;
1507
0
     }
1508
0
  }
1509
1510
0
  {
1511
0
    register mp_digit *top, *bottom;
1512
1513
    /* increment the used by the shift amount then copy upwards */
1514
0
    a->used += b;
1515
1516
    /* top */
1517
0
    top = a->dp + a->used - 1;
1518
1519
    /* base */
1520
0
    bottom = a->dp + a->used - 1 - b;
1521
1522
    /* much like mp_rshd this is implemented using a sliding window
1523
     * except the window goes the otherway around.  Copying from
1524
     * the bottom to the top.  see bn_mp_rshd.c for more info.
1525
     */
1526
0
    for (x = a->used - 1; x >= b; x--) {
1527
0
      *top-- = *bottom--;
1528
0
    }
1529
1530
    /* zero the lower digits */
1531
0
    top = a->dp;
1532
0
    for (x = 0; x < b; x++) {
1533
0
      *top++ = 0;
1534
0
    }
1535
0
  }
1536
0
  return MP_OKAY;
1537
0
}
1538
1539
1540
/* returns the number of bits in an int */
1541
static int mp_count_bits (mp_int * a)
1542
0
{
1543
0
  int     r;
1544
0
  mp_digit q;
1545
1546
  /* shortcut */
1547
0
  if (a->used == 0) {
1548
0
    return 0;
1549
0
  }
1550
1551
  /* get number of digits and add that */
1552
0
  r = (a->used - 1) * DIGIT_BIT;
1553
1554
  /* take the last digit and count the bits in it */
1555
0
  q = a->dp[a->used - 1];
1556
0
  while (q > ((mp_digit) 0)) {
1557
0
    ++r;
1558
0
    q >>= ((mp_digit) 1);
1559
0
  }
1560
0
  return r;
1561
0
}
1562
1563
1564
/* calc a value mod 2**b */
1565
static int mp_mod_2d (mp_int * a, int b, mp_int * c)
1566
0
{
1567
0
  int     x, res;
1568
1569
  /* if b is <= 0 then zero the int */
1570
0
  if (b <= 0) {
1571
0
    mp_zero (c);
1572
0
    return MP_OKAY;
1573
0
  }
1574
1575
  /* if the modulus is larger than the value than return */
1576
0
  if (b >= (int) (a->used * DIGIT_BIT)) {
1577
0
    res = mp_copy (a, c);
1578
0
    return res;
1579
0
  }
1580
1581
  /* copy */
1582
0
  if ((res = mp_copy (a, c)) != MP_OKAY) {
1583
0
    return res;
1584
0
  }
1585
1586
  /* zero digits above the last digit of the modulus */
1587
0
  for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1588
0
    c->dp[x] = 0;
1589
0
  }
1590
  /* clear the digit that is not completely outside/inside the modulus */
1591
0
  c->dp[b / DIGIT_BIT] &=
1592
0
    (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
1593
0
  mp_clamp (c);
1594
0
  return MP_OKAY;
1595
0
}
1596
1597
1598
#ifdef BN_MP_DIV_SMALL
1599
1600
/* slower bit-bang division... also smaller */
1601
static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1602
0
{
1603
0
   mp_int ta, tb, tq, q;
1604
0
   int    res, n, n2;
1605
1606
  /* is divisor zero ? */
1607
0
  if (mp_iszero (b) == 1) {
1608
0
    return MP_VAL;
1609
0
  }
1610
1611
  /* if a < b then q=0, r = a */
1612
0
  if (mp_cmp_mag (a, b) == MP_LT) {
1613
0
    if (d != NULL) {
1614
0
      res = mp_copy (a, d);
1615
0
    } else {
1616
0
      res = MP_OKAY;
1617
0
    }
1618
0
    if (c != NULL) {
1619
0
      mp_zero (c);
1620
0
    }
1621
0
    return res;
1622
0
  }
1623
1624
  /* init our temps */
1625
0
  if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL)) != MP_OKAY) {
1626
0
     return res;
1627
0
  }
1628
1629
1630
0
  mp_set(&tq, 1);
1631
0
  n = mp_count_bits(a) - mp_count_bits(b);
1632
0
  if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1633
0
      ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1634
0
      ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1635
0
      ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1636
0
      goto LBL_ERR;
1637
0
  }
1638
1639
0
  while (n-- >= 0) {
1640
0
     if (mp_cmp(&tb, &ta) != MP_GT) {
1641
0
        if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1642
0
            ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1643
0
           goto LBL_ERR;
1644
0
        }
1645
0
     }
1646
0
     if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1647
0
         ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1648
0
           goto LBL_ERR;
1649
0
     }
1650
0
  }
1651
1652
  /* now q == quotient and ta == remainder */
1653
0
  n  = a->sign;
1654
0
  n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1655
0
  if (c != NULL) {
1656
0
     mp_exch(c, &q);
1657
0
     c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1658
0
  }
1659
0
  if (d != NULL) {
1660
0
     mp_exch(d, &ta);
1661
0
     d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1662
0
  }
1663
0
LBL_ERR:
1664
0
   mp_clear_multi(&ta, &tb, &tq, &q, NULL);
1665
0
   return res;
1666
0
}
1667
1668
#else
1669
1670
/* integer signed division.
1671
 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1672
 * HAC pp.598 Algorithm 14.20
1673
 *
1674
 * Note that the description in HAC is horribly
1675
 * incomplete.  For example, it doesn't consider
1676
 * the case where digits are removed from 'x' in
1677
 * the inner loop.  It also doesn't consider the
1678
 * case that y has fewer than three digits, etc..
1679
 *
1680
 * The overall algorithm is as described as
1681
 * 14.20 from HAC but fixed to treat these cases.
1682
*/
1683
static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1684
{
1685
  mp_int  q, x, y, t1, t2;
1686
  int     res, n, t, i, norm, neg;
1687
1688
  /* is divisor zero ? */
1689
  if (mp_iszero (b) == 1) {
1690
    return MP_VAL;
1691
  }
1692
1693
  /* if a < b then q=0, r = a */
1694
  if (mp_cmp_mag (a, b) == MP_LT) {
1695
    if (d != NULL) {
1696
      res = mp_copy (a, d);
1697
    } else {
1698
      res = MP_OKAY;
1699
    }
1700
    if (c != NULL) {
1701
      mp_zero (c);
1702
    }
1703
    return res;
1704
  }
1705
1706
  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1707
    return res;
1708
  }
1709
  q.used = a->used + 2;
1710
1711
  if ((res = mp_init (&t1)) != MP_OKAY) {
1712
    goto LBL_Q;
1713
  }
1714
1715
  if ((res = mp_init (&t2)) != MP_OKAY) {
1716
    goto LBL_T1;
1717
  }
1718
1719
  if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1720
    goto LBL_T2;
1721
  }
1722
1723
  if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1724
    goto LBL_X;
1725
  }
1726
1727
  /* fix the sign */
1728
  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1729
  x.sign = y.sign = MP_ZPOS;
1730
1731
  /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1732
  norm = mp_count_bits(&y) % DIGIT_BIT;
1733
  if (norm < (int)(DIGIT_BIT-1)) {
1734
     norm = (DIGIT_BIT-1) - norm;
1735
     if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1736
       goto LBL_Y;
1737
     }
1738
     if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1739
       goto LBL_Y;
1740
     }
1741
  } else {
1742
     norm = 0;
1743
  }
1744
1745
  /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1746
  n = x.used - 1;
1747
  t = y.used - 1;
1748
1749
  /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1750
  if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1751
    goto LBL_Y;
1752
  }
1753
1754
  while (mp_cmp (&x, &y) != MP_LT) {
1755
    ++(q.dp[n - t]);
1756
    if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1757
      goto LBL_Y;
1758
    }
1759
  }
1760
1761
  /* reset y by shifting it back down */
1762
  mp_rshd (&y, n - t);
1763
1764
  /* step 3. for i from n down to (t + 1) */
1765
  for (i = n; i >= (t + 1); i--) {
1766
    if (i > x.used) {
1767
      continue;
1768
    }
1769
1770
    /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1771
     * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1772
    if (x.dp[i] == y.dp[t]) {
1773
      q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1774
    } else {
1775
      mp_word tmp;
1776
      tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1777
      tmp |= ((mp_word) x.dp[i - 1]);
1778
      tmp /= ((mp_word) y.dp[t]);
1779
      if (tmp > (mp_word) MP_MASK)
1780
        tmp = MP_MASK;
1781
      q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1782
    }
1783
1784
    /* while (q{i-t-1} * (yt * b + y{t-1})) >
1785
             xi * b**2 + xi-1 * b + xi-2
1786
1787
       do q{i-t-1} -= 1;
1788
    */
1789
    q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1790
    do {
1791
      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1792
1793
      /* find left hand */
1794
      mp_zero (&t1);
1795
      t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1796
      t1.dp[1] = y.dp[t];
1797
      t1.used = 2;
1798
      if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1799
        goto LBL_Y;
1800
      }
1801
1802
      /* find right hand */
1803
      t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1804
      t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1805
      t2.dp[2] = x.dp[i];
1806
      t2.used = 3;
1807
    } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1808
1809
    /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1810
    if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1811
      goto LBL_Y;
1812
    }
1813
1814
    if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1815
      goto LBL_Y;
1816
    }
1817
1818
    if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1819
      goto LBL_Y;
1820
    }
1821
1822
    /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1823
    if (x.sign == MP_NEG) {
1824
      if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1825
        goto LBL_Y;
1826
      }
1827
      if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1828
        goto LBL_Y;
1829
      }
1830
      if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1831
        goto LBL_Y;
1832
      }
1833
1834
      q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1835
    }
1836
  }
1837
1838
  /* now q is the quotient and x is the remainder
1839
   * [which we have to normalize]
1840
   */
1841
1842
  /* get sign before writing to c */
1843
  x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1844
1845
  if (c != NULL) {
1846
    mp_clamp (&q);
1847
    mp_exch (&q, c);
1848
    c->sign = neg;
1849
  }
1850
1851
  if (d != NULL) {
1852
    mp_div_2d (&x, norm, &x, NULL);
1853
    mp_exch (&x, d);
1854
  }
1855
1856
  res = MP_OKAY;
1857
1858
LBL_Y:mp_clear (&y);
1859
LBL_X:mp_clear (&x);
1860
LBL_T2:mp_clear (&t2);
1861
LBL_T1:mp_clear (&t1);
1862
LBL_Q:mp_clear (&q);
1863
  return res;
1864
}
1865
1866
#endif
1867
1868
1869
#ifdef MP_LOW_MEM
1870
   #define TAB_SIZE 32
1871
#else
1872
   #define TAB_SIZE 256
1873
#endif
1874
1875
static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
1876
0
{
1877
0
  mp_int  M[TAB_SIZE], res, mu;
1878
0
  mp_digit buf;
1879
0
  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1880
0
  int (*redux)(mp_int*,mp_int*,mp_int*);
1881
1882
  /* find window size */
1883
0
  x = mp_count_bits (X);
1884
0
  if (x <= 7) {
1885
0
    winsize = 2;
1886
0
  } else if (x <= 36) {
1887
0
    winsize = 3;
1888
0
  } else if (x <= 140) {
1889
0
    winsize = 4;
1890
0
  } else if (x <= 450) {
1891
0
    winsize = 5;
1892
0
  } else if (x <= 1303) {
1893
0
    winsize = 6;
1894
0
  } else if (x <= 3529) {
1895
0
    winsize = 7;
1896
0
  } else {
1897
0
    winsize = 8;
1898
0
  }
1899
1900
0
#ifdef MP_LOW_MEM
1901
0
    if (winsize > 5) {
1902
0
       winsize = 5;
1903
0
    }
1904
0
#endif
1905
1906
  /* init M array */
1907
  /* init first cell */
1908
0
  if ((err = mp_init(&M[1])) != MP_OKAY) {
1909
0
     return err;
1910
0
  }
1911
1912
  /* now init the second half of the array */
1913
0
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1914
0
    if ((err = mp_init(&M[x])) != MP_OKAY) {
1915
0
      for (y = 1<<(winsize-1); y < x; y++) {
1916
0
        mp_clear (&M[y]);
1917
0
      }
1918
0
      mp_clear(&M[1]);
1919
0
      return err;
1920
0
    }
1921
0
  }
1922
1923
  /* create mu, used for Barrett reduction */
1924
0
  if ((err = mp_init (&mu)) != MP_OKAY) {
1925
0
    goto LBL_M;
1926
0
  }
1927
1928
0
  if (redmode == 0) {
1929
0
     if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
1930
0
        goto LBL_MU;
1931
0
     }
1932
0
     redux = mp_reduce;
1933
0
  } else {
1934
0
     if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
1935
0
        goto LBL_MU;
1936
0
     }
1937
0
     redux = mp_reduce_2k_l;
1938
0
  }
1939
1940
  /* create M table
1941
   *
1942
   * The M table contains powers of the base,
1943
   * e.g. M[x] = G**x mod P
1944
   *
1945
   * The first half of the table is not
1946
   * computed though accept for M[0] and M[1]
1947
   */
1948
0
  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
1949
0
    goto LBL_MU;
1950
0
  }
1951
1952
  /* compute the value at M[1<<(winsize-1)] by squaring
1953
   * M[1] (winsize-1) times
1954
   */
1955
0
  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1956
0
    goto LBL_MU;
1957
0
  }
1958
1959
0
  for (x = 0; x < (winsize - 1); x++) {
1960
    /* square it */
1961
0
    if ((err = mp_sqr (&M[1 << (winsize - 1)],
1962
0
                       &M[1 << (winsize - 1)])) != MP_OKAY) {
1963
0
      goto LBL_MU;
1964
0
    }
1965
1966
    /* reduce modulo P */
1967
0
    if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
1968
0
      goto LBL_MU;
1969
0
    }
1970
0
  }
1971
1972
  /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
1973
   * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
1974
   */
1975
0
  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1976
0
    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1977
0
      goto LBL_MU;
1978
0
    }
1979
0
    if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
1980
0
      goto LBL_MU;
1981
0
    }
1982
0
  }
1983
1984
  /* setup result */
1985
0
  if ((err = mp_init (&res)) != MP_OKAY) {
1986
0
    goto LBL_MU;
1987
0
  }
1988
0
  mp_set (&res, 1);
1989
1990
  /* set initial mode and bit cnt */
1991
0
  mode   = 0;
1992
0
  bitcnt = 1;
1993
0
  buf    = 0;
1994
0
  digidx = X->used - 1;
1995
0
  bitcpy = 0;
1996
0
  bitbuf = 0;
1997
1998
0
  for (;;) {
1999
    /* grab next digit as required */
2000
0
    if (--bitcnt == 0) {
2001
      /* if digidx == -1 we are out of digits */
2002
0
      if (digidx == -1) {
2003
0
        break;
2004
0
      }
2005
      /* read next digit and reset the bitcnt */
2006
0
      buf    = X->dp[digidx--];
2007
0
      bitcnt = (int) DIGIT_BIT;
2008
0
    }
2009
2010
    /* grab the next msb from the exponent */
2011
0
    y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
2012
0
    buf <<= (mp_digit)1;
2013
2014
    /* if the bit is zero and mode == 0 then we ignore it
2015
     * These represent the leading zero bits before the first 1 bit
2016
     * in the exponent.  Technically this opt is not required but it
2017
     * does lower the # of trivial squaring/reductions used
2018
     */
2019
0
    if (mode == 0 && y == 0) {
2020
0
      continue;
2021
0
    }
2022
2023
    /* if the bit is zero and mode == 1 then we square */
2024
0
    if (mode == 1 && y == 0) {
2025
0
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2026
0
        goto LBL_RES;
2027
0
      }
2028
0
      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
2029
0
        goto LBL_RES;
2030
0
      }
2031
0
      continue;
2032
0
    }
2033
2034
    /* else we add it to the window */
2035
0
    bitbuf |= (y << (winsize - ++bitcpy));
2036
0
    mode    = 2;
2037
2038
0
    if (bitcpy == winsize) {
2039
      /* ok window is filled so square as required and multiply  */
2040
      /* square first */
2041
0
      for (x = 0; x < winsize; x++) {
2042
0
        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2043
0
          goto LBL_RES;
2044
0
        }
2045
0
        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
2046
0
          goto LBL_RES;
2047
0
        }
2048
0
      }
2049
2050
      /* then multiply */
2051
0
      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2052
0
        goto LBL_RES;
2053
0
      }
2054
0
      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
2055
0
        goto LBL_RES;
2056
0
      }
2057
2058
      /* empty window and reset */
2059
0
      bitcpy = 0;
2060
0
      bitbuf = 0;
2061
0
      mode   = 1;
2062
0
    }
2063
0
  }
2064
2065
  /* if bits remain then square/multiply */
2066
0
  if (mode == 2 && bitcpy > 0) {
2067
    /* square then multiply if the bit is set */
2068
0
    for (x = 0; x < bitcpy; x++) {
2069
0
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2070
0
        goto LBL_RES;
2071
0
      }
2072
0
      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
2073
0
        goto LBL_RES;
2074
0
      }
2075
2076
0
      bitbuf <<= 1;
2077
0
      if ((bitbuf & (1 << winsize)) != 0) {
2078
        /* then multiply */
2079
0
        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2080
0
          goto LBL_RES;
2081
0
        }
2082
0
        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
2083
0
          goto LBL_RES;
2084
0
        }
2085
0
      }
2086
0
    }
2087
0
  }
2088
2089
0
  mp_exch (&res, Y);
2090
0
  err = MP_OKAY;
2091
0
LBL_RES:mp_clear (&res);
2092
0
LBL_MU:mp_clear (&mu);
2093
0
LBL_M:
2094
0
  mp_clear(&M[1]);
2095
0
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2096
0
    mp_clear (&M[x]);
2097
0
  }
2098
0
  return err;
2099
0
}
2100
2101
2102
/* computes b = a*a */
2103
static int mp_sqr (mp_int * a, mp_int * b)
2104
0
{
2105
0
  int     res;
2106
2107
#ifdef BN_MP_TOOM_SQR_C
2108
  /* use Toom-Cook? */
2109
  if (a->used >= TOOM_SQR_CUTOFF) {
2110
    res = mp_toom_sqr(a, b);
2111
  /* Karatsuba? */
2112
  } else
2113
#endif
2114
#ifdef BN_MP_KARATSUBA_SQR_C
2115
if (a->used >= KARATSUBA_SQR_CUTOFF) {
2116
    res = mp_karatsuba_sqr (a, b);
2117
  } else
2118
#endif
2119
0
  {
2120
#ifdef BN_FAST_S_MP_SQR_C
2121
    /* can we use the fast comba multiplier? */
2122
    if ((a->used * 2 + 1) < MP_WARRAY &&
2123
         a->used <
2124
         (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2125
      res = fast_s_mp_sqr (a, b);
2126
    } else
2127
#endif
2128
0
#ifdef BN_S_MP_SQR_C
2129
0
      res = s_mp_sqr (a, b);
2130
#else
2131
#error mp_sqr could fail
2132
      res = MP_VAL;
2133
#endif
2134
0
  }
2135
0
  b->sign = MP_ZPOS;
2136
0
  return res;
2137
0
}
2138
2139
2140
/* reduces a modulo n where n is of the form 2**p - d
2141
   This differs from reduce_2k since "d" can be larger
2142
   than a single digit.
2143
*/
2144
static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
2145
0
{
2146
0
   mp_int q;
2147
0
   int    p, res;
2148
2149
0
   if ((res = mp_init(&q)) != MP_OKAY) {
2150
0
      return res;
2151
0
   }
2152
2153
0
   p = mp_count_bits(n);
2154
0
top:
2155
   /* q = a/2**p, a = a mod 2**p */
2156
0
   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2157
0
      goto ERR;
2158
0
   }
2159
2160
   /* q = q * d */
2161
0
   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
2162
0
      goto ERR;
2163
0
   }
2164
2165
   /* a = a + q */
2166
0
   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2167
0
      goto ERR;
2168
0
   }
2169
2170
0
   if (mp_cmp_mag(a, n) != MP_LT) {
2171
0
      s_mp_sub(a, n, a);
2172
0
      goto top;
2173
0
   }
2174
2175
0
ERR:
2176
0
   mp_clear(&q);
2177
0
   return res;
2178
0
}
2179
2180
2181
/* determines the setup value */
2182
static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
2183
0
{
2184
0
   int    res;
2185
0
   mp_int tmp;
2186
2187
0
   if ((res = mp_init(&tmp)) != MP_OKAY) {
2188
0
      return res;
2189
0
   }
2190
2191
0
   if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
2192
0
      goto ERR;
2193
0
   }
2194
2195
0
   if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
2196
0
      goto ERR;
2197
0
   }
2198
2199
0
ERR:
2200
0
   mp_clear(&tmp);
2201
0
   return res;
2202
0
}
2203
2204
2205
/* computes a = 2**b
2206
 *
2207
 * Simple algorithm which zeroes the int, grows it then just sets one bit
2208
 * as required.
2209
 */
2210
static int mp_2expt (mp_int * a, int b)
2211
0
{
2212
0
  int     res;
2213
2214
  /* zero a as per default */
2215
0
  mp_zero (a);
2216
2217
  /* grow a to accommodate the single bit */
2218
0
  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
2219
0
    return res;
2220
0
  }
2221
2222
  /* set the used count of where the bit will go */
2223
0
  a->used = b / DIGIT_BIT + 1;
2224
2225
  /* put the single bit in its place */
2226
0
  a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
2227
2228
0
  return MP_OKAY;
2229
0
}
2230
2231
2232
/* pre-calculate the value required for Barrett reduction
2233
 * For a given modulus "b" it calulates the value required in "a"
2234
 */
2235
static int mp_reduce_setup (mp_int * a, mp_int * b)
2236
0
{
2237
0
  int     res;
2238
2239
0
  if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
2240
0
    return res;
2241
0
  }
2242
0
  return mp_div (a, b, a, NULL);
2243
0
}
2244
2245
2246
/* reduces x mod m, assumes 0 < x < m**2, mu is
2247
 * precomputed via mp_reduce_setup.
2248
 * From HAC pp.604 Algorithm 14.42
2249
 */
2250
static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
2251
0
{
2252
0
  mp_int  q;
2253
0
  int     res, um = m->used;
2254
2255
  /* q = x */
2256
0
  if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
2257
0
    return res;
2258
0
  }
2259
2260
  /* q1 = x / b**(k-1)  */
2261
0
  mp_rshd (&q, um - 1);
2262
2263
  /* according to HAC this optimization is ok */
2264
0
  if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2265
0
    if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
2266
0
      goto CLEANUP;
2267
0
    }
2268
0
  } else {
2269
0
#ifdef BN_S_MP_MUL_HIGH_DIGS_C
2270
0
    if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2271
0
      goto CLEANUP;
2272
0
    }
2273
#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
2274
    if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2275
      goto CLEANUP;
2276
    }
2277
#else
2278
    {
2279
#error mp_reduce would always fail
2280
      res = MP_VAL;
2281
      goto CLEANUP;
2282
    }
2283
#endif
2284
0
  }
2285
2286
  /* q3 = q2 / b**(k+1) */
2287
0
  mp_rshd (&q, um + 1);
2288
2289
  /* x = x mod b**(k+1), quick (no division) */
2290
0
  if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2291
0
    goto CLEANUP;
2292
0
  }
2293
2294
  /* q = q * m mod b**(k+1), quick (no division) */
2295
0
  if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
2296
0
    goto CLEANUP;
2297
0
  }
2298
2299
  /* x = x - q */
2300
0
  if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
2301
0
    goto CLEANUP;
2302
0
  }
2303
2304
  /* If x < 0, add b**(k+1) to it */
2305
0
  if (mp_cmp_d (x, 0) == MP_LT) {
2306
0
    mp_set (&q, 1);
2307
0
    if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
2308
0
      goto CLEANUP;
2309
0
    }
2310
0
    if ((res = mp_add (x, &q, x)) != MP_OKAY) {
2311
0
      goto CLEANUP;
2312
0
    }
2313
0
  }
2314
2315
  /* Back off if it's too big */
2316
0
  while (mp_cmp (x, m) != MP_LT) {
2317
0
    if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
2318
0
      goto CLEANUP;
2319
0
    }
2320
0
  }
2321
2322
0
CLEANUP:
2323
0
  mp_clear (&q);
2324
2325
0
  return res;
2326
0
}
2327
2328
2329
/* multiplies |a| * |b| and only computes up to digs digits of result
2330
 * HAC pp. 595, Algorithm 14.12  Modified so you can control how
2331
 * many digits of output are created.
2332
 */
2333
static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2334
0
{
2335
0
  mp_int  t;
2336
0
  int     res, pa, pb, ix, iy;
2337
0
  mp_digit u;
2338
0
  mp_word r;
2339
0
  mp_digit tmpx, *tmpt, *tmpy;
2340
2341
#ifdef BN_FAST_S_MP_MUL_DIGS_C
2342
  /* can we use the fast multiplier? */
2343
  if (((digs) < MP_WARRAY) &&
2344
      MIN (a->used, b->used) <
2345
          (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2346
    return fast_s_mp_mul_digs (a, b, c, digs);
2347
  }
2348
#endif
2349
2350
0
  if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
2351
0
    return res;
2352
0
  }
2353
0
  t.used = digs;
2354
2355
  /* compute the digits of the product directly */
2356
0
  pa = a->used;
2357
0
  for (ix = 0; ix < pa; ix++) {
2358
    /* set the carry to zero */
2359
0
    u = 0;
2360
2361
    /* limit ourselves to making digs digits of output */
2362
0
    pb = MIN (b->used, digs - ix);
2363
2364
    /* setup some aliases */
2365
    /* copy of the digit from a used within the nested loop */
2366
0
    tmpx = a->dp[ix];
2367
2368
    /* an alias for the destination shifted ix places */
2369
0
    tmpt = t.dp + ix;
2370
2371
    /* an alias for the digits of b */
2372
0
    tmpy = b->dp;
2373
2374
    /* compute the columns of the output and propagate the carry */
2375
0
    for (iy = 0; iy < pb; iy++) {
2376
      /* compute the column as a mp_word */
2377
0
      r       = ((mp_word)*tmpt) +
2378
0
                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2379
0
                ((mp_word) u);
2380
2381
      /* the new column is the lower part of the result */
2382
0
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2383
2384
      /* get the carry word from the result */
2385
0
      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2386
0
    }
2387
    /* set carry if it is placed below digs */
2388
0
    if (ix + iy < digs) {
2389
0
      *tmpt = u;
2390
0
    }
2391
0
  }
2392
2393
0
  mp_clamp (&t);
2394
0
  mp_exch (&t, c);
2395
2396
0
  mp_clear (&t);
2397
0
  return MP_OKAY;
2398
0
}
2399
2400
2401
#ifdef BN_FAST_S_MP_MUL_DIGS_C
2402
/* Fast (comba) multiplier
2403
 *
2404
 * This is the fast column-array [comba] multiplier.  It is
2405
 * designed to compute the columns of the product first
2406
 * then handle the carries afterwards.  This has the effect
2407
 * of making the nested loops that compute the columns very
2408
 * simple and schedulable on super-scalar processors.
2409
 *
2410
 * This has been modified to produce a variable number of
2411
 * digits of output so if say only a half-product is required
2412
 * you don't have to compute the upper half (a feature
2413
 * required for fast Barrett reduction).
2414
 *
2415
 * Based on Algorithm 14.12 on pp.595 of HAC.
2416
 *
2417
 */
2418
static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2419
{
2420
  int     olduse, res, pa, ix, iz;
2421
  mp_digit W[MP_WARRAY];
2422
  register mp_word  _W;
2423
2424
  /* grow the destination as required */
2425
  if (c->alloc < digs) {
2426
    if ((res = mp_grow (c, digs)) != MP_OKAY) {
2427
      return res;
2428
    }
2429
  }
2430
2431
  /* number of output digits to produce */
2432
  pa = MIN(digs, a->used + b->used);
2433
2434
  /* clear the carry */
2435
  _W = 0;
2436
  os_memset(W, 0, sizeof(W));
2437
  for (ix = 0; ix < pa; ix++) {
2438
      int      tx, ty;
2439
      int      iy;
2440
      mp_digit *tmpx, *tmpy;
2441
2442
      /* get offsets into the two bignums */
2443
      ty = MIN(b->used-1, ix);
2444
      tx = ix - ty;
2445
2446
      /* setup temp aliases */
2447
      tmpx = a->dp + tx;
2448
      tmpy = b->dp + ty;
2449
2450
      /* this is the number of times the loop will iterrate, essentially
2451
         while (tx++ < a->used && ty-- >= 0) { ... }
2452
       */
2453
      iy = MIN(a->used-tx, ty+1);
2454
2455
      /* execute loop */
2456
      for (iz = 0; iz < iy; ++iz) {
2457
         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2458
2459
      }
2460
2461
      /* store term */
2462
      W[ix] = ((mp_digit)_W) & MP_MASK;
2463
2464
      /* make next carry */
2465
      _W = _W >> ((mp_word)DIGIT_BIT);
2466
 }
2467
2468
  /* setup dest */
2469
  olduse  = c->used;
2470
  c->used = pa;
2471
2472
  {
2473
    register mp_digit *tmpc;
2474
    tmpc = c->dp;
2475
    for (ix = 0; ix < pa+1; ix++) {
2476
      /* now extract the previous digit [below the carry] */
2477
      *tmpc++ = W[ix];
2478
    }
2479
2480
    /* clear unused digits [that existed in the old copy of c] */
2481
    for (; ix < olduse; ix++) {
2482
      *tmpc++ = 0;
2483
    }
2484
  }
2485
  mp_clamp (c);
2486
  return MP_OKAY;
2487
}
2488
#endif /* BN_FAST_S_MP_MUL_DIGS_C */
2489
2490
2491
/* init an mp_init for a given size */
2492
static int mp_init_size (mp_int * a, int size)
2493
0
{
2494
0
  int x;
2495
2496
  /* pad size so there are always extra digits */
2497
0
  size += (MP_PREC * 2) - (size % MP_PREC);
2498
2499
  /* alloc mem */
2500
0
  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
2501
0
  if (a->dp == NULL) {
2502
0
    return MP_MEM;
2503
0
  }
2504
2505
  /* set the members */
2506
0
  a->used  = 0;
2507
0
  a->alloc = size;
2508
0
  a->sign  = MP_ZPOS;
2509
2510
  /* zero the digits */
2511
0
  for (x = 0; x < size; x++) {
2512
0
      a->dp[x] = 0;
2513
0
  }
2514
2515
0
  return MP_OKAY;
2516
0
}
2517
2518
2519
/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
2520
static int s_mp_sqr (mp_int * a, mp_int * b)
2521
0
{
2522
0
  mp_int  t;
2523
0
  int     res, ix, iy, pa;
2524
0
  mp_word r;
2525
0
  mp_digit u, tmpx, *tmpt;
2526
2527
0
  pa = a->used;
2528
0
  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
2529
0
    return res;
2530
0
  }
2531
2532
  /* default used is maximum possible size */
2533
0
  t.used = 2*pa + 1;
2534
2535
0
  for (ix = 0; ix < pa; ix++) {
2536
    /* first calculate the digit at 2*ix */
2537
    /* calculate double precision result */
2538
0
    r = ((mp_word) t.dp[2*ix]) +
2539
0
        ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
2540
2541
    /* store lower part in result */
2542
0
    t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
2543
2544
    /* get the carry */
2545
0
    u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2546
2547
    /* left hand side of A[ix] * A[iy] */
2548
0
    tmpx        = a->dp[ix];
2549
2550
    /* alias for where to store the results */
2551
0
    tmpt        = t.dp + (2*ix + 1);
2552
2553
0
    for (iy = ix + 1; iy < pa; iy++) {
2554
      /* first calculate the product */
2555
0
      r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
2556
2557
      /* now calculate the double precision result, note we use
2558
       * addition instead of *2 since it's easier to optimize
2559
       */
2560
0
      r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
2561
2562
      /* store lower part */
2563
0
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2564
2565
      /* get carry */
2566
0
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2567
0
    }
2568
    /* propagate upwards */
2569
0
    while (u != ((mp_digit) 0)) {
2570
0
      r       = ((mp_word) *tmpt) + ((mp_word) u);
2571
0
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2572
0
      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2573
0
    }
2574
0
  }
2575
2576
0
  mp_clamp (&t);
2577
0
  mp_exch (&t, b);
2578
0
  mp_clear (&t);
2579
0
  return MP_OKAY;
2580
0
}
2581
2582
2583
/* multiplies |a| * |b| and does not compute the lower digs digits
2584
 * [meant to get the higher part of the product]
2585
 */
2586
static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2587
0
{
2588
0
  mp_int  t;
2589
0
  int     res, pa, pb, ix, iy;
2590
0
  mp_digit u;
2591
0
  mp_word r;
2592
0
  mp_digit tmpx, *tmpt, *tmpy;
2593
2594
  /* can we use the fast multiplier? */
2595
#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
2596
  if (((a->used + b->used + 1) < MP_WARRAY)
2597
      && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2598
    return fast_s_mp_mul_high_digs (a, b, c, digs);
2599
  }
2600
#endif
2601
2602
0
  if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
2603
0
    return res;
2604
0
  }
2605
0
  t.used = a->used + b->used + 1;
2606
2607
0
  pa = a->used;
2608
0
  pb = b->used;
2609
0
  for (ix = 0; ix < pa; ix++) {
2610
    /* clear the carry */
2611
0
    u = 0;
2612
2613
    /* left hand side of A[ix] * B[iy] */
2614
0
    tmpx = a->dp[ix];
2615
2616
    /* alias to the address of where the digits will be stored */
2617
0
    tmpt = &(t.dp[digs]);
2618
2619
    /* alias for where to read the right hand side from */
2620
0
    tmpy = b->dp + (digs - ix);
2621
2622
0
    for (iy = digs - ix; iy < pb; iy++) {
2623
      /* calculate the double precision result */
2624
0
      r       = ((mp_word)*tmpt) +
2625
0
                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2626
0
                ((mp_word) u);
2627
2628
      /* get the lower part */
2629
0
      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2630
2631
      /* carry the carry */
2632
0
      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2633
0
    }
2634
0
    *tmpt = u;
2635
0
  }
2636
0
  mp_clamp (&t);
2637
0
  mp_exch (&t, c);
2638
0
  mp_clear (&t);
2639
0
  return MP_OKAY;
2640
0
}
2641
2642
2643
#ifdef BN_MP_MONTGOMERY_SETUP_C
2644
/* setups the montgomery reduction stuff */
2645
static int
2646
mp_montgomery_setup (mp_int * n, mp_digit * rho)
2647
{
2648
  mp_digit x, b;
2649
2650
/* fast inversion mod 2**k
2651
 *
2652
 * Based on the fact that
2653
 *
2654
 * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
2655
 *                    =>  2*X*A - X*X*A*A = 1
2656
 *                    =>  2*(1) - (1)     = 1
2657
 */
2658
  b = n->dp[0];
2659
2660
  if ((b & 1) == 0) {
2661
    return MP_VAL;
2662
  }
2663
2664
  x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2665
  x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
2666
#if !defined(MP_8BIT)
2667
  x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
2668
#endif
2669
#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
2670
  x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
2671
#endif
2672
#ifdef MP_64BIT
2673
  x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
2674
#endif
2675
2676
  /* rho = -1/m mod b */
2677
  *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
2678
2679
  return MP_OKAY;
2680
}
2681
#endif
2682
2683
2684
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
2685
/* computes xR**-1 == x (mod N) via Montgomery Reduction
2686
 *
2687
 * This is an optimized implementation of montgomery_reduce
2688
 * which uses the comba method to quickly calculate the columns of the
2689
 * reduction.
2690
 *
2691
 * Based on Algorithm 14.32 on pp.601 of HAC.
2692
*/
2693
static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2694
{
2695
  int     ix, res, olduse;
2696
  mp_word W[MP_WARRAY];
2697
2698
  /* get old used count */
2699
  olduse = x->used;
2700
2701
  /* grow a as required */
2702
  if (x->alloc < n->used + 1) {
2703
    if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2704
      return res;
2705
    }
2706
  }
2707
2708
  /* first we have to get the digits of the input into
2709
   * an array of double precision words W[...]
2710
   */
2711
  {
2712
    register mp_word *_W;
2713
    register mp_digit *tmpx;
2714
2715
    /* alias for the W[] array */
2716
    _W   = W;
2717
2718
    /* alias for the digits of  x*/
2719
    tmpx = x->dp;
2720
2721
    /* copy the digits of a into W[0..a->used-1] */
2722
    for (ix = 0; ix < x->used; ix++) {
2723
      *_W++ = *tmpx++;
2724
    }
2725
2726
    /* zero the high words of W[a->used..m->used*2] */
2727
    for (; ix < n->used * 2 + 1; ix++) {
2728
      *_W++ = 0;
2729
    }
2730
  }
2731
2732
  /* now we proceed to zero successive digits
2733
   * from the least significant upwards
2734
   */
2735
  for (ix = 0; ix < n->used; ix++) {
2736
    /* mu = ai * m' mod b
2737
     *
2738
     * We avoid a double precision multiplication (which isn't required)
2739
     * by casting the value down to a mp_digit.  Note this requires
2740
     * that W[ix-1] have  the carry cleared (see after the inner loop)
2741
     */
2742
    register mp_digit mu;
2743
    mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2744
2745
    /* a = a + mu * m * b**i
2746
     *
2747
     * This is computed in place and on the fly.  The multiplication
2748
     * by b**i is handled by offseting which columns the results
2749
     * are added to.
2750
     *
2751
     * Note the comba method normally doesn't handle carries in the
2752
     * inner loop In this case we fix the carry from the previous
2753
     * column since the Montgomery reduction requires digits of the
2754
     * result (so far) [see above] to work.  This is
2755
     * handled by fixing up one carry after the inner loop.  The
2756
     * carry fixups are done in order so after these loops the
2757
     * first m->used words of W[] have the carries fixed
2758
     */
2759
    {
2760
      register int iy;
2761
      register mp_digit *tmpn;
2762
      register mp_word *_W;
2763
2764
      /* alias for the digits of the modulus */
2765
      tmpn = n->dp;
2766
2767
      /* Alias for the columns set by an offset of ix */
2768
      _W = W + ix;
2769
2770
      /* inner loop */
2771
      for (iy = 0; iy < n->used; iy++) {
2772
          *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
2773
      }
2774
    }
2775
2776
    /* now fix carry for next digit, W[ix+1] */
2777
    W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2778
  }
2779
2780
  /* now we have to propagate the carries and
2781
   * shift the words downward [all those least
2782
   * significant digits we zeroed].
2783
   */
2784
  {
2785
    register mp_digit *tmpx;
2786
    register mp_word *_W, *_W1;
2787
2788
    /* nox fix rest of carries */
2789
2790
    /* alias for current word */
2791
    _W1 = W + ix;
2792
2793
    /* alias for next word, where the carry goes */
2794
    _W = W + ++ix;
2795
2796
    for (; ix <= n->used * 2 + 1; ix++) {
2797
      *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
2798
    }
2799
2800
    /* copy out, A = A/b**n
2801
     *
2802
     * The result is A/b**n but instead of converting from an
2803
     * array of mp_word to mp_digit than calling mp_rshd
2804
     * we just copy them in the right order
2805
     */
2806
2807
    /* alias for destination word */
2808
    tmpx = x->dp;
2809
2810
    /* alias for shifted double precision result */
2811
    _W = W + n->used;
2812
2813
    for (ix = 0; ix < n->used + 1; ix++) {
2814
      *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
2815
    }
2816
2817
    /* zero oldused digits, if the input a was larger than
2818
     * m->used+1 we'll have to clear the digits
2819
     */
2820
    for (; ix < olduse; ix++) {
2821
      *tmpx++ = 0;
2822
    }
2823
  }
2824
2825
  /* set the max used and clamp */
2826
  x->used = n->used + 1;
2827
  mp_clamp (x);
2828
2829
  /* if A >= m then A = A - m */
2830
  if (mp_cmp_mag (x, n) != MP_LT) {
2831
    return s_mp_sub (x, n, x);
2832
  }
2833
  return MP_OKAY;
2834
}
2835
#endif
2836
2837
2838
#ifdef BN_MP_MUL_2_C
2839
/* b = a*2 */
2840
static int mp_mul_2(mp_int * a, mp_int * b)
2841
{
2842
  int     x, res, oldused;
2843
2844
  /* grow to accommodate result */
2845
  if (b->alloc < a->used + 1) {
2846
    if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2847
      return res;
2848
    }
2849
  }
2850
2851
  oldused = b->used;
2852
  b->used = a->used;
2853
2854
  {
2855
    register mp_digit r, rr, *tmpa, *tmpb;
2856
2857
    /* alias for source */
2858
    tmpa = a->dp;
2859
2860
    /* alias for dest */
2861
    tmpb = b->dp;
2862
2863
    /* carry */
2864
    r = 0;
2865
    for (x = 0; x < a->used; x++) {
2866
2867
      /* get what will be the *next* carry bit from the
2868
       * MSB of the current digit
2869
       */
2870
      rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2871
2872
      /* now shift up this digit, add in the carry [from the previous] */
2873
      *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2874
2875
      /* copy the carry that would be from the source
2876
       * digit into the next iteration
2877
       */
2878
      r = rr;
2879
    }
2880
2881
    /* new leading digit? */
2882
    if (r != 0) {
2883
      /* add a MSB which is always 1 at this point */
2884
      *tmpb = 1;
2885
      ++(b->used);
2886
    }
2887
2888
    /* now zero any excess digits on the destination
2889
     * that we didn't write to
2890
     */
2891
    tmpb = b->dp + b->used;
2892
    for (x = b->used; x < oldused; x++) {
2893
      *tmpb++ = 0;
2894
    }
2895
  }
2896
  b->sign = a->sign;
2897
  return MP_OKAY;
2898
}
2899
#endif
2900
2901
2902
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
2903
/*
2904
 * shifts with subtractions when the result is greater than b.
2905
 *
2906
 * The method is slightly modified to shift B unconditionally up to just under
2907
 * the leading bit of b.  This saves a lot of multiple precision shifting.
2908
 */
2909
static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
2910
{
2911
  int     x, bits, res;
2912
2913
  /* how many bits of last digit does b use */
2914
  bits = mp_count_bits (b) % DIGIT_BIT;
2915
2916
  if (b->used > 1) {
2917
     if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2918
        return res;
2919
     }
2920
  } else {
2921
     mp_set(a, 1);
2922
     bits = 1;
2923
  }
2924
2925
2926
  /* now compute C = A * B mod b */
2927
  for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
2928
    if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2929
      return res;
2930
    }
2931
    if (mp_cmp_mag (a, b) != MP_LT) {
2932
      if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2933
        return res;
2934
      }
2935
    }
2936
  }
2937
2938
  return MP_OKAY;
2939
}
2940
#endif
2941
2942
2943
#ifdef BN_MP_EXPTMOD_FAST_C
2944
/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
2945
 *
2946
 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
2947
 * The value of k changes based on the size of the exponent.
2948
 *
2949
 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
2950
 */
2951
2952
static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
2953
{
2954
  mp_int  M[TAB_SIZE], res;
2955
  mp_digit buf, mp;
2956
  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
2957
2958
  /* use a pointer to the reduction algorithm.  This allows us to use
2959
   * one of many reduction algorithms without modding the guts of
2960
   * the code with if statements everywhere.
2961
   */
2962
  int     (*redux)(mp_int*,mp_int*,mp_digit);
2963
2964
  /* find window size */
2965
  x = mp_count_bits (X);
2966
  if (x <= 7) {
2967
    winsize = 2;
2968
  } else if (x <= 36) {
2969
    winsize = 3;
2970
  } else if (x <= 140) {
2971
    winsize = 4;
2972
  } else if (x <= 450) {
2973
    winsize = 5;
2974
  } else if (x <= 1303) {
2975
    winsize = 6;
2976
  } else if (x <= 3529) {
2977
    winsize = 7;
2978
  } else {
2979
    winsize = 8;
2980
  }
2981
2982
#ifdef MP_LOW_MEM
2983
  if (winsize > 5) {
2984
     winsize = 5;
2985
  }
2986
#endif
2987
2988
  /* init M array */
2989
  /* init first cell */
2990
  if ((err = mp_init(&M[1])) != MP_OKAY) {
2991
     return err;
2992
  }
2993
2994
  /* now init the second half of the array */
2995
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2996
    if ((err = mp_init(&M[x])) != MP_OKAY) {
2997
      for (y = 1<<(winsize-1); y < x; y++) {
2998
        mp_clear (&M[y]);
2999
      }
3000
      mp_clear(&M[1]);
3001
      return err;
3002
    }
3003
  }
3004
3005
  /* determine and setup reduction code */
3006
  if (redmode == 0) {
3007
#ifdef BN_MP_MONTGOMERY_SETUP_C
3008
     /* now setup montgomery  */
3009
     if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
3010
        goto LBL_M;
3011
     }
3012
#else
3013
     err = MP_VAL;
3014
     goto LBL_M;
3015
#endif
3016
3017
     /* automatically pick the comba one if available (saves quite a few calls/ifs) */
3018
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
3019
     if (((P->used * 2 + 1) < MP_WARRAY) &&
3020
          P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3021
        redux = fast_mp_montgomery_reduce;
3022
     } else
3023
#endif
3024
     {
3025
#ifdef BN_MP_MONTGOMERY_REDUCE_C
3026
        /* use slower baseline Montgomery method */
3027
        redux = mp_montgomery_reduce;
3028
#else
3029
        err = MP_VAL;
3030
        goto LBL_M;
3031
#endif
3032
     }
3033
  } else if (redmode == 1) {
3034
#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
3035
     /* setup DR reduction for moduli of the form B**k - b */
3036
     mp_dr_setup(P, &mp);
3037
     redux = mp_dr_reduce;
3038
#else
3039
     err = MP_VAL;
3040
     goto LBL_M;
3041
#endif
3042
  } else {
3043
#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
3044
     /* setup DR reduction for moduli of the form 2**k - b */
3045
     if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
3046
        goto LBL_M;
3047
     }
3048
     redux = mp_reduce_2k;
3049
#else
3050
     err = MP_VAL;
3051
     goto LBL_M;
3052
#endif
3053
  }
3054
3055
  /* setup result */
3056
  if ((err = mp_init (&res)) != MP_OKAY) {
3057
    goto LBL_M;
3058
  }
3059
3060
  /* create M table
3061
   *
3062
3063
   *
3064
   * The first half of the table is not computed though accept for M[0] and M[1]
3065
   */
3066
3067
  if (redmode == 0) {
3068
#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
3069
     /* now we need R mod m */
3070
     if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
3071
       goto LBL_RES;
3072
     }
3073
#else
3074
     err = MP_VAL;
3075
     goto LBL_RES;
3076
#endif
3077
3078
     /* now set M[1] to G * R mod m */
3079
     if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
3080
       goto LBL_RES;
3081
     }
3082
  } else {
3083
     mp_set(&res, 1);
3084
     if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
3085
        goto LBL_RES;
3086
     }
3087
  }
3088
3089
  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
3090
  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3091
    goto LBL_RES;
3092
  }
3093
3094
  for (x = 0; x < (winsize - 1); x++) {
3095
    if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
3096
      goto LBL_RES;
3097
    }
3098
    if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
3099
      goto LBL_RES;
3100
    }
3101
  }
3102
3103
  /* create upper table */
3104
  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3105
    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3106
      goto LBL_RES;
3107
    }
3108
    if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
3109
      goto LBL_RES;
3110
    }
3111
  }
3112
3113
  /* set initial mode and bit cnt */
3114
  mode   = 0;
3115
  bitcnt = 1;
3116
  buf    = 0;
3117
  digidx = X->used - 1;
3118
  bitcpy = 0;
3119
  bitbuf = 0;
3120
3121
  for (;;) {
3122
    /* grab next digit as required */
3123
    if (--bitcnt == 0) {
3124
      /* if digidx == -1 we are out of digits so break */
3125
      if (digidx == -1) {
3126
        break;
3127
      }
3128
      /* read next digit and reset bitcnt */
3129
      buf    = X->dp[digidx--];
3130
      bitcnt = (int)DIGIT_BIT;
3131
    }
3132
3133
    /* grab the next msb from the exponent */
3134
    y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
3135
    buf <<= (mp_digit)1;
3136
3137
    /* if the bit is zero and mode == 0 then we ignore it
3138
     * These represent the leading zero bits before the first 1 bit
3139
     * in the exponent.  Technically this opt is not required but it
3140
     * does lower the # of trivial squaring/reductions used
3141
     */
3142
    if (mode == 0 && y == 0) {
3143
      continue;
3144
    }
3145
3146
    /* if the bit is zero and mode == 1 then we square */
3147
    if (mode == 1 && y == 0) {
3148
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3149
        goto LBL_RES;
3150
      }
3151
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
3152
        goto LBL_RES;
3153
      }
3154
      continue;
3155
    }
3156
3157
    /* else we add it to the window */
3158
    bitbuf |= (y << (winsize - ++bitcpy));
3159
    mode    = 2;
3160
3161
    if (bitcpy == winsize) {
3162
      /* ok window is filled so square as required and multiply  */
3163
      /* square first */
3164
      for (x = 0; x < winsize; x++) {
3165
        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3166
          goto LBL_RES;
3167
        }
3168
        if ((err = redux (&res, P, mp)) != MP_OKAY) {
3169
          goto LBL_RES;
3170
        }
3171
      }
3172
3173
      /* then multiply */
3174
      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3175
        goto LBL_RES;
3176
      }
3177
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
3178
        goto LBL_RES;
3179
      }
3180
3181
      /* empty window and reset */
3182
      bitcpy = 0;
3183
      bitbuf = 0;
3184
      mode   = 1;
3185
    }
3186
  }
3187
3188
  /* if bits remain then square/multiply */
3189
  if (mode == 2 && bitcpy > 0) {
3190
    /* square then multiply if the bit is set */
3191
    for (x = 0; x < bitcpy; x++) {
3192
      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3193
        goto LBL_RES;
3194
      }
3195
      if ((err = redux (&res, P, mp)) != MP_OKAY) {
3196
        goto LBL_RES;
3197
      }
3198
3199
      /* get next bit of the window */
3200
      bitbuf <<= 1;
3201
      if ((bitbuf & (1 << winsize)) != 0) {
3202
        /* then multiply */
3203
        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3204
          goto LBL_RES;
3205
        }
3206
        if ((err = redux (&res, P, mp)) != MP_OKAY) {
3207
          goto LBL_RES;
3208
        }
3209
      }
3210
    }
3211
  }
3212
3213
  if (redmode == 0) {
3214
     /* fixup result if Montgomery reduction is used
3215
      * recall that any value in a Montgomery system is
3216
      * actually multiplied by R mod n.  So we have
3217
      * to reduce one more time to cancel out the factor
3218
      * of R.
3219
      */
3220
     if ((err = redux(&res, P, mp)) != MP_OKAY) {
3221
       goto LBL_RES;
3222
     }
3223
  }
3224
3225
  /* swap res with Y */
3226
  mp_exch (&res, Y);
3227
  err = MP_OKAY;
3228
LBL_RES:mp_clear (&res);
3229
LBL_M:
3230
  mp_clear(&M[1]);
3231
  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3232
    mp_clear (&M[x]);
3233
  }
3234
  return err;
3235
}
3236
#endif
3237
3238
3239
#ifdef BN_FAST_S_MP_SQR_C
3240
/* the jist of squaring...
3241
 * you do like mult except the offset of the tmpx [one that
3242
 * starts closer to zero] can't equal the offset of tmpy.
3243
 * So basically you set up iy like before then you min it with
3244
 * (ty-tx) so that it never happens.  You double all those
3245
 * you add in the inner loop
3246
3247
After that loop you do the squares and add them in.
3248
*/
3249
3250
static int fast_s_mp_sqr (mp_int * a, mp_int * b)
3251
{
3252
  int       olduse, res, pa, ix, iz;
3253
  mp_digit   W[MP_WARRAY], *tmpx;
3254
  mp_word   W1;
3255
3256
  /* grow the destination as required */
3257
  pa = a->used + a->used;
3258
  if (b->alloc < pa) {
3259
    if ((res = mp_grow (b, pa)) != MP_OKAY) {
3260
      return res;
3261
    }
3262
  }
3263
3264
  /* number of output digits to produce */
3265
  W1 = 0;
3266
  for (ix = 0; ix < pa; ix++) {
3267
      int      tx, ty, iy;
3268
      mp_word  _W;
3269
      mp_digit *tmpy;
3270
3271
      /* clear counter */
3272
      _W = 0;
3273
3274
      /* get offsets into the two bignums */
3275
      ty = MIN(a->used-1, ix);
3276
      tx = ix - ty;
3277
3278
      /* setup temp aliases */
3279
      tmpx = a->dp + tx;
3280
      tmpy = a->dp + ty;
3281
3282
      /* this is the number of times the loop will iterrate, essentially
3283
         while (tx++ < a->used && ty-- >= 0) { ... }
3284
       */
3285
      iy = MIN(a->used-tx, ty+1);
3286
3287
      /* now for squaring tx can never equal ty
3288
       * we halve the distance since they approach at a rate of 2x
3289
       * and we have to round because odd cases need to be executed
3290
       */
3291
      iy = MIN(iy, (ty-tx+1)>>1);
3292
3293
      /* execute loop */
3294
      for (iz = 0; iz < iy; iz++) {
3295
         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3296
      }
3297
3298
      /* double the inner product and add carry */
3299
      _W = _W + _W + W1;
3300
3301
      /* even columns have the square term in them */
3302
      if ((ix&1) == 0) {
3303
         _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3304
      }
3305
3306
      /* store it */
3307
      W[ix] = (mp_digit)(_W & MP_MASK);
3308
3309
      /* make next carry */
3310
      W1 = _W >> ((mp_word)DIGIT_BIT);
3311
  }
3312
3313
  /* setup dest */
3314
  olduse  = b->used;
3315
  b->used = a->used+a->used;
3316
3317
  {
3318
    mp_digit *tmpb;
3319
    tmpb = b->dp;
3320
    for (ix = 0; ix < pa; ix++) {
3321
      *tmpb++ = W[ix] & MP_MASK;
3322
    }
3323
3324
    /* clear unused digits [that existed in the old copy of c] */
3325
    for (; ix < olduse; ix++) {
3326
      *tmpb++ = 0;
3327
    }
3328
  }
3329
  mp_clamp (b);
3330
  return MP_OKAY;
3331
}
3332
#endif
3333
3334
3335
#ifdef BN_MP_MUL_D_C
3336
/* multiply by a digit */
3337
static int
3338
mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
3339
{
3340
  mp_digit u, *tmpa, *tmpc;
3341
  mp_word  r;
3342
  int      ix, res, olduse;
3343
3344
  /* make sure c is big enough to hold a*b */
3345
  if (c->alloc < a->used + 1) {
3346
    if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
3347
      return res;
3348
    }
3349
  }
3350
3351
  /* get the original destinations used count */
3352
  olduse = c->used;
3353
3354
  /* set the sign */
3355
  c->sign = a->sign;
3356
3357
  /* alias for a->dp [source] */
3358
  tmpa = a->dp;
3359
3360
  /* alias for c->dp [dest] */
3361
  tmpc = c->dp;
3362
3363
  /* zero carry */
3364
  u = 0;
3365
3366
  /* compute columns */
3367
  for (ix = 0; ix < a->used; ix++) {
3368
    /* compute product and carry sum for this term */
3369
    r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
3370
3371
    /* mask off higher bits to get a single digit */
3372
    *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
3373
3374
    /* send carry into next iteration */
3375
    u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3376
  }
3377
3378
  /* store final carry [if any] and increment ix offset  */
3379
  *tmpc++ = u;
3380
  ++ix;
3381
3382
  /* now zero digits above the top */
3383
  while (ix++ < olduse) {
3384
     *tmpc++ = 0;
3385
  }
3386
3387
  /* set used count */
3388
  c->used = a->used + 1;
3389
  mp_clamp(c);
3390
3391
  return MP_OKAY;
3392
}
3393
#endif