/src/dropbear/libtommath/bn_s_mp_exptmod_fast.c

Source (jump to first uncovered line)
#include "tommath_private.h"
#ifdef BN_S_MP_EXPTMOD_FAST_C
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */

/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
 *
 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
 * The value of k changes based on the size of the exponent.
 *
 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
 */

#ifdef MP_LOW_MEM
#   define TAB_SIZE 32
#   define MAX_WINSIZE 5
#else
#   define TAB_SIZE 256
#   define MAX_WINSIZE 0
#endif

mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode)
{
   mp_int  M[TAB_SIZE], res;
   mp_digit buf, mp;
   int     bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
   mp_err   err;

   /* use a pointer to the reduction algorithm.  This allows us to use
    * one of many reduction algorithms without modding the guts of
    * the code with if statements everywhere.
    */
   mp_err(*redux)(mp_int *x, const mp_int *n, mp_digit rho);

   /* find window size */
   x = mp_count_bits(X);
   if (x <= 7) {
      winsize = 2;
   } else if (x <= 36) {
      winsize = 3;
   } else if (x <= 140) {
      winsize = 4;
   } else if (x <= 450) {
      winsize = 5;
   } else if (x <= 1303) {
      winsize = 6;
   } else if (x <= 3529) {
      winsize = 7;
   } else {
      winsize = 8;
   }

   winsize = MAX_WINSIZE ? MP_MIN(MAX_WINSIZE, winsize) : winsize;

   /* init M array */
   /* init first cell */
   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
      return err;
   }

   /* now init the second half of the array */
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
      if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
         for (y = 1<<(winsize-1); y < x; y++) {
            mp_clear(&M[y]);
         }
         mp_clear(&M[1]);
         return err;
      }
   }

   /* determine and setup reduction code */
   if (redmode == 0) {
      if (MP_HAS(MP_MONTGOMERY_SETUP)) {
         /* now setup montgomery  */
         if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY)      goto LBL_M;
      } else {
         err = MP_VAL;
         goto LBL_M;
      }

      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
      if (MP_HAS(S_MP_MONTGOMERY_REDUCE_FAST) &&
          (((P->used * 2) + 1) < MP_WARRAY) &&
          (P->used < MP_MAXFAST)) {
         redux = s_mp_montgomery_reduce_fast;
      } else if (MP_HAS(MP_MONTGOMERY_REDUCE)) {
         /* use slower baseline Montgomery method */
         redux = mp_montgomery_reduce;
      } else {
         err = MP_VAL;
         goto LBL_M;
      }
   } else if (redmode == 1) {
      if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
         /* setup DR reduction for moduli of the form B**k - b */
         mp_dr_setup(P, &mp);
         redux = mp_dr_reduce;
      } else {
         err = MP_VAL;
         goto LBL_M;
      }
   } else if (MP_HAS(MP_REDUCE_2K_SETUP) && MP_HAS(MP_REDUCE_2K)) {
      /* setup DR reduction for moduli of the form 2**k - b */
      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY)          goto LBL_M;
      redux = mp_reduce_2k;
   } else {
      err = MP_VAL;
      goto LBL_M;
   }

   /* setup result */
   if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY)           goto LBL_M;

   /* create M table
    *

    *
    * The first half of the table is not computed though accept for M[0] and M[1]
    */

   if (redmode == 0) {
      if (MP_HAS(MP_MONTGOMERY_CALC_NORMALIZATION)) {
         /* now we need R mod m */
         if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) goto LBL_RES;

         /* now set M[1] to G * R mod m */
         if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY)     goto LBL_RES;
      } else {
         err = MP_VAL;
         goto LBL_RES;
      }
   } else {
      mp_set(&res, 1uL);
      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY)                 goto LBL_RES;
   }

   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
   if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;

   for (x = 0; x < (winsize - 1); x++) {
      if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
      if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) goto LBL_RES;
   }

   /* create upper table */
   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
      if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY)     goto LBL_RES;
      if ((err = redux(&M[x], P, mp)) != MP_OKAY)                 goto LBL_RES;
   }

   /* set initial mode and bit cnt */
   mode   = 0;
   bitcnt = 1;
   buf    = 0;
   digidx = X->used - 1;
   bitcpy = 0;
   bitbuf = 0;

   for (;;) {
      /* grab next digit as required */
      if (--bitcnt == 0) {
         /* if digidx == -1 we are out of digits so break */
         if (digidx == -1) {
            break;
         }
         /* read next digit and reset bitcnt */
         buf    = X->dp[digidx--];
         bitcnt = (int)MP_DIGIT_BIT;
      }

      /* grab the next msb from the exponent */
      y     = (mp_digit)(buf >> (MP_DIGIT_BIT - 1)) & 1uL;
      buf <<= (mp_digit)1;

      /* if the bit is zero and mode == 0 then we ignore it
       * These represent the leading zero bits before the first 1 bit
       * in the exponent.  Technically this opt is not required but it
       * does lower the # of trivial squaring/reductions used
       */
      if ((mode == 0) && (y == 0)) {
         continue;
      }

      /* if the bit is zero and mode == 1 then we square */
      if ((mode == 1) && (y == 0)) {
         if ((err = mp_sqr(&res, &res)) != MP_OKAY)               goto LBL_RES;
         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;
         continue;
      }

      /* else we add it to the window */
      bitbuf |= (y << (winsize - ++bitcpy));
      mode    = 2;

      if (bitcpy == winsize) {
         /* ok window is filled so square as required and multiply  */
         /* square first */
         for (x = 0; x < winsize; x++) {
            if ((err = mp_sqr(&res, &res)) != MP_OKAY)            goto LBL_RES;
            if ((err = redux(&res, P, mp)) != MP_OKAY)            goto LBL_RES;
         }

         /* then multiply */
         if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY)   goto LBL_RES;
         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;

         /* empty window and reset */
         bitcpy = 0;
         bitbuf = 0;
         mode   = 1;
      }
   }

   /* if bits remain then square/multiply */
   if ((mode == 2) && (bitcpy > 0)) {
      /* square then multiply if the bit is set */
      for (x = 0; x < bitcpy; x++) {
         if ((err = mp_sqr(&res, &res)) != MP_OKAY)               goto LBL_RES;
         if ((err = redux(&res, P, mp)) != MP_OKAY)               goto LBL_RES;

         /* get next bit of the window */
         bitbuf <<= 1;
         if ((bitbuf & (1 << winsize)) != 0) {
            /* then multiply */
            if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY)     goto LBL_RES;
            if ((err = redux(&res, P, mp)) != MP_OKAY)            goto LBL_RES;
         }
      }
   }

   if (redmode == 0) {
      /* fixup result if Montgomery reduction is used
       * recall that any value in a Montgomery system is
       * actually multiplied by R mod n.  So we have
       * to reduce one more time to cancel out the factor
       * of R.
       */
      if ((err = redux(&res, P, mp)) != MP_OKAY)                  goto LBL_RES;
   }

   /* swap res with Y */
   mp_exch(&res, Y);
   err = MP_OKAY;
LBL_RES:
   mp_clear(&res);
LBL_M:
   mp_clear(&M[1]);
   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
      mp_clear(&M[x]);
   }
   return err;
}
#endif

Coverage Report

Created: 2023-06-07 06:49

Line	Count	Source (jump to first uncovered line)
1		#include "tommath_private.h"
2		#ifdef BN_S_MP_EXPTMOD_FAST_C
3		/* LibTomMath, multiple-precision integer library -- Tom St Denis */
4		/* SPDX-License-Identifier: Unlicense */
5
6		/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
7		*
8		* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
9		* The value of k changes based on the size of the exponent.
10		*
11		* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
12		*/
13
14		#ifdef MP_LOW_MEM
15		# define TAB_SIZE 32
16		# define MAX_WINSIZE 5
17		#else
18		# define TAB_SIZE 256
19	43.4k	# define MAX_WINSIZE 0
20		#endif
21
22		mp_err s_mp_exptmod_fast(const mp_int G, const mp_int X, const mp_int P, mp_int Y, int redmode)
23	43.4k	{
24	43.4k	mp_int M[TAB_SIZE], res;
25	43.4k	mp_digit buf, mp;
26	43.4k	int bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
27	43.4k	mp_err err;
28
29		/* use a pointer to the reduction algorithm. This allows us to use
30		* one of many reduction algorithms without modding the guts of
31		* the code with if statements everywhere.
32		*/
33	43.4k	mp_err(redux)(mp_int x, const mp_int *n, mp_digit rho);
34
35		/* find window size */
36	43.4k	x = mp_count_bits(X);
37	43.4k	if (x <= 7) {
38	494	winsize = 2;
39	42.9k	} else if (x <= 36) {
40	4.33k	winsize = 3;
41	38.5k	} else if (x <= 140) {
42	1.20k	winsize = 4;
43	37.3k	} else if (x <= 450) {
44	9.72k	winsize = 5;
45	27.6k	} else if (x <= 1303) {
46	0	winsize = 6;
47	27.6k	} else if (x <= 3529) {
48	27.6k	winsize = 7;
49	27.6k	} else {
50	0	winsize = 8;
51	0	}
52
53	43.4k	winsize = MAX_WINSIZE ? MP_MIN(MAX_WINSIZE, winsize) : winsize;
54
55		/* init M array */
56		/* init first cell */
57	43.4k	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
58	0	return err;
59	0	}
60
61		/* now init the second half of the array */
62	1.99M	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
63	1.95M	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
64	0	for (y = 1<<(winsize-1); y < x; y++) {
65	0	mp_clear(&M[y]);
66	0	}
67	0	mp_clear(&M[1]);
68	0	return err;
69	0	}
70	1.95M	}
71
72		/* determine and setup reduction code */
73	43.4k	if (redmode == 0) {
74	43.4k	if (MP_HAS(MP_MONTGOMERY_SETUP)) {
75		/* now setup montgomery */
76	43.4k	if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) goto LBL_M;
77	43.4k	} else {
78	0	err = MP_VAL;
79	0	goto LBL_M;
80	0	}
81
82		/* automatically pick the comba one if available (saves quite a few calls/ifs) */
83	43.4k	if (MP_HAS(S_MP_MONTGOMERY_REDUCE_FAST) &&
84	43.4k	(((P->used * 2) + 1) < MP_WARRAY) &&
85	43.4k	(P->used < MP_MAXFAST)) {
86	43.4k	redux = s_mp_montgomery_reduce_fast;
87	43.4k	} else if (MP_HAS(MP_MONTGOMERY_REDUCE)) {
88		/* use slower baseline Montgomery method */
89	0	redux = mp_montgomery_reduce;
90	0	} else {
91	0	err = MP_VAL;
92	0	goto LBL_M;
93	0	}
94	43.4k	} else if (redmode == 1) {
95	0	if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) {
96		/* setup DR reduction for moduli of the form B*k - b /
97	0	mp_dr_setup(P, &mp);
98	0	redux = mp_dr_reduce;
99	0	} else {
100	0	err = MP_VAL;
101	0	goto LBL_M;
102	0	}
103	0	} else if (MP_HAS(MP_REDUCE_2K_SETUP) && MP_HAS(MP_REDUCE_2K)) {
104		/* setup DR reduction for moduli of the form 2*k - b /
105	0	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) goto LBL_M;
106	0	redux = mp_reduce_2k;
107	0	} else {
108	0	err = MP_VAL;
109	0	goto LBL_M;
110	0	}
111
112		/* setup result */
113	43.4k	if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) goto LBL_M;
114
115		/* create M table
116		*
117
118		*
119		* The first half of the table is not computed though accept for M[0] and M[1]
120		*/
121
122	43.4k	if (redmode == 0) {
123	43.4k	if (MP_HAS(MP_MONTGOMERY_CALC_NORMALIZATION)) {
124		/* now we need R mod m */
125	43.4k	if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) goto LBL_RES;
126
127		/* now set M[1] to G * R mod m */
128	43.4k	if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) goto LBL_RES;
129	43.4k	} else {
130	0	err = MP_VAL;
131	0	goto LBL_RES;
132	0	}
133	43.4k	} else {
134	0	mp_set(&res, 1uL);
135	0	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) goto LBL_RES;
136	0	}
137
138		/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
139	43.4k	if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
140
141	260k	for (x = 0; x < (winsize - 1); x++) {
142	217k	if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES;
143	217k	if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) goto LBL_RES;
144	217k	}
145
146		/* create upper table */
147	1.95M	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
148	1.90M	if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) goto LBL_RES;
149	1.90M	if ((err = redux(&M[x], P, mp)) != MP_OKAY) goto LBL_RES;
150	1.90M	}
151
152		/* set initial mode and bit cnt */
153	43.4k	mode = 0;
154	43.4k	bitcnt = 1;
155	43.4k	buf = 0;
156	43.4k	digidx = X->used - 1;
157	43.4k	bitcpy = 0;
158	43.4k	bitbuf = 0;
159
160	60.2M	for (;;) {
161		/* grab next digit as required */
162	60.2M	if (--bitcnt == 0) {
163		/* if digidx == -1 we are out of digits so break */
164	1.04M	if (digidx == -1) {
165	43.4k	break;
166	43.4k	}
167		/* read next digit and reset bitcnt */
168	1.00M	buf = X->dp[digidx--];
169	1.00M	bitcnt = (int)MP_DIGIT_BIT;
170	1.00M	}
171
172		/* grab the next msb from the exponent */
173	60.2M	y = (mp_digit)(buf >> (MP_DIGIT_BIT - 1)) & 1uL;
174	60.2M	buf <<= (mp_digit)1;
175
176		/* if the bit is zero and mode == 0 then we ignore it
177		* These represent the leading zero bits before the first 1 bit
178		* in the exponent. Technically this opt is not required but it
179		* does lower the # of trivial squaring/reductions used
180		*/
181	60.2M	if ((mode == 0) && (y == 0)) {
182	1.94M	continue;
183	1.94M	}
184
185		/* if the bit is zero and mode == 1 then we square */
186	58.2M	if ((mode == 1) && (y == 0)) {
187	7.47M	if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
188	7.47M	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
189	7.47M	continue;
190	7.47M	}
191
192		/* else we add it to the window */
193	50.7M	bitbuf \|= (y << (winsize - ++bitcpy));
194	50.7M	mode = 2;
195
196	50.7M	if (bitcpy == winsize) {
197		/* ok window is filled so square as required and multiply */
198		/* square first */
199	58.0M	for (x = 0; x < winsize; x++) {
200	50.6M	if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
201	50.6M	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
202	50.6M	}
203
204		/* then multiply */
205	7.32M	if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) goto LBL_RES;
206	7.32M	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
207
208		/* empty window and reset */
209	7.32M	bitcpy = 0;
210	7.32M	bitbuf = 0;
211	7.32M	mode = 1;
212	7.32M	}
213	50.7M	}
214
215		/* if bits remain then square/multiply */
216	43.4k	if ((mode == 2) && (bitcpy > 0)) {
217		/* square then multiply if the bit is set */
218	129k	for (x = 0; x < bitcpy; x++) {
219	97.2k	if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES;
220	97.2k	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
221
222		/* get next bit of the window */
223	97.2k	bitbuf <<= 1;
224	97.2k	if ((bitbuf & (1 << winsize)) != 0) {
225		/* then multiply */
226	65.1k	if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) goto LBL_RES;
227	65.1k	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
228	65.1k	}
229	97.2k	}
230	32.1k	}
231
232	43.4k	if (redmode == 0) {
233		/* fixup result if Montgomery reduction is used
234		* recall that any value in a Montgomery system is
235		* actually multiplied by R mod n. So we have
236		* to reduce one more time to cancel out the factor
237		* of R.
238		*/
239	43.4k	if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES;
240	43.4k	}
241
242		/* swap res with Y */
243	43.4k	mp_exch(&res, Y);
244	43.4k	err = MP_OKAY;
245	43.4k	LBL_RES:
246	43.4k	mp_clear(&res);
247	43.4k	LBL_M:
248	43.4k	mp_clear(&M[1]);
249	1.99M	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
250	1.95M	mp_clear(&M[x]);
251	1.95M	}
252	43.4k	return err;
253	43.4k	}
254		#endif