/src/dropbear/libtommath/bn_s_mp_exptmod_fast.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "tommath_private.h" |
2 | | #ifdef BN_S_MP_EXPTMOD_FAST_C |
3 | | /* LibTomMath, multiple-precision integer library -- Tom St Denis */ |
4 | | /* SPDX-License-Identifier: Unlicense */ |
5 | | |
6 | | /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 |
7 | | * |
8 | | * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. |
9 | | * The value of k changes based on the size of the exponent. |
10 | | * |
11 | | * Uses Montgomery or Diminished Radix reduction [whichever appropriate] |
12 | | */ |
13 | | |
14 | | #ifdef MP_LOW_MEM |
15 | | # define TAB_SIZE 32 |
16 | | # define MAX_WINSIZE 5 |
17 | | #else |
18 | | # define TAB_SIZE 256 |
19 | 43.4k | # define MAX_WINSIZE 0 |
20 | | #endif |
21 | | |
22 | | mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) |
23 | 43.4k | { |
24 | 43.4k | mp_int M[TAB_SIZE], res; |
25 | 43.4k | mp_digit buf, mp; |
26 | 43.4k | int bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; |
27 | 43.4k | mp_err err; |
28 | | |
29 | | /* use a pointer to the reduction algorithm. This allows us to use |
30 | | * one of many reduction algorithms without modding the guts of |
31 | | * the code with if statements everywhere. |
32 | | */ |
33 | 43.4k | mp_err(*redux)(mp_int *x, const mp_int *n, mp_digit rho); |
34 | | |
35 | | /* find window size */ |
36 | 43.4k | x = mp_count_bits(X); |
37 | 43.4k | if (x <= 7) { |
38 | 494 | winsize = 2; |
39 | 42.9k | } else if (x <= 36) { |
40 | 4.33k | winsize = 3; |
41 | 38.5k | } else if (x <= 140) { |
42 | 1.20k | winsize = 4; |
43 | 37.3k | } else if (x <= 450) { |
44 | 9.72k | winsize = 5; |
45 | 27.6k | } else if (x <= 1303) { |
46 | 0 | winsize = 6; |
47 | 27.6k | } else if (x <= 3529) { |
48 | 27.6k | winsize = 7; |
49 | 27.6k | } else { |
50 | 0 | winsize = 8; |
51 | 0 | } |
52 | | |
53 | 43.4k | winsize = MAX_WINSIZE ? MP_MIN(MAX_WINSIZE, winsize) : winsize; |
54 | | |
55 | | /* init M array */ |
56 | | /* init first cell */ |
57 | 43.4k | if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) { |
58 | 0 | return err; |
59 | 0 | } |
60 | | |
61 | | /* now init the second half of the array */ |
62 | 1.99M | for (x = 1<<(winsize-1); x < (1 << winsize); x++) { |
63 | 1.95M | if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) { |
64 | 0 | for (y = 1<<(winsize-1); y < x; y++) { |
65 | 0 | mp_clear(&M[y]); |
66 | 0 | } |
67 | 0 | mp_clear(&M[1]); |
68 | 0 | return err; |
69 | 0 | } |
70 | 1.95M | } |
71 | | |
72 | | /* determine and setup reduction code */ |
73 | 43.4k | if (redmode == 0) { |
74 | 43.4k | if (MP_HAS(MP_MONTGOMERY_SETUP)) { |
75 | | /* now setup montgomery */ |
76 | 43.4k | if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) goto LBL_M; |
77 | 43.4k | } else { |
78 | 0 | err = MP_VAL; |
79 | 0 | goto LBL_M; |
80 | 0 | } |
81 | | |
82 | | /* automatically pick the comba one if available (saves quite a few calls/ifs) */ |
83 | 43.4k | if (MP_HAS(S_MP_MONTGOMERY_REDUCE_FAST) && |
84 | 43.4k | (((P->used * 2) + 1) < MP_WARRAY) && |
85 | 43.4k | (P->used < MP_MAXFAST)) { |
86 | 43.4k | redux = s_mp_montgomery_reduce_fast; |
87 | 43.4k | } else if (MP_HAS(MP_MONTGOMERY_REDUCE)) { |
88 | | /* use slower baseline Montgomery method */ |
89 | 0 | redux = mp_montgomery_reduce; |
90 | 0 | } else { |
91 | 0 | err = MP_VAL; |
92 | 0 | goto LBL_M; |
93 | 0 | } |
94 | 43.4k | } else if (redmode == 1) { |
95 | 0 | if (MP_HAS(MP_DR_SETUP) && MP_HAS(MP_DR_REDUCE)) { |
96 | | /* setup DR reduction for moduli of the form B**k - b */ |
97 | 0 | mp_dr_setup(P, &mp); |
98 | 0 | redux = mp_dr_reduce; |
99 | 0 | } else { |
100 | 0 | err = MP_VAL; |
101 | 0 | goto LBL_M; |
102 | 0 | } |
103 | 0 | } else if (MP_HAS(MP_REDUCE_2K_SETUP) && MP_HAS(MP_REDUCE_2K)) { |
104 | | /* setup DR reduction for moduli of the form 2**k - b */ |
105 | 0 | if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) goto LBL_M; |
106 | 0 | redux = mp_reduce_2k; |
107 | 0 | } else { |
108 | 0 | err = MP_VAL; |
109 | 0 | goto LBL_M; |
110 | 0 | } |
111 | | |
112 | | /* setup result */ |
113 | 43.4k | if ((err = mp_init_size(&res, P->alloc)) != MP_OKAY) goto LBL_M; |
114 | | |
115 | | /* create M table |
116 | | * |
117 | | |
118 | | * |
119 | | * The first half of the table is not computed though accept for M[0] and M[1] |
120 | | */ |
121 | | |
122 | 43.4k | if (redmode == 0) { |
123 | 43.4k | if (MP_HAS(MP_MONTGOMERY_CALC_NORMALIZATION)) { |
124 | | /* now we need R mod m */ |
125 | 43.4k | if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) goto LBL_RES; |
126 | | |
127 | | /* now set M[1] to G * R mod m */ |
128 | 43.4k | if ((err = mp_mulmod(G, &res, P, &M[1])) != MP_OKAY) goto LBL_RES; |
129 | 43.4k | } else { |
130 | 0 | err = MP_VAL; |
131 | 0 | goto LBL_RES; |
132 | 0 | } |
133 | 43.4k | } else { |
134 | 0 | mp_set(&res, 1uL); |
135 | 0 | if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) goto LBL_RES; |
136 | 0 | } |
137 | | |
138 | | /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ |
139 | 43.4k | if ((err = mp_copy(&M[1], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES; |
140 | | |
141 | 260k | for (x = 0; x < (winsize - 1); x++) { |
142 | 217k | if ((err = mp_sqr(&M[(size_t)1 << (winsize - 1)], &M[(size_t)1 << (winsize - 1)])) != MP_OKAY) goto LBL_RES; |
143 | 217k | if ((err = redux(&M[(size_t)1 << (winsize - 1)], P, mp)) != MP_OKAY) goto LBL_RES; |
144 | 217k | } |
145 | | |
146 | | /* create upper table */ |
147 | 1.95M | for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { |
148 | 1.90M | if ((err = mp_mul(&M[x - 1], &M[1], &M[x])) != MP_OKAY) goto LBL_RES; |
149 | 1.90M | if ((err = redux(&M[x], P, mp)) != MP_OKAY) goto LBL_RES; |
150 | 1.90M | } |
151 | | |
152 | | /* set initial mode and bit cnt */ |
153 | 43.4k | mode = 0; |
154 | 43.4k | bitcnt = 1; |
155 | 43.4k | buf = 0; |
156 | 43.4k | digidx = X->used - 1; |
157 | 43.4k | bitcpy = 0; |
158 | 43.4k | bitbuf = 0; |
159 | | |
160 | 60.2M | for (;;) { |
161 | | /* grab next digit as required */ |
162 | 60.2M | if (--bitcnt == 0) { |
163 | | /* if digidx == -1 we are out of digits so break */ |
164 | 1.04M | if (digidx == -1) { |
165 | 43.4k | break; |
166 | 43.4k | } |
167 | | /* read next digit and reset bitcnt */ |
168 | 1.00M | buf = X->dp[digidx--]; |
169 | 1.00M | bitcnt = (int)MP_DIGIT_BIT; |
170 | 1.00M | } |
171 | | |
172 | | /* grab the next msb from the exponent */ |
173 | 60.2M | y = (mp_digit)(buf >> (MP_DIGIT_BIT - 1)) & 1uL; |
174 | 60.2M | buf <<= (mp_digit)1; |
175 | | |
176 | | /* if the bit is zero and mode == 0 then we ignore it |
177 | | * These represent the leading zero bits before the first 1 bit |
178 | | * in the exponent. Technically this opt is not required but it |
179 | | * does lower the # of trivial squaring/reductions used |
180 | | */ |
181 | 60.2M | if ((mode == 0) && (y == 0)) { |
182 | 1.94M | continue; |
183 | 1.94M | } |
184 | | |
185 | | /* if the bit is zero and mode == 1 then we square */ |
186 | 58.2M | if ((mode == 1) && (y == 0)) { |
187 | 7.47M | if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES; |
188 | 7.47M | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
189 | 7.47M | continue; |
190 | 7.47M | } |
191 | | |
192 | | /* else we add it to the window */ |
193 | 50.7M | bitbuf |= (y << (winsize - ++bitcpy)); |
194 | 50.7M | mode = 2; |
195 | | |
196 | 50.7M | if (bitcpy == winsize) { |
197 | | /* ok window is filled so square as required and multiply */ |
198 | | /* square first */ |
199 | 58.0M | for (x = 0; x < winsize; x++) { |
200 | 50.6M | if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES; |
201 | 50.6M | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
202 | 50.6M | } |
203 | | |
204 | | /* then multiply */ |
205 | 7.32M | if ((err = mp_mul(&res, &M[bitbuf], &res)) != MP_OKAY) goto LBL_RES; |
206 | 7.32M | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
207 | | |
208 | | /* empty window and reset */ |
209 | 7.32M | bitcpy = 0; |
210 | 7.32M | bitbuf = 0; |
211 | 7.32M | mode = 1; |
212 | 7.32M | } |
213 | 50.7M | } |
214 | | |
215 | | /* if bits remain then square/multiply */ |
216 | 43.4k | if ((mode == 2) && (bitcpy > 0)) { |
217 | | /* square then multiply if the bit is set */ |
218 | 129k | for (x = 0; x < bitcpy; x++) { |
219 | 97.2k | if ((err = mp_sqr(&res, &res)) != MP_OKAY) goto LBL_RES; |
220 | 97.2k | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
221 | | |
222 | | /* get next bit of the window */ |
223 | 97.2k | bitbuf <<= 1; |
224 | 97.2k | if ((bitbuf & (1 << winsize)) != 0) { |
225 | | /* then multiply */ |
226 | 65.1k | if ((err = mp_mul(&res, &M[1], &res)) != MP_OKAY) goto LBL_RES; |
227 | 65.1k | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
228 | 65.1k | } |
229 | 97.2k | } |
230 | 32.1k | } |
231 | | |
232 | 43.4k | if (redmode == 0) { |
233 | | /* fixup result if Montgomery reduction is used |
234 | | * recall that any value in a Montgomery system is |
235 | | * actually multiplied by R mod n. So we have |
236 | | * to reduce one more time to cancel out the factor |
237 | | * of R. |
238 | | */ |
239 | 43.4k | if ((err = redux(&res, P, mp)) != MP_OKAY) goto LBL_RES; |
240 | 43.4k | } |
241 | | |
242 | | /* swap res with Y */ |
243 | 43.4k | mp_exch(&res, Y); |
244 | 43.4k | err = MP_OKAY; |
245 | 43.4k | LBL_RES: |
246 | 43.4k | mp_clear(&res); |
247 | 43.4k | LBL_M: |
248 | 43.4k | mp_clear(&M[1]); |
249 | 1.99M | for (x = 1<<(winsize-1); x < (1 << winsize); x++) { |
250 | 1.95M | mp_clear(&M[x]); |
251 | 1.95M | } |
252 | 43.4k | return err; |
253 | 43.4k | } |
254 | | #endif |