/src/libgcrypt/mpi/mpih-mul.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpih-mul.c - MPI helper functions |
2 | | * Copyright (C) 1994, 1996, 1998, 1999, 2000, |
3 | | * 2001, 2002 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of Libgcrypt. |
6 | | * |
7 | | * Libgcrypt is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation; either version 2.1 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * Libgcrypt is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
20 | | * |
21 | | * Note: This code is heavily based on the GNU MP Library. |
22 | | * Actually it's the same code with only minor changes in the |
23 | | * way the data is stored; this is to support the abstraction |
24 | | * of an optional secure memory allocation which may be used |
25 | | * to avoid revealing of sensitive data due to paging etc. |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | #include "mpi-internal.h" |
33 | | #include "longlong.h" |
34 | | #include "g10lib.h" |
35 | | |
36 | | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ |
37 | 0 | do { \ |
38 | 0 | if( (size) < KARATSUBA_THRESHOLD ) \ |
39 | 0 | mul_n_basecase (prodp, up, vp, size); \ |
40 | 0 | else \ |
41 | 0 | mul_n (prodp, up, vp, size, tspace); \ |
42 | 0 | } while (0) |
43 | | |
44 | | #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ |
45 | 0 | do { \ |
46 | 0 | if ((size) < KARATSUBA_THRESHOLD) \ |
47 | 0 | _gcry_mpih_sqr_n_basecase (prodp, up, size); \ |
48 | 0 | else \ |
49 | 0 | _gcry_mpih_sqr_n (prodp, up, size, tspace); \ |
50 | 0 | } while (0) |
51 | | |
52 | | |
53 | | |
54 | | |
55 | | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), |
56 | | * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are |
57 | | * always stored. Return the most significant limb. |
58 | | * |
59 | | * Argument constraints: |
60 | | * 1. PRODP != UP and PRODP != VP, i.e. the destination |
61 | | * must be distinct from the multiplier and the multiplicand. |
62 | | * |
63 | | * |
64 | | * Handle simple cases with traditional multiplication. |
65 | | * |
66 | | * This is the most critical code of multiplication. All multiplies rely |
67 | | * on this, both small and huge. Small ones arrive here immediately. Huge |
68 | | * ones arrive here as this is the base case for Karatsuba's recursive |
69 | | * algorithm below. |
70 | | */ |
71 | | |
72 | | static mpi_limb_t |
73 | | mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, |
74 | | mpi_ptr_t vp, mpi_size_t size) |
75 | 0 | { |
76 | 0 | mpi_size_t i; |
77 | 0 | mpi_limb_t cy; |
78 | 0 | mpi_limb_t v_limb; |
79 | | |
80 | | /* Multiply by the first limb in V separately, as the result can be |
81 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
82 | 0 | v_limb = vp[0]; |
83 | 0 | if( v_limb <= 1 ) { |
84 | 0 | if( v_limb == 1 ) |
85 | 0 | MPN_COPY( prodp, up, size ); |
86 | 0 | else |
87 | 0 | MPN_ZERO( prodp, size ); |
88 | 0 | cy = 0; |
89 | 0 | } |
90 | 0 | else |
91 | 0 | cy = _gcry_mpih_mul_1( prodp, up, size, v_limb ); |
92 | |
|
93 | 0 | prodp[size] = cy; |
94 | 0 | prodp++; |
95 | | |
96 | | /* For each iteration in the outer loop, multiply one limb from |
97 | | * U with one limb from V, and add it to PROD. */ |
98 | 0 | for( i = 1; i < size; i++ ) { |
99 | 0 | v_limb = vp[i]; |
100 | 0 | if( v_limb <= 1 ) { |
101 | 0 | cy = 0; |
102 | 0 | if( v_limb == 1 ) |
103 | 0 | cy = _gcry_mpih_add_n(prodp, prodp, up, size); |
104 | 0 | } |
105 | 0 | else |
106 | 0 | cy = _gcry_mpih_addmul_1(prodp, up, size, v_limb); |
107 | |
|
108 | 0 | prodp[size] = cy; |
109 | 0 | prodp++; |
110 | 0 | } |
111 | |
|
112 | 0 | return cy; |
113 | 0 | } |
114 | | |
115 | | |
116 | | static void |
117 | | mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, |
118 | | mpi_size_t size, mpi_ptr_t tspace ) |
119 | 0 | { |
120 | 0 | if( size & 1 ) { |
121 | | /* The size is odd, and the code below doesn't handle that. |
122 | | * Multiply the least significant (size - 1) limbs with a recursive |
123 | | * call, and handle the most significant limb of S1 and S2 |
124 | | * separately. |
125 | | * A slightly faster way to do this would be to make the Karatsuba |
126 | | * code below behave as if the size were even, and let it check for |
127 | | * odd size in the end. I.e., in essence move this code to the end. |
128 | | * Doing so would save us a recursive call, and potentially make the |
129 | | * stack grow a lot less. |
130 | | */ |
131 | 0 | mpi_size_t esize = size - 1; /* even size */ |
132 | 0 | mpi_limb_t cy_limb; |
133 | |
|
134 | 0 | MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace ); |
135 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, vp[esize] ); |
136 | 0 | prodp[esize + esize] = cy_limb; |
137 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, vp, size, up[esize] ); |
138 | 0 | prodp[esize + size] = cy_limb; |
139 | 0 | } |
140 | 0 | else { |
141 | | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. |
142 | | * |
143 | | * Split U in two pieces, U1 and U0, such that |
144 | | * U = U0 + U1*(B**n), |
145 | | * and V in V1 and V0, such that |
146 | | * V = V0 + V1*(B**n). |
147 | | * |
148 | | * UV is then computed recursively using the identity |
149 | | * |
150 | | * 2n n n n |
151 | | * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V |
152 | | * 1 1 1 0 0 1 0 0 |
153 | | * |
154 | | * Where B = 2**BITS_PER_MP_LIMB. |
155 | | */ |
156 | 0 | mpi_size_t hsize = size >> 1; |
157 | 0 | mpi_limb_t cy; |
158 | 0 | int negflg; |
159 | | |
160 | | /* Product H. ________________ ________________ |
161 | | * |_____U1 x V1____||____U0 x V0_____| |
162 | | * Put result in upper part of PROD and pass low part of TSPACE |
163 | | * as new TSPACE. |
164 | | */ |
165 | 0 | MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace); |
166 | | |
167 | | /* Product M. ________________ |
168 | | * |_(U1-U0)(V0-V1)_| |
169 | | */ |
170 | 0 | if( _gcry_mpih_cmp(up + hsize, up, hsize) >= 0 ) { |
171 | 0 | _gcry_mpih_sub_n(prodp, up + hsize, up, hsize); |
172 | 0 | negflg = 0; |
173 | 0 | } |
174 | 0 | else { |
175 | 0 | _gcry_mpih_sub_n(prodp, up, up + hsize, hsize); |
176 | 0 | negflg = 1; |
177 | 0 | } |
178 | 0 | if( _gcry_mpih_cmp(vp + hsize, vp, hsize) >= 0 ) { |
179 | 0 | _gcry_mpih_sub_n(prodp + hsize, vp + hsize, vp, hsize); |
180 | 0 | negflg ^= 1; |
181 | 0 | } |
182 | 0 | else { |
183 | 0 | _gcry_mpih_sub_n(prodp + hsize, vp, vp + hsize, hsize); |
184 | | /* No change of NEGFLG. */ |
185 | 0 | } |
186 | | /* Read temporary operands from low part of PROD. |
187 | | * Put result in low part of TSPACE using upper part of TSPACE |
188 | | * as new TSPACE. |
189 | | */ |
190 | 0 | MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size); |
191 | | |
192 | | /* Add/copy product H. */ |
193 | 0 | MPN_COPY (prodp + hsize, prodp + size, hsize); |
194 | 0 | cy = _gcry_mpih_add_n( prodp + size, prodp + size, |
195 | 0 | prodp + size + hsize, hsize); |
196 | | |
197 | | /* Add product M (if NEGFLG M is a negative number) */ |
198 | 0 | if(negflg) |
199 | 0 | cy -= _gcry_mpih_sub_n(prodp + hsize, prodp + hsize, tspace, size); |
200 | 0 | else |
201 | 0 | cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); |
202 | | |
203 | | /* Product L. ________________ ________________ |
204 | | * |________________||____U0 x V0_____| |
205 | | * Read temporary operands from low part of PROD. |
206 | | * Put result in low part of TSPACE using upper part of TSPACE |
207 | | * as new TSPACE. |
208 | | */ |
209 | 0 | MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); |
210 | | |
211 | | /* Add/copy Product L (twice) */ |
212 | |
|
213 | 0 | cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); |
214 | 0 | if( cy ) |
215 | 0 | _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy); |
216 | |
|
217 | 0 | MPN_COPY(prodp, tspace, hsize); |
218 | 0 | cy = _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize); |
219 | 0 | if( cy ) |
220 | 0 | _gcry_mpih_add_1(prodp + size, prodp + size, size, 1); |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | |
225 | | void |
226 | | _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) |
227 | 0 | { |
228 | 0 | mpi_size_t i; |
229 | 0 | mpi_limb_t cy_limb; |
230 | 0 | mpi_limb_t v_limb; |
231 | | |
232 | | /* Multiply by the first limb in V separately, as the result can be |
233 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
234 | 0 | v_limb = up[0]; |
235 | 0 | if( v_limb <= 1 ) { |
236 | 0 | if( v_limb == 1 ) |
237 | 0 | MPN_COPY( prodp, up, size ); |
238 | 0 | else |
239 | 0 | MPN_ZERO(prodp, size); |
240 | 0 | cy_limb = 0; |
241 | 0 | } |
242 | 0 | else |
243 | 0 | cy_limb = _gcry_mpih_mul_1( prodp, up, size, v_limb ); |
244 | |
|
245 | 0 | prodp[size] = cy_limb; |
246 | 0 | prodp++; |
247 | | |
248 | | /* For each iteration in the outer loop, multiply one limb from |
249 | | * U with one limb from V, and add it to PROD. */ |
250 | 0 | for( i=1; i < size; i++) { |
251 | 0 | v_limb = up[i]; |
252 | 0 | if( v_limb <= 1 ) { |
253 | 0 | cy_limb = 0; |
254 | 0 | if( v_limb == 1 ) |
255 | 0 | cy_limb = _gcry_mpih_add_n(prodp, prodp, up, size); |
256 | 0 | } |
257 | 0 | else |
258 | 0 | cy_limb = _gcry_mpih_addmul_1(prodp, up, size, v_limb); |
259 | |
|
260 | 0 | prodp[size] = cy_limb; |
261 | 0 | prodp++; |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | | |
266 | | void |
267 | | _gcry_mpih_sqr_n( mpi_ptr_t prodp, |
268 | | mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) |
269 | 0 | { |
270 | 0 | if( size & 1 ) { |
271 | | /* The size is odd, and the code below doesn't handle that. |
272 | | * Multiply the least significant (size - 1) limbs with a recursive |
273 | | * call, and handle the most significant limb of S1 and S2 |
274 | | * separately. |
275 | | * A slightly faster way to do this would be to make the Karatsuba |
276 | | * code below behave as if the size were even, and let it check for |
277 | | * odd size in the end. I.e., in essence move this code to the end. |
278 | | * Doing so would save us a recursive call, and potentially make the |
279 | | * stack grow a lot less. |
280 | | */ |
281 | 0 | mpi_size_t esize = size - 1; /* even size */ |
282 | 0 | mpi_limb_t cy_limb; |
283 | |
|
284 | 0 | MPN_SQR_N_RECURSE( prodp, up, esize, tspace ); |
285 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, up[esize] ); |
286 | 0 | prodp[esize + esize] = cy_limb; |
287 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, size, up[esize] ); |
288 | |
|
289 | 0 | prodp[esize + size] = cy_limb; |
290 | 0 | } |
291 | 0 | else { |
292 | 0 | mpi_size_t hsize = size >> 1; |
293 | 0 | mpi_limb_t cy; |
294 | | |
295 | | /* Product H. ________________ ________________ |
296 | | * |_____U1 x U1____||____U0 x U0_____| |
297 | | * Put result in upper part of PROD and pass low part of TSPACE |
298 | | * as new TSPACE. |
299 | | */ |
300 | 0 | MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); |
301 | | |
302 | | /* Product M. ________________ |
303 | | * |_(U1-U0)(U0-U1)_| |
304 | | */ |
305 | 0 | if( _gcry_mpih_cmp( up + hsize, up, hsize) >= 0 ) |
306 | 0 | _gcry_mpih_sub_n( prodp, up + hsize, up, hsize); |
307 | 0 | else |
308 | 0 | _gcry_mpih_sub_n (prodp, up, up + hsize, hsize); |
309 | | |
310 | | /* Read temporary operands from low part of PROD. |
311 | | * Put result in low part of TSPACE using upper part of TSPACE |
312 | | * as new TSPACE. */ |
313 | 0 | MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); |
314 | | |
315 | | /* Add/copy product H */ |
316 | 0 | MPN_COPY(prodp + hsize, prodp + size, hsize); |
317 | 0 | cy = _gcry_mpih_add_n(prodp + size, prodp + size, |
318 | 0 | prodp + size + hsize, hsize); |
319 | | |
320 | | /* Add product M (if NEGFLG M is a negative number). */ |
321 | 0 | cy -= _gcry_mpih_sub_n (prodp + hsize, prodp + hsize, tspace, size); |
322 | | |
323 | | /* Product L. ________________ ________________ |
324 | | * |________________||____U0 x U0_____| |
325 | | * Read temporary operands from low part of PROD. |
326 | | * Put result in low part of TSPACE using upper part of TSPACE |
327 | | * as new TSPACE. */ |
328 | 0 | MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); |
329 | | |
330 | | /* Add/copy Product L (twice). */ |
331 | 0 | cy += _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace, size); |
332 | 0 | if( cy ) |
333 | 0 | _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, |
334 | 0 | hsize, cy); |
335 | |
|
336 | 0 | MPN_COPY(prodp, tspace, hsize); |
337 | 0 | cy = _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); |
338 | 0 | if( cy ) |
339 | 0 | _gcry_mpih_add_1 (prodp + size, prodp + size, size, 1); |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | | |
344 | | /* This should be made into an inline function in gmp.h. */ |
345 | | void |
346 | | _gcry_mpih_mul_n( mpi_ptr_t prodp, |
347 | | mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) |
348 | 0 | { |
349 | 0 | int secure; |
350 | |
|
351 | 0 | if( up == vp ) { |
352 | 0 | if( size < KARATSUBA_THRESHOLD ) |
353 | 0 | _gcry_mpih_sqr_n_basecase( prodp, up, size ); |
354 | 0 | else { |
355 | 0 | mpi_ptr_t tspace; |
356 | 0 | secure = _gcry_is_secure( up ); |
357 | 0 | tspace = mpi_alloc_limb_space( 2 * size, secure ); |
358 | 0 | _gcry_mpih_sqr_n( prodp, up, size, tspace ); |
359 | 0 | _gcry_mpi_free_limb_space (tspace, 2 * size ); |
360 | 0 | } |
361 | 0 | } |
362 | 0 | else { |
363 | 0 | if( size < KARATSUBA_THRESHOLD ) |
364 | 0 | mul_n_basecase( prodp, up, vp, size ); |
365 | 0 | else { |
366 | 0 | mpi_ptr_t tspace; |
367 | 0 | secure = _gcry_is_secure( up ) || _gcry_is_secure( vp ); |
368 | 0 | tspace = mpi_alloc_limb_space( 2 * size, secure ); |
369 | 0 | mul_n (prodp, up, vp, size, tspace); |
370 | 0 | _gcry_mpi_free_limb_space (tspace, 2 * size ); |
371 | 0 | } |
372 | 0 | } |
373 | 0 | } |
374 | | |
375 | | |
376 | | |
377 | | void |
378 | | _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp, |
379 | | mpi_ptr_t up, mpi_size_t usize, |
380 | | mpi_ptr_t vp, mpi_size_t vsize, |
381 | | struct karatsuba_ctx *ctx ) |
382 | 0 | { |
383 | 0 | mpi_limb_t cy; |
384 | |
|
385 | 0 | if( !ctx->tspace || ctx->tspace_size < vsize ) { |
386 | 0 | if( ctx->tspace ) |
387 | 0 | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
388 | 0 | ctx->tspace_nlimbs = 2 * vsize; |
389 | 0 | ctx->tspace = mpi_alloc_limb_space (2 * vsize, |
390 | 0 | (_gcry_is_secure (up) |
391 | 0 | || _gcry_is_secure (vp))); |
392 | 0 | ctx->tspace_size = vsize; |
393 | 0 | } |
394 | |
|
395 | 0 | MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace ); |
396 | |
|
397 | 0 | prodp += vsize; |
398 | 0 | up += vsize; |
399 | 0 | usize -= vsize; |
400 | 0 | if( usize >= vsize ) { |
401 | 0 | if( !ctx->tp || ctx->tp_size < vsize ) { |
402 | 0 | if( ctx->tp ) |
403 | 0 | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
404 | 0 | ctx->tp_nlimbs = 2 * vsize; |
405 | 0 | ctx->tp = mpi_alloc_limb_space (2 * vsize, |
406 | 0 | (_gcry_is_secure (up) |
407 | 0 | || _gcry_is_secure (vp))); |
408 | 0 | ctx->tp_size = vsize; |
409 | 0 | } |
410 | |
|
411 | 0 | do { |
412 | 0 | MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace ); |
413 | 0 | cy = _gcry_mpih_add_n( prodp, prodp, ctx->tp, vsize ); |
414 | 0 | _gcry_mpih_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy ); |
415 | 0 | prodp += vsize; |
416 | 0 | up += vsize; |
417 | 0 | usize -= vsize; |
418 | 0 | } while( usize >= vsize ); |
419 | 0 | } |
420 | |
|
421 | 0 | if( usize ) { |
422 | 0 | if( usize < KARATSUBA_THRESHOLD ) { |
423 | 0 | _gcry_mpih_mul( ctx->tspace, vp, vsize, up, usize ); |
424 | 0 | } |
425 | 0 | else { |
426 | 0 | if( !ctx->next ) { |
427 | 0 | ctx->next = xcalloc( 1, sizeof *ctx ); |
428 | 0 | } |
429 | 0 | _gcry_mpih_mul_karatsuba_case( ctx->tspace, |
430 | 0 | vp, vsize, |
431 | 0 | up, usize, |
432 | 0 | ctx->next ); |
433 | 0 | } |
434 | |
|
435 | 0 | cy = _gcry_mpih_add_n( prodp, prodp, ctx->tspace, vsize); |
436 | 0 | _gcry_mpih_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy ); |
437 | 0 | } |
438 | 0 | } |
439 | | |
440 | | |
441 | | void |
442 | | _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx ) |
443 | 0 | { |
444 | 0 | struct karatsuba_ctx *ctx2; |
445 | |
|
446 | 0 | if( ctx->tp ) |
447 | 0 | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
448 | 0 | if( ctx->tspace ) |
449 | 0 | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
450 | 0 | for( ctx=ctx->next; ctx; ctx = ctx2 ) { |
451 | 0 | ctx2 = ctx->next; |
452 | 0 | if( ctx->tp ) |
453 | 0 | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
454 | 0 | if( ctx->tspace ) |
455 | 0 | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
456 | 0 | xfree( ctx ); |
457 | 0 | } |
458 | 0 | } |
459 | | |
460 | | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) |
461 | | * and v (pointed to by VP, with VSIZE limbs), and store the result at |
462 | | * PRODP. USIZE + VSIZE limbs are always stored, but if the input |
463 | | * operands are normalized. Return the most significant limb of the |
464 | | * result. |
465 | | * |
466 | | * NOTE: The space pointed to by PRODP is overwritten before finished |
467 | | * with U and V, so overlap is an error. |
468 | | * |
469 | | * Argument constraints: |
470 | | * 1. USIZE >= VSIZE. |
471 | | * 2. PRODP != UP and PRODP != VP, i.e. the destination |
472 | | * must be distinct from the multiplier and the multiplicand. |
473 | | */ |
474 | | |
475 | | mpi_limb_t |
476 | | _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, |
477 | | mpi_ptr_t vp, mpi_size_t vsize) |
478 | 0 | { |
479 | 0 | mpi_ptr_t prod_endp = prodp + usize + vsize - 1; |
480 | 0 | mpi_limb_t cy; |
481 | 0 | struct karatsuba_ctx ctx; |
482 | |
|
483 | 0 | if( vsize < KARATSUBA_THRESHOLD ) { |
484 | 0 | mpi_size_t i; |
485 | 0 | mpi_limb_t v_limb; |
486 | |
|
487 | 0 | if( !vsize ) |
488 | 0 | return 0; |
489 | | |
490 | | /* Multiply by the first limb in V separately, as the result can be |
491 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
492 | 0 | v_limb = vp[0]; |
493 | 0 | if( v_limb <= 1 ) { |
494 | 0 | if( v_limb == 1 ) |
495 | 0 | MPN_COPY( prodp, up, usize ); |
496 | 0 | else |
497 | 0 | MPN_ZERO( prodp, usize ); |
498 | 0 | cy = 0; |
499 | 0 | } |
500 | 0 | else |
501 | 0 | cy = _gcry_mpih_mul_1( prodp, up, usize, v_limb ); |
502 | |
|
503 | 0 | prodp[usize] = cy; |
504 | 0 | prodp++; |
505 | | |
506 | | /* For each iteration in the outer loop, multiply one limb from |
507 | | * U with one limb from V, and add it to PROD. */ |
508 | 0 | for( i = 1; i < vsize; i++ ) { |
509 | 0 | v_limb = vp[i]; |
510 | 0 | if( v_limb <= 1 ) { |
511 | 0 | cy = 0; |
512 | 0 | if( v_limb == 1 ) |
513 | 0 | cy = _gcry_mpih_add_n(prodp, prodp, up, usize); |
514 | 0 | } |
515 | 0 | else |
516 | 0 | cy = _gcry_mpih_addmul_1(prodp, up, usize, v_limb); |
517 | |
|
518 | 0 | prodp[usize] = cy; |
519 | 0 | prodp++; |
520 | 0 | } |
521 | |
|
522 | 0 | return cy; |
523 | 0 | } |
524 | | |
525 | 0 | memset( &ctx, 0, sizeof ctx ); |
526 | 0 | _gcry_mpih_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx ); |
527 | 0 | _gcry_mpih_release_karatsuba_ctx( &ctx ); |
528 | 0 | return *prod_endp; |
529 | 0 | } |