/src/libgcrypt/mpi/mpih-mul.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* mpih-mul.c - MPI helper functions |
2 | | * Copyright (C) 1994, 1996, 1998, 1999, 2000, |
3 | | * 2001, 2002 Free Software Foundation, Inc. |
4 | | * |
5 | | * This file is part of Libgcrypt. |
6 | | * |
7 | | * Libgcrypt is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation; either version 2.1 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * Libgcrypt is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
20 | | * |
21 | | * Note: This code is heavily based on the GNU MP Library. |
22 | | * Actually it's the same code with only minor changes in the |
23 | | * way the data is stored; this is to support the abstraction |
24 | | * of an optional secure memory allocation which may be used |
25 | | * to avoid revealing of sensitive data due to paging etc. |
26 | | */ |
27 | | |
28 | | #include <config.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | #include "mpi-internal.h" |
33 | | #include "longlong.h" |
34 | | #include "g10lib.h" |
35 | | |
36 | | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ |
37 | 8.51M | do { \ |
38 | 8.51M | if( (size) < KARATSUBA_THRESHOLD ) \ |
39 | 8.51M | mul_n_basecase (prodp, up, vp, size); \ |
40 | 8.51M | else \ |
41 | 8.51M | mul_n (prodp, up, vp, size, tspace); \ |
42 | 8.51M | } while (0) |
43 | | |
44 | | #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ |
45 | 0 | do { \ |
46 | 0 | if ((size) < KARATSUBA_THRESHOLD) \ |
47 | 0 | _gcry_mpih_sqr_n_basecase (prodp, up, size); \ |
48 | 0 | else \ |
49 | 0 | _gcry_mpih_sqr_n (prodp, up, size, tspace); \ |
50 | 0 | } while (0) |
51 | | |
52 | | |
53 | | |
54 | | |
55 | | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), |
56 | | * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are |
57 | | * always stored. Return the most significant limb. |
58 | | * |
59 | | * Argument constraints: |
60 | | * 1. PRODP != UP and PRODP != VP, i.e. the destination |
61 | | * must be distinct from the multiplier and the multiplicand. |
62 | | * |
63 | | * |
64 | | * Handle simple cases with traditional multiplication. |
65 | | * |
66 | | * This is the most critical code of multiplication. All multiplies rely |
67 | | * on this, both small and huge. Small ones arrive here immediately. Huge |
68 | | * ones arrive here as this is the base case for Karatsuba's recursive |
69 | | * algorithm below. |
70 | | */ |
71 | | |
72 | | static mpi_limb_t |
73 | | mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, |
74 | | mpi_ptr_t vp, mpi_size_t size) |
75 | 32.1M | { |
76 | 32.1M | mpi_size_t i; |
77 | 32.1M | mpi_limb_t cy; |
78 | 32.1M | mpi_limb_t v_limb; |
79 | | |
80 | | /* Multiply by the first limb in V separately, as the result can be |
81 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
82 | 32.1M | v_limb = vp[0]; |
83 | 32.1M | if( v_limb <= 1 ) { |
84 | 1.57M | if( v_limb == 1 ) |
85 | 1.54M | MPN_COPY( prodp, up, size ); |
86 | 36.7k | else |
87 | 36.7k | MPN_ZERO( prodp, size ); |
88 | 1.57M | cy = 0; |
89 | 1.57M | } |
90 | 30.5M | else |
91 | 30.5M | cy = _gcry_mpih_mul_1( prodp, up, size, v_limb ); |
92 | | |
93 | 32.1M | prodp[size] = cy; |
94 | 32.1M | prodp++; |
95 | | |
96 | | /* For each iteration in the outer loop, multiply one limb from |
97 | | * U with one limb from V, and add it to PROD. */ |
98 | 161M | for( i = 1; i < size; i++ ) { |
99 | 129M | v_limb = vp[i]; |
100 | 129M | if( v_limb <= 1 ) { |
101 | 5.00M | cy = 0; |
102 | 5.00M | if( v_limb == 1 ) |
103 | 12.9k | cy = _gcry_mpih_add_n(prodp, prodp, up, size); |
104 | 5.00M | } |
105 | 124M | else |
106 | 124M | cy = _gcry_mpih_addmul_1(prodp, up, size, v_limb); |
107 | | |
108 | 129M | prodp[size] = cy; |
109 | 129M | prodp++; |
110 | 129M | } |
111 | | |
112 | 32.1M | return cy; |
113 | 32.1M | } |
114 | | |
115 | | |
116 | | static void |
117 | | mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, |
118 | | mpi_size_t size, mpi_ptr_t tspace ) |
119 | 2.51M | { |
120 | 2.51M | if( size & 1 ) { |
121 | | /* The size is odd, and the code below doesn't handle that. |
122 | | * Multiply the least significant (size - 1) limbs with a recursive |
123 | | * call, and handle the most significant limb of S1 and S2 |
124 | | * separately. |
125 | | * A slightly faster way to do this would be to make the Karatsuba |
126 | | * code below behave as if the size were even, and let it check for |
127 | | * odd size in the end. I.e., in essence move this code to the end. |
128 | | * Doing so would save us a recursive call, and potentially make the |
129 | | * stack grow a lot less. |
130 | | */ |
131 | 248k | mpi_size_t esize = size - 1; /* even size */ |
132 | 248k | mpi_limb_t cy_limb; |
133 | | |
134 | 248k | MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace ); |
135 | 248k | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, vp[esize] ); |
136 | 248k | prodp[esize + esize] = cy_limb; |
137 | 248k | cy_limb = _gcry_mpih_addmul_1( prodp + esize, vp, size, up[esize] ); |
138 | 248k | prodp[esize + size] = cy_limb; |
139 | 248k | } |
140 | 2.26M | else { |
141 | | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. |
142 | | * |
143 | | * Split U in two pieces, U1 and U0, such that |
144 | | * U = U0 + U1*(B**n), |
145 | | * and V in V1 and V0, such that |
146 | | * V = V0 + V1*(B**n). |
147 | | * |
148 | | * UV is then computed recursively using the identity |
149 | | * |
150 | | * 2n n n n |
151 | | * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V |
152 | | * 1 1 1 0 0 1 0 0 |
153 | | * |
154 | | * Where B = 2**BITS_PER_MP_LIMB. |
155 | | */ |
156 | 2.26M | mpi_size_t hsize = size >> 1; |
157 | 2.26M | mpi_limb_t cy; |
158 | 2.26M | int negflg; |
159 | | |
160 | | /* Product H. ________________ ________________ |
161 | | * |_____U1 x V1____||____U0 x V0_____| |
162 | | * Put result in upper part of PROD and pass low part of TSPACE |
163 | | * as new TSPACE. |
164 | | */ |
165 | 2.26M | MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace); |
166 | | |
167 | | /* Product M. ________________ |
168 | | * |_(U1-U0)(V0-V1)_| |
169 | | */ |
170 | 2.26M | if( _gcry_mpih_cmp(up + hsize, up, hsize) >= 0 ) { |
171 | 614k | _gcry_mpih_sub_n(prodp, up + hsize, up, hsize); |
172 | 614k | negflg = 0; |
173 | 614k | } |
174 | 1.65M | else { |
175 | 1.65M | _gcry_mpih_sub_n(prodp, up, up + hsize, hsize); |
176 | 1.65M | negflg = 1; |
177 | 1.65M | } |
178 | 2.26M | if( _gcry_mpih_cmp(vp + hsize, vp, hsize) >= 0 ) { |
179 | 604k | _gcry_mpih_sub_n(prodp + hsize, vp + hsize, vp, hsize); |
180 | 604k | negflg ^= 1; |
181 | 604k | } |
182 | 1.66M | else { |
183 | 1.66M | _gcry_mpih_sub_n(prodp + hsize, vp, vp + hsize, hsize); |
184 | | /* No change of NEGFLG. */ |
185 | 1.66M | } |
186 | | /* Read temporary operands from low part of PROD. |
187 | | * Put result in low part of TSPACE using upper part of TSPACE |
188 | | * as new TSPACE. |
189 | | */ |
190 | 2.26M | MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size); |
191 | | |
192 | | /* Add/copy product H. */ |
193 | 2.26M | MPN_COPY (prodp + hsize, prodp + size, hsize); |
194 | 2.26M | cy = _gcry_mpih_add_n( prodp + size, prodp + size, |
195 | 2.26M | prodp + size + hsize, hsize); |
196 | | |
197 | | /* Add product M (if NEGFLG M is a negative number) */ |
198 | 2.26M | if(negflg) |
199 | 1.98M | cy -= _gcry_mpih_sub_n(prodp + hsize, prodp + hsize, tspace, size); |
200 | 277k | else |
201 | 277k | cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); |
202 | | |
203 | | /* Product L. ________________ ________________ |
204 | | * |________________||____U0 x V0_____| |
205 | | * Read temporary operands from low part of PROD. |
206 | | * Put result in low part of TSPACE using upper part of TSPACE |
207 | | * as new TSPACE. |
208 | | */ |
209 | 2.26M | MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); |
210 | | |
211 | | /* Add/copy Product L (twice) */ |
212 | | |
213 | 2.26M | cy += _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace, size); |
214 | 2.26M | if( cy ) |
215 | 628k | _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy); |
216 | | |
217 | 2.26M | MPN_COPY(prodp, tspace, hsize); |
218 | 2.26M | cy = _gcry_mpih_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize); |
219 | 2.26M | if( cy ) |
220 | 718k | _gcry_mpih_add_1(prodp + size, prodp + size, size, 1); |
221 | 2.26M | } |
222 | 2.51M | } |
223 | | |
224 | | |
225 | | void |
226 | | _gcry_mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) |
227 | 15.4M | { |
228 | 15.4M | mpi_size_t i; |
229 | 15.4M | mpi_limb_t cy_limb; |
230 | 15.4M | mpi_limb_t v_limb; |
231 | | |
232 | | /* Multiply by the first limb in V separately, as the result can be |
233 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
234 | 15.4M | v_limb = up[0]; |
235 | 15.4M | if( v_limb <= 1 ) { |
236 | 136k | if( v_limb == 1 ) |
237 | 36.3k | MPN_COPY( prodp, up, size ); |
238 | 99.6k | else |
239 | 99.6k | MPN_ZERO(prodp, size); |
240 | 136k | cy_limb = 0; |
241 | 136k | } |
242 | 15.3M | else |
243 | 15.3M | cy_limb = _gcry_mpih_mul_1( prodp, up, size, v_limb ); |
244 | | |
245 | 15.4M | prodp[size] = cy_limb; |
246 | 15.4M | prodp++; |
247 | | |
248 | | /* For each iteration in the outer loop, multiply one limb from |
249 | | * U with one limb from V, and add it to PROD. */ |
250 | 61.9M | for( i=1; i < size; i++) { |
251 | 46.4M | v_limb = up[i]; |
252 | 46.4M | if( v_limb <= 1 ) { |
253 | 408k | cy_limb = 0; |
254 | 408k | if( v_limb == 1 ) |
255 | 0 | cy_limb = _gcry_mpih_add_n(prodp, prodp, up, size); |
256 | 408k | } |
257 | 46.0M | else |
258 | 46.0M | cy_limb = _gcry_mpih_addmul_1(prodp, up, size, v_limb); |
259 | | |
260 | 46.4M | prodp[size] = cy_limb; |
261 | 46.4M | prodp++; |
262 | 46.4M | } |
263 | 15.4M | } |
264 | | |
265 | | |
266 | | void |
267 | | _gcry_mpih_sqr_n( mpi_ptr_t prodp, |
268 | | mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) |
269 | 0 | { |
270 | 0 | if( size & 1 ) { |
271 | | /* The size is odd, and the code below doesn't handle that. |
272 | | * Multiply the least significant (size - 1) limbs with a recursive |
273 | | * call, and handle the most significant limb of S1 and S2 |
274 | | * separately. |
275 | | * A slightly faster way to do this would be to make the Karatsuba |
276 | | * code below behave as if the size were even, and let it check for |
277 | | * odd size in the end. I.e., in essence move this code to the end. |
278 | | * Doing so would save us a recursive call, and potentially make the |
279 | | * stack grow a lot less. |
280 | | */ |
281 | 0 | mpi_size_t esize = size - 1; /* even size */ |
282 | 0 | mpi_limb_t cy_limb; |
283 | |
|
284 | 0 | MPN_SQR_N_RECURSE( prodp, up, esize, tspace ); |
285 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, esize, up[esize] ); |
286 | 0 | prodp[esize + esize] = cy_limb; |
287 | 0 | cy_limb = _gcry_mpih_addmul_1( prodp + esize, up, size, up[esize] ); |
288 | |
|
289 | 0 | prodp[esize + size] = cy_limb; |
290 | 0 | } |
291 | 0 | else { |
292 | 0 | mpi_size_t hsize = size >> 1; |
293 | 0 | mpi_limb_t cy; |
294 | | |
295 | | /* Product H. ________________ ________________ |
296 | | * |_____U1 x U1____||____U0 x U0_____| |
297 | | * Put result in upper part of PROD and pass low part of TSPACE |
298 | | * as new TSPACE. |
299 | | */ |
300 | 0 | MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); |
301 | | |
302 | | /* Product M. ________________ |
303 | | * |_(U1-U0)(U0-U1)_| |
304 | | */ |
305 | 0 | if( _gcry_mpih_cmp( up + hsize, up, hsize) >= 0 ) |
306 | 0 | _gcry_mpih_sub_n( prodp, up + hsize, up, hsize); |
307 | 0 | else |
308 | 0 | _gcry_mpih_sub_n (prodp, up, up + hsize, hsize); |
309 | | |
310 | | /* Read temporary operands from low part of PROD. |
311 | | * Put result in low part of TSPACE using upper part of TSPACE |
312 | | * as new TSPACE. */ |
313 | 0 | MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); |
314 | | |
315 | | /* Add/copy product H */ |
316 | 0 | MPN_COPY(prodp + hsize, prodp + size, hsize); |
317 | 0 | cy = _gcry_mpih_add_n(prodp + size, prodp + size, |
318 | 0 | prodp + size + hsize, hsize); |
319 | | |
320 | | /* Add product M (if NEGFLG M is a negative number). */ |
321 | 0 | cy -= _gcry_mpih_sub_n (prodp + hsize, prodp + hsize, tspace, size); |
322 | | |
323 | | /* Product L. ________________ ________________ |
324 | | * |________________||____U0 x U0_____| |
325 | | * Read temporary operands from low part of PROD. |
326 | | * Put result in low part of TSPACE using upper part of TSPACE |
327 | | * as new TSPACE. */ |
328 | 0 | MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); |
329 | | |
330 | | /* Add/copy Product L (twice). */ |
331 | 0 | cy += _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace, size); |
332 | 0 | if( cy ) |
333 | 0 | _gcry_mpih_add_1(prodp + hsize + size, prodp + hsize + size, |
334 | 0 | hsize, cy); |
335 | |
|
336 | 0 | MPN_COPY(prodp, tspace, hsize); |
337 | 0 | cy = _gcry_mpih_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); |
338 | 0 | if( cy ) |
339 | 0 | _gcry_mpih_add_1 (prodp + size, prodp + size, size, 1); |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | | |
344 | | /* This should be made into an inline function in gmp.h. */ |
345 | | void |
346 | | _gcry_mpih_mul_n( mpi_ptr_t prodp, |
347 | | mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) |
348 | 41.6M | { |
349 | 41.6M | int secure; |
350 | | |
351 | 41.6M | if( up == vp ) { |
352 | 15.4M | if( size < KARATSUBA_THRESHOLD ) |
353 | 15.4M | _gcry_mpih_sqr_n_basecase( prodp, up, size ); |
354 | 0 | else { |
355 | 0 | mpi_ptr_t tspace; |
356 | 0 | secure = _gcry_is_secure( up ); |
357 | 0 | tspace = mpi_alloc_limb_space( 2 * size, secure ); |
358 | 0 | _gcry_mpih_sqr_n( prodp, up, size, tspace ); |
359 | 0 | _gcry_mpi_free_limb_space (tspace, 2 * size ); |
360 | 0 | } |
361 | 15.4M | } |
362 | 26.1M | else { |
363 | 26.1M | if( size < KARATSUBA_THRESHOLD ) |
364 | 26.1M | mul_n_basecase( prodp, up, vp, size ); |
365 | 0 | else { |
366 | 0 | mpi_ptr_t tspace; |
367 | 0 | secure = _gcry_is_secure( up ) || _gcry_is_secure( vp ); |
368 | 0 | tspace = mpi_alloc_limb_space( 2 * size, secure ); |
369 | 0 | mul_n (prodp, up, vp, size, tspace); |
370 | 0 | _gcry_mpi_free_limb_space (tspace, 2 * size ); |
371 | 0 | } |
372 | 26.1M | } |
373 | 41.6M | } |
374 | | |
375 | | |
376 | | |
377 | | void |
378 | | _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp, |
379 | | mpi_ptr_t up, mpi_size_t usize, |
380 | | mpi_ptr_t vp, mpi_size_t vsize, |
381 | | struct karatsuba_ctx *ctx ) |
382 | 1.46M | { |
383 | 1.46M | mpi_limb_t cy; |
384 | | |
385 | 1.46M | if( !ctx->tspace || ctx->tspace_size < vsize ) { |
386 | 1.18M | if( ctx->tspace ) |
387 | 387 | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
388 | 1.18M | ctx->tspace_nlimbs = 2 * vsize; |
389 | 1.18M | ctx->tspace = mpi_alloc_limb_space (2 * vsize, |
390 | 1.18M | (_gcry_is_secure (up) |
391 | 1.18M | || _gcry_is_secure (vp))); |
392 | 1.18M | ctx->tspace_size = vsize; |
393 | 1.18M | } |
394 | | |
395 | 1.46M | MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace ); |
396 | | |
397 | 1.46M | prodp += vsize; |
398 | 1.46M | up += vsize; |
399 | 1.46M | usize -= vsize; |
400 | 1.46M | if( usize >= vsize ) { |
401 | 8.16k | if( !ctx->tp || ctx->tp_size < vsize ) { |
402 | 7.91k | if( ctx->tp ) |
403 | 0 | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
404 | 7.91k | ctx->tp_nlimbs = 2 * vsize; |
405 | 7.91k | ctx->tp = mpi_alloc_limb_space (2 * vsize, |
406 | 7.91k | (_gcry_is_secure (up) |
407 | 7.91k | || _gcry_is_secure (vp))); |
408 | 7.91k | ctx->tp_size = vsize; |
409 | 7.91k | } |
410 | | |
411 | 8.19k | do { |
412 | 8.19k | MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace ); |
413 | 8.19k | cy = _gcry_mpih_add_n( prodp, prodp, ctx->tp, vsize ); |
414 | 8.19k | _gcry_mpih_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy ); |
415 | 8.19k | prodp += vsize; |
416 | 8.19k | up += vsize; |
417 | 8.19k | usize -= vsize; |
418 | 8.19k | } while( usize >= vsize ); |
419 | 8.16k | } |
420 | | |
421 | 1.46M | if( usize ) { |
422 | 208k | if( usize < KARATSUBA_THRESHOLD ) { |
423 | 208k | _gcry_mpih_mul( ctx->tspace, vp, vsize, up, usize ); |
424 | 208k | } |
425 | 175 | else { |
426 | 175 | if( !ctx->next ) { |
427 | 61 | ctx->next = xcalloc( 1, sizeof *ctx ); |
428 | 61 | } |
429 | 175 | _gcry_mpih_mul_karatsuba_case( ctx->tspace, |
430 | 175 | vp, vsize, |
431 | 175 | up, usize, |
432 | 175 | ctx->next ); |
433 | 175 | } |
434 | | |
435 | 208k | cy = _gcry_mpih_add_n( prodp, prodp, ctx->tspace, vsize); |
436 | 208k | _gcry_mpih_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy ); |
437 | 208k | } |
438 | 1.46M | } |
439 | | |
440 | | |
441 | | void |
442 | | _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx ) |
443 | 2.55M | { |
444 | 2.55M | struct karatsuba_ctx *ctx2; |
445 | | |
446 | 2.55M | if( ctx->tp ) |
447 | 7.90k | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
448 | 2.55M | if( ctx->tspace ) |
449 | 1.18M | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
450 | 2.55M | for( ctx=ctx->next; ctx; ctx = ctx2 ) { |
451 | 61 | ctx2 = ctx->next; |
452 | 61 | if( ctx->tp ) |
453 | 7 | _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs ); |
454 | 61 | if( ctx->tspace ) |
455 | 61 | _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs ); |
456 | 61 | xfree( ctx ); |
457 | 61 | } |
458 | 2.55M | } |
459 | | |
460 | | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) |
461 | | * and v (pointed to by VP, with VSIZE limbs), and store the result at |
462 | | * PRODP. USIZE + VSIZE limbs are always stored, but if the input |
463 | | * operands are normalized. Return the most significant limb of the |
464 | | * result. |
465 | | * |
466 | | * NOTE: The space pointed to by PRODP is overwritten before finished |
467 | | * with U and V, so overlap is an error. |
468 | | * |
469 | | * Argument constraints: |
470 | | * 1. USIZE >= VSIZE. |
471 | | * 2. PRODP != UP and PRODP != VP, i.e. the destination |
472 | | * must be distinct from the multiplier and the multiplicand. |
473 | | */ |
474 | | |
475 | | mpi_limb_t |
476 | | _gcry_mpih_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, |
477 | | mpi_ptr_t vp, mpi_size_t vsize) |
478 | 21.3M | { |
479 | 21.3M | mpi_ptr_t prod_endp = prodp + usize + vsize - 1; |
480 | 21.3M | mpi_limb_t cy; |
481 | 21.3M | struct karatsuba_ctx ctx; |
482 | | |
483 | 21.3M | if( vsize < KARATSUBA_THRESHOLD ) { |
484 | 20.2M | mpi_size_t i; |
485 | 20.2M | mpi_limb_t v_limb; |
486 | | |
487 | 20.2M | if( !vsize ) |
488 | 0 | return 0; |
489 | | |
490 | | /* Multiply by the first limb in V separately, as the result can be |
491 | | * stored (not added) to PROD. We also avoid a loop for zeroing. */ |
492 | 20.2M | v_limb = vp[0]; |
493 | 20.2M | if( v_limb <= 1 ) { |
494 | 1.97M | if( v_limb == 1 ) |
495 | 1.52M | MPN_COPY( prodp, up, usize ); |
496 | 448k | else |
497 | 448k | MPN_ZERO( prodp, usize ); |
498 | 1.97M | cy = 0; |
499 | 1.97M | } |
500 | 18.2M | else |
501 | 18.2M | cy = _gcry_mpih_mul_1( prodp, up, usize, v_limb ); |
502 | | |
503 | 20.2M | prodp[usize] = cy; |
504 | 20.2M | prodp++; |
505 | | |
506 | | /* For each iteration in the outer loop, multiply one limb from |
507 | | * U with one limb from V, and add it to PROD. */ |
508 | 81.7M | for( i = 1; i < vsize; i++ ) { |
509 | 61.5M | v_limb = vp[i]; |
510 | 61.5M | if( v_limb <= 1 ) { |
511 | 583k | cy = 0; |
512 | 583k | if( v_limb == 1 ) |
513 | 5.02k | cy = _gcry_mpih_add_n(prodp, prodp, up, usize); |
514 | 583k | } |
515 | 60.9M | else |
516 | 60.9M | cy = _gcry_mpih_addmul_1(prodp, up, usize, v_limb); |
517 | | |
518 | 61.5M | prodp[usize] = cy; |
519 | 61.5M | prodp++; |
520 | 61.5M | } |
521 | | |
522 | 20.2M | return cy; |
523 | 20.2M | } |
524 | | |
525 | 1.17M | memset( &ctx, 0, sizeof ctx ); |
526 | 1.17M | _gcry_mpih_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx ); |
527 | 1.17M | _gcry_mpih_release_karatsuba_ctx( &ctx ); |
528 | 1.17M | return *prod_endp; |
529 | 21.3M | } |