/src/openssl30/crypto/bn/bn_mul.c
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. | 
| 3 |  |  * | 
| 4 |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use | 
| 5 |  |  * this file except in compliance with the License.  You can obtain a copy | 
| 6 |  |  * in the file LICENSE in the source distribution or at | 
| 7 |  |  * https://www.openssl.org/source/license.html | 
| 8 |  |  */ | 
| 9 |  |  | 
| 10 |  | #include <assert.h> | 
| 11 |  | #include "internal/cryptlib.h" | 
| 12 |  | #include "bn_local.h" | 
| 13 |  |  | 
| 14 |  | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | 
| 15 |  | /* | 
| 16 |  |  * Here follows specialised variants of bn_add_words() and bn_sub_words(). | 
| 17 |  |  * They have the property performing operations on arrays of different sizes. | 
| 18 |  |  * The sizes of those arrays is expressed through cl, which is the common | 
| 19 |  |  * length ( basically, min(len(a),len(b)) ), and dl, which is the delta | 
| 20 |  |  * between the two lengths, calculated as len(a)-len(b). All lengths are the | 
| 21 |  |  * number of BN_ULONGs...  For the operations that require a result array as | 
| 22 |  |  * parameter, it must have the length cl+abs(dl). These functions should | 
| 23 |  |  * probably end up in bn_asm.c as soon as there are assembler counterparts | 
| 24 |  |  * for the systems that use assembler files. | 
| 25 |  |  */ | 
| 26 |  |  | 
| 27 |  | BN_ULONG bn_sub_part_words(BN_ULONG *r, | 
| 28 |  |                            const BN_ULONG *a, const BN_ULONG *b, | 
| 29 |  |                            int cl, int dl) | 
| 30 | 72.6M | { | 
| 31 | 72.6M |     BN_ULONG c, t; | 
| 32 |  |  | 
| 33 | 72.6M |     assert(cl >= 0); | 
| 34 | 72.6M |     c = bn_sub_words(r, a, b, cl); | 
| 35 |  |  | 
| 36 | 72.6M |     if (dl == 0) | 
| 37 | 71.9M |         return c; | 
| 38 |  |  | 
| 39 | 735k |     r += cl; | 
| 40 | 735k |     a += cl; | 
| 41 | 735k |     b += cl; | 
| 42 |  |  | 
| 43 | 735k |     if (dl < 0) { | 
| 44 | 468k |         for (;;) { | 
| 45 | 468k |             t = b[0]; | 
| 46 | 468k |             r[0] = (0 - t - c) & BN_MASK2; | 
| 47 | 468k |             if (t != 0) | 
| 48 | 0 |                 c = 1; | 
| 49 | 468k |             if (++dl >= 0) | 
| 50 | 16.0k |                 break; | 
| 51 |  |  | 
| 52 | 451k |             t = b[1]; | 
| 53 | 451k |             r[1] = (0 - t - c) & BN_MASK2; | 
| 54 | 451k |             if (t != 0) | 
| 55 | 0 |                 c = 1; | 
| 56 | 451k |             if (++dl >= 0) | 
| 57 | 21.2k |                 break; | 
| 58 |  |  | 
| 59 | 430k |             t = b[2]; | 
| 60 | 430k |             r[2] = (0 - t - c) & BN_MASK2; | 
| 61 | 430k |             if (t != 0) | 
| 62 | 0 |                 c = 1; | 
| 63 | 430k |             if (++dl >= 0) | 
| 64 | 30.5k |                 break; | 
| 65 |  |  | 
| 66 | 400k |             t = b[3]; | 
| 67 | 400k |             r[3] = (0 - t - c) & BN_MASK2; | 
| 68 | 400k |             if (t != 0) | 
| 69 | 0 |                 c = 1; | 
| 70 | 400k |             if (++dl >= 0) | 
| 71 | 8.95k |                 break; | 
| 72 |  |  | 
| 73 | 391k |             b += 4; | 
| 74 | 391k |             r += 4; | 
| 75 | 391k |         } | 
| 76 | 658k |     } else { | 
| 77 | 658k |         int save_dl = dl; | 
| 78 | 1.00M |         while (c) { | 
| 79 | 397k |             t = a[0]; | 
| 80 | 397k |             r[0] = (t - c) & BN_MASK2; | 
| 81 | 397k |             if (t != 0) | 
| 82 | 76.0k |                 c = 0; | 
| 83 | 397k |             if (--dl <= 0) | 
| 84 | 14.2k |                 break; | 
| 85 |  |  | 
| 86 | 383k |             t = a[1]; | 
| 87 | 383k |             r[1] = (t - c) & BN_MASK2; | 
| 88 | 383k |             if (t != 0) | 
| 89 | 68.1k |                 c = 0; | 
| 90 | 383k |             if (--dl <= 0) | 
| 91 | 11.5k |                 break; | 
| 92 |  |  | 
| 93 | 371k |             t = a[2]; | 
| 94 | 371k |             r[2] = (t - c) & BN_MASK2; | 
| 95 | 371k |             if (t != 0) | 
| 96 | 66.5k |                 c = 0; | 
| 97 | 371k |             if (--dl <= 0) | 
| 98 | 13.9k |                 break; | 
| 99 |  |  | 
| 100 | 357k |             t = a[3]; | 
| 101 | 357k |             r[3] = (t - c) & BN_MASK2; | 
| 102 | 357k |             if (t != 0) | 
| 103 | 57.7k |                 c = 0; | 
| 104 | 357k |             if (--dl <= 0) | 
| 105 | 7.97k |                 break; | 
| 106 |  |  | 
| 107 | 349k |             save_dl = dl; | 
| 108 | 349k |             a += 4; | 
| 109 | 349k |             r += 4; | 
| 110 | 349k |         } | 
| 111 | 658k |         if (dl > 0) { | 
| 112 | 610k |             if (save_dl > dl) { | 
| 113 | 0 |                 switch (save_dl - dl) { | 
| 114 | 0 |                 case 1: | 
| 115 | 0 |                     r[1] = a[1]; | 
| 116 | 0 |                     if (--dl <= 0) | 
| 117 | 0 |                         break; | 
| 118 |  |                     /* fall thru */ | 
| 119 | 0 |                 case 2: | 
| 120 | 0 |                     r[2] = a[2]; | 
| 121 | 0 |                     if (--dl <= 0) | 
| 122 | 0 |                         break; | 
| 123 |  |                     /* fall thru */ | 
| 124 | 0 |                 case 3: | 
| 125 | 0 |                     r[3] = a[3]; | 
| 126 | 0 |                     if (--dl <= 0) | 
| 127 | 0 |                         break; | 
| 128 | 0 |                 } | 
| 129 | 0 |                 a += 4; | 
| 130 | 0 |                 r += 4; | 
| 131 | 0 |             } | 
| 132 | 610k |         } | 
| 133 | 658k |         if (dl > 0) { | 
| 134 | 6.74M |             for (;;) { | 
| 135 | 6.74M |                 r[0] = a[0]; | 
| 136 | 6.74M |                 if (--dl <= 0) | 
| 137 | 116k |                     break; | 
| 138 | 6.62M |                 r[1] = a[1]; | 
| 139 | 6.62M |                 if (--dl <= 0) | 
| 140 | 118k |                     break; | 
| 141 | 6.50M |                 r[2] = a[2]; | 
| 142 | 6.50M |                 if (--dl <= 0) | 
| 143 | 237k |                     break; | 
| 144 | 6.27M |                 r[3] = a[3]; | 
| 145 | 6.27M |                 if (--dl <= 0) | 
| 146 | 138k |                     break; | 
| 147 |  |  | 
| 148 | 6.13M |                 a += 4; | 
| 149 | 6.13M |                 r += 4; | 
| 150 | 6.13M |             } | 
| 151 | 610k |         } | 
| 152 | 658k |     } | 
| 153 | 735k |     return c; | 
| 154 | 735k | } | 
| 155 |  | #endif | 
| 156 |  |  | 
| 157 |  | #ifdef BN_RECURSION | 
| 158 |  | /* | 
| 159 |  |  * Karatsuba recursive multiplication algorithm (cf. Knuth, The Art of | 
| 160 |  |  * Computer Programming, Vol. 2) | 
| 161 |  |  */ | 
| 162 |  |  | 
| 163 |  | /*- | 
| 164 |  |  * r is 2*n2 words in size, | 
| 165 |  |  * a and b are both n2 words in size. | 
| 166 |  |  * n2 must be a power of 2. | 
| 167 |  |  * We multiply and return the result. | 
| 168 |  |  * t must be 2*n2 words in size | 
| 169 |  |  * We calculate | 
| 170 |  |  * a[0]*b[0] | 
| 171 |  |  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) | 
| 172 |  |  * a[1]*b[1] | 
| 173 |  |  */ | 
| 174 |  | /* dnX may not be positive, but n2/2+dnX has to be */ | 
| 175 |  | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 
| 176 |  |                       int dna, int dnb, BN_ULONG *t) | 
| 177 | 37.9M | { | 
| 178 | 37.9M |     int n = n2 / 2, c1, c2; | 
| 179 | 37.9M |     int tna = n + dna, tnb = n + dnb; | 
| 180 | 37.9M |     unsigned int neg, zero; | 
| 181 | 37.9M |     BN_ULONG ln, lo, *p; | 
| 182 |  |  | 
| 183 | 37.9M | # ifdef BN_MUL_COMBA | 
| 184 |  | #  if 0 | 
| 185 |  |     if (n2 == 4) { | 
| 186 |  |         bn_mul_comba4(r, a, b); | 
| 187 |  |         return; | 
| 188 |  |     } | 
| 189 |  | #  endif | 
| 190 |  |     /* | 
| 191 |  |      * Only call bn_mul_comba 8 if n2 == 8 and the two arrays are complete | 
| 192 |  |      * [steve] | 
| 193 |  |      */ | 
| 194 | 37.9M |     if (n2 == 8 && dna == 0 && dnb == 0) { | 
| 195 | 29.1k |         bn_mul_comba8(r, a, b); | 
| 196 | 29.1k |         return; | 
| 197 | 29.1k |     } | 
| 198 | 37.9M | # endif                         /* BN_MUL_COMBA */ | 
| 199 |  |     /* Else do normal multiply */ | 
| 200 | 37.9M |     if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { | 
| 201 | 16.3k |         bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); | 
| 202 | 16.3k |         if ((dna + dnb) < 0) | 
| 203 | 16.3k |             memset(&r[2 * n2 + dna + dnb], 0, | 
| 204 | 16.3k |                    sizeof(BN_ULONG) * -(dna + dnb)); | 
| 205 | 16.3k |         return; | 
| 206 | 16.3k |     } | 
| 207 |  |     /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 
| 208 | 37.8M |     c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); | 
| 209 | 37.8M |     c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); | 
| 210 | 37.8M |     zero = neg = 0; | 
| 211 | 37.8M |     switch (c1 * 3 + c2) { | 
| 212 | 8.47M |     case -4: | 
| 213 | 8.47M |         bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ | 
| 214 | 8.47M |         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ | 
| 215 | 8.47M |         break; | 
| 216 | 284k |     case -3: | 
| 217 | 284k |         zero = 1; | 
| 218 | 284k |         break; | 
| 219 | 6.94M |     case -2: | 
| 220 | 6.94M |         bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ | 
| 221 | 6.94M |         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ | 
| 222 | 6.94M |         neg = 1; | 
| 223 | 6.94M |         break; | 
| 224 | 554k |     case -1: | 
| 225 | 785k |     case 0: | 
| 226 | 1.31M |     case 1: | 
| 227 | 1.31M |         zero = 1; | 
| 228 | 1.31M |         break; | 
| 229 | 12.2M |     case 2: | 
| 230 | 12.2M |         bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ | 
| 231 | 12.2M |         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ | 
| 232 | 12.2M |         neg = 1; | 
| 233 | 12.2M |         break; | 
| 234 | 318k |     case 3: | 
| 235 | 318k |         zero = 1; | 
| 236 | 318k |         break; | 
| 237 | 8.32M |     case 4: | 
| 238 | 8.32M |         bn_sub_part_words(t, a, &(a[n]), tna, n - tna); | 
| 239 | 8.32M |         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); | 
| 240 | 8.32M |         break; | 
| 241 | 37.8M |     } | 
| 242 |  |  | 
| 243 | 37.8M | # ifdef BN_MUL_COMBA | 
| 244 | 37.8M |     if (n == 4 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba4 could take | 
| 245 |  |                                            * extra args to do this well */ | 
| 246 | 0 |         if (!zero) | 
| 247 | 0 |             bn_mul_comba4(&(t[n2]), t, &(t[n])); | 
| 248 | 0 |         else | 
| 249 | 0 |             memset(&t[n2], 0, sizeof(*t) * 8); | 
| 250 |  | 
 | 
| 251 | 0 |         bn_mul_comba4(r, a, b); | 
| 252 | 0 |         bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); | 
| 253 | 37.8M |     } else if (n == 8 && dna == 0 && dnb == 0) { /* XXX: bn_mul_comba8 could | 
| 254 |  |                                                   * take extra args to do | 
| 255 |  |                                                   * this well */ | 
| 256 | 25.2M |         if (!zero) | 
| 257 | 23.9M |             bn_mul_comba8(&(t[n2]), t, &(t[n])); | 
| 258 | 1.30M |         else | 
| 259 | 1.30M |             memset(&t[n2], 0, sizeof(*t) * 16); | 
| 260 |  |  | 
| 261 | 25.2M |         bn_mul_comba8(r, a, b); | 
| 262 | 25.2M |         bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); | 
| 263 | 25.2M |     } else | 
| 264 | 12.6M | # endif                         /* BN_MUL_COMBA */ | 
| 265 | 12.6M |     { | 
| 266 | 12.6M |         p = &(t[n2 * 2]); | 
| 267 | 12.6M |         if (!zero) | 
| 268 | 11.9M |             bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); | 
| 269 | 611k |         else | 
| 270 | 611k |             memset(&t[n2], 0, sizeof(*t) * n2); | 
| 271 | 12.6M |         bn_mul_recursive(r, a, b, n, 0, 0, p); | 
| 272 | 12.6M |         bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); | 
| 273 | 12.6M |     } | 
| 274 |  |  | 
| 275 |  |     /*- | 
| 276 |  |      * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | 
| 277 |  |      * r[10] holds (a[0]*b[0]) | 
| 278 |  |      * r[32] holds (b[1]*b[1]) | 
| 279 |  |      */ | 
| 280 |  |  | 
| 281 | 37.8M |     c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); | 
| 282 |  |  | 
| 283 | 37.8M |     if (neg) {                  /* if t[32] is negative */ | 
| 284 | 19.1M |         c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); | 
| 285 | 19.1M |     } else { | 
| 286 |  |         /* Might have a carry */ | 
| 287 | 18.7M |         c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); | 
| 288 | 18.7M |     } | 
| 289 |  |  | 
| 290 |  |     /*- | 
| 291 |  |      * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | 
| 292 |  |      * r[10] holds (a[0]*b[0]) | 
| 293 |  |      * r[32] holds (b[1]*b[1]) | 
| 294 |  |      * c1 holds the carry bits | 
| 295 |  |      */ | 
| 296 | 37.8M |     c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); | 
| 297 | 37.8M |     if (c1) { | 
| 298 | 14.6M |         p = &(r[n + n2]); | 
| 299 | 14.6M |         lo = *p; | 
| 300 | 14.6M |         ln = (lo + c1) & BN_MASK2; | 
| 301 | 14.6M |         *p = ln; | 
| 302 |  |  | 
| 303 |  |         /* | 
| 304 |  |          * The overflow will stop before we over write words we should not | 
| 305 |  |          * overwrite | 
| 306 |  |          */ | 
| 307 | 14.6M |         if (ln < (BN_ULONG)c1) { | 
| 308 | 222k |             do { | 
| 309 | 222k |                 p++; | 
| 310 | 222k |                 lo = *p; | 
| 311 | 222k |                 ln = (lo + 1) & BN_MASK2; | 
| 312 | 222k |                 *p = ln; | 
| 313 | 222k |             } while (ln == 0); | 
| 314 | 45.0k |         } | 
| 315 | 14.6M |     } | 
| 316 | 37.8M | } | 
| 317 |  |  | 
| 318 |  | /* | 
| 319 |  |  * n+tn is the word length t needs to be n*4 is size, as does r | 
| 320 |  |  */ | 
| 321 |  | /* tnX may not be negative but less than n */ | 
| 322 |  | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, | 
| 323 |  |                            int tna, int tnb, BN_ULONG *t) | 
| 324 | 356k | { | 
| 325 | 356k |     int i, j, n2 = n * 2; | 
| 326 | 356k |     int c1, c2, neg; | 
| 327 | 356k |     BN_ULONG ln, lo, *p; | 
| 328 |  |  | 
| 329 | 356k |     if (n < 8) { | 
| 330 | 0 |         bn_mul_normal(r, a, n + tna, b, n + tnb); | 
| 331 | 0 |         return; | 
| 332 | 0 |     } | 
| 333 |  |  | 
| 334 |  |     /* r=(a[0]-a[1])*(b[1]-b[0]) */ | 
| 335 | 356k |     c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); | 
| 336 | 356k |     c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); | 
| 337 | 356k |     neg = 0; | 
| 338 | 356k |     switch (c1 * 3 + c2) { | 
| 339 | 43.3k |     case -4: | 
| 340 | 43.3k |         bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ | 
| 341 | 43.3k |         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ | 
| 342 | 43.3k |         break; | 
| 343 | 1.30k |     case -3: | 
| 344 | 8.83k |     case -2: | 
| 345 | 8.83k |         bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ | 
| 346 | 8.83k |         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ | 
| 347 | 8.83k |         neg = 1; | 
| 348 | 8.83k |         break; | 
| 349 | 5.99k |     case -1: | 
| 350 | 6.91k |     case 0: | 
| 351 | 8.18k |     case 1: | 
| 352 | 290k |     case 2: | 
| 353 | 290k |         bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ | 
| 354 | 290k |         bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ | 
| 355 | 290k |         neg = 1; | 
| 356 | 290k |         break; | 
| 357 | 4.31k |     case 3: | 
| 358 | 13.8k |     case 4: | 
| 359 | 13.8k |         bn_sub_part_words(t, a, &(a[n]), tna, n - tna); | 
| 360 | 13.8k |         bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); | 
| 361 | 13.8k |         break; | 
| 362 | 356k |     } | 
| 363 |  |     /* | 
| 364 |  |      * The zero case isn't yet implemented here. The speedup would probably | 
| 365 |  |      * be negligible. | 
| 366 |  |      */ | 
| 367 |  | # if 0 | 
| 368 |  |     if (n == 4) { | 
| 369 |  |         bn_mul_comba4(&(t[n2]), t, &(t[n])); | 
| 370 |  |         bn_mul_comba4(r, a, b); | 
| 371 |  |         bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn); | 
| 372 |  |         memset(&r[n2 + tn * 2], 0, sizeof(*r) * (n2 - tn * 2)); | 
| 373 |  |     } else | 
| 374 |  | # endif | 
| 375 | 356k |     if (n == 8) { | 
| 376 | 49.3k |         bn_mul_comba8(&(t[n2]), t, &(t[n])); | 
| 377 | 49.3k |         bn_mul_comba8(r, a, b); | 
| 378 | 49.3k |         bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); | 
| 379 | 49.3k |         memset(&r[n2 + tna + tnb], 0, sizeof(*r) * (n2 - tna - tnb)); | 
| 380 | 307k |     } else { | 
| 381 | 307k |         p = &(t[n2 * 2]); | 
| 382 | 307k |         bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); | 
| 383 | 307k |         bn_mul_recursive(r, a, b, n, 0, 0, p); | 
| 384 | 307k |         i = n / 2; | 
| 385 |  |         /* | 
| 386 |  |          * If there is only a bottom half to the number, just do it | 
| 387 |  |          */ | 
| 388 | 307k |         if (tna > tnb) | 
| 389 | 32.2k |             j = tna - i; | 
| 390 | 274k |         else | 
| 391 | 274k |             j = tnb - i; | 
| 392 | 307k |         if (j == 0) { | 
| 393 | 13.4k |             bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), | 
| 394 | 13.4k |                              i, tna - i, tnb - i, p); | 
| 395 | 13.4k |             memset(&r[n2 + i * 2], 0, sizeof(*r) * (n2 - i * 2)); | 
| 396 | 293k |         } else if (j > 0) {     /* eg, n == 16, i == 8 and tn == 11 */ | 
| 397 | 100k |             bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), | 
| 398 | 100k |                                   i, tna - i, tnb - i, p); | 
| 399 | 100k |             memset(&(r[n2 + tna + tnb]), 0, | 
| 400 | 100k |                    sizeof(BN_ULONG) * (n2 - tna - tnb)); | 
| 401 | 193k |         } else {                /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ | 
| 402 |  |  | 
| 403 | 193k |             memset(&r[n2], 0, sizeof(*r) * n2); | 
| 404 | 193k |             if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL | 
| 405 | 193k |                 && tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { | 
| 406 | 164k |                 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); | 
| 407 | 164k |             } else { | 
| 408 | 44.4k |                 for (;;) { | 
| 409 | 44.4k |                     i /= 2; | 
| 410 |  |                     /* | 
| 411 |  |                      * these simplified conditions work exclusively because | 
| 412 |  |                      * difference between tna and tnb is 1 or 0 | 
| 413 |  |                      */ | 
| 414 | 44.4k |                     if (i < tna || i < tnb) { | 
| 415 | 19.1k |                         bn_mul_part_recursive(&(r[n2]), | 
| 416 | 19.1k |                                               &(a[n]), &(b[n]), | 
| 417 | 19.1k |                                               i, tna - i, tnb - i, p); | 
| 418 | 19.1k |                         break; | 
| 419 | 25.2k |                     } else if (i == tna || i == tnb) { | 
| 420 | 9.15k |                         bn_mul_recursive(&(r[n2]), | 
| 421 | 9.15k |                                          &(a[n]), &(b[n]), | 
| 422 | 9.15k |                                          i, tna - i, tnb - i, p); | 
| 423 | 9.15k |                         break; | 
| 424 | 9.15k |                     } | 
| 425 | 44.4k |                 } | 
| 426 | 28.3k |             } | 
| 427 | 193k |         } | 
| 428 | 307k |     } | 
| 429 |  |  | 
| 430 |  |     /*- | 
| 431 |  |      * t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign | 
| 432 |  |      * r[10] holds (a[0]*b[0]) | 
| 433 |  |      * r[32] holds (b[1]*b[1]) | 
| 434 |  |      */ | 
| 435 |  |  | 
| 436 | 356k |     c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); | 
| 437 |  |  | 
| 438 | 356k |     if (neg) {                  /* if t[32] is negative */ | 
| 439 | 299k |         c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); | 
| 440 | 299k |     } else { | 
| 441 |  |         /* Might have a carry */ | 
| 442 | 57.1k |         c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); | 
| 443 | 57.1k |     } | 
| 444 |  |  | 
| 445 |  |     /*- | 
| 446 |  |      * t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) | 
| 447 |  |      * r[10] holds (a[0]*b[0]) | 
| 448 |  |      * r[32] holds (b[1]*b[1]) | 
| 449 |  |      * c1 holds the carry bits | 
| 450 |  |      */ | 
| 451 | 356k |     c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); | 
| 452 | 356k |     if (c1) { | 
| 453 | 6.80k |         p = &(r[n + n2]); | 
| 454 | 6.80k |         lo = *p; | 
| 455 | 6.80k |         ln = (lo + c1) & BN_MASK2; | 
| 456 | 6.80k |         *p = ln; | 
| 457 |  |  | 
| 458 |  |         /* | 
| 459 |  |          * The overflow will stop before we over write words we should not | 
| 460 |  |          * overwrite | 
| 461 |  |          */ | 
| 462 | 6.80k |         if (ln < (BN_ULONG)c1) { | 
| 463 | 17.2k |             do { | 
| 464 | 17.2k |                 p++; | 
| 465 | 17.2k |                 lo = *p; | 
| 466 | 17.2k |                 ln = (lo + 1) & BN_MASK2; | 
| 467 | 17.2k |                 *p = ln; | 
| 468 | 17.2k |             } while (ln == 0); | 
| 469 | 2.63k |         } | 
| 470 | 6.80k |     } | 
| 471 | 356k | } | 
| 472 |  |  | 
| 473 |  | /*- | 
| 474 |  |  * a and b must be the same size, which is n2. | 
| 475 |  |  * r needs to be n2 words and t needs to be n2*2 | 
| 476 |  |  */ | 
| 477 |  | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 
| 478 |  |                           BN_ULONG *t) | 
| 479 | 0 | { | 
| 480 | 0 |     int n = n2 / 2; | 
| 481 |  | 
 | 
| 482 | 0 |     bn_mul_recursive(r, a, b, n, 0, 0, &(t[0])); | 
| 483 | 0 |     if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) { | 
| 484 | 0 |         bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2])); | 
| 485 | 0 |         bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); | 
| 486 | 0 |         bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2])); | 
| 487 | 0 |         bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); | 
| 488 | 0 |     } else { | 
| 489 | 0 |         bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n); | 
| 490 | 0 |         bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n); | 
| 491 | 0 |         bn_add_words(&(r[n]), &(r[n]), &(t[0]), n); | 
| 492 | 0 |         bn_add_words(&(r[n]), &(r[n]), &(t[n]), n); | 
| 493 | 0 |     } | 
| 494 | 0 | } | 
| 495 |  | #endif                          /* BN_RECURSION */ | 
| 496 |  |  | 
| 497 |  | int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | 
| 498 | 18.0M | { | 
| 499 | 18.0M |     int ret = bn_mul_fixed_top(r, a, b, ctx); | 
| 500 |  |  | 
| 501 | 18.0M |     bn_correct_top(r); | 
| 502 | 18.0M |     bn_check_top(r); | 
| 503 |  |  | 
| 504 | 18.0M |     return ret; | 
| 505 | 18.0M | } | 
| 506 |  |  | 
| 507 |  | int bn_mul_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) | 
| 508 | 19.7M | { | 
| 509 | 19.7M |     int ret = 0; | 
| 510 | 19.7M |     int top, al, bl; | 
| 511 | 19.7M |     BIGNUM *rr; | 
| 512 | 19.7M | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 
| 513 | 19.7M |     int i; | 
| 514 | 19.7M | #endif | 
| 515 | 19.7M | #ifdef BN_RECURSION | 
| 516 | 19.7M |     BIGNUM *t = NULL; | 
| 517 | 19.7M |     int j = 0, k; | 
| 518 | 19.7M | #endif | 
| 519 |  |  | 
| 520 | 19.7M |     bn_check_top(a); | 
| 521 | 19.7M |     bn_check_top(b); | 
| 522 | 19.7M |     bn_check_top(r); | 
| 523 |  |  | 
| 524 | 19.7M |     al = a->top; | 
| 525 | 19.7M |     bl = b->top; | 
| 526 |  |  | 
| 527 | 19.7M |     if ((al == 0) || (bl == 0)) { | 
| 528 | 694k |         BN_zero(r); | 
| 529 | 694k |         return 1; | 
| 530 | 694k |     } | 
| 531 | 19.0M |     top = al + bl; | 
| 532 |  |  | 
| 533 | 19.0M |     BN_CTX_start(ctx); | 
| 534 | 19.0M |     if ((r == a) || (r == b)) { | 
| 535 | 68.5k |         if ((rr = BN_CTX_get(ctx)) == NULL) | 
| 536 | 0 |             goto err; | 
| 537 | 68.5k |     } else | 
| 538 | 18.9M |         rr = r; | 
| 539 |  |  | 
| 540 | 19.0M | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 
| 541 | 19.0M |     i = al - bl; | 
| 542 | 19.0M | #endif | 
| 543 | 19.0M | #ifdef BN_MUL_COMBA | 
| 544 | 19.0M |     if (i == 0) { | 
| 545 |  | # if 0 | 
| 546 |  |         if (al == 4) { | 
| 547 |  |             if (bn_wexpand(rr, 8) == NULL) | 
| 548 |  |                 goto err; | 
| 549 |  |             rr->top = 8; | 
| 550 |  |             bn_mul_comba4(rr->d, a->d, b->d); | 
| 551 |  |             goto end; | 
| 552 |  |         } | 
| 553 |  | # endif | 
| 554 | 11.8M |         if (al == 8) { | 
| 555 | 15.4k |             if (bn_wexpand(rr, 16) == NULL) | 
| 556 | 0 |                 goto err; | 
| 557 | 15.4k |             rr->top = 16; | 
| 558 | 15.4k |             bn_mul_comba8(rr->d, a->d, b->d); | 
| 559 | 15.4k |             goto end; | 
| 560 | 15.4k |         } | 
| 561 | 11.8M |     } | 
| 562 | 19.0M | #endif                          /* BN_MUL_COMBA */ | 
| 563 | 19.0M | #ifdef BN_RECURSION | 
| 564 | 19.0M |     if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { | 
| 565 | 365k |         if (i >= -1 && i <= 1) { | 
| 566 |  |             /* | 
| 567 |  |              * Find out the power of two lower or equal to the longest of the | 
| 568 |  |              * two numbers | 
| 569 |  |              */ | 
| 570 | 337k |             if (i >= 0) { | 
| 571 | 285k |                 j = BN_num_bits_word((BN_ULONG)al); | 
| 572 | 285k |             } | 
| 573 | 337k |             if (i == -1) { | 
| 574 | 52.3k |                 j = BN_num_bits_word((BN_ULONG)bl); | 
| 575 | 52.3k |             } | 
| 576 | 337k |             j = 1 << (j - 1); | 
| 577 | 337k |             assert(j <= al || j <= bl); | 
| 578 | 337k |             k = j + j; | 
| 579 | 337k |             t = BN_CTX_get(ctx); | 
| 580 | 337k |             if (t == NULL) | 
| 581 | 0 |                 goto err; | 
| 582 | 337k |             if (al > j || bl > j) { | 
| 583 | 236k |                 if (bn_wexpand(t, k * 4) == NULL) | 
| 584 | 0 |                     goto err; | 
| 585 | 236k |                 if (bn_wexpand(rr, k * 4) == NULL) | 
| 586 | 0 |                     goto err; | 
| 587 | 236k |                 bn_mul_part_recursive(rr->d, a->d, b->d, | 
| 588 | 236k |                                       j, al - j, bl - j, t->d); | 
| 589 | 236k |             } else {            /* al <= j || bl <= j */ | 
| 590 |  |  | 
| 591 | 100k |                 if (bn_wexpand(t, k * 2) == NULL) | 
| 592 | 0 |                     goto err; | 
| 593 | 100k |                 if (bn_wexpand(rr, k * 2) == NULL) | 
| 594 | 0 |                     goto err; | 
| 595 | 100k |                 bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d); | 
| 596 | 100k |             } | 
| 597 | 337k |             rr->top = top; | 
| 598 | 337k |             goto end; | 
| 599 | 337k |         } | 
| 600 | 365k |     } | 
| 601 | 18.6M | #endif                          /* BN_RECURSION */ | 
| 602 | 18.6M |     if (bn_wexpand(rr, top) == NULL) | 
| 603 | 0 |         goto err; | 
| 604 | 18.6M |     rr->top = top; | 
| 605 | 18.6M |     bn_mul_normal(rr->d, a->d, al, b->d, bl); | 
| 606 |  |  | 
| 607 | 18.6M | #if defined(BN_MUL_COMBA) || defined(BN_RECURSION) | 
| 608 | 19.0M |  end: | 
| 609 | 19.0M | #endif | 
| 610 | 19.0M |     rr->neg = a->neg ^ b->neg; | 
| 611 | 19.0M |     rr->flags |= BN_FLG_FIXED_TOP; | 
| 612 | 19.0M |     if (r != rr && BN_copy(r, rr) == NULL) | 
| 613 | 0 |         goto err; | 
| 614 |  |  | 
| 615 | 19.0M |     ret = 1; | 
| 616 | 19.0M |  err: | 
| 617 | 19.0M |     bn_check_top(r); | 
| 618 | 19.0M |     BN_CTX_end(ctx); | 
| 619 | 19.0M |     return ret; | 
| 620 | 19.0M | } | 
| 621 |  |  | 
| 622 |  | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) | 
| 623 | 18.9M | { | 
| 624 | 18.9M |     BN_ULONG *rr; | 
| 625 |  |  | 
| 626 | 18.9M |     if (na < nb) { | 
| 627 | 6.66M |         int itmp; | 
| 628 | 6.66M |         BN_ULONG *ltmp; | 
| 629 |  |  | 
| 630 | 6.66M |         itmp = na; | 
| 631 | 6.66M |         na = nb; | 
| 632 | 6.66M |         nb = itmp; | 
| 633 | 6.66M |         ltmp = a; | 
| 634 | 6.66M |         a = b; | 
| 635 | 6.66M |         b = ltmp; | 
| 636 |  |  | 
| 637 | 6.66M |     } | 
| 638 | 18.9M |     rr = &(r[na]); | 
| 639 | 18.9M |     if (nb <= 0) { | 
| 640 | 48.7k |         (void)bn_mul_words(r, a, na, 0); | 
| 641 | 48.7k |         return; | 
| 642 | 48.7k |     } else | 
| 643 | 18.8M |         rr[0] = bn_mul_words(r, a, na, b[0]); | 
| 644 |  |  | 
| 645 | 19.4M |     for (;;) { | 
| 646 | 19.4M |         if (--nb <= 0) | 
| 647 | 15.4M |             return; | 
| 648 | 4.00M |         rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); | 
| 649 | 4.00M |         if (--nb <= 0) | 
| 650 | 1.22M |             return; | 
| 651 | 2.78M |         rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); | 
| 652 | 2.78M |         if (--nb <= 0) | 
| 653 | 100k |             return; | 
| 654 | 2.68M |         rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); | 
| 655 | 2.68M |         if (--nb <= 0) | 
| 656 | 2.11M |             return; | 
| 657 | 571k |         rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); | 
| 658 | 571k |         rr += 4; | 
| 659 | 571k |         r += 4; | 
| 660 | 571k |         b += 4; | 
| 661 | 571k |     } | 
| 662 | 18.8M | } | 
| 663 |  |  | 
| 664 |  | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | 
| 665 | 0 | { | 
| 666 | 0 |     bn_mul_words(r, a, n, b[0]); | 
| 667 |  | 
 | 
| 668 | 0 |     for (;;) { | 
| 669 | 0 |         if (--n <= 0) | 
| 670 | 0 |             return; | 
| 671 | 0 |         bn_mul_add_words(&(r[1]), a, n, b[1]); | 
| 672 | 0 |         if (--n <= 0) | 
| 673 | 0 |             return; | 
| 674 | 0 |         bn_mul_add_words(&(r[2]), a, n, b[2]); | 
| 675 | 0 |         if (--n <= 0) | 
| 676 | 0 |             return; | 
| 677 | 0 |         bn_mul_add_words(&(r[3]), a, n, b[3]); | 
| 678 | 0 |         if (--n <= 0) | 
| 679 | 0 |             return; | 
| 680 | 0 |         bn_mul_add_words(&(r[4]), a, n, b[4]); | 
| 681 | 0 |         r += 4; | 
| 682 | 0 |         b += 4; | 
| 683 | 0 |     } | 
| 684 | 0 | } |