/src/openssl30/crypto/bn/bn_local.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. | 
| 3 |  |  * | 
| 4 |  |  * Licensed under the Apache License 2.0 (the "License").  You may not use | 
| 5 |  |  * this file except in compliance with the License.  You can obtain a copy | 
| 6 |  |  * in the file LICENSE in the source distribution or at | 
| 7 |  |  * https://www.openssl.org/source/license.html | 
| 8 |  |  */ | 
| 9 |  |  | 
| 10 |  | #ifndef OSSL_CRYPTO_BN_LOCAL_H | 
| 11 |  | # define OSSL_CRYPTO_BN_LOCAL_H | 
| 12 |  |  | 
| 13 |  | /* | 
| 14 |  |  * The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or | 
| 15 |  |  * SIXTY_FOUR_BIT in its own environment since it doesn't re-run our | 
| 16 |  |  * Configure script and needs to support both 32-bit and 64-bit. | 
| 17 |  |  */ | 
| 18 |  | # include <openssl/opensslconf.h> | 
| 19 |  |  | 
| 20 |  | # if !defined(OPENSSL_SYS_UEFI) | 
| 21 |  | #  include "crypto/bn_conf.h" | 
| 22 |  | # endif | 
| 23 |  |  | 
| 24 |  | # include "crypto/bn.h" | 
| 25 |  | # include "internal/cryptlib.h" | 
| 26 |  | # include "internal/numbers.h" | 
| 27 |  |  | 
| 28 |  | /* | 
| 29 |  |  * These preprocessor symbols control various aspects of the bignum headers | 
| 30 |  |  * and library code. They're not defined by any "normal" configuration, as | 
| 31 |  |  * they are intended for development and testing purposes. NB: defining | 
| 32 |  |  * them can be useful for debugging application code as well as openssl | 
| 33 |  |  * itself. BN_DEBUG - turn on various debugging alterations to the bignum | 
| 34 |  |  * code BN_RAND_DEBUG - uses random poisoning of unused words to trip up | 
| 35 |  |  * mismanagement of bignum internals. Enable BN_RAND_DEBUG is known to | 
| 36 |  |  * break some of the OpenSSL tests. | 
| 37 |  |  */ | 
| 38 |  | # if defined(BN_RAND_DEBUG) && !defined(BN_DEBUG) | 
| 39 |  | #  define BN_DEBUG | 
| 40 |  | # endif | 
| 41 |  | # if defined(BN_RAND_DEBUG) | 
| 42 |  | #  include <openssl/rand.h> | 
| 43 |  | # endif | 
| 44 |  |  | 
| 45 |  | /* | 
| 46 |  |  * This should limit the stack usage due to alloca to about 4K. | 
| 47 |  |  * BN_SOFT_LIMIT is a soft limit equivalent to 2*OPENSSL_RSA_MAX_MODULUS_BITS. | 
| 48 |  |  * Beyond that size bn_mul_mont is no longer used, and the constant time | 
| 49 |  |  * assembler code is disabled, due to the blatant alloca and bn_mul_mont usage. | 
| 50 |  |  * Note that bn_mul_mont does an alloca that is hidden away in assembly. | 
| 51 |  |  * It is not recommended to do computations with numbers exceeding this limit, | 
| 52 |  |  * since the result will be highly version dependent: | 
| 53 |  |  * While the current OpenSSL version will use non-optimized, but safe code, | 
| 54 |  |  * previous versions will use optimized code, that may crash due to unexpected | 
| 55 |  |  * stack overflow, and future versions may very well turn this into a hard | 
| 56 |  |  * limit. | 
| 57 |  |  * Note however, that it is possible to override the size limit using | 
| 58 |  |  * "./config -DBN_SOFT_LIMIT=<limit>" if necessary, and the O/S specific | 
| 59 |  |  * stack limit is known and taken into consideration. | 
| 60 |  |  */ | 
| 61 |  | # ifndef BN_SOFT_LIMIT | 
| 62 | 115M | #  define BN_SOFT_LIMIT         (4096 / BN_BYTES) | 
| 63 |  | # endif | 
| 64 |  |  | 
| 65 |  | # ifndef OPENSSL_SMALL_FOOTPRINT | 
| 66 |  | #  define BN_MUL_COMBA | 
| 67 |  | #  define BN_SQR_COMBA | 
| 68 |  | #  define BN_RECURSION | 
| 69 |  | # endif | 
| 70 |  |  | 
| 71 |  | /* | 
| 72 |  |  * This next option uses the C libraries (2 word)/(1 word) function. If it is | 
| 73 |  |  * not defined, I use my C version (which is slower). The reason for this | 
| 74 |  |  * flag is that when the particular C compiler library routine is used, and | 
| 75 |  |  * the library is linked with a different compiler, the library is missing. | 
| 76 |  |  * This mostly happens when the library is built with gcc and then linked | 
| 77 |  |  * using normal cc.  This would be a common occurrence because gcc normally | 
| 78 |  |  * produces code that is 2 times faster than system compilers for the big | 
| 79 |  |  * number stuff. For machines with only one compiler (or shared libraries), | 
| 80 |  |  * this should be on.  Again this in only really a problem on machines using | 
| 81 |  |  * "long long's", are 32bit, and are not using my assembler code. | 
| 82 |  |  */ | 
| 83 |  | # if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ | 
| 84 |  |     defined(OPENSSL_SYS_WIN32) || defined(linux) | 
| 85 |  | #  define BN_DIV2W | 
| 86 |  | # endif | 
| 87 |  |  | 
| 88 |  | /* | 
| 89 |  |  * 64-bit processor with LP64 ABI | 
| 90 |  |  */ | 
| 91 |  | # ifdef SIXTY_FOUR_BIT_LONG | 
| 92 |  | #  define BN_ULLONG       unsigned long long | 
| 93 | 1.15G | #  define BN_BITS4        32 | 
| 94 | 7.15G | #  define BN_MASK2        (0xffffffffffffffffL) | 
| 95 | 1.15G | #  define BN_MASK2l       (0xffffffffL) | 
| 96 | 447k | #  define BN_MASK2h       (0xffffffff00000000L) | 
| 97 |  | #  define BN_MASK2h1      (0xffffffff80000000L) | 
| 98 | 1.57M | #  define BN_DEC_CONV     (10000000000000000000UL) | 
| 99 | 1.92M | #  define BN_DEC_NUM      19 | 
| 100 | 1.38M | #  define BN_DEC_FMT1     "%lu" | 
| 101 | 195k | #  define BN_DEC_FMT2     "%019lu" | 
| 102 |  | # endif | 
| 103 |  |  | 
| 104 |  | /* | 
| 105 |  |  * 64-bit processor other than LP64 ABI | 
| 106 |  |  */ | 
| 107 |  | # ifdef SIXTY_FOUR_BIT | 
| 108 |  | #  undef BN_LLONG | 
| 109 |  | #  undef BN_ULLONG | 
| 110 |  | #  define BN_BITS4        32 | 
| 111 |  | #  define BN_MASK2        (0xffffffffffffffffLL) | 
| 112 |  | #  define BN_MASK2l       (0xffffffffL) | 
| 113 |  | #  define BN_MASK2h       (0xffffffff00000000LL) | 
| 114 |  | #  define BN_MASK2h1      (0xffffffff80000000LL) | 
| 115 |  | #  define BN_DEC_CONV     (10000000000000000000ULL) | 
| 116 |  | #  define BN_DEC_NUM      19 | 
| 117 |  | #  define BN_DEC_FMT1     "%llu" | 
| 118 |  | #  define BN_DEC_FMT2     "%019llu" | 
| 119 |  | # endif | 
| 120 |  |  | 
| 121 |  | # ifdef THIRTY_TWO_BIT | 
| 122 |  | #  ifdef BN_LLONG | 
| 123 |  | #   if defined(_WIN32) && !defined(__GNUC__) | 
| 124 |  | #    define BN_ULLONG     unsigned __int64 | 
| 125 |  | #   else | 
| 126 |  | #    define BN_ULLONG     unsigned long long | 
| 127 |  | #   endif | 
| 128 |  | #  endif | 
| 129 |  | #  define BN_BITS4        16 | 
| 130 |  | #  define BN_MASK2        (0xffffffffL) | 
| 131 |  | #  define BN_MASK2l       (0xffff) | 
| 132 |  | #  define BN_MASK2h1      (0xffff8000L) | 
| 133 |  | #  define BN_MASK2h       (0xffff0000L) | 
| 134 |  | #  define BN_DEC_CONV     (1000000000L) | 
| 135 |  | #  define BN_DEC_NUM      9 | 
| 136 |  | #  define BN_DEC_FMT1     "%u" | 
| 137 |  | #  define BN_DEC_FMT2     "%09u" | 
| 138 |  | # endif | 
| 139 |  |  | 
| 140 |  |  | 
| 141 |  | /*- | 
| 142 |  |  * Bignum consistency macros | 
| 143 |  |  * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from | 
| 144 |  |  * bignum data after direct manipulations on the data. There is also an | 
| 145 |  |  * "internal" macro, bn_check_top(), for verifying that there are no leading | 
| 146 |  |  * zeroes. Unfortunately, some auditing is required due to the fact that | 
| 147 |  |  * bn_fix_top() has become an overabused duct-tape because bignum data is | 
| 148 |  |  * occasionally passed around in an inconsistent state. So the following | 
| 149 |  |  * changes have been made to sort this out; | 
| 150 |  |  * - bn_fix_top()s implementation has been moved to bn_correct_top() | 
| 151 |  |  * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and | 
| 152 |  |  *   bn_check_top() is as before. | 
| 153 |  |  * - if BN_DEBUG *is* defined; | 
| 154 |  |  *   - bn_check_top() tries to pollute unused words even if the bignum 'top' is | 
| 155 |  |  *     consistent. (ed: only if BN_RAND_DEBUG is defined) | 
| 156 |  |  *   - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. | 
| 157 |  |  * The idea is to have debug builds flag up inconsistent bignums when they | 
| 158 |  |  * occur. If that occurs in a bn_fix_top(), we examine the code in question; if | 
| 159 |  |  * the use of bn_fix_top() was appropriate (ie. it follows directly after code | 
| 160 |  |  * that manipulates the bignum) it is converted to bn_correct_top(), and if it | 
| 161 |  |  * was not appropriate, we convert it permanently to bn_check_top() and track | 
| 162 |  |  * down the cause of the bug. Eventually, no internal code should be using the | 
| 163 |  |  * bn_fix_top() macro. External applications and libraries should try this with | 
| 164 |  |  * their own code too, both in terms of building against the openssl headers | 
| 165 |  |  * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it | 
| 166 |  |  * defined. This not only improves external code, it provides more test | 
| 167 |  |  * coverage for openssl's own code. | 
| 168 |  |  */ | 
| 169 |  |  | 
| 170 |  | # ifdef BN_DEBUG | 
| 171 |  | /* | 
| 172 |  |  * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with | 
| 173 |  |  * bn_correct_top, in other words such vectors are permitted to have zeros | 
| 174 |  |  * in most significant limbs. Such vectors are used internally to achieve | 
| 175 |  |  * execution time invariance for critical operations with private keys. | 
| 176 |  |  * It's BN_DEBUG-only flag, because user application is not supposed to | 
| 177 |  |  * observe it anyway. Moreover, optimizing compiler would actually remove | 
| 178 |  |  * all operations manipulating the bit in question in non-BN_DEBUG build. | 
| 179 |  |  */ | 
| 180 |  | #  define BN_FLG_FIXED_TOP 0x10000 | 
| 181 |  | #  ifdef BN_RAND_DEBUG | 
| 182 |  | #   define bn_pollute(a) \ | 
| 183 |  |         do { \ | 
| 184 |  |             const BIGNUM *_bnum1 = (a); \ | 
| 185 |  |             if (_bnum1->top < _bnum1->dmax) { \ | 
| 186 |  |                 unsigned char _tmp_char; \ | 
| 187 |  |                 /* We cast away const without the compiler knowing, any \ | 
| 188 |  |                  * *genuinely* constant variables that aren't mutable \ | 
| 189 |  |                  * wouldn't be constructed with top!=dmax. */ \ | 
| 190 |  |                 BN_ULONG *_not_const; \ | 
| 191 |  |                 memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \ | 
| 192 |  |                 (void)RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\ | 
| 193 |  |                 memset(_not_const + _bnum1->top, _tmp_char, \ | 
| 194 |  |                        sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \ | 
| 195 |  |             } \ | 
| 196 |  |         } while(0) | 
| 197 |  | #  else | 
| 198 |  | #   define bn_pollute(a) | 
| 199 |  | #  endif | 
| 200 |  | #  define bn_check_top(a) \ | 
| 201 |  |         do { \ | 
| 202 |  |                 const BIGNUM *_bnum2 = (a); \ | 
| 203 |  |                 if (_bnum2 != NULL) { \ | 
| 204 |  |                         int _top = _bnum2->top; \ | 
| 205 |  |                         (void)ossl_assert((_top == 0 && !_bnum2->neg) || \ | 
| 206 |  |                                   (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ | 
| 207 |  |                                             || _bnum2->d[_top - 1] != 0))); \ | 
| 208 |  |                         bn_pollute(_bnum2); \ | 
| 209 |  |                 } \ | 
| 210 |  |         } while(0) | 
| 211 |  |  | 
| 212 |  | #  define bn_fix_top(a)           bn_check_top(a) | 
| 213 |  |  | 
| 214 |  | #  define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) | 
| 215 |  | #  define bn_wcheck_size(bn, words) \ | 
| 216 |  |         do { \ | 
| 217 |  |                 const BIGNUM *_bnum2 = (bn); \ | 
| 218 |  |                 assert((words) <= (_bnum2)->dmax && \ | 
| 219 |  |                        (words) >= (_bnum2)->top); \ | 
| 220 |  |                 /* avoid unused variable warning with NDEBUG */ \ | 
| 221 |  |                 (void)(_bnum2); \ | 
| 222 |  |         } while(0) | 
| 223 |  |  | 
| 224 |  | # else                          /* !BN_DEBUG */ | 
| 225 |  |  | 
| 226 | 1.83G | #  define BN_FLG_FIXED_TOP 0 | 
| 227 |  | #  define bn_pollute(a) | 
| 228 |  | #  define bn_check_top(a) | 
| 229 |  | #  define bn_fix_top(a)           bn_correct_top(a) | 
| 230 |  | #  define bn_check_size(bn, bits) | 
| 231 |  | #  define bn_wcheck_size(bn, words) | 
| 232 |  |  | 
| 233 |  | # endif | 
| 234 |  |  | 
| 235 |  | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, | 
| 236 |  |                           BN_ULONG w); | 
| 237 |  | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); | 
| 238 |  | void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); | 
| 239 |  | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); | 
| 240 |  | BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 
| 241 |  |                       int num); | 
| 242 |  | BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 
| 243 |  |                       int num); | 
| 244 |  |  | 
| 245 |  | struct bignum_st { | 
| 246 |  |     BN_ULONG *d;                /* Pointer to an array of 'BN_BITS2' bit | 
| 247 |  |                                  * chunks. */ | 
| 248 |  |     int top;                    /* Index of last used d +1. */ | 
| 249 |  |     /* The next are internal book keeping for bn_expand. */ | 
| 250 |  |     int dmax;                   /* Size of the d array. */ | 
| 251 |  |     int neg;                    /* one if the number is negative */ | 
| 252 |  |     int flags; | 
| 253 |  | }; | 
| 254 |  |  | 
| 255 |  | /* Used for montgomery multiplication */ | 
| 256 |  | struct bn_mont_ctx_st { | 
| 257 |  |     int ri;                     /* number of bits in R */ | 
| 258 |  |     BIGNUM RR;                  /* used to convert to montgomery form, | 
| 259 |  |                                    possibly zero-padded */ | 
| 260 |  |     BIGNUM N;                   /* The modulus */ | 
| 261 |  |     BIGNUM Ni;                  /* R*(1/R mod N) - N*Ni = 1 (Ni is only | 
| 262 |  |                                  * stored for bignum algorithm) */ | 
| 263 |  |     BN_ULONG n0[2];             /* least significant word(s) of Ni; (type | 
| 264 |  |                                  * changed with 0.9.9, was "BN_ULONG n0;" | 
| 265 |  |                                  * before) */ | 
| 266 |  |     int flags; | 
| 267 |  | }; | 
| 268 |  |  | 
| 269 |  | /* | 
| 270 |  |  * Used for reciprocal division/mod functions It cannot be shared between | 
| 271 |  |  * threads | 
| 272 |  |  */ | 
| 273 |  | struct bn_recp_ctx_st { | 
| 274 |  |     BIGNUM N;                   /* the divisor */ | 
| 275 |  |     BIGNUM Nr;                  /* the reciprocal */ | 
| 276 |  |     int num_bits; | 
| 277 |  |     int shift; | 
| 278 |  |     int flags; | 
| 279 |  | }; | 
| 280 |  |  | 
| 281 |  | /* Used for slow "generation" functions. */ | 
| 282 |  | struct bn_gencb_st { | 
| 283 |  |     unsigned int ver;           /* To handle binary (in)compatibility */ | 
| 284 |  |     void *arg;                  /* callback-specific data */ | 
| 285 |  |     union { | 
| 286 |  |         /* if (ver==1) - handles old style callbacks */ | 
| 287 |  |         void (*cb_1) (int, int, void *); | 
| 288 |  |         /* if (ver==2) - new callback style */ | 
| 289 |  |         int (*cb_2) (int, int, BN_GENCB *); | 
| 290 |  |     } cb; | 
| 291 |  | }; | 
| 292 |  |  | 
| 293 |  | /*- | 
| 294 |  |  * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions | 
| 295 |  |  * | 
| 296 |  |  * | 
| 297 |  |  * For window size 'w' (w >= 2) and a random 'b' bits exponent, | 
| 298 |  |  * the number of multiplications is a constant plus on average | 
| 299 |  |  * | 
| 300 |  |  *    2^(w-1) + (b-w)/(w+1); | 
| 301 |  |  * | 
| 302 |  |  * here  2^(w-1)  is for precomputing the table (we actually need | 
| 303 |  |  * entries only for windows that have the lowest bit set), and | 
| 304 |  |  * (b-w)/(w+1)  is an approximation for the expected number of | 
| 305 |  |  * w-bit windows, not counting the first one. | 
| 306 |  |  * | 
| 307 |  |  * Thus we should use | 
| 308 |  |  * | 
| 309 |  |  *    w >= 6  if        b > 671 | 
| 310 |  |  *     w = 5  if  671 > b > 239 | 
| 311 |  |  *     w = 4  if  239 > b >  79 | 
| 312 |  |  *     w = 3  if   79 > b >  23 | 
| 313 |  |  *    w <= 2  if   23 > b | 
| 314 |  |  * | 
| 315 |  |  * (with draws in between).  Very small exponents are often selected | 
| 316 |  |  * with low Hamming weight, so we use  w = 1  for b <= 23. | 
| 317 |  |  */ | 
| 318 |  | # define BN_window_bits_for_exponent_size(b) \ | 
| 319 | 126k |                 ((b) > 671 ? 6 : \ | 
| 320 | 126k |                  (b) > 239 ? 5 : \ | 
| 321 | 123k |                  (b) >  79 ? 4 : \ | 
| 322 | 102k |                  (b) >  23 ? 3 : 1) | 
| 323 |  |  | 
| 324 |  | /* | 
| 325 |  |  * BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache | 
| 326 |  |  * line width of the target processor is at least the following value. | 
| 327 |  |  */ | 
| 328 | 56.4k | # define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH      ( 64 ) | 
| 329 | 28.2k | # define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK       (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) | 
| 330 |  |  | 
| 331 |  | /* | 
| 332 |  |  * Window sizes optimized for fixed window size modular exponentiation | 
| 333 |  |  * algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of | 
| 334 |  |  * BN_mode_exp_mont_consttime, the maximum size of the window must not exceed | 
| 335 |  |  * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are | 
| 336 |  |  * defined for cache line sizes of 32 and 64, cache line sizes where | 
| 337 |  |  * log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be | 
| 338 |  |  * used on processors that have a 128 byte or greater cache line size. | 
| 339 |  |  */ | 
| 340 |  | # if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 | 
| 341 |  |  | 
| 342 |  | #  define BN_window_bits_for_ctime_exponent_size(b) \ | 
| 343 | 28.2k |                 ((b) > 937 ? 6 : \ | 
| 344 | 28.2k |                  (b) > 306 ? 5 : \ | 
| 345 | 12.7k |                  (b) >  89 ? 4 : \ | 
| 346 | 11.0k |                  (b) >  22 ? 3 : 1) | 
| 347 |  | #  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (6) | 
| 348 |  |  | 
| 349 |  | # elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 | 
| 350 |  |  | 
| 351 |  | #  define BN_window_bits_for_ctime_exponent_size(b) \ | 
| 352 |  |                 ((b) > 306 ? 5 : \ | 
| 353 |  |                  (b) >  89 ? 4 : \ | 
| 354 |  |                  (b) >  22 ? 3 : 1) | 
| 355 |  | #  define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE    (5) | 
| 356 |  |  | 
| 357 |  | # endif | 
| 358 |  |  | 
| 359 |  | /* Pentium pro 16,16,16,32,64 */ | 
| 360 |  | /* Alpha       16,16,16,16.64 */ | 
| 361 | 19.4M | # define BN_MULL_SIZE_NORMAL                     (16)/* 32 */ | 
| 362 | 38.4M | # define BN_MUL_RECURSIVE_SIZE_NORMAL            (16)/* 32 less than */ | 
| 363 | 10.1M | # define BN_SQR_RECURSIVE_SIZE_NORMAL            (16)/* 32 */ | 
| 364 | 0 | # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL        (32)/* 32 */ | 
| 365 |  | # define BN_MONT_CTX_SET_SIZE_WORD               (64)/* 32 */ | 
| 366 |  |  | 
| 367 |  | # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) | 
| 368 |  | /* | 
| 369 |  |  * BN_UMULT_HIGH section. | 
| 370 |  |  * If the compiler doesn't support 2*N integer type, then you have to | 
| 371 |  |  * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some | 
| 372 |  |  * shifts and additions which unavoidably results in severe performance | 
| 373 |  |  * penalties. Of course provided that the hardware is capable of producing | 
| 374 |  |  * 2*N result... That's when you normally start considering assembler | 
| 375 |  |  * implementation. However! It should be pointed out that some CPUs (e.g., | 
| 376 |  |  * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating | 
| 377 |  |  * the upper half of the product placing the result into a general | 
| 378 |  |  * purpose register. Now *if* the compiler supports inline assembler, | 
| 379 |  |  * then it's not impossible to implement the "bignum" routines (and have | 
| 380 |  |  * the compiler optimize 'em) exhibiting "native" performance in C. That's | 
| 381 |  |  * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do | 
| 382 |  |  * support 2*64 integer type, which is also used here. | 
| 383 |  |  */ | 
| 384 |  | #  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ | 
| 385 |  |       (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) | 
| 386 |  | #   define BN_UMULT_HIGH(a,b)          (((uint128_t)(a)*(b))>>64) | 
| 387 |  | #   define BN_UMULT_LOHI(low,high,a,b) ({       \ | 
| 388 |  |         uint128_t ret=(uint128_t)(a)*(b);   \ | 
| 389 |  |         (high)=ret>>64; (low)=ret;      }) | 
| 390 |  | #  elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) | 
| 391 |  | #   if defined(__DECC) | 
| 392 |  | #    include <c_asm.h> | 
| 393 |  | #    define BN_UMULT_HIGH(a,b)   (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) | 
| 394 |  | #   elif defined(__GNUC__) && __GNUC__>=2 | 
| 395 |  | #    define BN_UMULT_HIGH(a,b)   ({     \ | 
| 396 |  |         register BN_ULONG ret;          \ | 
| 397 |  |         asm ("umulh     %1,%2,%0"       \ | 
| 398 |  |              : "=r"(ret)                \ | 
| 399 |  |              : "r"(a), "r"(b));         \ | 
| 400 |  |         ret;                      }) | 
| 401 |  | #   endif                       /* compiler */ | 
| 402 |  | #  elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG) | 
| 403 |  | #   if defined(__GNUC__) && __GNUC__>=2 | 
| 404 |  | #    define BN_UMULT_HIGH(a,b)   ({     \ | 
| 405 |  |         register BN_ULONG ret;          \ | 
| 406 |  |         asm ("mulhdu    %0,%1,%2"       \ | 
| 407 |  |              : "=r"(ret)                \ | 
| 408 |  |              : "r"(a), "r"(b));         \ | 
| 409 |  |         ret;                      }) | 
| 410 |  | #   endif                       /* compiler */ | 
| 411 |  | #  elif (defined(__x86_64) || defined(__x86_64__)) && \ | 
| 412 |  |        (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) | 
| 413 |  | #   if defined(__GNUC__) && __GNUC__>=2 | 
| 414 |  | #    define BN_UMULT_HIGH(a,b)   ({     \ | 
| 415 |  |         register BN_ULONG ret,discard;  \ | 
| 416 |  |         asm ("mulq      %3"             \ | 
| 417 |  |              : "=a"(discard),"=d"(ret)  \ | 
| 418 |  |              : "a"(a), "g"(b)           \ | 
| 419 |  |              : "cc");                   \ | 
| 420 |  |         ret;                      }) | 
| 421 |  | #    define BN_UMULT_LOHI(low,high,a,b) \ | 
| 422 |  |         asm ("mulq      %3"             \ | 
| 423 |  |                 : "=a"(low),"=d"(high)  \ | 
| 424 |  |                 : "a"(a),"g"(b)         \ | 
| 425 |  |                 : "cc"); | 
| 426 |  | #   endif | 
| 427 |  | #  elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) | 
| 428 |  | #   if defined(_MSC_VER) && _MSC_VER>=1400 | 
| 429 |  | unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b); | 
| 430 |  | unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, | 
| 431 |  |                           unsigned __int64 *h); | 
| 432 |  | #    pragma intrinsic(__umulh,_umul128) | 
| 433 |  | #    define BN_UMULT_HIGH(a,b)           __umulh((a),(b)) | 
| 434 |  | #    define BN_UMULT_LOHI(low,high,a,b)  ((low)=_umul128((a),(b),&(high))) | 
| 435 |  | #   endif | 
| 436 |  | #  elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) | 
| 437 |  | #   if defined(__GNUC__) && __GNUC__>=2 | 
| 438 |  | #    define BN_UMULT_HIGH(a,b) ({       \ | 
| 439 |  |         register BN_ULONG ret;          \ | 
| 440 |  |         asm ("dmultu    %1,%2"          \ | 
| 441 |  |              : "=h"(ret)                \ | 
| 442 |  |              : "r"(a), "r"(b) : "l");   \ | 
| 443 |  |         ret;                    }) | 
| 444 |  | #    define BN_UMULT_LOHI(low,high,a,b) \ | 
| 445 |  |         asm ("dmultu    %2,%3"          \ | 
| 446 |  |              : "=l"(low),"=h"(high)     \ | 
| 447 |  |              : "r"(a), "r"(b)); | 
| 448 |  | #   endif | 
| 449 |  | #  elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) | 
| 450 |  | #   if defined(__GNUC__) && __GNUC__>=2 | 
| 451 |  | #    define BN_UMULT_HIGH(a,b)   ({     \ | 
| 452 |  |         register BN_ULONG ret;          \ | 
| 453 |  |         asm ("umulh     %0,%1,%2"       \ | 
| 454 |  |              : "=r"(ret)                \ | 
| 455 |  |              : "r"(a), "r"(b));         \ | 
| 456 |  |         ret;                      }) | 
| 457 |  | #   endif | 
| 458 |  | #  endif                        /* cpu */ | 
| 459 |  | # endif                         /* OPENSSL_NO_ASM */ | 
| 460 |  |  | 
| 461 |  | # ifdef BN_RAND_DEBUG | 
| 462 |  | #  define bn_clear_top2max(a) \ | 
| 463 |  |         { \ | 
| 464 |  |         int      ind = (a)->dmax - (a)->top; \ | 
| 465 |  |         BN_ULONG *ftl = &(a)->d[(a)->top-1]; \ | 
| 466 |  |         for (; ind != 0; ind--) \ | 
| 467 |  |                 *(++ftl) = 0x0; \ | 
| 468 |  |         } | 
| 469 |  | # else | 
| 470 |  | #  define bn_clear_top2max(a) | 
| 471 |  | # endif | 
| 472 |  |  | 
| 473 |  | # ifdef BN_LLONG | 
| 474 |  | /******************************************************************* | 
| 475 |  |  * Using the long long type, has to be twice as wide as BN_ULONG... | 
| 476 |  |  */ | 
| 477 |  | #  define Lw(t)    (((BN_ULONG)(t))&BN_MASK2) | 
| 478 |  | #  define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) | 
| 479 |  |  | 
| 480 |  | #  define mul_add(r,a,w,c) { \ | 
| 481 |  |         BN_ULLONG t; \ | 
| 482 |  |         t=(BN_ULLONG)w * (a) + (r) + (c); \ | 
| 483 |  |         (r)= Lw(t); \ | 
| 484 |  |         (c)= Hw(t); \ | 
| 485 |  |         } | 
| 486 |  |  | 
| 487 |  | #  define mul(r,a,w,c) { \ | 
| 488 |  |         BN_ULLONG t; \ | 
| 489 |  |         t=(BN_ULLONG)w * (a) + (c); \ | 
| 490 |  |         (r)= Lw(t); \ | 
| 491 |  |         (c)= Hw(t); \ | 
| 492 |  |         } | 
| 493 |  |  | 
| 494 |  | #  define sqr(r0,r1,a) { \ | 
| 495 |  |         BN_ULLONG t; \ | 
| 496 |  |         t=(BN_ULLONG)(a)*(a); \ | 
| 497 |  |         (r0)=Lw(t); \ | 
| 498 |  |         (r1)=Hw(t); \ | 
| 499 |  |         } | 
| 500 |  |  | 
| 501 |  | # elif defined(BN_UMULT_LOHI) | 
| 502 |  | #  define mul_add(r,a,w,c) {              \ | 
| 503 |  |         BN_ULONG high,low,ret,tmp=(a);  \ | 
| 504 |  |         ret =  (r);                     \ | 
| 505 |  |         BN_UMULT_LOHI(low,high,w,tmp);  \ | 
| 506 |  |         ret += (c);                     \ | 
| 507 |  |         (c) =  (ret<(c));               \ | 
| 508 |  |         (c) += high;                    \ | 
| 509 |  |         ret += low;                     \ | 
| 510 |  |         (c) += (ret<low);               \ | 
| 511 |  |         (r) =  ret;                     \ | 
| 512 |  |         } | 
| 513 |  |  | 
| 514 |  | #  define mul(r,a,w,c)    {               \ | 
| 515 |  |         BN_ULONG high,low,ret,ta=(a);   \ | 
| 516 |  |         BN_UMULT_LOHI(low,high,w,ta);   \ | 
| 517 |  |         ret =  low + (c);               \ | 
| 518 |  |         (c) =  high;                    \ | 
| 519 |  |         (c) += (ret<low);               \ | 
| 520 |  |         (r) =  ret;                     \ | 
| 521 |  |         } | 
| 522 |  |  | 
| 523 |  | #  define sqr(r0,r1,a)    {               \ | 
| 524 |  |         BN_ULONG tmp=(a);               \ | 
| 525 |  |         BN_UMULT_LOHI(r0,r1,tmp,tmp);   \ | 
| 526 |  |         } | 
| 527 |  |  | 
| 528 |  | # elif defined(BN_UMULT_HIGH) | 
| 529 |  | #  define mul_add(r,a,w,c) {              \ | 
| 530 |  |         BN_ULONG high,low,ret,tmp=(a);  \ | 
| 531 |  |         ret =  (r);                     \ | 
| 532 |  |         high=  BN_UMULT_HIGH(w,tmp);    \ | 
| 533 |  |         ret += (c);                     \ | 
| 534 |  |         low =  (w) * tmp;               \ | 
| 535 |  |         (c) =  (ret<(c));               \ | 
| 536 |  |         (c) += high;                    \ | 
| 537 |  |         ret += low;                     \ | 
| 538 |  |         (c) += (ret<low);               \ | 
| 539 |  |         (r) =  ret;                     \ | 
| 540 |  |         } | 
| 541 |  |  | 
| 542 |  | #  define mul(r,a,w,c)    {               \ | 
| 543 |  |         BN_ULONG high,low,ret,ta=(a);   \ | 
| 544 |  |         low =  (w) * ta;                \ | 
| 545 |  |         high=  BN_UMULT_HIGH(w,ta);     \ | 
| 546 |  |         ret =  low + (c);               \ | 
| 547 |  |         (c) =  high;                    \ | 
| 548 |  |         (c) += (ret<low);               \ | 
| 549 |  |         (r) =  ret;                     \ | 
| 550 |  |         } | 
| 551 |  |  | 
| 552 |  | #  define sqr(r0,r1,a)    {               \ | 
| 553 |  |         BN_ULONG tmp=(a);               \ | 
| 554 |  |         (r0) = tmp * tmp;               \ | 
| 555 |  |         (r1) = BN_UMULT_HIGH(tmp,tmp);  \ | 
| 556 |  |         } | 
| 557 |  |  | 
| 558 |  | # else | 
| 559 |  | /************************************************************* | 
| 560 |  |  * No long long type | 
| 561 |  |  */ | 
| 562 |  |  | 
| 563 | 455M | #  define LBITS(a)        ((a)&BN_MASK2l) | 
| 564 | 683M | #  define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l) | 
| 565 | 455M | #  define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2) | 
| 566 |  |  | 
| 567 |  | #  define LLBITS(a)       ((a)&BN_MASKl) | 
| 568 |  | #  define LHBITS(a)       (((a)>>BN_BITS2)&BN_MASKl) | 
| 569 |  | #  define LL2HBITS(a)     ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2) | 
| 570 |  |  | 
| 571 |  | #  define mul64(l,h,bl,bh) \ | 
| 572 | 227M |         { \ | 
| 573 | 227M |         BN_ULONG m,m1,lt,ht; \ | 
| 574 | 227M |  \ | 
| 575 | 227M |         lt=l; \ | 
| 576 | 227M |         ht=h; \ | 
| 577 | 227M |         m =(bh)*(lt); \ | 
| 578 | 227M |         lt=(bl)*(lt); \ | 
| 579 | 227M |         m1=(bl)*(ht); \ | 
| 580 | 227M |         ht =(bh)*(ht); \ | 
| 581 | 227M |         m=(m+m1)&BN_MASK2; ht += L2HBITS((BN_ULONG)(m < m1)); \ | 
| 582 | 227M |         ht+=HBITS(m); \ | 
| 583 | 227M |         m1=L2HBITS(m); \ | 
| 584 | 227M |         lt=(lt+m1)&BN_MASK2; ht += (lt < m1); \ | 
| 585 | 227M |         (l)=lt; \ | 
| 586 | 227M |         (h)=ht; \ | 
| 587 | 227M |         } | 
| 588 |  |  | 
| 589 |  | #  define sqr64(lo,ho,in) \ | 
| 590 |  |         { \ | 
| 591 |  |         BN_ULONG l,h,m; \ | 
| 592 |  |  \ | 
| 593 |  |         h=(in); \ | 
| 594 |  |         l=LBITS(h); \ | 
| 595 |  |         h=HBITS(h); \ | 
| 596 |  |         m =(l)*(h); \ | 
| 597 |  |         l*=l; \ | 
| 598 |  |         h*=h; \ | 
| 599 |  |         h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ | 
| 600 |  |         m =(m&BN_MASK2l)<<(BN_BITS4+1); \ | 
| 601 |  |         l=(l+m)&BN_MASK2; h += (l < m); \ | 
| 602 |  |         (lo)=l; \ | 
| 603 |  |         (ho)=h; \ | 
| 604 |  |         } | 
| 605 |  |  | 
| 606 |  | #  define mul_add(r,a,bl,bh,c) { \ | 
| 607 |  |         BN_ULONG l,h; \ | 
| 608 |  |  \ | 
| 609 |  |         h= (a); \ | 
| 610 |  |         l=LBITS(h); \ | 
| 611 |  |         h=HBITS(h); \ | 
| 612 |  |         mul64(l,h,(bl),(bh)); \ | 
| 613 |  |  \ | 
| 614 |  |         /* non-multiply part */ \ | 
| 615 |  |         l=(l+(c))&BN_MASK2; h += (l < (c)); \ | 
| 616 |  |         (c)=(r); \ | 
| 617 |  |         l=(l+(c))&BN_MASK2; h += (l < (c)); \ | 
| 618 |  |         (c)=h&BN_MASK2; \ | 
| 619 |  |         (r)=l; \ | 
| 620 |  |         } | 
| 621 |  |  | 
| 622 |  | #  define mul(r,a,bl,bh,c) { \ | 
| 623 |  |         BN_ULONG l,h; \ | 
| 624 |  |  \ | 
| 625 |  |         h= (a); \ | 
| 626 |  |         l=LBITS(h); \ | 
| 627 |  |         h=HBITS(h); \ | 
| 628 |  |         mul64(l,h,(bl),(bh)); \ | 
| 629 |  |  \ | 
| 630 |  |         /* non-multiply part */ \ | 
| 631 |  |         l+=(c); h += ((l&BN_MASK2) < (c)); \ | 
| 632 |  |         (c)=h&BN_MASK2; \ | 
| 633 |  |         (r)=l&BN_MASK2; \ | 
| 634 |  |         } | 
| 635 |  | # endif                         /* !BN_LLONG */ | 
| 636 |  |  | 
| 637 |  | void BN_RECP_CTX_init(BN_RECP_CTX *recp); | 
| 638 |  | void BN_MONT_CTX_init(BN_MONT_CTX *ctx); | 
| 639 |  |  | 
| 640 |  | void bn_init(BIGNUM *a); | 
| 641 |  | void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); | 
| 642 |  | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 
| 643 |  | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); | 
| 644 |  | void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp); | 
| 645 |  | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); | 
| 646 |  | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); | 
| 647 |  | int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n); | 
| 648 |  | int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); | 
| 649 |  | void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 
| 650 |  |                       int dna, int dnb, BN_ULONG *t); | 
| 651 |  | void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, | 
| 652 |  |                            int n, int tna, int tnb, BN_ULONG *t); | 
| 653 |  | void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); | 
| 654 |  | void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); | 
| 655 |  | void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, | 
| 656 |  |                           BN_ULONG *t); | 
| 657 |  | BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, | 
| 658 |  |                            int cl, int dl); | 
| 659 |  | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 
| 660 |  |                 const BN_ULONG *np, const BN_ULONG *n0, int num); | 
| 661 |  | void bn_correct_top_consttime(BIGNUM *a); | 
| 662 |  | BIGNUM *int_bn_mod_inverse(BIGNUM *in, | 
| 663 |  |                            const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, | 
| 664 |  |                            int *noinv); | 
| 665 |  |  | 
| 666 |  | static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) | 
| 667 | 176M | { | 
| 668 | 176M |     if (bits > (INT_MAX - BN_BITS2 + 1)) | 
| 669 | 0 |         return NULL; | 
| 670 |  |  | 
| 671 | 176M |     if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) | 
| 672 | 173M |         return a; | 
| 673 |  |  | 
| 674 | 2.84M |     return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); | 
| 675 | 176M | } Unexecuted instantiation: bn_dh.c:bn_expand| Line | Count | Source |  | 667 | 176M | { |  | 668 | 176M |     if (bits > (INT_MAX - BN_BITS2 + 1)) |  | 669 | 0 |         return NULL; |  | 670 |  |  |  | 671 | 176M |     if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) |  | 672 | 173M |         return a; |  | 673 |  |  |  | 674 | 2.84M |     return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); |  | 675 | 176M | } | 
Unexecuted instantiation: bn_mont.c:bn_expandUnexecuted instantiation: bn_mul.c:bn_expandUnexecuted instantiation: bn_shift.c:bn_expandUnexecuted instantiation: bn_sqr.c:bn_expandUnexecuted instantiation: bn_word.c:bn_expandUnexecuted instantiation: x86_64-gcc.c:bn_expandUnexecuted instantiation: bn_add.c:bn_expandUnexecuted instantiation: bn_blind.c:bn_expandUnexecuted instantiation: bn_conv.c:bn_expandUnexecuted instantiation: bn_ctx.c:bn_expandUnexecuted instantiation: bn_div.c:bn_expandUnexecuted instantiation: bn_exp.c:bn_expandUnexecuted instantiation: bn_exp2.c:bn_expandUnexecuted instantiation: bn_gcd.c:bn_expandUnexecuted instantiation: bn_intern.c:bn_expandUnexecuted instantiation: bn_kron.c:bn_expandUnexecuted instantiation: bn_mod.c:bn_expandUnexecuted instantiation: bn_nist.c:bn_expandUnexecuted instantiation: bn_prime.c:bn_expandUnexecuted instantiation: bn_print.c:bn_expandUnexecuted instantiation: bn_rand.c:bn_expandUnexecuted instantiation: bn_recp.c:bn_expandUnexecuted instantiation: bn_rsa_fips186_4.c:bn_expandUnexecuted instantiation: bn_sqrt.c:bn_expandUnexecuted instantiation: bn_srp.c:bn_expandUnexecuted instantiation: rsaz_exp.c:bn_expandUnexecuted instantiation: rsaz_exp_x2.c:bn_expandUnexecuted instantiation: bn_gf2m.c:bn_expand | 
| 676 |  |  | 
| 677 |  | int ossl_bn_check_prime(const BIGNUM *w, int checks, BN_CTX *ctx, | 
| 678 |  |                         int do_trial_division, BN_GENCB *cb); | 
| 679 |  |  | 
| 680 |  | #endif |