/src/botan/build/include/botan/internal/mul128.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * 64x64->128 bit multiply operation |
3 | | * (C) 2013,2015 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #ifndef BOTAN_UTIL_MUL128_H_ |
9 | | #define BOTAN_UTIL_MUL128_H_ |
10 | | |
11 | | #include <botan/types.h> |
12 | | |
13 | | #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
14 | | #include <intrin.h> |
15 | | #pragma intrinsic(_umul128) |
16 | | #endif |
17 | | |
18 | | namespace Botan { |
19 | | |
20 | | #if defined(__SIZEOF_INT128__) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
21 | | #define BOTAN_TARGET_HAS_NATIVE_UINT128 |
22 | | |
23 | | // Prefer TI mode over __int128 as GCC rejects the latter in pendantic mode |
24 | | #if defined(__GNUG__) |
25 | | typedef unsigned int uint128_t __attribute__((mode(TI))); |
26 | | #else |
27 | | typedef unsigned __int128 uint128_t; |
28 | | #endif |
29 | | #endif |
30 | | |
31 | | /** |
32 | | * Perform a 64x64->128 bit multiplication |
33 | | */ |
34 | | inline void mul64x64_128(uint64_t a, uint64_t b, uint64_t* lo, uint64_t* hi) |
35 | 0 | { |
36 | 0 | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
37 | 0 |
|
38 | 0 | const uint128_t r = static_cast<uint128_t>(a) * b; |
39 | 0 | *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; |
40 | 0 | *lo = (r ) & 0xFFFFFFFFFFFFFFFF; |
41 | 0 |
|
42 | 0 | #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
43 | 0 | *lo = _umul128(a, b, hi); |
44 | 0 |
|
45 | 0 | #elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64) |
46 | 0 | asm("mulq %3" |
47 | 0 | : "=d" (*hi), "=a" (*lo) |
48 | 0 | : "a" (a), "rm" (b) |
49 | 0 | : "cc"); |
50 | 0 |
|
51 | 0 | #elif defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_PPC64) |
52 | 0 | asm("mulhdu %0,%1,%2" |
53 | 0 | : "=r" (*hi) |
54 | 0 | : "r" (a), "r" (b) |
55 | 0 | : "cc"); |
56 | 0 | *lo = a * b; |
57 | 0 |
|
58 | 0 | #else |
59 | 0 |
|
60 | 0 | /* |
61 | 0 | * Do a 64x64->128 multiply using four 32x32->64 multiplies plus |
62 | 0 | * some adds and shifts. Last resort for CPUs like UltraSPARC (with |
63 | 0 | * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs. |
64 | 0 | */ |
65 | 0 | const size_t HWORD_BITS = 32; |
66 | 0 | const uint32_t HWORD_MASK = 0xFFFFFFFF; |
67 | 0 |
|
68 | 0 | const uint32_t a_hi = (a >> HWORD_BITS); |
69 | 0 | const uint32_t a_lo = (a & HWORD_MASK); |
70 | 0 | const uint32_t b_hi = (b >> HWORD_BITS); |
71 | 0 | const uint32_t b_lo = (b & HWORD_MASK); |
72 | 0 |
|
73 | 0 | uint64_t x0 = static_cast<uint64_t>(a_hi) * b_hi; |
74 | 0 | uint64_t x1 = static_cast<uint64_t>(a_lo) * b_hi; |
75 | 0 | uint64_t x2 = static_cast<uint64_t>(a_hi) * b_lo; |
76 | 0 | uint64_t x3 = static_cast<uint64_t>(a_lo) * b_lo; |
77 | 0 |
|
78 | 0 | // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1 |
79 | 0 | x2 += x3 >> HWORD_BITS; |
80 | 0 |
|
81 | 0 | // this one can overflow |
82 | 0 | x2 += x1; |
83 | 0 |
|
84 | 0 | // propagate the carry if any |
85 | 0 | x0 += static_cast<uint64_t>(static_cast<bool>(x2 < x1)) << HWORD_BITS; |
86 | 0 |
|
87 | 0 | *hi = x0 + (x2 >> HWORD_BITS); |
88 | 0 | *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK); |
89 | 0 | #endif |
90 | 0 | } |
91 | | |
92 | | } |
93 | | |
94 | | #endif |