/src/botan/build/include/botan/mul128.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * 64x64->128 bit multiply operation |
3 | | * (C) 2013,2015 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #ifndef BOTAN_UTIL_MUL128_H_ |
9 | | #define BOTAN_UTIL_MUL128_H_ |
10 | | |
11 | | #include <botan/types.h> |
12 | | |
13 | | BOTAN_FUTURE_INTERNAL_HEADER(mul128.h) |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | #if defined(__SIZEOF_INT128__) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
18 | | #define BOTAN_TARGET_HAS_NATIVE_UINT128 |
19 | | |
20 | | // Prefer TI mode over __int128 as GCC rejects the latter in pendantic mode |
21 | | #if defined(__GNUG__) |
22 | | typedef unsigned int uint128_t __attribute__((mode(TI))); |
23 | | #else |
24 | | typedef unsigned __int128 uint128_t; |
25 | | #endif |
26 | | #endif |
27 | | |
28 | | } |
29 | | |
30 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
31 | | |
32 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \ |
33 | | do { \ |
34 | | const uint128_t r = static_cast<uint128_t>(a) * b; \ |
35 | | *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \ |
36 | | *lo = (r ) & 0xFFFFFFFFFFFFFFFF; \ |
37 | | } while(0) |
38 | | |
39 | | #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
40 | | |
41 | | #include <intrin.h> |
42 | | #pragma intrinsic(_umul128) |
43 | | |
44 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \ |
45 | | do { *lo = _umul128(a, b, hi); } while(0) |
46 | | |
47 | | #elif defined(BOTAN_USE_GCC_INLINE_ASM) |
48 | | |
49 | | #if defined(BOTAN_TARGET_ARCH_IS_X86_64) |
50 | | |
51 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
52 | | asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \ |
53 | | } while(0) |
54 | | |
55 | | #elif defined(BOTAN_TARGET_ARCH_IS_ALPHA) |
56 | | |
57 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
58 | | asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \ |
59 | | *lo = a * b; \ |
60 | | } while(0) |
61 | | |
62 | | #elif defined(BOTAN_TARGET_ARCH_IS_IA64) |
63 | | |
64 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
65 | | asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \ |
66 | | *lo = a * b; \ |
67 | | } while(0) |
68 | | |
69 | | #elif defined(BOTAN_TARGET_ARCH_IS_PPC64) |
70 | | |
71 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
72 | | asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \ |
73 | | *lo = a * b; \ |
74 | | } while(0) |
75 | | |
76 | | #endif |
77 | | |
78 | | #endif |
79 | | |
80 | | namespace Botan { |
81 | | |
82 | | /** |
83 | | * Perform a 64x64->128 bit multiplication |
84 | | */ |
85 | | inline void mul64x64_128(uint64_t a, uint64_t b, uint64_t* lo, uint64_t* hi) |
86 | 0 | { |
87 | 0 | #if defined(BOTAN_FAST_64X64_MUL) |
88 | 0 | BOTAN_FAST_64X64_MUL(a, b, lo, hi); |
89 | 0 | #else |
90 | 0 |
|
91 | 0 | /* |
92 | 0 | * Do a 64x64->128 multiply using four 32x32->64 multiplies plus |
93 | 0 | * some adds and shifts. Last resort for CPUs like UltraSPARC (with |
94 | 0 | * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs. |
95 | 0 | */ |
96 | 0 | const size_t HWORD_BITS = 32; |
97 | 0 | const uint32_t HWORD_MASK = 0xFFFFFFFF; |
98 | 0 |
|
99 | 0 | const uint32_t a_hi = (a >> HWORD_BITS); |
100 | 0 | const uint32_t a_lo = (a & HWORD_MASK); |
101 | 0 | const uint32_t b_hi = (b >> HWORD_BITS); |
102 | 0 | const uint32_t b_lo = (b & HWORD_MASK); |
103 | 0 |
|
104 | 0 | uint64_t x0 = static_cast<uint64_t>(a_hi) * b_hi; |
105 | 0 | uint64_t x1 = static_cast<uint64_t>(a_lo) * b_hi; |
106 | 0 | uint64_t x2 = static_cast<uint64_t>(a_hi) * b_lo; |
107 | 0 | uint64_t x3 = static_cast<uint64_t>(a_lo) * b_lo; |
108 | 0 |
|
109 | 0 | // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1 |
110 | 0 | x2 += x3 >> HWORD_BITS; |
111 | 0 |
|
112 | 0 | // this one can overflow |
113 | 0 | x2 += x1; |
114 | 0 |
|
115 | 0 | // propagate the carry if any |
116 | 0 | x0 += static_cast<uint64_t>(static_cast<bool>(x2 < x1)) << HWORD_BITS; |
117 | 0 |
|
118 | 0 | *hi = x0 + (x2 >> HWORD_BITS); |
119 | 0 | *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK); |
120 | 0 | #endif |
121 | 0 | } |
122 | | |
123 | | } |
124 | | |
125 | | #endif |