/src/botan/build/include/botan/internal/mul128.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * 64x64->128 bit multiply operation |
3 | | * (C) 2013,2015 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #ifndef BOTAN_UTIL_MUL128_H_ |
9 | | #define BOTAN_UTIL_MUL128_H_ |
10 | | |
11 | | #include <botan/types.h> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | #if defined(__SIZEOF_INT128__) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
16 | | #define BOTAN_TARGET_HAS_NATIVE_UINT128 |
17 | | |
18 | | // Prefer TI mode over __int128 as GCC rejects the latter in pendantic mode |
19 | | #if defined(__GNUG__) |
20 | | typedef unsigned int uint128_t __attribute__((mode(TI))); |
21 | | #else |
22 | | typedef unsigned __int128 uint128_t; |
23 | | #endif |
24 | | #endif |
25 | | |
26 | | } |
27 | | |
28 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
29 | | |
30 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \ |
31 | | do { \ |
32 | | const uint128_t r = static_cast<uint128_t>(a) * b; \ |
33 | | *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \ |
34 | | *lo = (r ) & 0xFFFFFFFFFFFFFFFF; \ |
35 | | } while(0) |
36 | | |
37 | | #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT) |
38 | | |
39 | | #include <intrin.h> |
40 | | #pragma intrinsic(_umul128) |
41 | | |
42 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \ |
43 | | do { *lo = _umul128(a, b, hi); } while(0) |
44 | | |
45 | | #elif defined(BOTAN_USE_GCC_INLINE_ASM) |
46 | | |
47 | | #if defined(BOTAN_TARGET_ARCH_IS_X86_64) |
48 | | |
49 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
50 | | asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \ |
51 | | } while(0) |
52 | | |
53 | | #elif defined(BOTAN_TARGET_ARCH_IS_ALPHA) |
54 | | |
55 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
56 | | asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \ |
57 | | *lo = a * b; \ |
58 | | } while(0) |
59 | | |
60 | | #elif defined(BOTAN_TARGET_ARCH_IS_IA64) |
61 | | |
62 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
63 | | asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \ |
64 | | *lo = a * b; \ |
65 | | } while(0) |
66 | | |
67 | | #elif defined(BOTAN_TARGET_ARCH_IS_PPC64) |
68 | | |
69 | | #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \ |
70 | | asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \ |
71 | | *lo = a * b; \ |
72 | | } while(0) |
73 | | |
74 | | #endif |
75 | | |
76 | | #endif |
77 | | |
78 | | namespace Botan { |
79 | | |
80 | | /** |
81 | | * Perform a 64x64->128 bit multiplication |
82 | | */ |
83 | | inline void mul64x64_128(uint64_t a, uint64_t b, uint64_t* lo, uint64_t* hi) |
84 | 0 | { |
85 | 0 | #if defined(BOTAN_FAST_64X64_MUL) |
86 | 0 | BOTAN_FAST_64X64_MUL(a, b, lo, hi); |
87 | 0 | #else |
88 | 0 |
|
89 | 0 | /* |
90 | 0 | * Do a 64x64->128 multiply using four 32x32->64 multiplies plus |
91 | 0 | * some adds and shifts. Last resort for CPUs like UltraSPARC (with |
92 | 0 | * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs. |
93 | 0 | */ |
94 | 0 | const size_t HWORD_BITS = 32; |
95 | 0 | const uint32_t HWORD_MASK = 0xFFFFFFFF; |
96 | 0 |
|
97 | 0 | const uint32_t a_hi = (a >> HWORD_BITS); |
98 | 0 | const uint32_t a_lo = (a & HWORD_MASK); |
99 | 0 | const uint32_t b_hi = (b >> HWORD_BITS); |
100 | 0 | const uint32_t b_lo = (b & HWORD_MASK); |
101 | 0 |
|
102 | 0 | uint64_t x0 = static_cast<uint64_t>(a_hi) * b_hi; |
103 | 0 | uint64_t x1 = static_cast<uint64_t>(a_lo) * b_hi; |
104 | 0 | uint64_t x2 = static_cast<uint64_t>(a_hi) * b_lo; |
105 | 0 | uint64_t x3 = static_cast<uint64_t>(a_lo) * b_lo; |
106 | 0 |
|
107 | 0 | // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1 |
108 | 0 | x2 += x3 >> HWORD_BITS; |
109 | 0 |
|
110 | 0 | // this one can overflow |
111 | 0 | x2 += x1; |
112 | 0 |
|
113 | 0 | // propagate the carry if any |
114 | 0 | x0 += static_cast<uint64_t>(static_cast<bool>(x2 < x1)) << HWORD_BITS; |
115 | 0 |
|
116 | 0 | *hi = x0 + (x2 >> HWORD_BITS); |
117 | 0 | *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK); |
118 | 0 | #endif |
119 | 0 | } |
120 | | |
121 | | } |
122 | | |
123 | | #endif |