/src/cryptopp/integer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // integer.cpp - originally written and placed in the public domain by Wei Dai |
2 | | // contains public domain code contributed by Alister Lee and Leonard Janke |
3 | | |
4 | | // Notes by JW: The Integer class needs to do two things. First, it needs |
5 | | // to set function pointers on some platforms, like X86 and X64. The |
6 | | // function pointers select a fast multiply and addition based on the cpu. |
7 | | // Second, it wants to create Integer::Zero(), Integer::One() and |
8 | | // Integer::Two(). |
9 | | // The function pointers are initialized in the InitializeInteger class by |
10 | | // calling SetFunctionPointers(). The call to SetFunctionPointers() is |
11 | | // guarded to run once using a double-checked pattern. We don't use C++ |
12 | | // std::call_once due to bad interactions between libstdc++, glibc and |
13 | | // pthreads. The bad interactions were causing crashes for us on platforms |
14 | | // like Sparc and PowerPC. Since we are only setting function pointers we |
15 | | // don't have to worry about leaking memory. The worst case seems to be the |
16 | | // pointers gets written twice until the init flag is set and visible to |
17 | | // all threads. |
18 | | // For Integer::Zero(), Integer::One() and Integer::Two(), we use one of three |
19 | | // strategies. First, if initialization priorities are available then we use |
20 | | // them. Initialization priorities are init_priority() on Linux and init_seg() |
21 | | // on Windows. OS X and several other platforms lack them. Initialization |
22 | | // priorities are platform specific but they are also the most trouble free |
23 | | // with deterministic destruction. |
24 | | // Second, if C++11 dynamic initialization is available, then we use it. After |
25 | | // the std::call_once fiasco we moved to dynamic initialization to avoid |
26 | | // unknown troubles platforms that are tested less frequently. In addition |
27 | | // Microsoft platforms mostly do not provide dynamic initialization. |
28 | | // The MSDN docs claim they do but they don't in practice because we need |
29 | | // Visual Studio 2017 and Windows 10 or above. |
30 | | // Third, we fall back to Wei's original code of a Singleton. Wei's original |
31 | | // code was much simpler. It simply used the Singleton pattern, but it always |
32 | | // produced memory findings on some platforms. The Singleton generates memory |
33 | | // findings because it uses a Create On First Use pattern (a dumb Nifty |
34 | | // Counter) and the compiler had to be smart enough to fold them to return |
35 | | // the same object. Unix and Linux compilers do a good job of folding objects, |
36 | | // but Microsoft compilers do a rather poor job for some versions of the |
37 | | // compiler. |
38 | | // Another problem with the Singleton is resource destruction requires running |
39 | | // resource acquisition in reverse. For resources provided through the |
40 | | // Singletons, there is no way to express the dependency order to safely |
41 | | // destroy resources. (That's one of the problems C++11 dynamic |
42 | | // initialization with concurrent execution is supposed to solve). |
43 | | // The final problem with Singletons is resource/memory exhaustion in languages |
44 | | // like Java and .Net. Java and .Net load and unload a native DLL hundreds or |
45 | | // thousands of times during the life of a program. Each load produces a |
46 | | // memory leak and they can add up quickly. If they library is being used in |
47 | | // Java or .Net then Singleton must be avoided at all costs. |
48 | | // |
49 | | // The code below has a path cut-in for BMI2 using mulx and adcx instructions. |
50 | | // There was a modest speedup of approximately 0.03 ms in public key Integer |
51 | | // operations. We had to disable BMI2 for the moment because some OS X machines |
52 | | // were advertising BMI/BMI2 support but caused SIGILL's at runtime. Also see |
53 | | // https://github.com/weidai11/cryptopp/issues/850. |
54 | | |
55 | | #include "pch.h" |
56 | | #include "config.h" |
57 | | |
58 | | #ifndef CRYPTOPP_IMPORTS |
59 | | |
60 | | #include "integer.h" |
61 | | #include "secblock.h" |
62 | | #include "modarith.h" |
63 | | #include "nbtheory.h" |
64 | | #include "smartptr.h" |
65 | | #include "algparam.h" |
66 | | #include "filters.h" |
67 | | #include "stdcpp.h" |
68 | | #include "asn.h" |
69 | | #include "oids.h" |
70 | | #include "words.h" |
71 | | #include "pubkey.h" // for P1363_KDF2 |
72 | | #include "sha.h" |
73 | | #include "cpu.h" |
74 | | #include "misc.h" |
75 | | |
76 | | #include <iostream> |
77 | | |
78 | | #if (CRYPTOPP_MSC_VERSION >= 1400) && !defined(_M_ARM) |
79 | | #include <intrin.h> |
80 | | #endif |
81 | | |
82 | | #ifdef __DECCXX |
83 | | #include <c_asm.h> |
84 | | #endif |
85 | | |
86 | | // "Error: The operand ___LKDB cannot be assigned to", |
87 | | // http://github.com/weidai11/cryptopp/issues/188 |
88 | | #if (__SUNPRO_CC >= 0x5130) |
89 | | # define MAYBE_CONST |
90 | | # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x) |
91 | | #else |
92 | 856M | # define MAYBE_CONST const |
93 | 856M | # define MAYBE_UNCONST_CAST(x) x |
94 | | #endif |
95 | | |
96 | | // "Inline assembly operands don't work with .intel_syntax", |
97 | | // http://llvm.org/bugs/show_bug.cgi?id=24232 |
98 | | #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM) |
99 | | # undef CRYPTOPP_X86_ASM_AVAILABLE |
100 | | # undef CRYPTOPP_X32_ASM_AVAILABLE |
101 | | # undef CRYPTOPP_X64_ASM_AVAILABLE |
102 | | # undef CRYPTOPP_SSE2_ASM_AVAILABLE |
103 | | # undef CRYPTOPP_SSSE3_ASM_AVAILABLE |
104 | | #else |
105 | | # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)) |
106 | | #endif |
107 | | |
108 | | // ***************** C++ Static Initialization ******************** |
109 | | |
110 | | NAMESPACE_BEGIN(CryptoPP) |
111 | | |
112 | | // Function body near the middle of the file |
113 | | static void SetFunctionPointers(); |
114 | | |
115 | | // Use a double-checked pattern. We are not leaking anything so it |
116 | | // does not matter if a pointer is written twice during a race. |
117 | | // Avoid std::call_once due to too many problems on platforms like |
118 | | // Solaris and Sparc. Also see |
119 | | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=66146 and |
120 | | // http://github.com/weidai11/cryptopp/issues/707. |
121 | | InitializeInteger::InitializeInteger() |
122 | 57.2M | { |
123 | 57.2M | static bool s_flag; |
124 | 57.2M | MEMORY_BARRIER(); |
125 | 57.2M | if (s_flag == false) |
126 | 10 | { |
127 | 10 | SetFunctionPointers(); |
128 | 10 | s_flag = true; |
129 | 10 | MEMORY_BARRIER(); |
130 | 10 | } |
131 | 57.2M | } |
132 | | |
133 | | template <long i> |
134 | | struct NewInteger |
135 | | { |
136 | | Integer * operator()() const |
137 | | { |
138 | | return new Integer(i); |
139 | | } |
140 | | }; |
141 | | |
142 | | // ***************** Library code ******************** |
143 | | |
144 | | inline static int Compare(const word *A, const word *B, size_t N) |
145 | 33.2M | { |
146 | 40.5M | while (N--) |
147 | 40.5M | if (A[N] > B[N]) |
148 | 10.2M | return 1; |
149 | 30.2M | else if (A[N] < B[N]) |
150 | 22.9M | return -1; |
151 | | |
152 | 6.57k | return 0; |
153 | 33.2M | } |
154 | | |
155 | | inline static int Increment(word *A, size_t N, word B=1) |
156 | 28.5M | { |
157 | 28.5M | CRYPTOPP_ASSERT(N); |
158 | 28.5M | word t = A[0]; |
159 | 28.5M | A[0] = t+B; |
160 | 28.5M | if (A[0] >= t) |
161 | 28.4M | return 0; |
162 | 96.2k | for (unsigned i=1; i<N; i++) |
163 | 90.9k | if (++A[i]) |
164 | 27.6k | return 0; |
165 | 5.29k | return 1; |
166 | 32.9k | } |
167 | | |
168 | | inline static int Decrement(word *A, size_t N, word B=1) |
169 | 126k | { |
170 | 126k | CRYPTOPP_ASSERT(N); |
171 | 126k | word t = A[0]; |
172 | 126k | A[0] = t-B; |
173 | 126k | if (A[0] <= t) |
174 | 125k | return 0; |
175 | 1.43k | for (unsigned i=1; i<N; i++) |
176 | 1.43k | if (A[i]--) |
177 | 1.27k | return 0; |
178 | 4 | return 1; |
179 | 1.27k | } |
180 | | |
181 | | static void TwosComplement(word *A, size_t N) |
182 | 62.8k | { |
183 | 62.8k | Decrement(A, N); |
184 | 238k | for (unsigned i=0; i<N; i++) |
185 | 176k | A[i] = ~A[i]; |
186 | 62.8k | } |
187 | | |
188 | | static word AtomicInverseModPower2(word A) |
189 | 28.8k | { |
190 | 28.8k | CRYPTOPP_ASSERT(A%2==1); |
191 | | |
192 | 28.8k | word R=A%8; |
193 | | |
194 | 173k | for (unsigned i=3; i<WORD_BITS; i*=2) |
195 | 144k | R = R*(2-R*A); |
196 | | |
197 | 28.8k | CRYPTOPP_ASSERT(R*A==1); |
198 | 28.8k | return R; |
199 | 28.8k | } |
200 | | |
201 | | // ******************************************************** |
202 | | |
203 | | #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || ((defined(__aarch64__) || defined(__x86_64__)) && defined(CRYPTOPP_WORD128_AVAILABLE)) |
204 | | #define TWO_64_BIT_WORDS 1 |
205 | 1.84G | #define Declare2Words(x) word x##0, x##1; |
206 | 1.47G | #define AssignWord(a, b) a##0 = b; a##1 = 0; |
207 | | #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); |
208 | 5.78G | #define LowWord(a) a##0 |
209 | 1.31G | #define HighWord(a) a##1 |
210 | | #ifdef CRYPTOPP_MSC_VERSION |
211 | | #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1); |
212 | | #ifndef __INTEL_COMPILER |
213 | | #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; |
214 | | #endif |
215 | | #elif defined(__aarch32__) || defined(__aarch64__) |
216 | | #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; asm ("umulh %0,%1,%2" : "=r"(p1) : "r"(a), "r"(b)); |
217 | | #elif defined(__DECCXX) |
218 | | #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b); |
219 | | #elif defined(__x86_64__) |
220 | | #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100 |
221 | | // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works |
222 | | #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc"); |
223 | | #elif defined(__BMI2__) && 0 |
224 | | #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulxq %3, %0, %1" : "=r"(p0), "=r"(p1) : "d"(a), "r"(b)); |
225 | | #define MulAcc(c, d, a, b) asm ("mulxq %6, %3, %4; addq %3, %0; adcxq %4, %1; adcxq %7, %2;" : "+&r"(c), "+&r"(d##0), "+&r"(d##1), "=&r"(p0), "=&r"(p1) : "d"(a), "r"(b), "r"(W64LIT(0)) : "cc"); |
226 | | #define Double3Words(c, d) asm ("addq %0, %0; adcxq %1, %1; adcxq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); |
227 | | #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+&r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc"); |
228 | | #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); |
229 | | #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=&r"(e##0), "=&r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc"); |
230 | | #else |
231 | 1.87G | #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); |
232 | 4.87G | #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); |
233 | 131M | #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); |
234 | 892M | #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); |
235 | 421M | #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); |
236 | 131M | #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); |
237 | | #endif |
238 | | #endif |
239 | 1.87G | #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b) |
240 | | #ifndef Double3Words |
241 | | #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; |
242 | | #endif |
243 | | #ifndef Acc2WordsBy2 |
244 | | #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; |
245 | | #endif |
246 | 1.62G | #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);} |
247 | 1.47G | #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);} |
248 | 61.4M | #define GetCarry(u) u##1 |
249 | 37.5M | #define GetBorrow(u) u##1 |
250 | | #else |
251 | | #define Declare2Words(x) dword x; |
252 | | #if CRYPTOPP_MSC_VERSION >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64)) |
253 | | #define MultiplyWords(p, a, b) p = __emulu(a, b); |
254 | | #else |
255 | | #define MultiplyWords(p, a, b) p = (dword)a*b; |
256 | | #endif |
257 | | #define AssignWord(a, b) a = b; |
258 | | #define Add2WordsBy1(a, b, c) a = b + c; |
259 | | #define Acc2WordsBy2(a, b) a += b; |
260 | | #define LowWord(a) word(a) |
261 | | #define HighWord(a) word(a>>WORD_BITS) |
262 | | #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2; |
263 | | #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u); |
264 | | #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); |
265 | | #define GetCarry(u) HighWord(u) |
266 | | #define GetBorrow(u) word(u>>(WORD_BITS*2-1)) |
267 | | #endif |
268 | | #ifndef MulAcc |
269 | | #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); |
270 | | #endif |
271 | | #ifndef Acc2WordsBy1 |
272 | | #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b) |
273 | | #endif |
274 | | #ifndef Acc3WordsBy2 |
275 | | #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); |
276 | | #endif |
277 | | |
278 | | class DWord |
279 | | { |
280 | | public: |
281 | | #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) |
282 | 270M | DWord() {std::memset(&m_whole, 0x00, sizeof(m_whole));} |
283 | | #else |
284 | | DWord() {std::memset(&m_halfs, 0x00, sizeof(m_halfs));} |
285 | | #endif |
286 | | |
287 | | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
288 | 105M | explicit DWord(word low) : m_whole(low) { } |
289 | | #else |
290 | | explicit DWord(word low) |
291 | | { |
292 | | m_halfs.high = 0; |
293 | | m_halfs.low = low; |
294 | | } |
295 | | #endif |
296 | | |
297 | | #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) |
298 | | DWord(word low, word high) : m_whole() |
299 | | #else |
300 | | DWord(word low, word high) : m_halfs() |
301 | | #endif |
302 | 395M | { |
303 | 395M | #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) |
304 | 395M | # if (CRYPTOPP_LITTLE_ENDIAN) |
305 | 395M | const word t[2] = {low,high}; |
306 | 395M | std::memcpy(&m_whole, t, sizeof(m_whole)); |
307 | | # else |
308 | | const word t[2] = {high,low}; |
309 | | std::memcpy(&m_whole, t, sizeof(m_whole)); |
310 | | # endif |
311 | | #else |
312 | | m_halfs.low = low; |
313 | | m_halfs.high = high; |
314 | | #endif |
315 | 395M | } |
316 | | |
317 | | static DWord Multiply(word a, word b) |
318 | 74.8M | { |
319 | 74.8M | DWord r; |
320 | 74.8M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
321 | 74.8M | r.m_whole = (dword)a * b; |
322 | | #elif defined(MultiplyWordsLoHi) |
323 | | MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b); |
324 | | #else |
325 | | CRYPTOPP_ASSERT(0); |
326 | | #endif |
327 | 74.8M | return r; |
328 | 74.8M | } |
329 | | |
330 | | static DWord MultiplyAndAdd(word a, word b, word c) |
331 | 0 | { |
332 | 0 | DWord r = Multiply(a, b); |
333 | 0 | return r += c; |
334 | 0 | } |
335 | | |
336 | | DWord & operator+=(word a) |
337 | 0 | { |
338 | 0 | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
339 | 0 | m_whole = m_whole + a; |
340 | | #else |
341 | | m_halfs.low += a; |
342 | | m_halfs.high += (m_halfs.low < a); |
343 | | #endif |
344 | 0 | return *this; |
345 | 0 | } |
346 | | |
347 | | DWord operator+(word a) |
348 | 0 | { |
349 | 0 | DWord r; |
350 | 0 | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
351 | 0 | r.m_whole = m_whole + a; |
352 | 0 | #else |
353 | 0 | r.m_halfs.low = m_halfs.low + a; |
354 | 0 | r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); |
355 | 0 | #endif |
356 | 0 | return r; |
357 | 0 | } |
358 | | |
359 | | DWord operator-(DWord a) |
360 | 37.4M | { |
361 | 37.4M | DWord r; |
362 | 37.4M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
363 | 37.4M | r.m_whole = m_whole - a.m_whole; |
364 | | #else |
365 | | r.m_halfs.low = m_halfs.low - a.m_halfs.low; |
366 | | r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); |
367 | | #endif |
368 | 37.4M | return r; |
369 | 37.4M | } |
370 | | |
371 | | DWord operator-(word a) |
372 | 157M | { |
373 | 157M | DWord r; |
374 | 157M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
375 | 157M | r.m_whole = m_whole - a; |
376 | | #else |
377 | | r.m_halfs.low = m_halfs.low - a; |
378 | | r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); |
379 | | #endif |
380 | 157M | return r; |
381 | 157M | } |
382 | | |
383 | | // returns quotient, which must fit in a word |
384 | | word operator/(word divisor); |
385 | | |
386 | | word operator%(word a); |
387 | | |
388 | | bool operator!() const |
389 | 18.8M | { |
390 | 18.8M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
391 | 18.8M | return !m_whole; |
392 | | #else |
393 | | return !m_halfs.high && !m_halfs.low; |
394 | | #endif |
395 | 18.8M | } |
396 | | |
397 | | // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive |
398 | | // union members, and that's UB. Also see http://stackoverflow.com/q/11373203. |
399 | 236M | word GetLowHalf() const {return m_halfs.low;} |
400 | 183M | word GetHighHalf() const {return m_halfs.high;} |
401 | 52.5M | word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} |
402 | | |
403 | | private: |
404 | | // Issue 274, "Types cannot be declared in anonymous union", |
405 | | // http://github.com/weidai11/cryptopp/issues/274 |
406 | | // Thanks to Martin Bonner at http://stackoverflow.com/a/39507183 |
407 | | struct half_words |
408 | | { |
409 | | #if (CRYPTOPP_LITTLE_ENDIAN) |
410 | | word low; |
411 | | word high; |
412 | | #else |
413 | | word high; |
414 | | word low; |
415 | | #endif |
416 | | }; |
417 | | union |
418 | | { |
419 | | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
420 | | dword m_whole; |
421 | | #endif |
422 | | half_words m_halfs; |
423 | | }; |
424 | | }; |
425 | | |
426 | | class Word |
427 | | { |
428 | | public: |
429 | 0 | Word() : m_whole(0) {} |
430 | 0 | Word(word value) : m_whole(value) {} |
431 | 0 | Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {} |
432 | | |
433 | | static Word Multiply(hword a, hword b) |
434 | 0 | { |
435 | 0 | Word r; |
436 | 0 | r.m_whole = (word)a * b; |
437 | 0 | return r; |
438 | 0 | } |
439 | | |
440 | | Word operator-(Word a) |
441 | 0 | { |
442 | 0 | Word r; |
443 | 0 | r.m_whole = m_whole - a.m_whole; |
444 | 0 | return r; |
445 | 0 | } |
446 | | |
447 | | Word operator-(hword a) |
448 | 0 | { |
449 | 0 | Word r; |
450 | 0 | r.m_whole = m_whole - a; |
451 | 0 | return r; |
452 | 0 | } |
453 | | |
454 | | // returns quotient, which must fit in a word |
455 | | hword operator/(hword divisor) |
456 | 0 | { |
457 | 0 | return hword(m_whole / divisor); |
458 | 0 | } |
459 | | |
460 | | bool operator!() const |
461 | 0 | { |
462 | 0 | return !m_whole; |
463 | 0 | } |
464 | | |
465 | 0 | word GetWhole() const {return m_whole;} |
466 | 0 | hword GetLowHalf() const {return hword(m_whole);} |
467 | 0 | hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} |
468 | 0 | hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} |
469 | | |
470 | | private: |
471 | | word m_whole; |
472 | | }; |
473 | | |
474 | | // do a 3 word by 2 word divide, returns quotient and leaves remainder in A |
475 | | template <class S, class D> |
476 | | S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR) |
477 | 37.4M | { |
478 | 37.4M | CRYPTOPP_UNUSED(dummy); |
479 | | |
480 | | // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S |
481 | 37.4M | CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0)); |
482 | | |
483 | | // Profiling guided the flow below. |
484 | | |
485 | | // estimate the quotient: do a 2 S by 1 S divide. |
486 | 37.4M | S Q; bool pre = (S(B1+1) == 0); |
487 | 37.4M | if (B1 > 0 && !pre) |
488 | 37.3M | Q = D(A[1], A[2]) / S(B1+1); |
489 | 55.2k | else if (pre) |
490 | 55.2k | Q = A[2]; |
491 | 0 | else |
492 | 0 | Q = D(A[0], A[1]) / B0; |
493 | | |
494 | | // now subtract Q*B from A |
495 | 37.4M | D p = D::Multiply(B0, Q); |
496 | 37.4M | D u = (D) A[0] - p.GetLowHalf(); |
497 | 37.4M | A[0] = u.GetLowHalf(); |
498 | 37.4M | u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); |
499 | 37.4M | A[1] = u.GetLowHalf(); |
500 | 37.4M | A[2] += u.GetHighHalf(); |
501 | | |
502 | | // Q <= actual quotient, so fix it |
503 | 52.5M | while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) |
504 | 15.1M | { |
505 | 15.1M | u = (D) A[0] - B0; |
506 | 15.1M | A[0] = u.GetLowHalf(); |
507 | 15.1M | u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); |
508 | 15.1M | A[1] = u.GetLowHalf(); |
509 | 15.1M | A[2] += u.GetHighHalf(); |
510 | 15.1M | Q++; |
511 | 15.1M | CRYPTOPP_ASSERT(Q); // shouldn't overflow |
512 | 15.1M | } |
513 | | |
514 | 37.4M | return Q; |
515 | 37.4M | } |
516 | | |
517 | | // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 |
518 | | template <class S, class D> |
519 | | inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) |
520 | 18.8M | { |
521 | | // Profiling guided the flow below. |
522 | | |
523 | 18.8M | if (!!B) |
524 | 18.7M | { |
525 | 18.7M | S Q[2]; |
526 | 18.7M | T[0] = Al.GetLowHalf(); |
527 | 18.7M | T[1] = Al.GetHighHalf(); |
528 | 18.7M | T[2] = Ah.GetLowHalf(); |
529 | 18.7M | T[3] = Ah.GetHighHalf(); |
530 | 18.7M | Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); |
531 | 18.7M | Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); |
532 | 18.7M | return D(Q[0], Q[1]); |
533 | 18.7M | } |
534 | 98.2k | else // if divisor is 0, we assume divisor==2**(2*WORD_BITS) |
535 | 98.2k | { |
536 | 98.2k | return D(Ah.GetLowHalf(), Ah.GetHighHalf()); |
537 | 98.2k | } |
538 | 18.8M | } |
539 | | |
540 | | // returns quotient, which must fit in a word |
541 | | inline word DWord::operator/(word a) |
542 | 178M | { |
543 | 178M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
544 | 178M | return word(m_whole / a); |
545 | | #else |
546 | | hword r[4]; |
547 | | return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); |
548 | | #endif |
549 | 178M | } |
550 | | |
551 | | inline word DWord::operator%(word a) |
552 | 141M | { |
553 | 141M | #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE |
554 | 141M | return word(m_whole % a); |
555 | | #else |
556 | | if (a < (word(1) << (WORD_BITS/2))) |
557 | | { |
558 | | hword h = hword(a); |
559 | | word r = m_halfs.high % h; |
560 | | r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; |
561 | | return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); |
562 | | } |
563 | | else |
564 | | { |
565 | | hword r[4]; |
566 | | DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); |
567 | | return Word(r[0], r[1]).GetWhole(); |
568 | | } |
569 | | #endif |
570 | 141M | } |
571 | | |
572 | | // ******************************************************** |
573 | | |
574 | | // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC. |
575 | | #if defined(__GNUC__) |
576 | | #define AddPrologue \ |
577 | | int result; \ |
578 | | __asm__ __volatile__ \ |
579 | | ( \ |
580 | | INTEL_NOPREFIX |
581 | | #define AddEpilogue \ |
582 | | ATT_PREFIX \ |
583 | | : "=a" (result)\ |
584 | | : "d" (C), "a" (A), "D" (B), "c" (N) \ |
585 | | : "%esi", "memory", "cc" \ |
586 | | );\ |
587 | | return result; |
588 | | #define MulPrologue \ |
589 | | __asm__ __volatile__ \ |
590 | | ( \ |
591 | | INTEL_NOPREFIX \ |
592 | | AS1( push ebx) \ |
593 | | AS2( mov ebx, edx) |
594 | | #define MulEpilogue \ |
595 | | AS1( pop ebx) \ |
596 | | ATT_PREFIX \ |
597 | | : \ |
598 | | : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ |
599 | | : "%esi", "memory", "cc" \ |
600 | | ); |
601 | | #define SquPrologue MulPrologue |
602 | | #define SquEpilogue \ |
603 | | AS1( pop ebx) \ |
604 | | ATT_PREFIX \ |
605 | | : \ |
606 | | : "d" (s_maskLow16), "c" (C), "a" (A) \ |
607 | | : "%esi", "%edi", "memory", "cc" \ |
608 | | ); |
609 | | #define TopPrologue MulPrologue |
610 | | #define TopEpilogue \ |
611 | | AS1( pop ebx) \ |
612 | | ATT_PREFIX \ |
613 | | : \ |
614 | | : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ |
615 | | : "memory", "cc" \ |
616 | | ); |
617 | | #else |
618 | | #define AddPrologue \ |
619 | | __asm push edi \ |
620 | | __asm push esi \ |
621 | | __asm mov eax, [esp+12] \ |
622 | | __asm mov edi, [esp+16] |
623 | | #define AddEpilogue \ |
624 | | __asm pop esi \ |
625 | | __asm pop edi \ |
626 | | __asm ret 8 |
627 | | #define SaveEBX |
628 | | #define RestoreEBX |
629 | | #define SquPrologue \ |
630 | | AS2( mov eax, A) \ |
631 | | AS2( mov ecx, C) \ |
632 | | SaveEBX \ |
633 | | AS2( lea ebx, s_maskLow16) |
634 | | #define MulPrologue \ |
635 | | AS2( mov eax, A) \ |
636 | | AS2( mov edi, B) \ |
637 | | AS2( mov ecx, C) \ |
638 | | SaveEBX \ |
639 | | AS2( lea ebx, s_maskLow16) |
640 | | #define TopPrologue \ |
641 | | AS2( mov eax, A) \ |
642 | | AS2( mov edi, B) \ |
643 | | AS2( mov ecx, C) \ |
644 | | AS2( mov esi, L) \ |
645 | | SaveEBX \ |
646 | | AS2( lea ebx, s_maskLow16) |
647 | | #define SquEpilogue RestoreEBX |
648 | | #define MulEpilogue RestoreEBX |
649 | | #define TopEpilogue RestoreEBX |
650 | | #endif |
651 | | |
652 | | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
653 | | extern "C" { |
654 | | int Baseline_Add(size_t N, word *C, const word *A, const word *B); |
655 | | int Baseline_Sub(size_t N, word *C, const word *A, const word *B); |
656 | | } |
657 | | #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE) |
658 | | int Baseline_Add(size_t N, word *C, const word *A, const word *B) |
659 | | { |
660 | | word result; |
661 | | __asm__ __volatile__ |
662 | | ( |
663 | | INTEL_NOPREFIX |
664 | | AS1( neg %1) |
665 | | ASJ( jz, 1, f) |
666 | | AS2( mov %0,[%3+8*%1]) |
667 | | AS2( add %0,[%4+8*%1]) |
668 | | AS2( mov [%2+8*%1],%0) |
669 | | ASL(0) |
670 | | AS2( mov %0,[%3+8*%1+8]) |
671 | | AS2( adc %0,[%4+8*%1+8]) |
672 | | AS2( mov [%2+8*%1+8],%0) |
673 | | AS2( lea %1,[%1+2]) |
674 | | ASJ( jrcxz, 1, f) |
675 | | AS2( mov %0,[%3+8*%1]) |
676 | | AS2( adc %0,[%4+8*%1]) |
677 | | AS2( mov [%2+8*%1],%0) |
678 | | ASJ( jmp, 0, b) |
679 | | ASL(1) |
680 | | AS2( mov %0, 0) |
681 | | AS2( adc %0, %0) |
682 | | ATT_NOPREFIX |
683 | | : "=&r" (result), "+c" (N) |
684 | | : "r" (C+N), "r" (A+N), "r" (B+N) |
685 | | : "memory", "cc" |
686 | | ); |
687 | | return (int)result; |
688 | | } |
689 | | |
690 | | int Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
691 | | { |
692 | | word result; |
693 | | __asm__ __volatile__ |
694 | | ( |
695 | | INTEL_NOPREFIX |
696 | | AS1( neg %1) |
697 | | ASJ( jz, 1, f) |
698 | | AS2( mov %0,[%3+8*%1]) |
699 | | AS2( sub %0,[%4+8*%1]) |
700 | | AS2( mov [%2+8*%1],%0) |
701 | | ASL(0) |
702 | | AS2( mov %0,[%3+8*%1+8]) |
703 | | AS2( sbb %0,[%4+8*%1+8]) |
704 | | AS2( mov [%2+8*%1+8],%0) |
705 | | AS2( lea %1,[%1+2]) |
706 | | ASJ( jrcxz, 1, f) |
707 | | AS2( mov %0,[%3+8*%1]) |
708 | | AS2( sbb %0,[%4+8*%1]) |
709 | | AS2( mov [%2+8*%1],%0) |
710 | | ASJ( jmp, 0, b) |
711 | | ASL(1) |
712 | | AS2( mov %0, 0) |
713 | | AS2( adc %0, %0) |
714 | | ATT_NOPREFIX |
715 | | : "=&r" (result), "+c" (N) |
716 | | : "r" (C+N), "r" (A+N), "r" (B+N) |
717 | | : "memory", "cc" |
718 | | ); |
719 | | return (int)result; |
720 | | } |
721 | | #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 |
722 | | CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) |
723 | | { |
724 | | AddPrologue |
725 | | |
726 | | // now: eax = A, edi = B, edx = C, ecx = N |
727 | | AS2( lea eax, [eax+4*ecx]) |
728 | | AS2( lea edi, [edi+4*ecx]) |
729 | | AS2( lea edx, [edx+4*ecx]) |
730 | | |
731 | | AS1( neg ecx) // ecx is negative index |
732 | | AS2( test ecx, 2) // this clears carry flag |
733 | | ASJ( jz, 0, f) |
734 | | AS2( sub ecx, 2) |
735 | | ASJ( jmp, 1, f) |
736 | | |
737 | | ASL(0) |
738 | | ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero |
739 | | AS2( mov esi,[eax+4*ecx]) |
740 | | AS2( adc esi,[edi+4*ecx]) |
741 | | AS2( mov [edx+4*ecx],esi) |
742 | | AS2( mov esi,[eax+4*ecx+4]) |
743 | | AS2( adc esi,[edi+4*ecx+4]) |
744 | | AS2( mov [edx+4*ecx+4],esi) |
745 | | ASL(1) |
746 | | AS2( mov esi,[eax+4*ecx+8]) |
747 | | AS2( adc esi,[edi+4*ecx+8]) |
748 | | AS2( mov [edx+4*ecx+8],esi) |
749 | | AS2( mov esi,[eax+4*ecx+12]) |
750 | | AS2( adc esi,[edi+4*ecx+12]) |
751 | | AS2( mov [edx+4*ecx+12],esi) |
752 | | |
753 | | AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 |
754 | | ASJ( jmp, 0, b) |
755 | | |
756 | | ASL(2) |
757 | | AS2( mov eax, 0) |
758 | | AS1( setc al) // store carry into eax (return result register) |
759 | | |
760 | | AddEpilogue |
761 | | |
762 | | // http://github.com/weidai11/cryptopp/issues/340 |
763 | | CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B); |
764 | | CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N); |
765 | | } |
766 | | |
767 | | CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
768 | | { |
769 | | AddPrologue |
770 | | |
771 | | // now: eax = A, edi = B, edx = C, ecx = N |
772 | | AS2( lea eax, [eax+4*ecx]) |
773 | | AS2( lea edi, [edi+4*ecx]) |
774 | | AS2( lea edx, [edx+4*ecx]) |
775 | | |
776 | | AS1( neg ecx) // ecx is negative index |
777 | | AS2( test ecx, 2) // this clears carry flag |
778 | | ASJ( jz, 0, f) |
779 | | AS2( sub ecx, 2) |
780 | | ASJ( jmp, 1, f) |
781 | | |
782 | | ASL(0) |
783 | | ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero |
784 | | AS2( mov esi,[eax+4*ecx]) |
785 | | AS2( sbb esi,[edi+4*ecx]) |
786 | | AS2( mov [edx+4*ecx],esi) |
787 | | AS2( mov esi,[eax+4*ecx+4]) |
788 | | AS2( sbb esi,[edi+4*ecx+4]) |
789 | | AS2( mov [edx+4*ecx+4],esi) |
790 | | ASL(1) |
791 | | AS2( mov esi,[eax+4*ecx+8]) |
792 | | AS2( sbb esi,[edi+4*ecx+8]) |
793 | | AS2( mov [edx+4*ecx+8],esi) |
794 | | AS2( mov esi,[eax+4*ecx+12]) |
795 | | AS2( sbb esi,[edi+4*ecx+12]) |
796 | | AS2( mov [edx+4*ecx+12],esi) |
797 | | |
798 | | AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 |
799 | | ASJ( jmp, 0, b) |
800 | | |
801 | | ASL(2) |
802 | | AS2( mov eax, 0) |
803 | | AS1( setc al) // store carry into eax (return result register) |
804 | | |
805 | | AddEpilogue |
806 | | |
807 | | // http://github.com/weidai11/cryptopp/issues/340 |
808 | | CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B); |
809 | | CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N); |
810 | | } |
811 | | |
812 | | #if CRYPTOPP_INTEGER_SSE2 |
813 | | CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) |
814 | | { |
815 | | AddPrologue |
816 | | |
817 | | // now: eax = A, edi = B, edx = C, ecx = N |
818 | | AS2( lea eax, [eax+4*ecx]) |
819 | | AS2( lea edi, [edi+4*ecx]) |
820 | | AS2( lea edx, [edx+4*ecx]) |
821 | | |
822 | | AS1( neg ecx) // ecx is negative index |
823 | | AS2( pxor mm2, mm2) |
824 | | ASJ( jz, 2, f) |
825 | | AS2( test ecx, 2) // this clears carry flag |
826 | | ASJ( jz, 0, f) |
827 | | AS2( sub ecx, 2) |
828 | | ASJ( jmp, 1, f) |
829 | | |
830 | | ASL(0) |
831 | | AS2( movd mm0, DWORD PTR [eax+4*ecx]) |
832 | | AS2( movd mm1, DWORD PTR [edi+4*ecx]) |
833 | | AS2( paddq mm0, mm1) |
834 | | AS2( paddq mm2, mm0) |
835 | | AS2( movd DWORD PTR [edx+4*ecx], mm2) |
836 | | AS2( psrlq mm2, 32) |
837 | | |
838 | | AS2( movd mm0, DWORD PTR [eax+4*ecx+4]) |
839 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) |
840 | | AS2( paddq mm0, mm1) |
841 | | AS2( paddq mm2, mm0) |
842 | | AS2( movd DWORD PTR [edx+4*ecx+4], mm2) |
843 | | AS2( psrlq mm2, 32) |
844 | | |
845 | | ASL(1) |
846 | | AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) |
847 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) |
848 | | AS2( paddq mm0, mm1) |
849 | | AS2( paddq mm2, mm0) |
850 | | AS2( movd DWORD PTR [edx+4*ecx+8], mm2) |
851 | | AS2( psrlq mm2, 32) |
852 | | |
853 | | AS2( movd mm0, DWORD PTR [eax+4*ecx+12]) |
854 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) |
855 | | AS2( paddq mm0, mm1) |
856 | | AS2( paddq mm2, mm0) |
857 | | AS2( movd DWORD PTR [edx+4*ecx+12], mm2) |
858 | | AS2( psrlq mm2, 32) |
859 | | |
860 | | AS2( add ecx, 4) |
861 | | ASJ( jnz, 0, b) |
862 | | |
863 | | ASL(2) |
864 | | AS2( movd eax, mm2) |
865 | | AS1( emms) |
866 | | |
867 | | AddEpilogue |
868 | | |
869 | | // http://github.com/weidai11/cryptopp/issues/340 |
870 | | CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B); |
871 | | CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N); |
872 | | } |
873 | | CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) |
874 | | { |
875 | | AddPrologue |
876 | | |
877 | | // now: eax = A, edi = B, edx = C, ecx = N |
878 | | AS2( lea eax, [eax+4*ecx]) |
879 | | AS2( lea edi, [edi+4*ecx]) |
880 | | AS2( lea edx, [edx+4*ecx]) |
881 | | |
882 | | AS1( neg ecx) // ecx is negative index |
883 | | AS2( pxor mm2, mm2) |
884 | | ASJ( jz, 2, f) |
885 | | AS2( test ecx, 2) // this clears carry flag |
886 | | ASJ( jz, 0, f) |
887 | | AS2( sub ecx, 2) |
888 | | ASJ( jmp, 1, f) |
889 | | |
890 | | ASL(0) |
891 | | AS2( movd mm0, DWORD PTR [eax+4*ecx]) |
892 | | AS2( movd mm1, DWORD PTR [edi+4*ecx]) |
893 | | AS2( psubq mm0, mm1) |
894 | | AS2( psubq mm0, mm2) |
895 | | AS2( movd DWORD PTR [edx+4*ecx], mm0) |
896 | | AS2( psrlq mm0, 63) |
897 | | |
898 | | AS2( movd mm2, DWORD PTR [eax+4*ecx+4]) |
899 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) |
900 | | AS2( psubq mm2, mm1) |
901 | | AS2( psubq mm2, mm0) |
902 | | AS2( movd DWORD PTR [edx+4*ecx+4], mm2) |
903 | | AS2( psrlq mm2, 63) |
904 | | |
905 | | ASL(1) |
906 | | AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) |
907 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) |
908 | | AS2( psubq mm0, mm1) |
909 | | AS2( psubq mm0, mm2) |
910 | | AS2( movd DWORD PTR [edx+4*ecx+8], mm0) |
911 | | AS2( psrlq mm0, 63) |
912 | | |
913 | | AS2( movd mm2, DWORD PTR [eax+4*ecx+12]) |
914 | | AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) |
915 | | AS2( psubq mm2, mm1) |
916 | | AS2( psubq mm2, mm0) |
917 | | AS2( movd DWORD PTR [edx+4*ecx+12], mm2) |
918 | | AS2( psrlq mm2, 63) |
919 | | |
920 | | AS2( add ecx, 4) |
921 | | ASJ( jnz, 0, b) |
922 | | |
923 | | ASL(2) |
924 | | AS2( movd eax, mm2) |
925 | | AS1( emms) |
926 | | |
927 | | AddEpilogue |
928 | | |
929 | | // http://github.com/weidai11/cryptopp/issues/340 |
930 | | CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B); |
931 | | CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N); |
932 | | } |
933 | | #endif // CRYPTOPP_INTEGER_SSE2 |
934 | | #else // CRYPTOPP_SSE2_ASM_AVAILABLE |
935 | | int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) |
936 | 61.4M | { |
937 | 61.4M | CRYPTOPP_ASSERT (N%2 == 0); |
938 | | |
939 | 61.4M | Declare2Words(u); |
940 | 61.4M | AssignWord(u, 0); |
941 | 874M | for (size_t i=0; i<N; i+=2) |
942 | 812M | { |
943 | 812M | AddWithCarry(u, A[i], B[i]); |
944 | 812M | C[i] = LowWord(u); |
945 | 812M | AddWithCarry(u, A[i+1], B[i+1]); |
946 | 812M | C[i+1] = LowWord(u); |
947 | 812M | } |
948 | 61.4M | return int(GetCarry(u)); |
949 | 61.4M | } |
950 | | |
951 | | int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) |
952 | 37.5M | { |
953 | 37.5M | CRYPTOPP_ASSERT (N%2 == 0); |
954 | | |
955 | 37.5M | Declare2Words(u); |
956 | 37.5M | AssignWord(u, 0); |
957 | 772M | for (size_t i=0; i<N; i+=2) |
958 | 735M | { |
959 | 735M | SubtractWithBorrow(u, A[i], B[i]); |
960 | 735M | C[i] = LowWord(u); |
961 | 735M | SubtractWithBorrow(u, A[i+1], B[i+1]); |
962 | 735M | C[i+1] = LowWord(u); |
963 | 735M | } |
964 | 37.5M | return int(GetBorrow(u)); |
965 | 37.5M | } |
966 | | #endif |
967 | | |
968 | | static word LinearMultiply(word *C, const word *AA, word B, size_t N) |
969 | 12.7M | { |
970 | | // http://github.com/weidai11/cryptopp/issues/188 |
971 | 12.7M | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
972 | | |
973 | 12.7M | word carry=0; |
974 | 904M | for(unsigned i=0; i<N; i++) |
975 | 891M | { |
976 | 891M | Declare2Words(p); |
977 | 891M | MultiplyWords(p, A[i], B); |
978 | 891M | Acc2WordsBy1(p, carry); |
979 | 891M | C[i] = LowWord(p); |
980 | 891M | carry = HighWord(p); |
981 | 891M | } |
982 | 12.7M | return carry; |
983 | 12.7M | } |
984 | | |
985 | | #ifndef CRYPTOPP_DOXYGEN_PROCESSING |
986 | | |
987 | | #define Mul_2 \ |
988 | 403M | Mul_Begin(2) \ |
989 | 403M | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
990 | 403M | Mul_End(1, 1) |
991 | | |
992 | | #define Mul_4 \ |
993 | 33.7k | Mul_Begin(4) \ |
994 | 33.7k | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
995 | 33.7k | Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ |
996 | 33.7k | Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
997 | 33.7k | Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ |
998 | 33.7k | Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ |
999 | 33.7k | Mul_End(5, 3) |
1000 | | |
1001 | | #define Mul_8 \ |
1002 | 23.9k | Mul_Begin(8) \ |
1003 | 23.9k | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
1004 | 23.9k | Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ |
1005 | 23.9k | Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
1006 | 23.9k | Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ |
1007 | 23.9k | Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ |
1008 | 23.9k | Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ |
1009 | 23.9k | Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ |
1010 | 23.9k | Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ |
1011 | 23.9k | Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ |
1012 | 23.9k | Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ |
1013 | 23.9k | Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ |
1014 | 23.9k | Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ |
1015 | 23.9k | Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ |
1016 | 23.9k | Mul_End(13, 7) |
1017 | | |
1018 | | #define Mul_16 \ |
1019 | 13.0M | Mul_Begin(16) \ |
1020 | 13.0M | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
1021 | 13.0M | Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ |
1022 | 13.0M | Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
1023 | 13.0M | Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ |
1024 | 13.0M | Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ |
1025 | 13.0M | Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ |
1026 | 13.0M | Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ |
1027 | 13.0M | Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ |
1028 | 13.0M | Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ |
1029 | 13.0M | Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ |
1030 | 13.0M | Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ |
1031 | 13.0M | Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ |
1032 | 13.0M | Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ |
1033 | 13.0M | Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ |
1034 | 13.0M | Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ |
1035 | 13.0M | Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ |
1036 | 13.0M | Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ |
1037 | 13.0M | Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ |
1038 | 13.0M | Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ |
1039 | 13.0M | Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ |
1040 | 13.0M | Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ |
1041 | 13.0M | Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ |
1042 | 13.0M | Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ |
1043 | 13.0M | Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ |
1044 | 13.0M | Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ |
1045 | 13.0M | Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ |
1046 | 13.0M | Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ |
1047 | 13.0M | Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ |
1048 | 13.0M | Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ |
1049 | 13.0M | Mul_End(29, 15) |
1050 | | |
1051 | | #define Squ_2 \ |
1052 | 435k | Squ_Begin(2) \ |
1053 | 435k | Squ_End(2) |
1054 | | |
1055 | | #define Squ_4 \ |
1056 | 132k | Squ_Begin(4) \ |
1057 | 132k | Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ |
1058 | 132k | Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ |
1059 | 132k | Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ |
1060 | 132k | Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ |
1061 | 132k | Squ_End(4) |
1062 | | |
1063 | | #define Squ_8 \ |
1064 | 107k | Squ_Begin(8) \ |
1065 | 107k | Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ |
1066 | 107k | Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ |
1067 | 107k | Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ |
1068 | 107k | Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ |
1069 | 107k | Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ |
1070 | 107k | Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ |
1071 | 107k | Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ |
1072 | 107k | Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ |
1073 | 107k | Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ |
1074 | 107k | Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ |
1075 | 107k | Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ |
1076 | 107k | Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ |
1077 | 107k | Squ_End(8) |
1078 | | |
1079 | | #define Squ_16 \ |
1080 | 4.45M | Squ_Begin(16) \ |
1081 | 4.45M | Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ |
1082 | 4.45M | Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ |
1083 | 4.45M | Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ |
1084 | 4.45M | Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ |
1085 | 4.45M | Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ |
1086 | 4.45M | Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ |
1087 | 4.45M | Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ |
1088 | 4.45M | Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ |
1089 | 4.45M | Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ |
1090 | 4.45M | Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ |
1091 | 4.45M | Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ |
1092 | 4.45M | Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ |
1093 | 4.45M | Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ |
1094 | 4.45M | Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ |
1095 | 4.45M | Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ |
1096 | 4.45M | Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ |
1097 | 4.45M | Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ |
1098 | 4.45M | Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ |
1099 | 4.45M | Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ |
1100 | 4.45M | Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ |
1101 | 4.45M | Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ |
1102 | 4.45M | Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ |
1103 | 4.45M | Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ |
1104 | 4.45M | Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ |
1105 | 4.45M | Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ |
1106 | 4.45M | Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ |
1107 | 4.45M | Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ |
1108 | 4.45M | Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ |
1109 | 4.45M | Squ_End(16) |
1110 | | |
1111 | | #define Bot_2 \ |
1112 | 360k | Mul_Begin(2) \ |
1113 | 360k | Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ |
1114 | 360k | Bot_End(2) |
1115 | | |
1116 | | #define Bot_4 \ |
1117 | 111k | Mul_Begin(4) \ |
1118 | 111k | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
1119 | 111k | Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ |
1120 | 111k | Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ |
1121 | 111k | Bot_End(4) |
1122 | | |
1123 | | #define Bot_8 \ |
1124 | 80.2k | Mul_Begin(8) \ |
1125 | 80.2k | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
1126 | 80.2k | Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ |
1127 | 80.2k | Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
1128 | 80.2k | Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ |
1129 | 80.2k | Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ |
1130 | 80.2k | Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ |
1131 | 80.2k | Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ |
1132 | 80.2k | Bot_End(8) |
1133 | | |
1134 | | #define Bot_16 \ |
1135 | 2.28M | Mul_Begin(16) \ |
1136 | 2.28M | Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ |
1137 | 2.28M | Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ |
1138 | 2.28M | Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
1139 | 2.28M | Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ |
1140 | 2.28M | Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ |
1141 | 2.28M | Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ |
1142 | 2.28M | Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ |
1143 | 2.28M | Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ |
1144 | 2.28M | Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ |
1145 | 2.28M | Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ |
1146 | 2.28M | Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ |
1147 | 2.28M | Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ |
1148 | 2.28M | Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ |
1149 | 2.28M | Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ |
1150 | 2.28M | Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ |
1151 | 2.28M | Bot_End(16) |
1152 | | |
1153 | | #endif |
1154 | | |
1155 | | #if 0 |
1156 | | #define Mul_Begin(n) \ |
1157 | | Declare2Words(p) \ |
1158 | | Declare2Words(c) \ |
1159 | | Declare2Words(d) \ |
1160 | | MultiplyWords(p, A[0], B[0]) \ |
1161 | | AssignWord(c, LowWord(p)) \ |
1162 | | AssignWord(d, HighWord(p)) |
1163 | | |
1164 | | #define Mul_Acc(i, j) \ |
1165 | | MultiplyWords(p, A[i], B[j]) \ |
1166 | | Acc2WordsBy1(c, LowWord(p)) \ |
1167 | | Acc2WordsBy1(d, HighWord(p)) |
1168 | | |
1169 | | #define Mul_SaveAcc(k, i, j) \ |
1170 | | R[k] = LowWord(c); \ |
1171 | | Add2WordsBy1(c, d, HighWord(c)) \ |
1172 | | MultiplyWords(p, A[i], B[j]) \ |
1173 | | AssignWord(d, HighWord(p)) \ |
1174 | | Acc2WordsBy1(c, LowWord(p)) |
1175 | | |
1176 | | #define Mul_End(n) \ |
1177 | | R[2*n-3] = LowWord(c); \ |
1178 | | Acc2WordsBy1(d, HighWord(c)) \ |
1179 | | MultiplyWords(p, A[n-1], B[n-1])\ |
1180 | | Acc2WordsBy2(d, p) \ |
1181 | | R[2*n-2] = LowWord(d); \ |
1182 | | R[2*n-1] = HighWord(d); |
1183 | | |
1184 | | #define Bot_SaveAcc(k, i, j) \ |
1185 | | R[k] = LowWord(c); \ |
1186 | | word e = LowWord(d) + HighWord(c); \ |
1187 | | e += A[i] * B[j]; |
1188 | | |
1189 | | #define Bot_Acc(i, j) \ |
1190 | | e += A[i] * B[j]; |
1191 | | |
1192 | | #define Bot_End(n) \ |
1193 | | R[n-1] = e; |
1194 | | #else |
1195 | | #define Mul_Begin(n) \ |
1196 | 419M | Declare2Words(p) \ |
1197 | 419M | word c; \ |
1198 | 419M | Declare2Words(d) \ |
1199 | 419M | MultiplyWords(p, A[0], B[0]) \ |
1200 | 419M | c = LowWord(p); \ |
1201 | 419M | AssignWord(d, HighWord(p)) |
1202 | | |
1203 | | #define Mul_Acc(i, j) \ |
1204 | 3.58G | MulAcc(c, d, A[i], B[j]) |
1205 | | |
1206 | | #define Mul_SaveAcc(k, i, j) \ |
1207 | 815M | R[k] = c; \ |
1208 | 815M | c = LowWord(d); \ |
1209 | 815M | AssignWord(d, HighWord(d)) \ |
1210 | 815M | MulAcc(c, d, A[i], B[j]) |
1211 | | |
1212 | | #define Mul_End(k, i) \ |
1213 | 416M | R[k] = c; \ |
1214 | 416M | MultiplyWords(p, A[i], B[i]) \ |
1215 | 416M | Acc2WordsBy2(p, d) \ |
1216 | 416M | R[k+1] = LowWord(p); \ |
1217 | 416M | R[k+2] = HighWord(p); |
1218 | | |
1219 | | #define Bot_SaveAcc(k, i, j) \ |
1220 | 2.83M | R[k] = c; \ |
1221 | 2.83M | c = LowWord(d); \ |
1222 | 2.83M | c += A[i] * B[j]; |
1223 | | |
1224 | | #define Bot_Acc(i, j) \ |
1225 | 35.4M | c += A[i] * B[j]; |
1226 | | |
1227 | | #define Bot_End(n) \ |
1228 | 2.83M | R[n-1] = c; |
1229 | | #endif |
1230 | | |
1231 | | #define Squ_Begin(n) \ |
1232 | 5.12M | Declare2Words(p) \ |
1233 | 5.12M | word c; \ |
1234 | 5.12M | Declare2Words(d) \ |
1235 | 5.12M | Declare2Words(e) \ |
1236 | 5.12M | MultiplyWords(p, A[0], A[0]) \ |
1237 | 5.12M | R[0] = LowWord(p); \ |
1238 | 5.12M | AssignWord(e, HighWord(p)) \ |
1239 | 5.12M | MultiplyWords(p, A[0], A[1]) \ |
1240 | 5.12M | c = LowWord(p); \ |
1241 | 5.12M | AssignWord(d, HighWord(p)) \ |
1242 | 5.12M | Squ_NonDiag \ |
1243 | | |
1244 | | #define Squ_NonDiag \ |
1245 | 131M | Double3Words(c, d) |
1246 | | |
1247 | | #define Squ_SaveAcc(k, i, j) \ |
1248 | 126M | Acc3WordsBy2(c, d, e) \ |
1249 | 126M | R[k] = c; \ |
1250 | 126M | MultiplyWords(p, A[i], A[j]) \ |
1251 | 126M | c = LowWord(p); \ |
1252 | 126M | AssignWord(d, HighWord(p)) \ |
1253 | | |
1254 | | #define Squ_Acc(i, j) \ |
1255 | 406M | MulAcc(c, d, A[i], A[j]) |
1256 | | |
1257 | | #define Squ_Diag(i) \ |
1258 | 63.2M | Squ_NonDiag \ |
1259 | 63.2M | MulAcc(c, d, A[i], A[i]) |
1260 | | |
1261 | | #define Squ_End(n) \ |
1262 | 5.12M | Acc3WordsBy2(c, d, e) \ |
1263 | 5.12M | R[2*n-3] = c; \ |
1264 | 5.12M | MultiplyWords(p, A[n-1], A[n-1])\ |
1265 | 5.12M | Acc2WordsBy2(p, e) \ |
1266 | 5.12M | R[2*n-2] = LowWord(p); \ |
1267 | 5.12M | R[2*n-1] = HighWord(p); |
1268 | | |
1269 | | |
1270 | | void Baseline_Multiply2(word *R, const word *AA, const word *BB) |
1271 | 403M | { |
1272 | | // http://github.com/weidai11/cryptopp/issues/188 |
1273 | 403M | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1274 | 403M | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1275 | | |
1276 | 403M | Mul_2 |
1277 | 403M | } |
1278 | | |
1279 | | void Baseline_Multiply4(word *R, const word *AA, const word *BB) |
1280 | 33.7k | { |
1281 | | // http://github.com/weidai11/cryptopp/issues/188 |
1282 | 33.7k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1283 | 33.7k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1284 | | |
1285 | 33.7k | Mul_4 |
1286 | 33.7k | } |
1287 | | |
1288 | | void Baseline_Multiply8(word *R, const word *AA, const word *BB) |
1289 | 23.9k | { |
1290 | | // http://github.com/weidai11/cryptopp/issues/188 |
1291 | 23.9k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1292 | 23.9k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1293 | | |
1294 | 23.9k | Mul_8 |
1295 | 23.9k | } |
1296 | | |
1297 | | void Baseline_Square2(word *R, const word *AA) |
1298 | 435k | { |
1299 | | // http://github.com/weidai11/cryptopp/issues/188 |
1300 | 435k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1301 | | |
1302 | 435k | Squ_2 |
1303 | 435k | } |
1304 | | |
1305 | | void Baseline_Square4(word *R, const word *AA) |
1306 | 132k | { |
1307 | | // http://github.com/weidai11/cryptopp/issues/188 |
1308 | 132k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1309 | | |
1310 | 132k | Squ_4 |
1311 | 132k | } |
1312 | | |
1313 | | void Baseline_Square8(word *R, const word *AA) |
1314 | 107k | { |
1315 | | // http://github.com/weidai11/cryptopp/issues/188 |
1316 | 107k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1317 | | |
1318 | 107k | Squ_8 |
1319 | 107k | } |
1320 | | |
1321 | | void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB) |
1322 | 360k | { |
1323 | | // http://github.com/weidai11/cryptopp/issues/188 |
1324 | 360k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1325 | 360k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1326 | | |
1327 | 360k | Bot_2 |
1328 | | |
1329 | | // http://github.com/weidai11/cryptopp/issues/340 |
1330 | 360k | #if defined(TWO_64_BIT_WORDS) |
1331 | 360k | CRYPTOPP_UNUSED(d0); CRYPTOPP_UNUSED(d1); |
1332 | 360k | #endif |
1333 | 360k | } |
1334 | | |
1335 | | void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB) |
1336 | 111k | { |
1337 | | // http://github.com/weidai11/cryptopp/issues/188 |
1338 | 111k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1339 | 111k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1340 | | |
1341 | 111k | Bot_4 |
1342 | 111k | } |
1343 | | |
1344 | | void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB) |
1345 | 80.2k | { |
1346 | | // http://github.com/weidai11/cryptopp/issues/188 |
1347 | 80.2k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1348 | 80.2k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1349 | | |
1350 | 80.2k | Bot_8 |
1351 | 80.2k | } |
1352 | | |
1353 | | #define Top_Begin(n) \ |
1354 | 186k | Declare2Words(p) \ |
1355 | 186k | word c; \ |
1356 | 186k | Declare2Words(d) \ |
1357 | 186k | MultiplyWords(p, A[0], B[n-2]);\ |
1358 | 186k | AssignWord(d, HighWord(p)); |
1359 | | |
1360 | | #define Top_Acc(i, j) \ |
1361 | 735k | MultiplyWords(p, A[i], B[j]);\ |
1362 | 735k | Acc2WordsBy1(d, HighWord(p)); |
1363 | | |
1364 | | #define Top_SaveAcc0(i, j) \ |
1365 | 186k | c = LowWord(d); \ |
1366 | 186k | AssignWord(d, HighWord(d)) \ |
1367 | 186k | MulAcc(c, d, A[i], B[j]) |
1368 | | |
1369 | | #define Top_SaveAcc1(i, j) \ |
1370 | 186k | c = L<c; \ |
1371 | 186k | Acc2WordsBy1(d, c); \ |
1372 | 186k | c = LowWord(d); \ |
1373 | 186k | AssignWord(d, HighWord(d)) \ |
1374 | 186k | MulAcc(c, d, A[i], B[j]) |
1375 | | |
1376 | | void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L) |
1377 | 279k | { |
1378 | 279k | CRYPTOPP_UNUSED(L); |
1379 | 279k | word T[4]; |
1380 | 279k | Baseline_Multiply2(T, A, B); |
1381 | 279k | R[0] = T[2]; |
1382 | 279k | R[1] = T[3]; |
1383 | 279k | } |
1384 | | |
1385 | | void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L) |
1386 | 104k | { |
1387 | | // http://github.com/weidai11/cryptopp/issues/188 |
1388 | 104k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1389 | 104k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1390 | | |
1391 | 104k | Top_Begin(4) |
1392 | 104k | Top_Acc(1, 1) Top_Acc(2, 0) \ |
1393 | 104k | Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ |
1394 | 104k | Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ |
1395 | 104k | Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ |
1396 | 104k | Mul_End(1, 3) |
1397 | 104k | } |
1398 | | |
1399 | | void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L) |
1400 | 78.1k | { |
1401 | | // http://github.com/weidai11/cryptopp/issues/188 |
1402 | 78.1k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1403 | 78.1k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1404 | | |
1405 | 78.1k | Top_Begin(8) |
1406 | 78.1k | Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ |
1407 | 78.1k | Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ |
1408 | 78.1k | Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ |
1409 | 78.1k | Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ |
1410 | 78.1k | Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ |
1411 | 78.1k | Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ |
1412 | 78.1k | Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ |
1413 | 78.1k | Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ |
1414 | 78.1k | Mul_End(5, 7) |
1415 | 78.1k | } |
1416 | | |
1417 | | #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available |
1418 | | void Baseline_Multiply16(word *R, const word *AA, const word *BB) |
1419 | 13.0M | { |
1420 | | // http://github.com/weidai11/cryptopp/issues/188 |
1421 | 13.0M | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1422 | 13.0M | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1423 | | |
1424 | 13.0M | Mul_16 |
1425 | 13.0M | } |
1426 | | |
1427 | | void Baseline_Square16(word *R, const word *AA) |
1428 | 4.45M | { |
1429 | | // http://github.com/weidai11/cryptopp/issues/188 |
1430 | 4.45M | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1431 | | |
1432 | 4.45M | Squ_16 |
1433 | 4.45M | } |
1434 | | |
1435 | | void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB) |
1436 | 2.28M | { |
1437 | | // http://github.com/weidai11/cryptopp/issues/188 |
1438 | 2.28M | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1439 | 2.28M | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1440 | | |
1441 | 2.28M | Bot_16 |
1442 | 2.28M | } |
1443 | | |
1444 | | void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L) |
1445 | 4.20k | { |
1446 | | // http://github.com/weidai11/cryptopp/issues/188 |
1447 | 4.20k | MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA); |
1448 | 4.20k | MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB); |
1449 | | |
1450 | 4.20k | Top_Begin(16) |
1451 | 4.20k | Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ |
1452 | 4.20k | Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ |
1453 | 4.20k | Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ |
1454 | 4.20k | Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ |
1455 | 4.20k | Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ |
1456 | 4.20k | Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ |
1457 | 4.20k | Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ |
1458 | 4.20k | Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ |
1459 | 4.20k | Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ |
1460 | 4.20k | Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ |
1461 | 4.20k | Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ |
1462 | 4.20k | Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ |
1463 | 4.20k | Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ |
1464 | 4.20k | Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ |
1465 | 4.20k | Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ |
1466 | 4.20k | Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ |
1467 | 4.20k | Mul_End(13, 15) |
1468 | 4.20k | } |
1469 | | #endif |
1470 | | |
1471 | | // ******************************************************** |
1472 | | |
1473 | | #if CRYPTOPP_INTEGER_SSE2 |
1474 | | |
1475 | | CRYPTOPP_ALIGN_DATA(16) |
1476 | | CRYPTOPP_TABLE |
1477 | | const word32 s_maskLow16[4] = { |
1478 | | 0xffff,0xffff,0xffff,0xffff |
1479 | | }; |
1480 | | |
1481 | | #undef Mul_Begin |
1482 | | #undef Mul_Acc |
1483 | | #undef Top_Begin |
1484 | | #undef Top_Acc |
1485 | | #undef Squ_Acc |
1486 | | #undef Squ_NonDiag |
1487 | | #undef Squ_Diag |
1488 | | #undef Squ_SaveAcc |
1489 | | #undef Squ_Begin |
1490 | | #undef Mul_SaveAcc |
1491 | | #undef Bot_Acc |
1492 | | #undef Bot_SaveAcc |
1493 | | #undef Bot_End |
1494 | | #undef Squ_End |
1495 | | #undef Mul_End |
1496 | | |
1497 | | #define SSE2_FinalSave(k) \ |
1498 | | AS2( psllq xmm5, 16) \ |
1499 | | AS2( paddq xmm4, xmm5) \ |
1500 | | AS2( movq QWORD PTR [ecx+8*(k)], xmm4) |
1501 | | |
1502 | | #define SSE2_SaveShift(k) \ |
1503 | | AS2( movq xmm0, xmm6) \ |
1504 | | AS2( punpckhqdq xmm6, xmm0) \ |
1505 | | AS2( movq xmm1, xmm7) \ |
1506 | | AS2( punpckhqdq xmm7, xmm1) \ |
1507 | | AS2( paddd xmm6, xmm0) \ |
1508 | | AS2( pslldq xmm6, 4) \ |
1509 | | AS2( paddd xmm7, xmm1) \ |
1510 | | AS2( paddd xmm4, xmm6) \ |
1511 | | AS2( pslldq xmm7, 4) \ |
1512 | | AS2( movq xmm6, xmm4) \ |
1513 | | AS2( paddd xmm5, xmm7) \ |
1514 | | AS2( movq xmm7, xmm5) \ |
1515 | | AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ |
1516 | | AS2( psrlq xmm6, 16) \ |
1517 | | AS2( paddq xmm6, xmm7) \ |
1518 | | AS2( punpckhqdq xmm4, xmm0) \ |
1519 | | AS2( punpckhqdq xmm5, xmm0) \ |
1520 | | AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ |
1521 | | AS2( psrlq xmm6, 3*16) \ |
1522 | | AS2( paddd xmm4, xmm6) \ |
1523 | | |
1524 | | #define Squ_SSE2_SaveShift(k) \ |
1525 | | AS2( movq xmm0, xmm6) \ |
1526 | | AS2( punpckhqdq xmm6, xmm0) \ |
1527 | | AS2( movq xmm1, xmm7) \ |
1528 | | AS2( punpckhqdq xmm7, xmm1) \ |
1529 | | AS2( paddd xmm6, xmm0) \ |
1530 | | AS2( pslldq xmm6, 4) \ |
1531 | | AS2( paddd xmm7, xmm1) \ |
1532 | | AS2( paddd xmm4, xmm6) \ |
1533 | | AS2( pslldq xmm7, 4) \ |
1534 | | AS2( movhlps xmm6, xmm4) \ |
1535 | | AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ |
1536 | | AS2( paddd xmm5, xmm7) \ |
1537 | | AS2( movhps QWORD PTR [esp+12], xmm5)\ |
1538 | | AS2( psrlq xmm4, 16) \ |
1539 | | AS2( paddq xmm4, xmm5) \ |
1540 | | AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ |
1541 | | AS2( psrlq xmm4, 3*16) \ |
1542 | | AS2( paddd xmm4, xmm6) \ |
1543 | | AS2( movq QWORD PTR [esp+4], xmm4)\ |
1544 | | |
1545 | | #define SSE2_FirstMultiply(i) \ |
1546 | | AS2( movdqa xmm7, [esi+(i)*16])\ |
1547 | | AS2( movdqa xmm5, [edi-(i)*16])\ |
1548 | | AS2( pmuludq xmm5, xmm7) \ |
1549 | | AS2( movdqa xmm4, [ebx])\ |
1550 | | AS2( movdqa xmm6, xmm4) \ |
1551 | | AS2( pand xmm4, xmm5) \ |
1552 | | AS2( psrld xmm5, 16) \ |
1553 | | AS2( pmuludq xmm7, [edx-(i)*16])\ |
1554 | | AS2( pand xmm6, xmm7) \ |
1555 | | AS2( psrld xmm7, 16) |
1556 | | |
1557 | | #define Squ_Begin(n) \ |
1558 | | SquPrologue \ |
1559 | | AS2( mov esi, esp)\ |
1560 | | AS2( and esp, 0xfffffff0)\ |
1561 | | AS2( lea edi, [esp-32*n])\ |
1562 | | AS2( sub esp, 32*n+16)\ |
1563 | | AS1( push esi)\ |
1564 | | AS2( mov esi, edi) \ |
1565 | | AS2( xor edx, edx) \ |
1566 | | ASL(1) \ |
1567 | | ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ |
1568 | | ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ |
1569 | | AS2( movdqa [edi+2*edx], xmm0) \ |
1570 | | AS2( psrlq xmm0, 32) \ |
1571 | | AS2( movdqa [edi+2*edx+16], xmm0) \ |
1572 | | AS2( movdqa [edi+16*n+2*edx], xmm1) \ |
1573 | | AS2( psrlq xmm1, 32) \ |
1574 | | AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ |
1575 | | AS2( add edx, 16) \ |
1576 | | AS2( cmp edx, 8*(n)) \ |
1577 | | ASJ( jne, 1, b) \ |
1578 | | AS2( lea edx, [edi+16*n])\ |
1579 | | SSE2_FirstMultiply(0) \ |
1580 | | |
1581 | | #define Squ_Acc(i) \ |
1582 | | ASL(LSqu##i) \ |
1583 | | AS2( movdqa xmm1, [esi+(i)*16]) \ |
1584 | | AS2( movdqa xmm0, [edi-(i)*16]) \ |
1585 | | AS2( movdqa xmm2, [ebx]) \ |
1586 | | AS2( pmuludq xmm0, xmm1) \ |
1587 | | AS2( pmuludq xmm1, [edx-(i)*16]) \ |
1588 | | AS2( movdqa xmm3, xmm2) \ |
1589 | | AS2( pand xmm2, xmm0) \ |
1590 | | AS2( psrld xmm0, 16) \ |
1591 | | AS2( paddd xmm4, xmm2) \ |
1592 | | AS2( paddd xmm5, xmm0) \ |
1593 | | AS2( pand xmm3, xmm1) \ |
1594 | | AS2( psrld xmm1, 16) \ |
1595 | | AS2( paddd xmm6, xmm3) \ |
1596 | | AS2( paddd xmm7, xmm1) \ |
1597 | | |
1598 | | #define Squ_Acc1(i) |
1599 | | #define Squ_Acc2(i) ASC(call, LSqu##i) |
1600 | | #define Squ_Acc3(i) Squ_Acc2(i) |
1601 | | #define Squ_Acc4(i) Squ_Acc2(i) |
1602 | | #define Squ_Acc5(i) Squ_Acc2(i) |
1603 | | #define Squ_Acc6(i) Squ_Acc2(i) |
1604 | | #define Squ_Acc7(i) Squ_Acc2(i) |
1605 | | #define Squ_Acc8(i) Squ_Acc2(i) |
1606 | | |
1607 | | #define SSE2_End(E, n) \ |
1608 | | SSE2_SaveShift(2*(n)-3) \ |
1609 | | AS2( movdqa xmm7, [esi+16]) \ |
1610 | | AS2( movdqa xmm0, [edi]) \ |
1611 | | AS2( pmuludq xmm0, xmm7) \ |
1612 | | AS2( movdqa xmm2, [ebx]) \ |
1613 | | AS2( pmuludq xmm7, [edx]) \ |
1614 | | AS2( movdqa xmm6, xmm2) \ |
1615 | | AS2( pand xmm2, xmm0) \ |
1616 | | AS2( psrld xmm0, 16) \ |
1617 | | AS2( paddd xmm4, xmm2) \ |
1618 | | AS2( paddd xmm5, xmm0) \ |
1619 | | AS2( pand xmm6, xmm7) \ |
1620 | | AS2( psrld xmm7, 16) \ |
1621 | | SSE2_SaveShift(2*(n)-2) \ |
1622 | | SSE2_FinalSave(2*(n)-1) \ |
1623 | | AS1( pop esp)\ |
1624 | | E |
1625 | | |
1626 | | #define Squ_End(n) SSE2_End(SquEpilogue, n) |
1627 | | #define Mul_End(n) SSE2_End(MulEpilogue, n) |
1628 | | #define Top_End(n) SSE2_End(TopEpilogue, n) |
1629 | | |
1630 | | #define Squ_Column1(k, i) \ |
1631 | | Squ_SSE2_SaveShift(k) \ |
1632 | | AS2( add esi, 16) \ |
1633 | | SSE2_FirstMultiply(1)\ |
1634 | | Squ_Acc##i(i) \ |
1635 | | AS2( paddd xmm4, xmm4) \ |
1636 | | AS2( paddd xmm5, xmm5) \ |
1637 | | AS2( movdqa xmm3, [esi]) \ |
1638 | | AS2( movq xmm1, QWORD PTR [esi+8]) \ |
1639 | | AS2( pmuludq xmm1, xmm3) \ |
1640 | | AS2( pmuludq xmm3, xmm3) \ |
1641 | | AS2( movdqa xmm0, [ebx])\ |
1642 | | AS2( movdqa xmm2, xmm0) \ |
1643 | | AS2( pand xmm0, xmm1) \ |
1644 | | AS2( psrld xmm1, 16) \ |
1645 | | AS2( paddd xmm6, xmm0) \ |
1646 | | AS2( paddd xmm7, xmm1) \ |
1647 | | AS2( pand xmm2, xmm3) \ |
1648 | | AS2( psrld xmm3, 16) \ |
1649 | | AS2( paddd xmm6, xmm6) \ |
1650 | | AS2( paddd xmm7, xmm7) \ |
1651 | | AS2( paddd xmm4, xmm2) \ |
1652 | | AS2( paddd xmm5, xmm3) \ |
1653 | | AS2( movq xmm0, QWORD PTR [esp+4])\ |
1654 | | AS2( movq xmm1, QWORD PTR [esp+12])\ |
1655 | | AS2( paddd xmm4, xmm0)\ |
1656 | | AS2( paddd xmm5, xmm1)\ |
1657 | | |
1658 | | #define Squ_Column0(k, i) \ |
1659 | | Squ_SSE2_SaveShift(k) \ |
1660 | | AS2( add edi, 16) \ |
1661 | | AS2( add edx, 16) \ |
1662 | | SSE2_FirstMultiply(1)\ |
1663 | | Squ_Acc##i(i) \ |
1664 | | AS2( paddd xmm6, xmm6) \ |
1665 | | AS2( paddd xmm7, xmm7) \ |
1666 | | AS2( paddd xmm4, xmm4) \ |
1667 | | AS2( paddd xmm5, xmm5) \ |
1668 | | AS2( movq xmm0, QWORD PTR [esp+4])\ |
1669 | | AS2( movq xmm1, QWORD PTR [esp+12])\ |
1670 | | AS2( paddd xmm4, xmm0)\ |
1671 | | AS2( paddd xmm5, xmm1)\ |
1672 | | |
1673 | | #define SSE2_MulAdd45 \ |
1674 | | AS2( movdqa xmm7, [esi]) \ |
1675 | | AS2( movdqa xmm0, [edi]) \ |
1676 | | AS2( pmuludq xmm0, xmm7) \ |
1677 | | AS2( movdqa xmm2, [ebx]) \ |
1678 | | AS2( pmuludq xmm7, [edx]) \ |
1679 | | AS2( movdqa xmm6, xmm2) \ |
1680 | | AS2( pand xmm2, xmm0) \ |
1681 | | AS2( psrld xmm0, 16) \ |
1682 | | AS2( paddd xmm4, xmm2) \ |
1683 | | AS2( paddd xmm5, xmm0) \ |
1684 | | AS2( pand xmm6, xmm7) \ |
1685 | | AS2( psrld xmm7, 16) |
1686 | | |
1687 | | #define Mul_Begin(n) \ |
1688 | | MulPrologue \ |
1689 | | AS2( mov esi, esp)\ |
1690 | | AS2( and esp, 0xfffffff0)\ |
1691 | | AS2( sub esp, 48*n+16)\ |
1692 | | AS1( push esi)\ |
1693 | | AS2( xor edx, edx) \ |
1694 | | ASL(1) \ |
1695 | | ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ |
1696 | | ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ |
1697 | | ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ |
1698 | | AS2( movdqa [esp+20+2*edx], xmm0) \ |
1699 | | AS2( psrlq xmm0, 32) \ |
1700 | | AS2( movdqa [esp+20+2*edx+16], xmm0) \ |
1701 | | AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ |
1702 | | AS2( psrlq xmm1, 32) \ |
1703 | | AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ |
1704 | | AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ |
1705 | | AS2( psrlq xmm2, 32) \ |
1706 | | AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ |
1707 | | AS2( add edx, 16) \ |
1708 | | AS2( cmp edx, 8*(n)) \ |
1709 | | ASJ( jne, 1, b) \ |
1710 | | AS2( lea edi, [esp+20])\ |
1711 | | AS2( lea edx, [esp+20+16*n])\ |
1712 | | AS2( lea esi, [esp+20+32*n])\ |
1713 | | SSE2_FirstMultiply(0) \ |
1714 | | |
1715 | | #define Mul_Acc(i) \ |
1716 | | ASL(LMul##i) \ |
1717 | | AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ |
1718 | | AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ |
1719 | | AS2( movdqa xmm2, [ebx]) \ |
1720 | | AS2( pmuludq xmm0, xmm1) \ |
1721 | | AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ |
1722 | | AS2( movdqa xmm3, xmm2) \ |
1723 | | AS2( pand xmm2, xmm0) \ |
1724 | | AS2( psrld xmm0, 16) \ |
1725 | | AS2( paddd xmm4, xmm2) \ |
1726 | | AS2( paddd xmm5, xmm0) \ |
1727 | | AS2( pand xmm3, xmm1) \ |
1728 | | AS2( psrld xmm1, 16) \ |
1729 | | AS2( paddd xmm6, xmm3) \ |
1730 | | AS2( paddd xmm7, xmm1) \ |
1731 | | |
1732 | | #define Mul_Acc1(i) |
1733 | | #define Mul_Acc2(i) ASC(call, LMul##i) |
1734 | | #define Mul_Acc3(i) Mul_Acc2(i) |
1735 | | #define Mul_Acc4(i) Mul_Acc2(i) |
1736 | | #define Mul_Acc5(i) Mul_Acc2(i) |
1737 | | #define Mul_Acc6(i) Mul_Acc2(i) |
1738 | | #define Mul_Acc7(i) Mul_Acc2(i) |
1739 | | #define Mul_Acc8(i) Mul_Acc2(i) |
1740 | | #define Mul_Acc9(i) Mul_Acc2(i) |
1741 | | #define Mul_Acc10(i) Mul_Acc2(i) |
1742 | | #define Mul_Acc11(i) Mul_Acc2(i) |
1743 | | #define Mul_Acc12(i) Mul_Acc2(i) |
1744 | | #define Mul_Acc13(i) Mul_Acc2(i) |
1745 | | #define Mul_Acc14(i) Mul_Acc2(i) |
1746 | | #define Mul_Acc15(i) Mul_Acc2(i) |
1747 | | #define Mul_Acc16(i) Mul_Acc2(i) |
1748 | | |
1749 | | #define Mul_Column1(k, i) \ |
1750 | | SSE2_SaveShift(k) \ |
1751 | | AS2( add esi, 16) \ |
1752 | | SSE2_MulAdd45\ |
1753 | | Mul_Acc##i(i) \ |
1754 | | |
1755 | | #define Mul_Column0(k, i) \ |
1756 | | SSE2_SaveShift(k) \ |
1757 | | AS2( add edi, 16) \ |
1758 | | AS2( add edx, 16) \ |
1759 | | SSE2_MulAdd45\ |
1760 | | Mul_Acc##i(i) \ |
1761 | | |
1762 | | #define Bot_Acc(i) \ |
1763 | | AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ |
1764 | | AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ |
1765 | | AS2( pmuludq xmm0, xmm1) \ |
1766 | | AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ |
1767 | | AS2( paddq xmm4, xmm0) \ |
1768 | | AS2( paddd xmm6, xmm1) |
1769 | | |
1770 | | #define Bot_SaveAcc(k) \ |
1771 | | SSE2_SaveShift(k) \ |
1772 | | AS2( add edi, 16) \ |
1773 | | AS2( add edx, 16) \ |
1774 | | AS2( movdqa xmm6, [esi]) \ |
1775 | | AS2( movdqa xmm0, [edi]) \ |
1776 | | AS2( pmuludq xmm0, xmm6) \ |
1777 | | AS2( paddq xmm4, xmm0) \ |
1778 | | AS2( psllq xmm5, 16) \ |
1779 | | AS2( paddq xmm4, xmm5) \ |
1780 | | AS2( pmuludq xmm6, [edx]) |
1781 | | |
1782 | | #define Bot_End(n) \ |
1783 | | AS2( movhlps xmm7, xmm6) \ |
1784 | | AS2( paddd xmm6, xmm7) \ |
1785 | | AS2( psllq xmm6, 32) \ |
1786 | | AS2( paddd xmm4, xmm6) \ |
1787 | | AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \ |
1788 | | AS1( pop esp)\ |
1789 | | MulEpilogue |
1790 | | |
1791 | | #define Top_Begin(n) \ |
1792 | | TopPrologue \ |
1793 | | AS2( mov edx, esp)\ |
1794 | | AS2( and esp, 0xfffffff0)\ |
1795 | | AS2( sub esp, 48*n+16)\ |
1796 | | AS1( push edx)\ |
1797 | | AS2( xor edx, edx) \ |
1798 | | ASL(1) \ |
1799 | | ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ |
1800 | | ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ |
1801 | | ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ |
1802 | | AS2( movdqa [esp+20+2*edx], xmm0) \ |
1803 | | AS2( psrlq xmm0, 32) \ |
1804 | | AS2( movdqa [esp+20+2*edx+16], xmm0) \ |
1805 | | AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ |
1806 | | AS2( psrlq xmm1, 32) \ |
1807 | | AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ |
1808 | | AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ |
1809 | | AS2( psrlq xmm2, 32) \ |
1810 | | AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ |
1811 | | AS2( add edx, 16) \ |
1812 | | AS2( cmp edx, 8*(n)) \ |
1813 | | ASJ( jne, 1, b) \ |
1814 | | AS2( mov eax, esi) \ |
1815 | | AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\ |
1816 | | AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\ |
1817 | | AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\ |
1818 | | AS2( pxor xmm4, xmm4)\ |
1819 | | AS2( pxor xmm5, xmm5) |
1820 | | |
1821 | | #define Top_Acc(i) \ |
1822 | | AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \ |
1823 | | AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ |
1824 | | AS2( psrlq xmm0, 48) \ |
1825 | | AS2( paddd xmm5, xmm0)\ |
1826 | | |
1827 | | #define Top_Column0(i) \ |
1828 | | AS2( psllq xmm5, 32) \ |
1829 | | AS2( add edi, 16) \ |
1830 | | AS2( add edx, 16) \ |
1831 | | SSE2_MulAdd45\ |
1832 | | Mul_Acc##i(i) \ |
1833 | | |
1834 | | #define Top_Column1(i) \ |
1835 | | SSE2_SaveShift(0) \ |
1836 | | AS2( add esi, 16) \ |
1837 | | SSE2_MulAdd45\ |
1838 | | Mul_Acc##i(i) \ |
1839 | | AS2( shr eax, 16) \ |
1840 | | AS2( movd xmm0, eax)\ |
1841 | | AS2( movd xmm1, [ecx+4])\ |
1842 | | AS2( psrld xmm1, 16)\ |
1843 | | AS2( pcmpgtd xmm1, xmm0)\ |
1844 | | AS2( psrld xmm1, 31)\ |
1845 | | AS2( paddd xmm4, xmm1)\ |
1846 | | |
1847 | | void SSE2_Square4(word *C, const word *A) |
1848 | | { |
1849 | | Squ_Begin(2) |
1850 | | Squ_Column0(0, 1) |
1851 | | Squ_End(2) |
1852 | | } |
1853 | | |
1854 | | void SSE2_Square8(word *C, const word *A) |
1855 | | { |
1856 | | Squ_Begin(4) |
1857 | | #ifndef __GNUC__ |
1858 | | ASJ( jmp, 0, f) |
1859 | | Squ_Acc(2) |
1860 | | AS1( ret) ASL(0) |
1861 | | #endif |
1862 | | Squ_Column0(0, 1) |
1863 | | Squ_Column1(1, 1) |
1864 | | Squ_Column0(2, 2) |
1865 | | Squ_Column1(3, 1) |
1866 | | Squ_Column0(4, 1) |
1867 | | Squ_End(4) |
1868 | | } |
1869 | | |
1870 | | void SSE2_Square16(word *C, const word *A) |
1871 | | { |
1872 | | Squ_Begin(8) |
1873 | | #ifndef __GNUC__ |
1874 | | ASJ( jmp, 0, f) |
1875 | | Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) |
1876 | | AS1( ret) ASL(0) |
1877 | | #endif |
1878 | | Squ_Column0(0, 1) |
1879 | | Squ_Column1(1, 1) |
1880 | | Squ_Column0(2, 2) |
1881 | | Squ_Column1(3, 2) |
1882 | | Squ_Column0(4, 3) |
1883 | | Squ_Column1(5, 3) |
1884 | | Squ_Column0(6, 4) |
1885 | | Squ_Column1(7, 3) |
1886 | | Squ_Column0(8, 3) |
1887 | | Squ_Column1(9, 2) |
1888 | | Squ_Column0(10, 2) |
1889 | | Squ_Column1(11, 1) |
1890 | | Squ_Column0(12, 1) |
1891 | | Squ_End(8) |
1892 | | } |
1893 | | |
1894 | | void SSE2_Square32(word *C, const word *A) |
1895 | | { |
1896 | | Squ_Begin(16) |
1897 | | ASJ( jmp, 0, f) |
1898 | | Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) |
1899 | | AS1( ret) ASL(0) |
1900 | | Squ_Column0(0, 1) |
1901 | | Squ_Column1(1, 1) |
1902 | | Squ_Column0(2, 2) |
1903 | | Squ_Column1(3, 2) |
1904 | | Squ_Column0(4, 3) |
1905 | | Squ_Column1(5, 3) |
1906 | | Squ_Column0(6, 4) |
1907 | | Squ_Column1(7, 4) |
1908 | | Squ_Column0(8, 5) |
1909 | | Squ_Column1(9, 5) |
1910 | | Squ_Column0(10, 6) |
1911 | | Squ_Column1(11, 6) |
1912 | | Squ_Column0(12, 7) |
1913 | | Squ_Column1(13, 7) |
1914 | | Squ_Column0(14, 8) |
1915 | | Squ_Column1(15, 7) |
1916 | | Squ_Column0(16, 7) |
1917 | | Squ_Column1(17, 6) |
1918 | | Squ_Column0(18, 6) |
1919 | | Squ_Column1(19, 5) |
1920 | | Squ_Column0(20, 5) |
1921 | | Squ_Column1(21, 4) |
1922 | | Squ_Column0(22, 4) |
1923 | | Squ_Column1(23, 3) |
1924 | | Squ_Column0(24, 3) |
1925 | | Squ_Column1(25, 2) |
1926 | | Squ_Column0(26, 2) |
1927 | | Squ_Column1(27, 1) |
1928 | | Squ_Column0(28, 1) |
1929 | | Squ_End(16) |
1930 | | } |
1931 | | |
1932 | | void SSE2_Multiply4(word *C, const word *A, const word *B) |
1933 | | { |
1934 | | Mul_Begin(2) |
1935 | | #ifndef __GNUC__ |
1936 | | ASJ( jmp, 0, f) |
1937 | | Mul_Acc(2) |
1938 | | AS1( ret) ASL(0) |
1939 | | #endif |
1940 | | Mul_Column0(0, 2) |
1941 | | Mul_End(2) |
1942 | | } |
1943 | | |
1944 | | void SSE2_Multiply8(word *C, const word *A, const word *B) |
1945 | | { |
1946 | | Mul_Begin(4) |
1947 | | #ifndef __GNUC__ |
1948 | | ASJ( jmp, 0, f) |
1949 | | Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
1950 | | AS1( ret) ASL(0) |
1951 | | #endif |
1952 | | Mul_Column0(0, 2) |
1953 | | Mul_Column1(1, 3) |
1954 | | Mul_Column0(2, 4) |
1955 | | Mul_Column1(3, 3) |
1956 | | Mul_Column0(4, 2) |
1957 | | Mul_End(4) |
1958 | | } |
1959 | | |
1960 | | void SSE2_Multiply16(word *C, const word *A, const word *B) |
1961 | | { |
1962 | | Mul_Begin(8) |
1963 | | #ifndef __GNUC__ |
1964 | | ASJ( jmp, 0, f) |
1965 | | Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
1966 | | AS1( ret) ASL(0) |
1967 | | #endif |
1968 | | Mul_Column0(0, 2) |
1969 | | Mul_Column1(1, 3) |
1970 | | Mul_Column0(2, 4) |
1971 | | Mul_Column1(3, 5) |
1972 | | Mul_Column0(4, 6) |
1973 | | Mul_Column1(5, 7) |
1974 | | Mul_Column0(6, 8) |
1975 | | Mul_Column1(7, 7) |
1976 | | Mul_Column0(8, 6) |
1977 | | Mul_Column1(9, 5) |
1978 | | Mul_Column0(10, 4) |
1979 | | Mul_Column1(11, 3) |
1980 | | Mul_Column0(12, 2) |
1981 | | Mul_End(8) |
1982 | | } |
1983 | | |
1984 | | void SSE2_Multiply32(word *C, const word *A, const word *B) |
1985 | | { |
1986 | | Mul_Begin(16) |
1987 | | ASJ( jmp, 0, f) |
1988 | | Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
1989 | | AS1( ret) ASL(0) |
1990 | | Mul_Column0(0, 2) |
1991 | | Mul_Column1(1, 3) |
1992 | | Mul_Column0(2, 4) |
1993 | | Mul_Column1(3, 5) |
1994 | | Mul_Column0(4, 6) |
1995 | | Mul_Column1(5, 7) |
1996 | | Mul_Column0(6, 8) |
1997 | | Mul_Column1(7, 9) |
1998 | | Mul_Column0(8, 10) |
1999 | | Mul_Column1(9, 11) |
2000 | | Mul_Column0(10, 12) |
2001 | | Mul_Column1(11, 13) |
2002 | | Mul_Column0(12, 14) |
2003 | | Mul_Column1(13, 15) |
2004 | | Mul_Column0(14, 16) |
2005 | | Mul_Column1(15, 15) |
2006 | | Mul_Column0(16, 14) |
2007 | | Mul_Column1(17, 13) |
2008 | | Mul_Column0(18, 12) |
2009 | | Mul_Column1(19, 11) |
2010 | | Mul_Column0(20, 10) |
2011 | | Mul_Column1(21, 9) |
2012 | | Mul_Column0(22, 8) |
2013 | | Mul_Column1(23, 7) |
2014 | | Mul_Column0(24, 6) |
2015 | | Mul_Column1(25, 5) |
2016 | | Mul_Column0(26, 4) |
2017 | | Mul_Column1(27, 3) |
2018 | | Mul_Column0(28, 2) |
2019 | | Mul_End(16) |
2020 | | } |
2021 | | |
2022 | | void SSE2_MultiplyBottom4(word *C, const word *A, const word *B) |
2023 | | { |
2024 | | Mul_Begin(2) |
2025 | | Bot_SaveAcc(0) Bot_Acc(2) |
2026 | | Bot_End(2) |
2027 | | } |
2028 | | |
2029 | | void SSE2_MultiplyBottom8(word *C, const word *A, const word *B) |
2030 | | { |
2031 | | Mul_Begin(4) |
2032 | | #ifndef __GNUC__ |
2033 | | ASJ( jmp, 0, f) |
2034 | | Mul_Acc(3) Mul_Acc(2) |
2035 | | AS1( ret) ASL(0) |
2036 | | #endif |
2037 | | Mul_Column0(0, 2) |
2038 | | Mul_Column1(1, 3) |
2039 | | Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
2040 | | Bot_End(4) |
2041 | | } |
2042 | | |
2043 | | void SSE2_MultiplyBottom16(word *C, const word *A, const word *B) |
2044 | | { |
2045 | | Mul_Begin(8) |
2046 | | #ifndef __GNUC__ |
2047 | | ASJ( jmp, 0, f) |
2048 | | Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
2049 | | AS1( ret) ASL(0) |
2050 | | #endif |
2051 | | Mul_Column0(0, 2) |
2052 | | Mul_Column1(1, 3) |
2053 | | Mul_Column0(2, 4) |
2054 | | Mul_Column1(3, 5) |
2055 | | Mul_Column0(4, 6) |
2056 | | Mul_Column1(5, 7) |
2057 | | Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
2058 | | Bot_End(8) |
2059 | | } |
2060 | | |
2061 | | void SSE2_MultiplyBottom32(word *C, const word *A, const word *B) |
2062 | | { |
2063 | | Mul_Begin(16) |
2064 | | #ifndef __GNUC__ |
2065 | | ASJ( jmp, 0, f) |
2066 | | Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
2067 | | AS1( ret) ASL(0) |
2068 | | #endif |
2069 | | Mul_Column0(0, 2) |
2070 | | Mul_Column1(1, 3) |
2071 | | Mul_Column0(2, 4) |
2072 | | Mul_Column1(3, 5) |
2073 | | Mul_Column0(4, 6) |
2074 | | Mul_Column1(5, 7) |
2075 | | Mul_Column0(6, 8) |
2076 | | Mul_Column1(7, 9) |
2077 | | Mul_Column0(8, 10) |
2078 | | Mul_Column1(9, 11) |
2079 | | Mul_Column0(10, 12) |
2080 | | Mul_Column1(11, 13) |
2081 | | Mul_Column0(12, 14) |
2082 | | Mul_Column1(13, 15) |
2083 | | Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) |
2084 | | Bot_End(16) |
2085 | | } |
2086 | | |
2087 | | void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L) |
2088 | | { |
2089 | | Top_Begin(4) |
2090 | | Top_Acc(3) Top_Acc(2) Top_Acc(1) |
2091 | | #ifndef __GNUC__ |
2092 | | ASJ( jmp, 0, f) |
2093 | | Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
2094 | | AS1( ret) ASL(0) |
2095 | | #endif |
2096 | | Top_Column0(4) |
2097 | | Top_Column1(3) |
2098 | | Mul_Column0(0, 2) |
2099 | | Top_End(2) |
2100 | | } |
2101 | | |
2102 | | void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L) |
2103 | | { |
2104 | | Top_Begin(8) |
2105 | | Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) |
2106 | | #ifndef __GNUC__ |
2107 | | ASJ( jmp, 0, f) |
2108 | | Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
2109 | | AS1( ret) ASL(0) |
2110 | | #endif |
2111 | | Top_Column0(8) |
2112 | | Top_Column1(7) |
2113 | | Mul_Column0(0, 6) |
2114 | | Mul_Column1(1, 5) |
2115 | | Mul_Column0(2, 4) |
2116 | | Mul_Column1(3, 3) |
2117 | | Mul_Column0(4, 2) |
2118 | | Top_End(4) |
2119 | | } |
2120 | | |
2121 | | void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L) |
2122 | | { |
2123 | | Top_Begin(16) |
2124 | | Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) |
2125 | | #ifndef __GNUC__ |
2126 | | ASJ( jmp, 0, f) |
2127 | | Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) |
2128 | | AS1( ret) ASL(0) |
2129 | | #endif |
2130 | | Top_Column0(16) |
2131 | | Top_Column1(15) |
2132 | | Mul_Column0(0, 14) |
2133 | | Mul_Column1(1, 13) |
2134 | | Mul_Column0(2, 12) |
2135 | | Mul_Column1(3, 11) |
2136 | | Mul_Column0(4, 10) |
2137 | | Mul_Column1(5, 9) |
2138 | | Mul_Column0(6, 8) |
2139 | | Mul_Column1(7, 7) |
2140 | | Mul_Column0(8, 6) |
2141 | | Mul_Column1(9, 5) |
2142 | | Mul_Column0(10, 4) |
2143 | | Mul_Column1(11, 3) |
2144 | | Mul_Column0(12, 2) |
2145 | | Top_End(8) |
2146 | | } |
2147 | | |
2148 | | #endif // #if CRYPTOPP_INTEGER_SSE2 |
2149 | | |
2150 | | // ******************************************************** |
2151 | | |
2152 | | typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B); |
2153 | | typedef void (* PMul)(word *C, const word *A, const word *B); |
2154 | | typedef void (* PSqu)(word *C, const word *A); |
2155 | | typedef void (* PMulTop)(word *C, const word *A, const word *B, word L); |
2156 | | |
2157 | | #if CRYPTOPP_INTEGER_SSE2 |
2158 | | static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub; |
2159 | | static size_t s_recursionLimit = 8; |
2160 | | #else |
2161 | | static const size_t s_recursionLimit = 16; |
2162 | | #endif // CRYPTOPP_INTEGER_SSE2 |
2163 | | |
2164 | | static PMul s_pMul[9], s_pBot[9]; |
2165 | | static PSqu s_pSqu[9]; |
2166 | | static PMulTop s_pTop[9]; |
2167 | | |
2168 | | void SetFunctionPointers() |
2169 | 20 | { |
2170 | 20 | s_pMul[0] = &Baseline_Multiply2; |
2171 | 20 | s_pBot[0] = &Baseline_MultiplyBottom2; |
2172 | 20 | s_pSqu[0] = &Baseline_Square2; |
2173 | 20 | s_pTop[0] = &Baseline_MultiplyTop2; |
2174 | 20 | s_pTop[1] = &Baseline_MultiplyTop4; |
2175 | | |
2176 | | #if CRYPTOPP_INTEGER_SSE2 |
2177 | | if (HasSSE2()) |
2178 | | { |
2179 | | if (IsP4()) |
2180 | | { |
2181 | | s_pAdd = &SSE2_Add; |
2182 | | s_pSub = &SSE2_Sub; |
2183 | | } |
2184 | | |
2185 | | s_recursionLimit = 32; |
2186 | | |
2187 | | s_pMul[1] = &SSE2_Multiply4; |
2188 | | s_pMul[2] = &SSE2_Multiply8; |
2189 | | s_pMul[4] = &SSE2_Multiply16; |
2190 | | s_pMul[8] = &SSE2_Multiply32; |
2191 | | |
2192 | | s_pBot[1] = &SSE2_MultiplyBottom4; |
2193 | | s_pBot[2] = &SSE2_MultiplyBottom8; |
2194 | | s_pBot[4] = &SSE2_MultiplyBottom16; |
2195 | | s_pBot[8] = &SSE2_MultiplyBottom32; |
2196 | | |
2197 | | s_pSqu[1] = &SSE2_Square4; |
2198 | | s_pSqu[2] = &SSE2_Square8; |
2199 | | s_pSqu[4] = &SSE2_Square16; |
2200 | | s_pSqu[8] = &SSE2_Square32; |
2201 | | |
2202 | | s_pTop[2] = &SSE2_MultiplyTop8; |
2203 | | s_pTop[4] = &SSE2_MultiplyTop16; |
2204 | | s_pTop[8] = &SSE2_MultiplyTop32; |
2205 | | } |
2206 | | else |
2207 | | #endif // CRYPTOPP_INTEGER_SSE2 |
2208 | 20 | { |
2209 | 20 | s_pMul[1] = &Baseline_Multiply4; |
2210 | 20 | s_pMul[2] = &Baseline_Multiply8; |
2211 | | |
2212 | 20 | s_pBot[1] = &Baseline_MultiplyBottom4; |
2213 | 20 | s_pBot[2] = &Baseline_MultiplyBottom8; |
2214 | | |
2215 | 20 | s_pSqu[1] = &Baseline_Square4; |
2216 | 20 | s_pSqu[2] = &Baseline_Square8; |
2217 | | |
2218 | 20 | s_pTop[2] = &Baseline_MultiplyTop8; |
2219 | | |
2220 | 20 | #if !CRYPTOPP_INTEGER_SSE2 |
2221 | 20 | s_pMul[4] = &Baseline_Multiply16; |
2222 | 20 | s_pBot[4] = &Baseline_MultiplyBottom16; |
2223 | 20 | s_pSqu[4] = &Baseline_Square16; |
2224 | 20 | s_pTop[4] = &Baseline_MultiplyTop16; |
2225 | 20 | #endif // !CRYPTOPP_INTEGER_SSE2 |
2226 | 20 | } |
2227 | 20 | } |
2228 | | |
2229 | | inline int Add(word *C, const word *A, const word *B, size_t N) |
2230 | 61.4M | { |
2231 | | #if CRYPTOPP_INTEGER_SSE2 |
2232 | | return s_pAdd(N, C, A, B); |
2233 | | #else |
2234 | 61.4M | return Baseline_Add(N, C, A, B); |
2235 | 61.4M | #endif // CRYPTOPP_INTEGER_SSE2 |
2236 | 61.4M | } |
2237 | | |
2238 | | inline int Subtract(word *C, const word *A, const word *B, size_t N) |
2239 | 37.5M | { |
2240 | | #if CRYPTOPP_INTEGER_SSE2 |
2241 | | return s_pSub(N, C, A, B); |
2242 | | #else |
2243 | 37.5M | return Baseline_Sub(N, C, A, B); |
2244 | 37.5M | #endif // CRYPTOPP_INTEGER_SSE2 |
2245 | 37.5M | } |
2246 | | |
2247 | | // ******************************************************** |
2248 | | |
2249 | | |
2250 | 17.1M | #define A0 A |
2251 | 16.1M | #define A1 (A+N2) |
2252 | 10.6M | #define B0 B |
2253 | 9.57M | #define B1 (B+N2) |
2254 | | |
2255 | 24.6M | #define T0 T |
2256 | 4.04M | #define T1 (T+N2) |
2257 | 25.9M | #define T2 (T+N) |
2258 | 0 | #define T3 (T+N+N2) |
2259 | | |
2260 | 19.8M | #define R0 R |
2261 | 42.2M | #define R1 (R+N2) |
2262 | 26.6M | #define R2 (R+N) |
2263 | 9.93M | #define R3 (R+N+N2) |
2264 | | |
2265 | | // R[2*N] - result = A*B |
2266 | | // T[2*N] - temporary work space |
2267 | | // A[N] --- multiplier |
2268 | | // B[N] --- multiplicant |
2269 | | |
2270 | | void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N) |
2271 | 419M | { |
2272 | 419M | CRYPTOPP_ASSERT(N>=2 && N%2==0); |
2273 | | |
2274 | 419M | if (N <= s_recursionLimit) |
2275 | 416M | s_pMul[N/4](R, A, B); |
2276 | 3.34M | else |
2277 | 3.34M | { |
2278 | 3.34M | const size_t N2 = N/2; |
2279 | | |
2280 | 3.34M | size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; |
2281 | 3.34M | Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); |
2282 | | |
2283 | 3.34M | size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; |
2284 | 3.34M | Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); |
2285 | | |
2286 | 3.34M | RecursiveMultiply(R2, T2, A1, B1, N2); |
2287 | 3.34M | RecursiveMultiply(T0, T2, R0, R1, N2); |
2288 | 3.34M | RecursiveMultiply(R0, T2, A0, B0, N2); |
2289 | | |
2290 | | // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1 |
2291 | | |
2292 | 3.34M | int c2 = Add(R2, R2, R1, N2); |
2293 | 3.34M | int c3 = c2; |
2294 | 3.34M | c2 += Add(R1, R2, R0, N2); |
2295 | 3.34M | c3 += Add(R2, R2, R3, N2); |
2296 | | |
2297 | 3.34M | if (AN2 == BN2) |
2298 | 1.83M | c3 -= Subtract(R1, R1, T0, N); |
2299 | 1.50M | else |
2300 | 1.50M | c3 += Add(R1, R1, T0, N); |
2301 | | |
2302 | 3.34M | c3 += Increment(R2, N2, c2); |
2303 | 3.34M | CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2); |
2304 | 3.34M | Increment(R3, N2, c3); |
2305 | 3.34M | } |
2306 | 419M | } |
2307 | | |
2308 | | // R[2*N] - result = A*A |
2309 | | // T[2*N] - temporary work space |
2310 | | // A[N] --- number to be squared |
2311 | | |
2312 | | void RecursiveSquare(word *R, word *T, const word *A, size_t N) |
2313 | 8.37M | { |
2314 | 8.37M | CRYPTOPP_ASSERT(N && N%2==0); |
2315 | | |
2316 | 8.37M | if (N <= s_recursionLimit) |
2317 | 5.12M | s_pSqu[N/4](R, A); |
2318 | 3.25M | else |
2319 | 3.25M | { |
2320 | 3.25M | const size_t N2 = N/2; |
2321 | | |
2322 | 3.25M | RecursiveSquare(R0, T2, A0, N2); |
2323 | 3.25M | RecursiveSquare(R2, T2, A1, N2); |
2324 | 3.25M | RecursiveMultiply(T0, T2, A0, A1, N2); |
2325 | | |
2326 | 3.25M | int carry = Add(R1, R1, T0, N); |
2327 | 3.25M | carry += Add(R1, R1, T0, N); |
2328 | 3.25M | Increment(R3, N2, carry); |
2329 | 3.25M | } |
2330 | 8.37M | } |
2331 | | |
2332 | | // R[N] - bottom half of A*B |
2333 | | // T[3*N/2] - temporary work space |
2334 | | // A[N] - multiplier |
2335 | | // B[N] - multiplicant |
2336 | | |
2337 | | void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) |
2338 | 4.43M | { |
2339 | 4.43M | CRYPTOPP_ASSERT(N>=2 && N%2==0); |
2340 | | |
2341 | 4.43M | if (N <= s_recursionLimit) |
2342 | 2.77M | s_pBot[N/4](R, A, B); |
2343 | 1.66M | else |
2344 | 1.66M | { |
2345 | 1.66M | const size_t N2 = N/2; |
2346 | | |
2347 | 1.66M | RecursiveMultiply(R, T, A0, B0, N2); |
2348 | 1.66M | RecursiveMultiplyBottom(T0, T1, A1, B0, N2); |
2349 | 1.66M | Add(R1, R1, T0, N2); |
2350 | 1.66M | RecursiveMultiplyBottom(T0, T1, A0, B1, N2); |
2351 | 1.66M | Add(R1, R1, T0, N2); |
2352 | 1.66M | } |
2353 | 4.43M | } |
2354 | | |
2355 | | // R[N] --- upper half of A*B |
2356 | | // T[2*N] - temporary work space |
2357 | | // L[N] --- lower half of A*B |
2358 | | // A[N] --- multiplier |
2359 | | // B[N] --- multiplicant |
2360 | | |
2361 | | void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N) |
2362 | 1.07M | { |
2363 | 1.07M | CRYPTOPP_ASSERT(N>=2 && N%2==0); |
2364 | | |
2365 | 1.07M | if (N <= s_recursionLimit) |
2366 | 465k | s_pTop[N/4](R, A, B, L[N-1]); |
2367 | 613k | else |
2368 | 613k | { |
2369 | 613k | const size_t N2 = N/2; |
2370 | | |
2371 | 613k | size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; |
2372 | 613k | Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); |
2373 | | |
2374 | 613k | size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; |
2375 | 613k | Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); |
2376 | | |
2377 | 613k | RecursiveMultiply(T0, T2, R0, R1, N2); |
2378 | 613k | RecursiveMultiply(R0, T2, A1, B1, N2); |
2379 | | |
2380 | | // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1 |
2381 | | |
2382 | 613k | int t, c3; |
2383 | 613k | int c2 = Subtract(T2, L+N2, L, N2); |
2384 | | |
2385 | 613k | if (AN2 == BN2) |
2386 | 305k | { |
2387 | 305k | c2 -= Add(T2, T2, T0, N2); |
2388 | 305k | t = (Compare(T2, R0, N2) == -1); |
2389 | 305k | c3 = t - Subtract(T2, T2, T1, N2); |
2390 | 305k | } |
2391 | 307k | else |
2392 | 307k | { |
2393 | 307k | c2 += Subtract(T2, T2, T0, N2); |
2394 | 307k | t = (Compare(T2, R0, N2) == -1); |
2395 | 307k | c3 = t + Add(T2, T2, T1, N2); |
2396 | 307k | } |
2397 | | |
2398 | 613k | c2 += t; |
2399 | 613k | if (c2 >= 0) |
2400 | 593k | c3 += Increment(T2, N2, c2); |
2401 | 19.2k | else |
2402 | 19.2k | c3 -= Decrement(T2, N2, -c2); |
2403 | 613k | c3 += Add(R0, T2, R1, N2); |
2404 | | |
2405 | 613k | CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2); |
2406 | 613k | Increment(R1, N2, c3); |
2407 | 613k | } |
2408 | 1.07M | } |
2409 | | |
2410 | | inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N) |
2411 | 403M | { |
2412 | 403M | RecursiveMultiply(R, T, A, B, N); |
2413 | 403M | } |
2414 | | |
2415 | | inline void Square(word *R, word *T, const word *A, size_t N) |
2416 | 1.87M | { |
2417 | 1.87M | RecursiveSquare(R, T, A, N); |
2418 | 1.87M | } |
2419 | | |
2420 | | inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) |
2421 | 1.11M | { |
2422 | 1.11M | RecursiveMultiplyBottom(R, T, A, B, N); |
2423 | 1.11M | } |
2424 | | |
2425 | | // R[NA+NB] - result = A*B |
2426 | | // T[NA+NB] - temporary work space |
2427 | | // A[NA] ---- multiplier |
2428 | | // B[NB] ---- multiplicant |
2429 | | |
2430 | | void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB) |
2431 | 33.5M | { |
2432 | 33.5M | if (NA == NB) |
2433 | 2.96M | { |
2434 | | // Profiling guided the flow below. |
2435 | 2.96M | if (A != B) |
2436 | 1.96M | Multiply(R, T, A, B, NA); |
2437 | 1.00M | else |
2438 | 1.00M | Square(R, T, A, NA); |
2439 | | |
2440 | 2.96M | return; |
2441 | 2.96M | } |
2442 | | |
2443 | 30.5M | if (NA > NB) |
2444 | 11.9M | { |
2445 | 11.9M | std::swap(A, B); |
2446 | 11.9M | std::swap(NA, NB); |
2447 | 11.9M | } |
2448 | | |
2449 | 30.5M | CRYPTOPP_ASSERT(NB % NA == 0); |
2450 | | |
2451 | 30.5M | if (NA==2 && !A[1]) |
2452 | 12.7M | { |
2453 | | // Profiling guided the flow below. |
2454 | 12.7M | switch (A[0]) |
2455 | 12.7M | { |
2456 | 12.7M | default: |
2457 | 12.7M | R[NB] = LinearMultiply(R, B, A[0], NB); |
2458 | 12.7M | R[NB+1] = 0; |
2459 | 12.7M | return; |
2460 | 274 | case 0: |
2461 | 274 | SetWords(R, 0, NB+2); |
2462 | 274 | return; |
2463 | 25.7k | case 1: |
2464 | 25.7k | CopyWords(R, B, NB); |
2465 | 25.7k | R[NB] = R[NB+1] = 0; |
2466 | 25.7k | return; |
2467 | 12.7M | } |
2468 | 12.7M | } |
2469 | | |
2470 | 17.7M | size_t i; |
2471 | 17.7M | if ((NB/NA)%2 == 0) |
2472 | 4.42M | { |
2473 | 4.42M | Multiply(R, T, A, B, NA); |
2474 | 4.42M | CopyWords(T+2*NA, R+NA, NA); |
2475 | | |
2476 | 45.5M | for (i=2*NA; i<NB; i+=2*NA) |
2477 | 41.1M | Multiply(T+NA+i, T, A, B+i, NA); |
2478 | 50.0M | for (i=NA; i<NB; i+=2*NA) |
2479 | 45.5M | Multiply(R+i, T, A, B+i, NA); |
2480 | 4.42M | } |
2481 | 13.3M | else |
2482 | 13.3M | { |
2483 | 175M | for (i=0; i<NB; i+=2*NA) |
2484 | 161M | Multiply(R+i, T, A, B+i, NA); |
2485 | 161M | for (i=NA; i<NB; i+=2*NA) |
2486 | 148M | Multiply(T+NA+i, T, A, B+i, NA); |
2487 | 13.3M | } |
2488 | | |
2489 | 17.7M | if (Add(R+NA, R+NA, T+2*NA, NB-NA)) |
2490 | 5.31M | Increment(R+NB, NA); |
2491 | 17.7M | } |
2492 | | |
2493 | | // R[N] ----- result = A inverse mod 2**(WORD_BITS*N) |
2494 | | // T[3*N/2] - temporary work space |
2495 | | // A[N] ----- an odd number as input |
2496 | | |
2497 | | void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N) |
2498 | 62.8k | { |
2499 | | // Profiling guided the flow below. |
2500 | 62.8k | if (N!=2) |
2501 | 33.9k | { |
2502 | 33.9k | const size_t N2 = N/2; |
2503 | 33.9k | RecursiveInverseModPower2(R0, T0, A0, N2); |
2504 | 33.9k | T0[0] = 1; |
2505 | 33.9k | SetWords(T0+1, 0, N2-1); |
2506 | 33.9k | MultiplyTop(R1, T1, T0, R0, A0, N2); |
2507 | 33.9k | MultiplyBottom(T0, T1, R0, A1, N2); |
2508 | 33.9k | Add(T0, R1, T0, N2); |
2509 | 33.9k | TwosComplement(T0, N2); |
2510 | 33.9k | MultiplyBottom(R1, T1, R0, T0, N2); |
2511 | 33.9k | } |
2512 | 28.8k | else |
2513 | 28.8k | { |
2514 | 28.8k | T[0] = AtomicInverseModPower2(A[0]); |
2515 | 28.8k | T[1] = 0; |
2516 | 28.8k | s_pBot[0](T+2, T, A); |
2517 | 28.8k | TwosComplement(T+2, 2); |
2518 | 28.8k | Increment(T+2, 2, 2); |
2519 | 28.8k | s_pBot[0](R, T, T+2); |
2520 | 28.8k | } |
2521 | 62.8k | } |
2522 | | |
2523 | | // R[N] --- result = X/(2**(WORD_BITS*N)) mod M |
2524 | | // T[3*N] - temporary work space |
2525 | | // X[2*N] - number to be reduced |
2526 | | // M[N] --- modulus |
2527 | | // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N) |
2528 | | |
2529 | | void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N) |
2530 | 1.04M | { |
2531 | 1.04M | #if 1 |
2532 | 1.04M | MultiplyBottom(R, T, X, U, N); |
2533 | 1.04M | MultiplyTop(T, T+N, X, R, M, N); |
2534 | 1.04M | word borrow = Subtract(T, X+N, T, N); |
2535 | | // defend against timing attack by doing this Add even when not needed |
2536 | 1.04M | word carry = Add(T+N, T, M, N); |
2537 | 1.04M | CRYPTOPP_ASSERT(carry | !borrow); |
2538 | 1.04M | CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow); |
2539 | 1.04M | CopyWords(R, T + ((0-borrow) & N), N); |
2540 | | #elif 0 |
2541 | | const word u = 0-U[0]; |
2542 | | Declare2Words(p) |
2543 | | for (size_t i=0; i<N; i++) |
2544 | | { |
2545 | | const word t = u * X[i]; |
2546 | | word c = 0; |
2547 | | for (size_t j=0; j<N; j+=2) |
2548 | | { |
2549 | | MultiplyWords(p, t, M[j]); |
2550 | | Acc2WordsBy1(p, X[i+j]); |
2551 | | Acc2WordsBy1(p, c); |
2552 | | X[i+j] = LowWord(p); |
2553 | | c = HighWord(p); |
2554 | | MultiplyWords(p, t, M[j+1]); |
2555 | | Acc2WordsBy1(p, X[i+j+1]); |
2556 | | Acc2WordsBy1(p, c); |
2557 | | X[i+j+1] = LowWord(p); |
2558 | | c = HighWord(p); |
2559 | | } |
2560 | | |
2561 | | if (Increment(X+N+i, N-i, c)) |
2562 | | while (!Subtract(X+N, X+N, M, N)) {} |
2563 | | } |
2564 | | |
2565 | | std::memcpy(R, X+N, N*WORD_SIZE); |
2566 | | #else |
2567 | | __m64 u = _mm_cvtsi32_si64(0-U[0]), p; |
2568 | | for (size_t i=0; i<N; i++) |
2569 | | { |
2570 | | __m64 t = _mm_cvtsi32_si64(X[i]); |
2571 | | t = _mm_mul_su32(t, u); |
2572 | | __m64 c = _mm_setzero_si64(); |
2573 | | for (size_t j=0; j<N; j+=2) |
2574 | | { |
2575 | | p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j])); |
2576 | | p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j])); |
2577 | | c = _mm_add_si64(c, p); |
2578 | | X[i+j] = _mm_cvtsi64_si32(c); |
2579 | | c = _mm_srli_si64(c, 32); |
2580 | | p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1])); |
2581 | | p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1])); |
2582 | | c = _mm_add_si64(c, p); |
2583 | | X[i+j+1] = _mm_cvtsi64_si32(c); |
2584 | | c = _mm_srli_si64(c, 32); |
2585 | | } |
2586 | | |
2587 | | if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c))) |
2588 | | while (!Subtract(X+N, X+N, M, N)) {} |
2589 | | } |
2590 | | |
2591 | | std::memcpy(R, X+N, N*WORD_SIZE); |
2592 | | _mm_empty(); |
2593 | | #endif |
2594 | 1.04M | } |
2595 | | |
2596 | | // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M |
2597 | | // T[2*N] - temporary work space |
2598 | | // X[2*N] - number to be reduced |
2599 | | // M[N] --- modulus |
2600 | | // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2) |
2601 | | // V[N] --- 2**(WORD_BITS*3*N/2) mod M |
2602 | | |
2603 | | void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N) |
2604 | 0 | { |
2605 | 0 | CRYPTOPP_ASSERT(N%2==0 && N>=4); |
2606 | |
|
2607 | 0 | #define M0 M |
2608 | 0 | #define M1 (M+N2) |
2609 | 0 | #define V0 V |
2610 | 0 | #define V1 (V+N2) |
2611 | |
|
2612 | 0 | #define X0 X |
2613 | 0 | #define X1 (X+N2) |
2614 | 0 | #define X2 (X+N) |
2615 | 0 | #define X3 (X+N+N2) |
2616 | |
|
2617 | 0 | const size_t N2 = N/2; |
2618 | 0 | Multiply(T0, T2, V0, X3, N2); |
2619 | 0 | int c2 = Add(T0, T0, X0, N); |
2620 | 0 | MultiplyBottom(T3, T2, T0, U, N2); |
2621 | 0 | MultiplyTop(T2, R, T0, T3, M0, N2); |
2622 | 0 | c2 -= Subtract(T2, T1, T2, N2); |
2623 | 0 | Multiply(T0, R, T3, M1, N2); |
2624 | 0 | c2 -= Subtract(T0, T2, T0, N2); |
2625 | 0 | int c3 = -(int)Subtract(T1, X2, T1, N2); |
2626 | 0 | Multiply(R0, T2, V1, X3, N2); |
2627 | 0 | c3 += Add(R, R, T, N); |
2628 | |
|
2629 | 0 | if (c2>0) |
2630 | 0 | c3 += Increment(R1, N2); |
2631 | 0 | else if (c2<0) |
2632 | 0 | c3 -= Decrement(R1, N2, -c2); |
2633 | |
|
2634 | 0 | CRYPTOPP_ASSERT(c3>=-1 && c3<=1); |
2635 | 0 | if (c3>0) |
2636 | 0 | Subtract(R, R, M, N); |
2637 | 0 | else if (c3<0) |
2638 | 0 | Add(R, R, M, N); |
2639 | |
|
2640 | 0 | #undef M0 |
2641 | 0 | #undef M1 |
2642 | 0 | #undef V0 |
2643 | 0 | #undef V1 |
2644 | |
|
2645 | 0 | #undef X0 |
2646 | 0 | #undef X1 |
2647 | 0 | #undef X2 |
2648 | 0 | #undef X3 |
2649 | 0 | } |
2650 | | |
2651 | | #undef A0 |
2652 | | #undef A1 |
2653 | | #undef B0 |
2654 | | #undef B1 |
2655 | | |
2656 | | #undef T0 |
2657 | | #undef T1 |
2658 | | #undef T2 |
2659 | | #undef T3 |
2660 | | |
2661 | | #undef R0 |
2662 | | #undef R1 |
2663 | | #undef R2 |
2664 | | #undef R3 |
2665 | | |
2666 | | /* |
2667 | | // do a 3 word by 2 word divide, returns quotient and leaves remainder in A |
2668 | | static word SubatomicDivide(word *A, word B0, word B1) |
2669 | | { |
2670 | | // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word |
2671 | | CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0)); |
2672 | | |
2673 | | // estimate the quotient: do a 2 word by 1 word divide |
2674 | | word Q; |
2675 | | if (B1+1 == 0) |
2676 | | Q = A[2]; |
2677 | | else |
2678 | | Q = DWord(A[1], A[2]).DividedBy(B1+1); |
2679 | | |
2680 | | // now subtract Q*B from A |
2681 | | DWord p = DWord::Multiply(B0, Q); |
2682 | | DWord u = (DWord) A[0] - p.GetLowHalf(); |
2683 | | A[0] = u.GetLowHalf(); |
2684 | | u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); |
2685 | | A[1] = u.GetLowHalf(); |
2686 | | A[2] += u.GetHighHalf(); |
2687 | | |
2688 | | // Q <= actual quotient, so fix it |
2689 | | while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) |
2690 | | { |
2691 | | u = (DWord) A[0] - B0; |
2692 | | A[0] = u.GetLowHalf(); |
2693 | | u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); |
2694 | | A[1] = u.GetLowHalf(); |
2695 | | A[2] += u.GetHighHalf(); |
2696 | | Q++; |
2697 | | CRYPTOPP_ASSERT(Q); // shouldn't overflow |
2698 | | } |
2699 | | |
2700 | | return Q; |
2701 | | } |
2702 | | |
2703 | | // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 |
2704 | | static inline void AtomicDivide(word *Q, const word *A, const word *B) |
2705 | | { |
2706 | | if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) |
2707 | | { |
2708 | | Q[0] = A[2]; |
2709 | | Q[1] = A[3]; |
2710 | | } |
2711 | | else |
2712 | | { |
2713 | | word T[4]; |
2714 | | T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3]; |
2715 | | Q[1] = SubatomicDivide(T+1, B[0], B[1]); |
2716 | | Q[0] = SubatomicDivide(T, B[0], B[1]); |
2717 | | |
2718 | | #if defined(CRYPTOPP_DEBUG) |
2719 | | // multiply quotient and divisor and add remainder, make sure it equals dividend |
2720 | | CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); |
2721 | | word P[4]; |
2722 | | LowLevel::Multiply2(P, Q, B); |
2723 | | Add(P, P, T, 4); |
2724 | | CRYPTOPP_ASSERT(std::memcmp(P, A, 4*WORD_SIZE)==0); |
2725 | | #endif |
2726 | | } |
2727 | | } |
2728 | | */ |
2729 | | |
2730 | | static inline void AtomicDivide(word *Q, const word *A, const word *B) |
2731 | 18.8M | { |
2732 | 18.8M | word T[4]; |
2733 | 18.8M | DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); |
2734 | 18.8M | Q[0] = q.GetLowHalf(); |
2735 | 18.8M | Q[1] = q.GetHighHalf(); |
2736 | | |
2737 | | #if defined(CRYPTOPP_DEBUG) |
2738 | | if (B[0] || B[1]) |
2739 | | { |
2740 | | // multiply quotient and divisor and add remainder, make sure it equals dividend |
2741 | | CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); |
2742 | | word P[4]; |
2743 | | s_pMul[0](P, Q, B); |
2744 | | Add(P, P, T, 4); |
2745 | | CRYPTOPP_ASSERT(std::memcmp(P, A, 4*WORD_SIZE)==0); |
2746 | | } |
2747 | | #endif |
2748 | 18.8M | } |
2749 | | |
2750 | | // for use by Divide(), corrects the underestimated quotient {Q1,Q0} |
2751 | | static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N) |
2752 | 18.8M | { |
2753 | 18.8M | CRYPTOPP_ASSERT(N && N%2==0); |
2754 | | |
2755 | 18.8M | AsymmetricMultiply(T, T+N+2, Q, 2, B, N); |
2756 | | |
2757 | 18.8M | word borrow = Subtract(R, R, T, N+2); |
2758 | 18.8M | CRYPTOPP_ASSERT(!borrow && !R[N+1]); |
2759 | 18.8M | CRYPTOPP_UNUSED(borrow); |
2760 | | |
2761 | 23.7M | while (R[N] || Compare(R, B, N) >= 0) |
2762 | 4.91M | { |
2763 | 4.91M | R[N] -= Subtract(R, R, B, N); |
2764 | 4.91M | Q[1] += (++Q[0]==0); |
2765 | 4.91M | CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow |
2766 | 4.91M | } |
2767 | 18.8M | } |
2768 | | |
2769 | | // R[NB] -------- remainder = A%B |
2770 | | // Q[NA-NB+2] --- quotient = A/B |
2771 | | // T[NA+3*(NB+2)] - temp work space |
2772 | | // A[NA] -------- dividend |
2773 | | // B[NB] -------- divisor |
2774 | | |
2775 | | void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB) |
2776 | 2.24M | { |
2777 | 2.24M | CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0); |
2778 | 2.24M | CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]); |
2779 | 2.24M | CRYPTOPP_ASSERT(NB <= NA); |
2780 | | |
2781 | | // set up temporary work space |
2782 | 2.24M | word *const TA=T; |
2783 | 2.24M | word *const TB=T+NA+2; |
2784 | 2.24M | word *const TP=T+NA+2+NB; |
2785 | | |
2786 | | // copy B into TB and normalize it so that TB has highest bit set to 1 |
2787 | 2.24M | unsigned shiftWords = (B[NB-1]==0); |
2788 | 2.24M | TB[0] = TB[NB-1] = 0; |
2789 | 2.24M | CopyWords(TB+shiftWords, B, NB-shiftWords); |
2790 | 2.24M | unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); |
2791 | 2.24M | CRYPTOPP_ASSERT(shiftBits < WORD_BITS); |
2792 | 2.24M | ShiftWordsLeftByBits(TB, NB, shiftBits); |
2793 | | |
2794 | | // copy A into TA and normalize it |
2795 | 2.24M | TA[0] = TA[NA] = TA[NA+1] = 0; |
2796 | 2.24M | CopyWords(TA+shiftWords, A, NA); |
2797 | 2.24M | ShiftWordsLeftByBits(TA, NA+2, shiftBits); |
2798 | | |
2799 | 2.24M | if (TA[NA+1]==0 && TA[NA] <= 1) |
2800 | 1.01M | { |
2801 | 1.01M | Q[NA-NB+1] = Q[NA-NB] = 0; |
2802 | 1.75M | while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) |
2803 | 736k | { |
2804 | 736k | TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); |
2805 | 736k | ++Q[NA-NB]; |
2806 | 736k | } |
2807 | 1.01M | } |
2808 | 1.23M | else |
2809 | 1.23M | { |
2810 | 1.23M | NA+=2; |
2811 | 1.23M | CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0); |
2812 | 1.23M | } |
2813 | | |
2814 | 2.24M | word BT[2]; |
2815 | 2.24M | BT[0] = TB[NB-2] + 1; |
2816 | 2.24M | BT[1] = TB[NB-1] + (BT[0]==0); |
2817 | | |
2818 | | // start reducing TA mod TB, 2 words at a time |
2819 | 21.0M | for (size_t i=NA-2; i>=NB; i-=2) |
2820 | 18.8M | { |
2821 | 18.8M | AtomicDivide(Q+i-NB, TA+i-2, BT); |
2822 | 18.8M | CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); |
2823 | 18.8M | } |
2824 | | |
2825 | | // copy TA into R, and denormalize it |
2826 | 2.24M | CopyWords(R, TA+shiftWords, NB); |
2827 | 2.24M | ShiftWordsRightByBits(R, NB, shiftBits); |
2828 | 2.24M | } |
2829 | | |
2830 | | static inline size_t EvenWordCount(const word *X, size_t N) |
2831 | 3.18k | { |
2832 | 11.4k | while (N && X[N-2]==0 && X[N-1]==0) |
2833 | 8.29k | N-=2; |
2834 | 3.18k | return N; |
2835 | 3.18k | } |
2836 | | |
2837 | | // return k |
2838 | | // R[N] --- result = A^(-1) * 2^k mod M |
2839 | | // T[4*N] - temporary work space |
2840 | | // A[NA] -- number to take inverse of |
2841 | | // M[N] --- modulus |
2842 | | |
2843 | | unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N) |
2844 | 1.25k | { |
2845 | 1.25k | CRYPTOPP_ASSERT(NA<=N && N && N%2==0); |
2846 | | |
2847 | 1.25k | word *b = T; |
2848 | 1.25k | word *c = T+N; |
2849 | 1.25k | word *f = T+2*N; |
2850 | 1.25k | word *g = T+3*N; |
2851 | 1.25k | size_t bcLen=2, fgLen=EvenWordCount(M, N); |
2852 | 1.25k | unsigned int k=0; |
2853 | 1.25k | bool s=false; |
2854 | | |
2855 | 1.25k | SetWords(T, 0, 3*N); |
2856 | 1.25k | b[0]=1; |
2857 | 1.25k | CopyWords(f, A, NA); |
2858 | 1.25k | CopyWords(g, M, N); |
2859 | | |
2860 | 1.04M | while (1) |
2861 | 1.04M | { |
2862 | 1.04M | word t=f[0]; |
2863 | 1.04M | while (!t) |
2864 | 499 | { |
2865 | 499 | if (EvenWordCount(f, fgLen)==0) |
2866 | 273 | { |
2867 | 273 | SetWords(R, 0, N); |
2868 | 273 | return 0; |
2869 | 273 | } |
2870 | | |
2871 | 226 | ShiftWordsRightByWords(f, fgLen, 1); |
2872 | 226 | bcLen += 2 * (c[bcLen-1] != 0); |
2873 | 226 | CRYPTOPP_ASSERT(bcLen <= N); |
2874 | 226 | ShiftWordsLeftByWords(c, bcLen, 1); |
2875 | 226 | k+=WORD_BITS; |
2876 | 226 | t=f[0]; |
2877 | 226 | } |
2878 | | |
2879 | | // t must be non-0; otherwise undefined. |
2880 | 1.04M | unsigned int i = TrailingZeros(t); |
2881 | 1.04M | t >>= i; |
2882 | 1.04M | k += i; |
2883 | | |
2884 | 1.04M | if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0) |
2885 | 985 | { |
2886 | 985 | if (s) |
2887 | 497 | Subtract(R, M, b, N); |
2888 | 488 | else |
2889 | 488 | CopyWords(R, b, N); |
2890 | 985 | return k; |
2891 | 985 | } |
2892 | | |
2893 | 1.04M | ShiftWordsRightByBits(f, fgLen, i); |
2894 | 1.04M | t = ShiftWordsLeftByBits(c, bcLen, i); |
2895 | 1.04M | c[bcLen] += t; |
2896 | 1.04M | bcLen += 2 * (t!=0); |
2897 | 1.04M | CRYPTOPP_ASSERT(bcLen <= N); |
2898 | | |
2899 | 1.04M | bool swap = Compare(f, g, fgLen)==-1; |
2900 | 1.04M | ConditionalSwapPointers(swap, f, g); |
2901 | 1.04M | ConditionalSwapPointers(swap, b, c); |
2902 | 1.04M | s ^= swap; |
2903 | | |
2904 | 1.04M | fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]); |
2905 | | |
2906 | 1.04M | Subtract(f, f, g, fgLen); |
2907 | 1.04M | t = Add(b, b, c, bcLen); |
2908 | 1.04M | b[bcLen] += t; |
2909 | 1.04M | bcLen += 2*t; |
2910 | 1.04M | CRYPTOPP_ASSERT(bcLen <= N); |
2911 | 1.04M | } |
2912 | 1.25k | } |
2913 | | |
2914 | | // R[N] - result = A/(2^k) mod M |
2915 | | // A[N] - input |
2916 | | // M[N] - modulus |
2917 | | |
2918 | | void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) |
2919 | 1.25k | { |
2920 | 1.25k | CopyWords(R, A, N); |
2921 | | |
2922 | 1.83M | while (k--) |
2923 | 1.83M | { |
2924 | 1.83M | if (R[0]%2==0) |
2925 | 918k | ShiftWordsRightByBits(R, N, 1); |
2926 | 914k | else |
2927 | 914k | { |
2928 | 914k | word carry = Add(R, R, M, N); |
2929 | 914k | ShiftWordsRightByBits(R, N, 1); |
2930 | 914k | R[N-1] += carry<<(WORD_BITS-1); |
2931 | 914k | } |
2932 | 1.83M | } |
2933 | 1.25k | } |
2934 | | |
2935 | | // R[N] - result = A*(2^k) mod M |
2936 | | // A[N] - input |
2937 | | // M[N] - modulus |
2938 | | |
2939 | | void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) |
2940 | 0 | { |
2941 | 0 | CopyWords(R, A, N); |
2942 | |
|
2943 | 0 | while (k--) |
2944 | 0 | if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) |
2945 | 0 | Subtract(R, R, M, N); |
2946 | 0 | } |
2947 | | |
2948 | | // ****************************************************************** |
2949 | | |
2950 | | static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; |
2951 | | |
2952 | | static inline size_t RoundupSize(size_t n) |
2953 | 76.5M | { |
2954 | 76.5M | if (n<=8) |
2955 | 35.6M | return RoundupSizeTable[n]; |
2956 | 40.9M | else if (n<=16) |
2957 | 5.64M | return 16; |
2958 | 35.3M | else if (n<=32) |
2959 | 8.44M | return 32; |
2960 | 26.8M | else if (n<=64) |
2961 | 12.2M | return 64; |
2962 | 14.6M | else |
2963 | 14.6M | return size_t(1) << BitPrecision(n-1); |
2964 | 76.5M | } |
2965 | | |
2966 | | Integer::Integer() |
2967 | | : reg(2), sign(POSITIVE) |
2968 | 19.7M | { |
2969 | 19.7M | reg[0] = reg[1] = 0; |
2970 | 19.7M | } |
2971 | | |
2972 | | Integer::Integer(const Integer& t) |
2973 | | : reg(RoundupSize(t.WordCount())), sign(t.sign) |
2974 | 886k | { |
2975 | 886k | CopyWords(reg, t.reg, reg.size()); |
2976 | 886k | } |
2977 | | |
2978 | | Integer::Integer(Sign s, lword value) |
2979 | | : reg(2), sign(s) |
2980 | 0 | { |
2981 | 0 | reg[0] = word(value); |
2982 | 0 | reg[1] = word(SafeRightShift<WORD_BITS>(value)); |
2983 | 0 | } |
2984 | | |
2985 | | Integer::Integer(signed long value) |
2986 | | : reg(2) |
2987 | 26.6M | { |
2988 | 26.6M | if (value >= 0) |
2989 | 26.6M | sign = POSITIVE; |
2990 | 271 | else |
2991 | 271 | { |
2992 | 271 | sign = NEGATIVE; |
2993 | 271 | value = -value; |
2994 | 271 | } |
2995 | 26.6M | reg[0] = word(value); |
2996 | 26.6M | reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); |
2997 | 26.6M | } |
2998 | | |
2999 | | Integer::Integer(Sign s, word high, word low) |
3000 | | : reg(2), sign(s) |
3001 | 0 | { |
3002 | 0 | reg[0] = low; |
3003 | 0 | reg[1] = high; |
3004 | 0 | } |
3005 | | |
3006 | | bool Integer::IsConvertableToLong() const |
3007 | 239 | { |
3008 | 239 | if (ByteCount() > sizeof(long)) |
3009 | 76 | return false; |
3010 | | |
3011 | 163 | unsigned long value = (unsigned long)reg[0]; |
3012 | 163 | value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); |
3013 | | |
3014 | 163 | if (sign==POSITIVE) |
3015 | 163 | return (signed long)value >= 0; |
3016 | 0 | else |
3017 | 0 | return -(signed long)value < 0; |
3018 | 163 | } |
3019 | | |
3020 | | signed long Integer::ConvertToLong() const |
3021 | 156 | { |
3022 | 156 | CRYPTOPP_ASSERT(IsConvertableToLong()); |
3023 | | |
3024 | 156 | unsigned long value = (unsigned long)reg[0]; |
3025 | 156 | value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); |
3026 | 156 | return sign==POSITIVE ? value : -(signed long)value; |
3027 | 156 | } |
3028 | | |
3029 | | Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o) |
3030 | 81.5k | { |
3031 | 81.5k | CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER); |
3032 | | |
3033 | 81.5k | if (o != LITTLE_ENDIAN_ORDER) |
3034 | 81.5k | { |
3035 | 81.5k | Decode(encodedInteger, byteCount, s); |
3036 | 81.5k | } |
3037 | 0 | else |
3038 | 0 | { |
3039 | 0 | SecByteBlock block(byteCount); |
3040 | 0 | encodedInteger.Get(block, block.size()); |
3041 | 0 | std::reverse(block.begin(), block.begin()+block.size()); |
3042 | |
|
3043 | 0 | Decode(block.begin(), block.size(), s); |
3044 | 0 | } |
3045 | 81.5k | } |
3046 | | |
3047 | | Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o) |
3048 | 4.82M | { |
3049 | 4.82M | CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer |
3050 | 4.82M | CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER); |
3051 | | |
3052 | 4.82M | if (o != LITTLE_ENDIAN_ORDER) |
3053 | 4.82M | { |
3054 | 4.82M | Decode(encodedInteger, byteCount, s); |
3055 | 4.82M | } |
3056 | 0 | else |
3057 | 0 | { |
3058 | 0 | SecByteBlock block(byteCount); |
3059 | | #if (CRYPTOPP_MSC_VERSION >= 1500) |
3060 | | std::reverse_copy(encodedInteger, encodedInteger+byteCount, |
3061 | | stdext::make_checked_array_iterator(block.begin(), block.size())); |
3062 | | #else |
3063 | 0 | std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin()); |
3064 | 0 | #endif |
3065 | 0 | Decode(block.begin(), block.size(), s); |
3066 | 0 | return; |
3067 | 0 | } |
3068 | 4.82M | } |
3069 | | |
3070 | | Integer::Integer(BufferedTransformation &bt) |
3071 | 0 | { |
3072 | | // Make explicit call to avoid virtual-dispatch findings in ctor |
3073 | 0 | Integer::BERDecode(bt); |
3074 | 0 | } |
3075 | | |
3076 | | Integer::Integer(RandomNumberGenerator &rng, size_t bitcount) |
3077 | 0 | { |
3078 | 0 | Randomize(rng, bitcount); |
3079 | 0 | } |
3080 | | |
3081 | | Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) |
3082 | 0 | { |
3083 | 0 | if (!Randomize(rng, min, max, rnType, equiv, mod)) |
3084 | 0 | throw Integer::RandomNumberNotFound(); |
3085 | 0 | } |
3086 | | |
3087 | | Integer Integer::Power2(size_t e) |
3088 | 1.90k | { |
3089 | 1.90k | Integer r((word)0, BitsToWords(e+1)); |
3090 | 1.90k | r.SetBit(e); |
3091 | 1.90k | return r; |
3092 | 1.90k | } |
3093 | | |
3094 | | bool Integer::operator!() const |
3095 | 3.73M | { |
3096 | 3.73M | return IsNegative() ? false : (reg[0]==0 && WordCount()==0); |
3097 | 3.73M | } |
3098 | | |
3099 | | Integer& Integer::operator=(const Integer& t) |
3100 | 18.5M | { |
3101 | 18.5M | if (this != &t) |
3102 | 18.5M | { |
3103 | 18.5M | if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0) |
3104 | 14.3M | reg.New(RoundupSize(t.WordCount())); |
3105 | 18.5M | CopyWords(reg, t.reg, reg.size()); |
3106 | 18.5M | sign = t.sign; |
3107 | 18.5M | } |
3108 | 18.5M | return *this; |
3109 | 18.5M | } |
3110 | | |
3111 | | bool Integer::GetBit(size_t n) const |
3112 | 1.89M | { |
3113 | | // Profiling guided the flow below. |
3114 | 1.89M | if (n/WORD_BITS < reg.size()) |
3115 | 1.89M | return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); |
3116 | 29 | else |
3117 | 29 | return 0; |
3118 | 1.89M | } |
3119 | | |
3120 | | void Integer::SetBit(size_t n, bool value) |
3121 | 1.99k | { |
3122 | 1.99k | if (value) |
3123 | 1.98k | { |
3124 | 1.98k | reg.CleanGrow(RoundupSize(BitsToWords(n+1))); |
3125 | 1.98k | reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); |
3126 | 1.98k | } |
3127 | 12 | else |
3128 | 12 | { |
3129 | 12 | if (n/WORD_BITS < reg.size()) |
3130 | 10 | reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); |
3131 | 12 | } |
3132 | 1.99k | } |
3133 | | |
3134 | | byte Integer::GetByte(size_t n) const |
3135 | 282M | { |
3136 | | // Profiling guided the flow below. |
3137 | 282M | if (n/WORD_SIZE < reg.size()) |
3138 | 282M | return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); |
3139 | 96 | else |
3140 | 96 | return 0; |
3141 | 282M | } |
3142 | | |
3143 | | void Integer::SetByte(size_t n, byte value) |
3144 | 0 | { |
3145 | 0 | reg.CleanGrow(RoundupSize(BytesToWords(n+1))); |
3146 | 0 | reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); |
3147 | 0 | reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); |
3148 | 0 | } |
3149 | | |
3150 | | lword Integer::GetBits(size_t i, size_t n) const |
3151 | 0 | { |
3152 | 0 | lword v = 0; |
3153 | 0 | CRYPTOPP_ASSERT(n <= sizeof(v)*8); |
3154 | 0 | for (unsigned int j=0; j<n; j++) |
3155 | 0 | v |= lword(GetBit(i+j)) << j; |
3156 | 0 | return v; |
3157 | 0 | } |
3158 | | |
3159 | | Integer Integer::operator-() const |
3160 | 115 | { |
3161 | 115 | Integer result(*this); |
3162 | 115 | result.Negate(); |
3163 | 115 | return result; |
3164 | 115 | } |
3165 | | |
3166 | | Integer Integer::AbsoluteValue() const |
3167 | 136 | { |
3168 | 136 | Integer result(*this); |
3169 | 136 | result.sign = POSITIVE; |
3170 | 136 | return result; |
3171 | 136 | } |
3172 | | |
3173 | | void Integer::swap(Integer &a) |
3174 | 3.71M | { |
3175 | 3.71M | reg.swap(a.reg); |
3176 | 3.71M | std::swap(sign, a.sign); |
3177 | 3.71M | } |
3178 | | |
3179 | | Integer::Integer(word value, size_t length) |
3180 | | : reg(RoundupSize(length)), sign(POSITIVE) |
3181 | 5.00M | { |
3182 | 5.00M | reg[0] = value; |
3183 | 5.00M | SetWords(reg+1, 0, reg.size()-1); |
3184 | 5.00M | } |
3185 | | |
3186 | | template <class T> |
3187 | | static Integer StringToInteger(const T *str, ByteOrder order) |
3188 | 70.7k | { |
3189 | 70.7k | CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER ); |
3190 | | |
3191 | 70.7k | int radix, sign = 1; |
3192 | | // GCC workaround |
3193 | | // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3 |
3194 | 70.7k | unsigned int length; |
3195 | 13.3M | for (length = 0; str[length] != 0; length++) {} |
3196 | | |
3197 | 70.7k | Integer v; |
3198 | | |
3199 | 70.7k | if (length == 0) |
3200 | 0 | return Integer::Zero(); |
3201 | | |
3202 | | // 'str' is of length 1 or more |
3203 | 70.7k | switch (str[length-1]) |
3204 | 70.7k | { |
3205 | 0 | case 'h': |
3206 | 0 | case 'H': |
3207 | 0 | radix=16; |
3208 | 0 | break; |
3209 | 0 | case 'o': |
3210 | 0 | case 'O': |
3211 | 0 | radix=8; |
3212 | 0 | break; |
3213 | 0 | case 'b': |
3214 | 0 | case 'B': |
3215 | 0 | radix=2; |
3216 | 0 | break; |
3217 | 70.7k | default: |
3218 | 70.7k | radix=10; |
3219 | 70.7k | } |
3220 | | |
3221 | | // 'str' is of length 1 or more |
3222 | 70.7k | if (str[0] == '-') |
3223 | 0 | { |
3224 | 0 | sign = -1; |
3225 | 0 | str += 1, length -= 1; |
3226 | 0 | } |
3227 | | |
3228 | | // Recognize common prefixes for hexadecimal, octal and decimal. |
3229 | | // Microsoft's MASM also recognizes 0t for octal, but not here. |
3230 | 70.7k | if (length > 2 && str[0] == '0') |
3231 | 11.6k | { |
3232 | 11.6k | if (str[1] == 'x' || str[1] == 'X') |
3233 | 0 | { |
3234 | 0 | radix = 16; |
3235 | 0 | str += 2, length -= 2; |
3236 | 0 | } |
3237 | 11.6k | else if (str[1] == 'n' || str[1] == 'N') |
3238 | 0 | { |
3239 | 0 | radix = 10; |
3240 | 0 | str += 2, length -= 2; |
3241 | 0 | } |
3242 | 11.6k | else if (str[1] == 'o' || str[1] == 'O') |
3243 | 0 | { |
3244 | 0 | radix = 8; |
3245 | 0 | str += 2, length -= 2; |
3246 | 0 | } |
3247 | 11.6k | } |
3248 | | |
3249 | 70.7k | if (order == BIG_ENDIAN_ORDER) |
3250 | 70.7k | { |
3251 | 13.3M | for (unsigned int i=0; i<length; i++) |
3252 | 13.2M | { |
3253 | 13.2M | int digit, ch = static_cast<int>(str[i]); |
3254 | | |
3255 | | // Profiling guided the flow below. |
3256 | 13.2M | if (ch >= '0' && ch <= '9') |
3257 | 13.2M | digit = ch - '0'; |
3258 | 0 | else if (ch >= 'a' && ch <= 'f') |
3259 | 0 | digit = ch - 'a' + 10; |
3260 | 0 | else if (ch >= 'A' && ch <= 'F') |
3261 | 0 | digit = ch - 'A' + 10; |
3262 | 0 | else |
3263 | 0 | digit = radix; |
3264 | | |
3265 | 13.2M | if (digit < radix) |
3266 | 13.2M | { |
3267 | 13.2M | v *= radix; |
3268 | 13.2M | v += digit; |
3269 | 13.2M | } |
3270 | 13.2M | } |
3271 | 70.7k | } |
3272 | 0 | else if (radix == 16 && order == LITTLE_ENDIAN_ORDER) |
3273 | 0 | { |
3274 | | // Nibble high, low and count |
3275 | 0 | unsigned int nh = 0, nl = 0, nc = 0; |
3276 | 0 | Integer position(Integer::One()); |
3277 | |
|
3278 | 0 | for (unsigned int i=0; i<length; i++) |
3279 | 0 | { |
3280 | 0 | int digit, ch = static_cast<int>(str[i]); |
3281 | |
|
3282 | 0 | if (ch >= '0' && ch <= '9') |
3283 | 0 | digit = ch - '0'; |
3284 | 0 | else if (ch >= 'a' && ch <= 'f') |
3285 | 0 | digit = ch - 'a' + 10; |
3286 | 0 | else if (ch >= 'A' && ch <= 'F') |
3287 | 0 | digit = ch - 'A' + 10; |
3288 | 0 | else |
3289 | 0 | digit = radix; |
3290 | |
|
3291 | 0 | if (digit < radix) |
3292 | 0 | { |
3293 | 0 | if (nc++ == 0) |
3294 | 0 | nh = digit; |
3295 | 0 | else |
3296 | 0 | nl = digit; |
3297 | |
|
3298 | 0 | if (nc == 2) |
3299 | 0 | { |
3300 | 0 | v += position * (nh << 4 | nl); |
3301 | 0 | nc = 0, position <<= 8; |
3302 | 0 | } |
3303 | 0 | } |
3304 | 0 | } |
3305 | |
|
3306 | 0 | if (nc == 1) |
3307 | 0 | v += nh * position; |
3308 | 0 | } |
3309 | 0 | else // LITTLE_ENDIAN_ORDER && radix != 16 |
3310 | 0 | { |
3311 | 0 | for (int i=static_cast<int>(length)-1; i>=0; i--) |
3312 | 0 | { |
3313 | 0 | int digit, ch = static_cast<int>(str[i]); |
3314 | |
|
3315 | 0 | if (ch >= '0' && ch <= '9') |
3316 | 0 | digit = ch - '0'; |
3317 | 0 | else if (ch >= 'a' && ch <= 'f') |
3318 | 0 | digit = ch - 'a' + 10; |
3319 | 0 | else if (ch >= 'A' && ch <= 'F') |
3320 | 0 | digit = ch - 'A' + 10; |
3321 | 0 | else |
3322 | 0 | digit = radix; |
3323 | |
|
3324 | 0 | if (digit < radix) |
3325 | 0 | { |
3326 | 0 | v *= radix; |
3327 | 0 | v += digit; |
3328 | 0 | } |
3329 | 0 | } |
3330 | 0 | } |
3331 | | |
3332 | 70.7k | if (sign == -1) |
3333 | 0 | v.Negate(); |
3334 | | |
3335 | 70.7k | return v; |
3336 | 70.7k | } integer.cpp:CryptoPP::Integer CryptoPP::StringToInteger<char>(char const*, CryptoPP::ByteOrder) Line | Count | Source | 3188 | 70.7k | { | 3189 | 70.7k | CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER ); | 3190 | | | 3191 | 70.7k | int radix, sign = 1; | 3192 | | // GCC workaround | 3193 | | // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3 | 3194 | 70.7k | unsigned int length; | 3195 | 13.3M | for (length = 0; str[length] != 0; length++) {} | 3196 | | | 3197 | 70.7k | Integer v; | 3198 | | | 3199 | 70.7k | if (length == 0) | 3200 | 0 | return Integer::Zero(); | 3201 | | | 3202 | | // 'str' is of length 1 or more | 3203 | 70.7k | switch (str[length-1]) | 3204 | 70.7k | { | 3205 | 0 | case 'h': | 3206 | 0 | case 'H': | 3207 | 0 | radix=16; | 3208 | 0 | break; | 3209 | 0 | case 'o': | 3210 | 0 | case 'O': | 3211 | 0 | radix=8; | 3212 | 0 | break; | 3213 | 0 | case 'b': | 3214 | 0 | case 'B': | 3215 | 0 | radix=2; | 3216 | 0 | break; | 3217 | 70.7k | default: | 3218 | 70.7k | radix=10; | 3219 | 70.7k | } | 3220 | | | 3221 | | // 'str' is of length 1 or more | 3222 | 70.7k | if (str[0] == '-') | 3223 | 0 | { | 3224 | 0 | sign = -1; | 3225 | 0 | str += 1, length -= 1; | 3226 | 0 | } | 3227 | | | 3228 | | // Recognize common prefixes for hexadecimal, octal and decimal. | 3229 | | // Microsoft's MASM also recognizes 0t for octal, but not here. | 3230 | 70.7k | if (length > 2 && str[0] == '0') | 3231 | 11.6k | { | 3232 | 11.6k | if (str[1] == 'x' || str[1] == 'X') | 3233 | 0 | { | 3234 | 0 | radix = 16; | 3235 | 0 | str += 2, length -= 2; | 3236 | 0 | } | 3237 | 11.6k | else if (str[1] == 'n' || str[1] == 'N') | 3238 | 0 | { | 3239 | 0 | radix = 10; | 3240 | 0 | str += 2, length -= 2; | 3241 | 0 | } | 3242 | 11.6k | else if (str[1] == 'o' || str[1] == 'O') | 3243 | 0 | { | 3244 | 0 | radix = 8; | 3245 | 0 | str += 2, length -= 2; | 3246 | 0 | } | 3247 | 11.6k | } | 3248 | | | 3249 | 70.7k | if (order == BIG_ENDIAN_ORDER) | 3250 | 70.7k | { | 3251 | 13.3M | for (unsigned int i=0; i<length; i++) | 3252 | 13.2M | { | 3253 | 13.2M | int digit, ch = static_cast<int>(str[i]); | 3254 | | | 3255 | | // Profiling guided the flow below. | 3256 | 13.2M | if (ch >= '0' && ch <= '9') | 3257 | 13.2M | digit = ch - '0'; | 3258 | 0 | else if (ch >= 'a' && ch <= 'f') | 3259 | 0 | digit = ch - 'a' + 10; | 3260 | 0 | else if (ch >= 'A' && ch <= 'F') | 3261 | 0 | digit = ch - 'A' + 10; | 3262 | 0 | else | 3263 | 0 | digit = radix; | 3264 | | | 3265 | 13.2M | if (digit < radix) | 3266 | 13.2M | { | 3267 | 13.2M | v *= radix; | 3268 | 13.2M | v += digit; | 3269 | 13.2M | } | 3270 | 13.2M | } | 3271 | 70.7k | } | 3272 | 0 | else if (radix == 16 && order == LITTLE_ENDIAN_ORDER) | 3273 | 0 | { | 3274 | | // Nibble high, low and count | 3275 | 0 | unsigned int nh = 0, nl = 0, nc = 0; | 3276 | 0 | Integer position(Integer::One()); | 3277 | |
| 3278 | 0 | for (unsigned int i=0; i<length; i++) | 3279 | 0 | { | 3280 | 0 | int digit, ch = static_cast<int>(str[i]); | 3281 | |
| 3282 | 0 | if (ch >= '0' && ch <= '9') | 3283 | 0 | digit = ch - '0'; | 3284 | 0 | else if (ch >= 'a' && ch <= 'f') | 3285 | 0 | digit = ch - 'a' + 10; | 3286 | 0 | else if (ch >= 'A' && ch <= 'F') | 3287 | 0 | digit = ch - 'A' + 10; | 3288 | 0 | else | 3289 | 0 | digit = radix; | 3290 | |
| 3291 | 0 | if (digit < radix) | 3292 | 0 | { | 3293 | 0 | if (nc++ == 0) | 3294 | 0 | nh = digit; | 3295 | 0 | else | 3296 | 0 | nl = digit; | 3297 | |
| 3298 | 0 | if (nc == 2) | 3299 | 0 | { | 3300 | 0 | v += position * (nh << 4 | nl); | 3301 | 0 | nc = 0, position <<= 8; | 3302 | 0 | } | 3303 | 0 | } | 3304 | 0 | } | 3305 | |
| 3306 | 0 | if (nc == 1) | 3307 | 0 | v += nh * position; | 3308 | 0 | } | 3309 | 0 | else // LITTLE_ENDIAN_ORDER && radix != 16 | 3310 | 0 | { | 3311 | 0 | for (int i=static_cast<int>(length)-1; i>=0; i--) | 3312 | 0 | { | 3313 | 0 | int digit, ch = static_cast<int>(str[i]); | 3314 | |
| 3315 | 0 | if (ch >= '0' && ch <= '9') | 3316 | 0 | digit = ch - '0'; | 3317 | 0 | else if (ch >= 'a' && ch <= 'f') | 3318 | 0 | digit = ch - 'a' + 10; | 3319 | 0 | else if (ch >= 'A' && ch <= 'F') | 3320 | 0 | digit = ch - 'A' + 10; | 3321 | 0 | else | 3322 | 0 | digit = radix; | 3323 | |
| 3324 | 0 | if (digit < radix) | 3325 | 0 | { | 3326 | 0 | v *= radix; | 3327 | 0 | v += digit; | 3328 | 0 | } | 3329 | 0 | } | 3330 | 0 | } | 3331 | | | 3332 | 70.7k | if (sign == -1) | 3333 | 0 | v.Negate(); | 3334 | | | 3335 | 70.7k | return v; | 3336 | 70.7k | } |
Unexecuted instantiation: integer.cpp:CryptoPP::Integer CryptoPP::StringToInteger<wchar_t>(wchar_t const*, CryptoPP::ByteOrder) |
3337 | | |
3338 | | Integer::Integer(const char *str, ByteOrder order) |
3339 | | : reg(2), sign(POSITIVE) |
3340 | 70.7k | { |
3341 | 70.7k | *this = StringToInteger(str,order); |
3342 | 70.7k | } |
3343 | | |
3344 | | Integer::Integer(const wchar_t *str, ByteOrder order) |
3345 | | : reg(2), sign(POSITIVE) |
3346 | 0 | { |
3347 | 0 | *this = StringToInteger(str,order); |
3348 | 0 | } |
3349 | | |
3350 | | unsigned int Integer::WordCount() const |
3351 | 54.5M | { |
3352 | 54.5M | return (unsigned int)CountWords(reg, reg.size()); |
3353 | 54.5M | } |
3354 | | |
3355 | | unsigned int Integer::ByteCount() const |
3356 | 54.6k | { |
3357 | 54.6k | unsigned wordCount = WordCount(); |
3358 | 54.6k | if (wordCount) |
3359 | 54.5k | return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); |
3360 | 51 | else |
3361 | 51 | return 0; |
3362 | 54.6k | } |
3363 | | |
3364 | | unsigned int Integer::BitCount() const |
3365 | 11.6k | { |
3366 | 11.6k | unsigned wordCount = WordCount(); |
3367 | 11.6k | if (wordCount) |
3368 | 11.6k | return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); |
3369 | 4 | else |
3370 | 4 | return 0; |
3371 | 11.6k | } |
3372 | | |
3373 | | void Integer::Decode(const byte *input, size_t inputLen, Signedness s) |
3374 | 4.82M | { |
3375 | 4.82M | CRYPTOPP_ASSERT(input && inputLen); // NULL buffer |
3376 | 4.82M | StringStore store(input, inputLen); |
3377 | 4.82M | Decode(store, inputLen, s); |
3378 | 4.82M | } |
3379 | | |
3380 | | void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s) |
3381 | 4.98M | { |
3382 | 4.98M | CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen); |
3383 | 4.98M | if (bt.MaxRetrievable() < inputLen) |
3384 | 0 | throw InvalidArgument("Integer: input length is too small"); |
3385 | | |
3386 | 4.98M | byte b; |
3387 | 4.98M | bt.Peek(b); |
3388 | 4.98M | sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; |
3389 | | |
3390 | 5.26M | while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) |
3391 | 282k | { |
3392 | 282k | bt.Skip(1); |
3393 | 282k | inputLen--; |
3394 | 282k | bt.Peek(b); |
3395 | 282k | } |
3396 | | |
3397 | 4.98M | reg.CleanNew(RoundupSize(BytesToWords(inputLen))); |
3398 | 294M | for (size_t i=inputLen; i > 0; i--) |
3399 | 289M | { |
3400 | 289M | (void)bt.Get(b); |
3401 | 289M | reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; |
3402 | 289M | } |
3403 | | |
3404 | 4.98M | if (sign == NEGATIVE) |
3405 | 0 | { |
3406 | 0 | for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++) |
3407 | 0 | reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; |
3408 | 0 | TwosComplement(reg, reg.size()); |
3409 | 0 | } |
3410 | 4.98M | } |
3411 | | |
3412 | | size_t Integer::MinEncodedSize(Signedness signedness) const |
3413 | 0 | { |
3414 | | // Profiling guided the flow below. |
3415 | 0 | unsigned int outputLen = STDMAX(1U, ByteCount()); |
3416 | 0 | const bool pre = (signedness == UNSIGNED); |
3417 | 0 | if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80)) |
3418 | 0 | outputLen++; |
3419 | 0 | if (pre) |
3420 | 0 | return outputLen; |
3421 | 0 | if (IsNegative() && *this < -Power2(outputLen*8-1)) |
3422 | 0 | outputLen++; |
3423 | 0 | return outputLen; |
3424 | 0 | } |
3425 | | |
3426 | | // PKCS12_PBKDF and other classes use undersized buffers |
3427 | | void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const |
3428 | 4.79M | { |
3429 | 4.79M | CRYPTOPP_ASSERT(output && outputLen); // NULL buffer |
3430 | 4.79M | ArraySink sink(output, outputLen); |
3431 | 4.79M | Encode(sink, outputLen, signedness); |
3432 | 4.79M | } |
3433 | | |
3434 | | void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const |
3435 | 4.79M | { |
3436 | 4.79M | if (signedness == UNSIGNED || NotNegative()) |
3437 | 4.79M | { |
3438 | 287M | for (size_t i=outputLen; i > 0; i--) |
3439 | 282M | bt.Put(GetByte(i-1)); |
3440 | 4.79M | } |
3441 | 0 | else |
3442 | 0 | { |
3443 | | // take two's complement of *this |
3444 | 0 | Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this; |
3445 | 0 | temp.Encode(bt, outputLen, UNSIGNED); |
3446 | 0 | } |
3447 | 4.79M | } |
3448 | | |
3449 | | void Integer::DEREncode(BufferedTransformation &bt) const |
3450 | 0 | { |
3451 | 0 | DERGeneralEncoder enc(bt, INTEGER); |
3452 | 0 | Encode(enc, MinEncodedSize(SIGNED), SIGNED); |
3453 | 0 | enc.MessageEnd(); |
3454 | 0 | } |
3455 | | |
3456 | | void Integer::BERDecode(const byte *input, size_t len) |
3457 | 0 | { |
3458 | 0 | CRYPTOPP_ASSERT(input && len); // NULL buffer |
3459 | 0 | StringStore store(input, len); |
3460 | 0 | BERDecode(store); |
3461 | 0 | } |
3462 | | |
3463 | | void Integer::BERDecode(BufferedTransformation &bt) |
3464 | 0 | { |
3465 | 0 | BERGeneralDecoder dec(bt, INTEGER); |
3466 | 0 | if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) |
3467 | 0 | BERDecodeError(); |
3468 | 0 | Decode(dec, (size_t)dec.RemainingLength(), SIGNED); |
3469 | 0 | dec.MessageEnd(); |
3470 | 0 | } |
3471 | | |
3472 | | void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const |
3473 | 0 | { |
3474 | 0 | DERGeneralEncoder enc(bt, OCTET_STRING); |
3475 | 0 | Encode(enc, length); |
3476 | 0 | enc.MessageEnd(); |
3477 | 0 | } |
3478 | | |
3479 | | void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length) |
3480 | 0 | { |
3481 | 0 | BERGeneralDecoder dec(bt, OCTET_STRING); |
3482 | 0 | if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) |
3483 | 0 | BERDecodeError(); |
3484 | 0 | Decode(dec, length); |
3485 | 0 | dec.MessageEnd(); |
3486 | 0 | } |
3487 | | |
3488 | | size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const |
3489 | 0 | { |
3490 | 0 | CRYPTOPP_ASSERT(output && bufferSize); // NULL buffer |
3491 | 0 | CRYPTOPP_ASSERT(bufferSize >= 2+ByteCount()); // Undersized buffer |
3492 | 0 | ArraySink sink(output, bufferSize); |
3493 | 0 | return OpenPGPEncode(sink); |
3494 | 0 | } |
3495 | | |
3496 | | size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const |
3497 | 0 | { |
3498 | 0 | word16 bitCount = word16(BitCount()); |
3499 | 0 | bt.PutWord16(bitCount); |
3500 | 0 | size_t byteCount = BitsToBytes(bitCount); |
3501 | 0 | Encode(bt, byteCount); |
3502 | 0 | return 2 + byteCount; |
3503 | 0 | } |
3504 | | |
3505 | | void Integer::OpenPGPDecode(const byte *input, size_t len) |
3506 | 0 | { |
3507 | 0 | CRYPTOPP_ASSERT(input && len); // NULL buffer |
3508 | 0 | StringStore store(input, len); |
3509 | 0 | OpenPGPDecode(store); |
3510 | 0 | } |
3511 | | |
3512 | | void Integer::OpenPGPDecode(BufferedTransformation &bt) |
3513 | 0 | { |
3514 | 0 | word16 bitCount; |
3515 | 0 | if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) |
3516 | 0 | throw OpenPGPDecodeErr(); |
3517 | 0 | Decode(bt, BitsToBytes(bitCount)); |
3518 | 0 | } |
3519 | | |
3520 | | void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits) |
3521 | 0 | { |
3522 | 0 | const size_t nbytes = nbits/8 + 1; |
3523 | 0 | SecByteBlock buf(nbytes); |
3524 | 0 | rng.GenerateBlock(buf, nbytes); |
3525 | | |
3526 | | // https://github.com/weidai11/cryptopp/issues/1206 |
3527 | | // if (nbytes) |
3528 | | // buf[0] = (byte)Crop(buf[0], nbits % 8); |
3529 | |
|
3530 | 0 | buf[0] = (byte)Crop(buf[0], nbits % 8); |
3531 | 0 | Decode(buf, nbytes, UNSIGNED); |
3532 | 0 | } |
3533 | | |
3534 | | void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) |
3535 | 0 | { |
3536 | 0 | if (min > max) |
3537 | 0 | throw InvalidArgument("Integer: Min must be no greater than Max"); |
3538 | | |
3539 | 0 | Integer range = max - min; |
3540 | 0 | const unsigned int nbits = range.BitCount(); |
3541 | |
|
3542 | 0 | do |
3543 | 0 | { |
3544 | 0 | Randomize(rng, nbits); |
3545 | 0 | } |
3546 | 0 | while (*this > range); |
3547 | |
|
3548 | 0 | *this += min; |
3549 | 0 | } |
3550 | | |
3551 | | bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) |
3552 | 0 | { |
3553 | 0 | return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max) |
3554 | 0 | ("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); |
3555 | 0 | } |
3556 | | |
3557 | | class KDF2_RNG : public RandomNumberGenerator |
3558 | | { |
3559 | | public: |
3560 | | KDF2_RNG(const byte *seed, size_t seedSize) |
3561 | | : m_counter(0), m_counterAndSeed(ClampSize(seedSize) + 4) |
3562 | 0 | { |
3563 | 0 | std::memcpy(m_counterAndSeed + 4, seed, ClampSize(seedSize)); |
3564 | 0 | } |
3565 | | |
3566 | | void GenerateBlock(byte *output, size_t size) |
3567 | 0 | { |
3568 | 0 | CRYPTOPP_ASSERT(output && size); // NULL buffer |
3569 | 0 | PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); |
3570 | 0 | ++m_counter; |
3571 | 0 | P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0); |
3572 | 0 | } |
3573 | | |
3574 | | // UBsan finding, -Wstringop-overflow |
3575 | | inline size_t ClampSize(size_t req) const |
3576 | 0 | { |
3577 | | // Clamp at 16 MB |
3578 | 0 | if (req > 16U*1024*1024) |
3579 | 0 | return 16U*1024*1024; |
3580 | 0 | return req; |
3581 | 0 | } |
3582 | | |
3583 | | private: |
3584 | | word32 m_counter; |
3585 | | SecByteBlock m_counterAndSeed; |
3586 | | }; |
3587 | | |
3588 | | bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms) |
3589 | 0 | { |
3590 | 0 | Integer min = params.GetValueWithDefault("Min", Integer::Zero()); |
3591 | 0 | Integer max; |
3592 | 0 | if (!params.GetValue("Max", max)) |
3593 | 0 | { |
3594 | 0 | int bitLength; |
3595 | 0 | if (params.GetIntValue("BitLength", bitLength)) |
3596 | 0 | max = Integer::Power2(bitLength); |
3597 | 0 | else |
3598 | 0 | throw InvalidArgument("Integer: missing Max argument"); |
3599 | 0 | } |
3600 | 0 | if (min > max) |
3601 | 0 | throw InvalidArgument("Integer: Min must be no greater than Max"); |
3602 | | |
3603 | 0 | Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); |
3604 | 0 | Integer mod = params.GetValueWithDefault("Mod", Integer::One()); |
3605 | |
|
3606 | 0 | if (equiv.IsNegative() || equiv >= mod) |
3607 | 0 | throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument"); |
3608 | | |
3609 | 0 | Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY); |
3610 | |
|
3611 | 0 | member_ptr<KDF2_RNG> kdf2Rng; |
3612 | 0 | ConstByteArrayParameter seed; |
3613 | 0 | if (params.GetValue(Name::Seed(), seed)) |
3614 | 0 | { |
3615 | 0 | ByteQueue bq; |
3616 | 0 | DERSequenceEncoder seq(bq); |
3617 | 0 | min.DEREncode(seq); |
3618 | 0 | max.DEREncode(seq); |
3619 | 0 | equiv.DEREncode(seq); |
3620 | 0 | mod.DEREncode(seq); |
3621 | 0 | DEREncodeUnsigned(seq, rnType); |
3622 | 0 | DEREncodeOctetString(seq, seed.begin(), seed.size()); |
3623 | 0 | seq.MessageEnd(); |
3624 | |
|
3625 | 0 | SecByteBlock finalSeed((size_t)bq.MaxRetrievable()); |
3626 | 0 | bq.Get(finalSeed, finalSeed.size()); |
3627 | 0 | kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); |
3628 | 0 | } |
3629 | 0 | RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng; |
3630 | |
|
3631 | 0 | switch (rnType) |
3632 | 0 | { |
3633 | 0 | case ANY: |
3634 | 0 | if (mod == One()) |
3635 | 0 | Randomize(rng, min, max); |
3636 | 0 | else |
3637 | 0 | { |
3638 | 0 | Integer min1 = min + (equiv-min)%mod; |
3639 | 0 | if (max < min1) |
3640 | 0 | return false; |
3641 | 0 | Randomize(rng, Zero(), (max - min1) / mod); |
3642 | 0 | *this *= mod; |
3643 | 0 | *this += min1; |
3644 | 0 | } |
3645 | 0 | return true; |
3646 | | |
3647 | 0 | case PRIME: |
3648 | 0 | { |
3649 | 0 | const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR); |
3650 | |
|
3651 | 0 | int i; |
3652 | 0 | i = 0; |
3653 | 0 | while (1) |
3654 | 0 | { |
3655 | 0 | if (++i==16) |
3656 | 0 | { |
3657 | | // check if there are any suitable primes in [min, max] |
3658 | 0 | Integer first = min; |
3659 | 0 | if (FirstPrime(first, max, equiv, mod, pSelector)) |
3660 | 0 | { |
3661 | | // if there is only one suitable prime, we're done |
3662 | 0 | *this = first; |
3663 | 0 | if (!FirstPrime(first, max, equiv, mod, pSelector)) |
3664 | 0 | return true; |
3665 | 0 | } |
3666 | 0 | else |
3667 | 0 | return false; |
3668 | 0 | } |
3669 | | |
3670 | 0 | Randomize(rng, min, max); |
3671 | 0 | if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) |
3672 | 0 | return true; |
3673 | 0 | } |
3674 | 0 | } |
3675 | | |
3676 | 0 | default: |
3677 | 0 | throw InvalidArgument("Integer: invalid RandomNumberType argument"); |
3678 | 0 | } |
3679 | 0 | } |
3680 | | |
3681 | | std::istream& operator>>(std::istream& in, Integer &a) |
3682 | 0 | { |
3683 | 0 | char c; |
3684 | 0 | unsigned int length = 0; |
3685 | 0 | SecBlock<char> str(length + 16); |
3686 | |
|
3687 | 0 | std::ws(in); |
3688 | |
|
3689 | 0 | do |
3690 | 0 | { |
3691 | 0 | in.read(&c, 1); |
3692 | 0 | str[length++] = c; |
3693 | 0 | if (length >= str.size()) |
3694 | 0 | str.Grow(length + 16); |
3695 | 0 | } |
3696 | 0 | while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.')); |
3697 | |
|
3698 | 0 | if (in.gcount()) |
3699 | 0 | in.putback(c); |
3700 | 0 | str[length-1] = '\0'; |
3701 | 0 | a = Integer(str); |
3702 | |
|
3703 | 0 | return in; |
3704 | 0 | } |
3705 | | |
3706 | | // Ensure base 10 is default |
3707 | 0 | inline int FlagToBase(long f) { |
3708 | 0 | return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10); |
3709 | 0 | } |
3710 | | |
3711 | 0 | inline char FlagToSuffix(long f) { |
3712 | 0 | return f == std::ios::hex ? 'h' : (f == std::ios::oct ? 'o' : '.'); |
3713 | 0 | } |
3714 | | |
3715 | | // Ensure base 10 is default |
3716 | | std::ostream& operator<<(std::ostream& out, const Integer &a) |
3717 | 0 | { |
3718 | | // Get relevant conversion specifications from ostream. |
3719 | 0 | const long f = out.flags() & std::ios::basefield; |
3720 | 0 | const int base = FlagToBase(f); |
3721 | 0 | const char suffix = FlagToSuffix(f); |
3722 | |
|
3723 | 0 | Integer temp1=a, temp2; |
3724 | 0 | if (a.IsNegative()) |
3725 | 0 | { |
3726 | 0 | out << '-'; |
3727 | 0 | temp1.Negate(); |
3728 | 0 | } |
3729 | |
|
3730 | 0 | if (!a) |
3731 | 0 | out << '0'; |
3732 | |
|
3733 | 0 | static const char upper[]="0123456789ABCDEF"; |
3734 | 0 | static const char lower[]="0123456789abcdef"; |
3735 | |
|
3736 | 0 | const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower; |
3737 | 0 | unsigned int i=0; |
3738 | 0 | SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1); |
3739 | |
|
3740 | 0 | while (!!temp1) |
3741 | 0 | { |
3742 | 0 | word digit; |
3743 | 0 | Integer::Divide(digit, temp2, temp1, base); |
3744 | 0 | s[i++]=vec[digit]; |
3745 | 0 | temp1.swap(temp2); |
3746 | 0 | } |
3747 | |
|
3748 | 0 | while (i--) |
3749 | 0 | { |
3750 | 0 | out << s[i]; |
3751 | 0 | } |
3752 | |
|
3753 | | #ifdef CRYPTOPP_USE_STD_SHOWBASE |
3754 | | if (out.flags() & std::ios_base::showbase) |
3755 | | out << suffix; |
3756 | | |
3757 | | return out; |
3758 | | #else |
3759 | 0 | return out << suffix; |
3760 | 0 | #endif |
3761 | 0 | } |
3762 | | |
3763 | | Integer& Integer::operator++() |
3764 | 24.9k | { |
3765 | 24.9k | if (NotNegative()) |
3766 | 24.9k | { |
3767 | 24.9k | if (Increment(reg, reg.size())) |
3768 | 0 | { |
3769 | 0 | reg.CleanGrow(2*reg.size()); |
3770 | 0 | reg[reg.size()/2]=1; |
3771 | 0 | } |
3772 | 24.9k | } |
3773 | 0 | else |
3774 | 0 | { |
3775 | 0 | word borrow = Decrement(reg, reg.size()); |
3776 | 0 | CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow); |
3777 | |
|
3778 | 0 | if (WordCount()==0) |
3779 | 0 | *this = Zero(); |
3780 | 0 | } |
3781 | 24.9k | return *this; |
3782 | 24.9k | } |
3783 | | |
3784 | | Integer& Integer::operator--() |
3785 | 0 | { |
3786 | 0 | if (IsNegative()) |
3787 | 0 | { |
3788 | 0 | if (Increment(reg, reg.size())) |
3789 | 0 | { |
3790 | 0 | reg.CleanGrow(2*reg.size()); |
3791 | 0 | reg[reg.size()/2]=1; |
3792 | 0 | } |
3793 | 0 | } |
3794 | 0 | else |
3795 | 0 | { |
3796 | 0 | if (Decrement(reg, reg.size())) |
3797 | 0 | *this = -One(); |
3798 | 0 | } |
3799 | 0 | return *this; |
3800 | 0 | } |
3801 | | |
3802 | | // This is a bit operation. We set sign to POSITIVE, so there's no need to |
3803 | | // worry about negative zero. Also see http://stackoverflow.com/q/11644362. |
3804 | | Integer Integer::And(const Integer& t) const |
3805 | 99 | { |
3806 | | // Grow due to https://github.com/weidai11/cryptopp/issues/1072 |
3807 | | // The temporary Integer 'result' may have fewer blocks than |
3808 | | // 'this' or 't', if leading 0-blocks are trimmed in copy ctor. |
3809 | | |
3810 | 99 | if (this == &t) |
3811 | 0 | { |
3812 | 0 | return AbsoluteValue(); |
3813 | 0 | } |
3814 | 99 | else if (reg.size() >= t.reg.size()) |
3815 | 58 | { |
3816 | 58 | IntegerSecBlock temp(t.reg.size()); |
3817 | | // AndWords(temp, reg, t.reg, t.reg.size()); |
3818 | 940 | for (size_t i=0; i<t.reg.size(); ++i) |
3819 | 882 | temp[i] = reg[i] & t.reg[i]; |
3820 | | |
3821 | 58 | Integer result; |
3822 | 58 | std::swap(result.reg, temp); |
3823 | | |
3824 | 58 | return result; |
3825 | 58 | } |
3826 | 41 | else // reg.size() < t.reg.size() |
3827 | 41 | { |
3828 | 41 | IntegerSecBlock temp(reg.size()); |
3829 | | // AndWords(temp, reg, t.reg, reg.size()); |
3830 | 397 | for (size_t i=0; i<reg.size(); ++i) |
3831 | 356 | temp[i] = reg[i] & t.reg[i]; |
3832 | | |
3833 | 41 | Integer result; |
3834 | 41 | std::swap(result.reg, temp); |
3835 | | |
3836 | 41 | return result; |
3837 | 41 | } |
3838 | 99 | } |
3839 | | |
3840 | | // This is a bit operation. We set sign to POSITIVE, so there's no need to |
3841 | | // worry about negative zero. Also see http://stackoverflow.com/q/11644362. |
3842 | | Integer Integer::Or(const Integer& t) const |
3843 | 104 | { |
3844 | | // Grow due to https://github.com/weidai11/cryptopp/issues/1072 |
3845 | | // The temporary Integer 'result' may have fewer blocks than |
3846 | | // 'this' or 't', if leading 0-blocks are trimmed in copy ctor. |
3847 | | |
3848 | 104 | if (this == &t) |
3849 | 0 | { |
3850 | 0 | return AbsoluteValue(); |
3851 | 0 | } |
3852 | 104 | else if (reg.size() >= t.reg.size()) |
3853 | 59 | { |
3854 | 59 | IntegerSecBlock temp(reg, reg.size()); |
3855 | | // OrWords(temp, t.reg, t.reg.size()); |
3856 | 969 | for (size_t i=0; i<t.reg.size(); ++i) |
3857 | 910 | temp[i] |= t.reg[i]; |
3858 | | |
3859 | 59 | Integer result; |
3860 | 59 | std::swap(result.reg, temp); |
3861 | | |
3862 | 59 | return result; |
3863 | 59 | } |
3864 | 45 | else // reg.size() < t.reg.size() |
3865 | 45 | { |
3866 | 45 | IntegerSecBlock temp(t.reg, t.reg.size()); |
3867 | | // OrWords(temp, reg, reg.size()); |
3868 | 291 | for (size_t i=0; i<reg.size(); ++i) |
3869 | 246 | temp[i] |= reg[i]; |
3870 | | |
3871 | 45 | Integer result; |
3872 | 45 | std::swap(result.reg, temp); |
3873 | | |
3874 | 45 | return result; |
3875 | 45 | } |
3876 | 104 | } |
3877 | | |
3878 | | // This is a bit operation. We set sign to POSITIVE, so there's no need to |
3879 | | // worry about negative zero. Also see http://stackoverflow.com/q/11644362. |
3880 | | Integer Integer::Xor(const Integer& t) const |
3881 | 122 | { |
3882 | | // Grow due to https://github.com/weidai11/cryptopp/issues/1072 |
3883 | | // The temporary Integer 'result' may have fewer blocks than |
3884 | | // 'this' or 't', if leading 0-blocks are trimmed in copy ctor. |
3885 | | |
3886 | 122 | if (this == &t) |
3887 | 0 | { |
3888 | 0 | return Integer::Zero(); |
3889 | 0 | } |
3890 | 122 | else if (reg.size() >= t.reg.size()) |
3891 | 82 | { |
3892 | 82 | IntegerSecBlock temp(reg, reg.size()); |
3893 | | // OrWords(temp, t.reg, t.reg.size()); |
3894 | 2.31k | for (size_t i=0; i<t.reg.size(); ++i) |
3895 | 2.23k | temp[i] ^= t.reg[i]; |
3896 | | |
3897 | 82 | Integer result; |
3898 | 82 | std::swap(result.reg, temp); |
3899 | | |
3900 | 82 | return result; |
3901 | 82 | } |
3902 | 40 | else // reg.size() < t.reg.size() |
3903 | 40 | { |
3904 | 40 | IntegerSecBlock temp(t.reg, t.reg.size()); |
3905 | | // OrWords(temp, reg, reg.size()); |
3906 | 370 | for (size_t i=0; i<reg.size(); ++i) |
3907 | 330 | temp[i] ^= reg[i]; |
3908 | | |
3909 | 40 | Integer result; |
3910 | 40 | std::swap(result.reg, temp); |
3911 | | |
3912 | 40 | return result; |
3913 | 40 | } |
3914 | 122 | } |
3915 | | |
3916 | | void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) |
3917 | 18.0M | { |
3918 | | // Profiling guided the flow below. |
3919 | 18.0M | int carry; const bool pre = (a.reg.size() == b.reg.size()); |
3920 | 18.0M | if (!pre && a.reg.size() > b.reg.size()) |
3921 | 11.9M | { |
3922 | 11.9M | carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); |
3923 | 11.9M | CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); |
3924 | 11.9M | carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); |
3925 | 11.9M | } |
3926 | 6.10M | else if (pre) |
3927 | 6.10M | { |
3928 | 6.10M | carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); |
3929 | 6.10M | } |
3930 | 585 | else |
3931 | 585 | { |
3932 | 585 | carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); |
3933 | 585 | CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); |
3934 | 585 | carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); |
3935 | 585 | } |
3936 | | |
3937 | 18.0M | if (carry) |
3938 | 2.24M | { |
3939 | 2.24M | sum.reg.CleanGrow(2*sum.reg.size()); |
3940 | 2.24M | sum.reg[sum.reg.size()/2] = 1; |
3941 | 2.24M | } |
3942 | 18.0M | sum.sign = Integer::POSITIVE; |
3943 | 18.0M | } |
3944 | | |
3945 | | void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) |
3946 | 55.2k | { |
3947 | 55.2k | unsigned aSize = a.WordCount(); |
3948 | 55.2k | aSize += aSize%2; |
3949 | 55.2k | unsigned bSize = b.WordCount(); |
3950 | 55.2k | bSize += bSize%2; |
3951 | | |
3952 | | // Profiling guided the flow below. |
3953 | 55.2k | if (aSize > bSize) |
3954 | 44.5k | { |
3955 | 44.5k | word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); |
3956 | 44.5k | CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); |
3957 | 44.5k | borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); |
3958 | 44.5k | CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow); |
3959 | 44.5k | diff.sign = Integer::POSITIVE; |
3960 | 44.5k | } |
3961 | 10.6k | else if (aSize == bSize) |
3962 | 10.5k | { |
3963 | 10.5k | if (Compare(a.reg, b.reg, aSize) >= 0) |
3964 | 10.4k | { |
3965 | 10.4k | Subtract(diff.reg, a.reg, b.reg, aSize); |
3966 | 10.4k | diff.sign = Integer::POSITIVE; |
3967 | 10.4k | } |
3968 | 47 | else |
3969 | 47 | { |
3970 | 47 | Subtract(diff.reg, b.reg, a.reg, aSize); |
3971 | 47 | diff.sign = Integer::NEGATIVE; |
3972 | 47 | } |
3973 | 10.5k | } |
3974 | 132 | else |
3975 | 132 | { |
3976 | 132 | word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); |
3977 | 132 | CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); |
3978 | 132 | borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); |
3979 | 132 | CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow); |
3980 | 132 | diff.sign = Integer::NEGATIVE; |
3981 | 132 | } |
3982 | 55.2k | } |
3983 | | |
3984 | | // MSVC .NET 2003 workaround |
3985 | | template <class T> inline const T& STDMAX2(const T& a, const T& b) |
3986 | 4.85M | { |
3987 | 4.85M | return a < b ? b : a; |
3988 | 4.85M | } |
3989 | | |
3990 | | Integer Integer::Plus(const Integer& b) const |
3991 | 4.80M | { |
3992 | 4.80M | Integer sum((word)0, STDMAX2(reg.size(), b.reg.size())); |
3993 | 4.80M | if (NotNegative()) |
3994 | 4.80M | { |
3995 | 4.80M | if (b.NotNegative()) |
3996 | 4.80M | PositiveAdd(sum, *this, b); |
3997 | 0 | else |
3998 | 0 | PositiveSubtract(sum, *this, b); |
3999 | 4.80M | } |
4000 | 0 | else |
4001 | 0 | { |
4002 | 0 | if (b.NotNegative()) |
4003 | 0 | PositiveSubtract(sum, b, *this); |
4004 | 0 | else |
4005 | 0 | { |
4006 | 0 | PositiveAdd(sum, *this, b); |
4007 | 0 | sum.sign = Integer::NEGATIVE; |
4008 | 0 | } |
4009 | 0 | } |
4010 | 4.80M | return sum; |
4011 | 4.80M | } |
4012 | | |
4013 | | Integer& Integer::operator+=(const Integer& t) |
4014 | 13.2M | { |
4015 | 13.2M | reg.CleanGrow(t.reg.size()); |
4016 | 13.2M | if (NotNegative()) |
4017 | 13.2M | { |
4018 | 13.2M | if (t.NotNegative()) |
4019 | 13.2M | PositiveAdd(*this, *this, t); |
4020 | 0 | else |
4021 | 0 | PositiveSubtract(*this, *this, t); |
4022 | 13.2M | } |
4023 | 0 | else |
4024 | 0 | { |
4025 | 0 | if (t.NotNegative()) |
4026 | 0 | PositiveSubtract(*this, t, *this); |
4027 | 0 | else |
4028 | 0 | { |
4029 | 0 | PositiveAdd(*this, *this, t); |
4030 | 0 | sign = Integer::NEGATIVE; |
4031 | 0 | } |
4032 | 0 | } |
4033 | 13.2M | return *this; |
4034 | 13.2M | } |
4035 | | |
4036 | | Integer Integer::Minus(const Integer& b) const |
4037 | 55.2k | { |
4038 | 55.2k | Integer diff((word)0, STDMAX2(reg.size(), b.reg.size())); |
4039 | 55.2k | if (NotNegative()) |
4040 | 55.2k | { |
4041 | 55.2k | if (b.NotNegative()) |
4042 | 55.2k | PositiveSubtract(diff, *this, b); |
4043 | 0 | else |
4044 | 0 | PositiveAdd(diff, *this, b); |
4045 | 55.2k | } |
4046 | 0 | else |
4047 | 0 | { |
4048 | 0 | if (b.NotNegative()) |
4049 | 0 | { |
4050 | 0 | PositiveAdd(diff, *this, b); |
4051 | 0 | diff.sign = Integer::NEGATIVE; |
4052 | 0 | } |
4053 | 0 | else |
4054 | 0 | PositiveSubtract(diff, b, *this); |
4055 | 0 | } |
4056 | 55.2k | return diff; |
4057 | 55.2k | } |
4058 | | |
4059 | | Integer& Integer::operator-=(const Integer& t) |
4060 | 0 | { |
4061 | 0 | reg.CleanGrow(t.reg.size()); |
4062 | 0 | if (NotNegative()) |
4063 | 0 | { |
4064 | 0 | if (t.NotNegative()) |
4065 | 0 | PositiveSubtract(*this, *this, t); |
4066 | 0 | else |
4067 | 0 | PositiveAdd(*this, *this, t); |
4068 | 0 | } |
4069 | 0 | else |
4070 | 0 | { |
4071 | 0 | if (t.NotNegative()) |
4072 | 0 | { |
4073 | 0 | PositiveAdd(*this, *this, t); |
4074 | 0 | sign = Integer::NEGATIVE; |
4075 | 0 | } |
4076 | 0 | else |
4077 | 0 | PositiveSubtract(*this, t, *this); |
4078 | 0 | } |
4079 | 0 | return *this; |
4080 | 0 | } |
4081 | | |
4082 | | Integer& Integer::operator<<=(size_t n) |
4083 | 114k | { |
4084 | 114k | const size_t wordCount = WordCount(); |
4085 | 114k | const size_t shiftWords = n / WORD_BITS; |
4086 | 114k | const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); |
4087 | | |
4088 | 114k | reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); |
4089 | 114k | ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); |
4090 | 114k | ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); |
4091 | 114k | return *this; |
4092 | 114k | } |
4093 | | |
4094 | | Integer& Integer::operator>>=(size_t n) |
4095 | 784k | { |
4096 | 784k | const size_t wordCount = WordCount(); |
4097 | 784k | const size_t shiftWords = n / WORD_BITS; |
4098 | 784k | const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); |
4099 | | |
4100 | 784k | ShiftWordsRightByWords(reg, wordCount, shiftWords); |
4101 | 784k | if (wordCount > shiftWords) |
4102 | 784k | ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); |
4103 | 784k | if (IsNegative() && WordCount()==0) // avoid -0 |
4104 | 0 | *this = Zero(); |
4105 | 784k | return *this; |
4106 | 784k | } |
4107 | | |
4108 | | Integer& Integer::operator&=(const Integer& t) |
4109 | 0 | { |
4110 | 0 | if (this != &t) |
4111 | 0 | { |
4112 | 0 | const size_t size = STDMIN(reg.size(), t.reg.size()); |
4113 | 0 | reg.resize(size); |
4114 | 0 | AndWords(reg, t.reg, size); |
4115 | 0 | } |
4116 | 0 | sign = POSITIVE; |
4117 | 0 | return *this; |
4118 | 0 | } |
4119 | | |
4120 | | Integer& Integer::operator|=(const Integer& t) |
4121 | 0 | { |
4122 | 0 | if (this != &t) |
4123 | 0 | { |
4124 | 0 | if (reg.size() >= t.reg.size()) |
4125 | 0 | { |
4126 | 0 | OrWords(reg, t.reg, t.reg.size()); |
4127 | 0 | } |
4128 | 0 | else // reg.size() < t.reg.size() |
4129 | 0 | { |
4130 | 0 | const size_t head = reg.size(); |
4131 | 0 | const size_t tail = t.reg.size() - reg.size(); |
4132 | 0 | reg.resize(head+tail); |
4133 | 0 | OrWords(reg, t.reg, head); |
4134 | 0 | CopyWords(reg+head,t.reg+head,tail); |
4135 | 0 | } |
4136 | 0 | } |
4137 | 0 | sign = POSITIVE; |
4138 | 0 | return *this; |
4139 | 0 | } |
4140 | | |
4141 | | Integer& Integer::operator^=(const Integer& t) |
4142 | 0 | { |
4143 | 0 | if (this == &t) |
4144 | 0 | { |
4145 | 0 | *this = Zero(); |
4146 | 0 | } |
4147 | 0 | else |
4148 | 0 | { |
4149 | 0 | if (reg.size() >= t.reg.size()) |
4150 | 0 | { |
4151 | 0 | XorWords(reg, t.reg, t.reg.size()); |
4152 | 0 | } |
4153 | 0 | else // reg.size() < t.reg.size() |
4154 | 0 | { |
4155 | 0 | const size_t head = reg.size(); |
4156 | 0 | const size_t tail = t.reg.size() - reg.size(); |
4157 | 0 | reg.resize(head+tail); |
4158 | 0 | XorWords(reg, t.reg, head); |
4159 | 0 | CopyWords(reg+head,t.reg+head,tail); |
4160 | 0 | } |
4161 | 0 | } |
4162 | 0 | sign = POSITIVE; |
4163 | 0 | return *this; |
4164 | 0 | } |
4165 | | |
4166 | | void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) |
4167 | 14.5M | { |
4168 | 14.5M | size_t aSize = RoundupSize(a.WordCount()); |
4169 | 14.5M | size_t bSize = RoundupSize(b.WordCount()); |
4170 | | |
4171 | 14.5M | product.reg.CleanNew(RoundupSize(aSize+bSize)); |
4172 | 14.5M | product.sign = Integer::POSITIVE; |
4173 | | |
4174 | 14.5M | IntegerSecBlock workspace(aSize + bSize); |
4175 | 14.5M | AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); |
4176 | 14.5M | } |
4177 | | |
4178 | | void Multiply(Integer &product, const Integer &a, const Integer &b) |
4179 | 14.5M | { |
4180 | 14.5M | PositiveMultiply(product, a, b); |
4181 | | |
4182 | 14.5M | if (a.NotNegative() != b.NotNegative()) |
4183 | 0 | product.Negate(); |
4184 | 14.5M | } |
4185 | | |
4186 | | Integer Integer::Times(const Integer &b) const |
4187 | 14.5M | { |
4188 | 14.5M | Integer product; |
4189 | 14.5M | Multiply(product, *this, b); |
4190 | 14.5M | return product; |
4191 | 14.5M | } |
4192 | | |
4193 | | /* |
4194 | | void PositiveDivide(Integer &remainder, Integer "ient, |
4195 | | const Integer ÷nd, const Integer &divisor) |
4196 | | { |
4197 | | remainder.reg.CleanNew(divisor.reg.size()); |
4198 | | remainder.sign = Integer::POSITIVE; |
4199 | | quotient.reg.New(0); |
4200 | | quotient.sign = Integer::POSITIVE; |
4201 | | unsigned i=dividend.BitCount(); |
4202 | | while (i--) |
4203 | | { |
4204 | | word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1); |
4205 | | remainder.reg[0] |= dividend[i]; |
4206 | | if (overflow || remainder >= divisor) |
4207 | | { |
4208 | | Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size()); |
4209 | | quotient.SetBit(i); |
4210 | | } |
4211 | | } |
4212 | | } |
4213 | | */ |
4214 | | |
4215 | | void PositiveDivide(Integer &remainder, Integer "ient, |
4216 | | const Integer &a, const Integer &b) |
4217 | 2.29M | { |
4218 | 2.29M | unsigned aSize = a.WordCount(); |
4219 | 2.29M | unsigned bSize = b.WordCount(); |
4220 | | |
4221 | 2.29M | if (!bSize) |
4222 | 89 | throw Integer::DivideByZero(); |
4223 | | |
4224 | 2.29M | if (aSize < bSize) |
4225 | 44.8k | { |
4226 | 44.8k | remainder = a; |
4227 | 44.8k | remainder.sign = Integer::POSITIVE; |
4228 | 44.8k | quotient = Integer::Zero(); |
4229 | 44.8k | return; |
4230 | 44.8k | } |
4231 | | |
4232 | 2.24M | aSize += aSize%2; // round up to next even number |
4233 | 2.24M | bSize += bSize%2; |
4234 | | |
4235 | 2.24M | remainder.reg.CleanNew(RoundupSize(bSize)); |
4236 | 2.24M | remainder.sign = Integer::POSITIVE; |
4237 | 2.24M | quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); |
4238 | 2.24M | quotient.sign = Integer::POSITIVE; |
4239 | | |
4240 | 2.24M | IntegerSecBlock T(aSize+3*(bSize+2)); |
4241 | 2.24M | Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); |
4242 | 2.24M | } |
4243 | | |
4244 | | void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) |
4245 | 2.29M | { |
4246 | 2.29M | PositiveDivide(remainder, quotient, dividend, divisor); |
4247 | | |
4248 | 2.29M | if (dividend.IsNegative()) |
4249 | 0 | { |
4250 | 0 | quotient.Negate(); |
4251 | 0 | if (remainder.NotZero()) |
4252 | 0 | { |
4253 | 0 | --quotient; |
4254 | 0 | remainder = divisor.AbsoluteValue() - remainder; |
4255 | 0 | } |
4256 | 0 | } |
4257 | | |
4258 | 2.29M | if (divisor.IsNegative()) |
4259 | 0 | quotient.Negate(); |
4260 | 2.29M | } |
4261 | | |
4262 | | void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) |
4263 | 0 | { |
4264 | 0 | q = a; |
4265 | 0 | q >>= n; |
4266 | |
|
4267 | 0 | const size_t wordCount = BitsToWords(n); |
4268 | 0 | if (wordCount <= a.WordCount()) |
4269 | 0 | { |
4270 | 0 | r.reg.resize(RoundupSize(wordCount)); |
4271 | 0 | CopyWords(r.reg, a.reg, wordCount); |
4272 | 0 | SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); |
4273 | 0 | if (n % WORD_BITS != 0) |
4274 | 0 | r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS)); |
4275 | 0 | } |
4276 | 0 | else |
4277 | 0 | { |
4278 | 0 | r.reg.resize(RoundupSize(a.WordCount())); |
4279 | 0 | CopyWords(r.reg, a.reg, r.reg.size()); |
4280 | 0 | } |
4281 | 0 | r.sign = POSITIVE; |
4282 | |
|
4283 | 0 | if (a.IsNegative() && r.NotZero()) |
4284 | 0 | { |
4285 | 0 | --q; |
4286 | 0 | r = Power2(n) - r; |
4287 | 0 | } |
4288 | 0 | } |
4289 | | |
4290 | | Integer Integer::DividedBy(const Integer &b) const |
4291 | 2.91k | { |
4292 | 2.91k | Integer remainder, quotient; |
4293 | 2.91k | Integer::Divide(remainder, quotient, *this, b); |
4294 | 2.91k | return quotient; |
4295 | 2.91k | } |
4296 | | |
4297 | | Integer Integer::Modulo(const Integer &b) const |
4298 | 2.29M | { |
4299 | 2.29M | Integer remainder, quotient; |
4300 | 2.29M | Integer::Divide(remainder, quotient, *this, b); |
4301 | 2.29M | return remainder; |
4302 | 2.29M | } |
4303 | | |
4304 | | void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor) |
4305 | 3.19M | { |
4306 | 3.19M | if (!divisor) |
4307 | 0 | throw Integer::DivideByZero(); |
4308 | | |
4309 | | // IsPowerOf2 uses BMI on x86 if available. There is a small |
4310 | | // but measurable improvement during decryption and signing. |
4311 | 3.19M | if (IsPowerOf2(divisor)) |
4312 | 0 | { |
4313 | 0 | quotient = dividend >> (BitPrecision(divisor)-1); |
4314 | 0 | remainder = dividend.reg[0] & (divisor-1); |
4315 | 0 | return; |
4316 | 0 | } |
4317 | | |
4318 | 3.19M | unsigned int i = dividend.WordCount(); |
4319 | 3.19M | quotient.reg.CleanNew(RoundupSize(i)); |
4320 | 3.19M | remainder = 0; |
4321 | 144M | while (i--) |
4322 | 141M | { |
4323 | 141M | quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; |
4324 | 141M | remainder = DWord(dividend.reg[i], remainder) % divisor; |
4325 | 141M | } |
4326 | | |
4327 | 3.19M | if (dividend.NotNegative()) |
4328 | 3.19M | quotient.sign = POSITIVE; |
4329 | 0 | else |
4330 | 0 | { |
4331 | 0 | quotient.sign = NEGATIVE; |
4332 | 0 | if (remainder) |
4333 | 0 | { |
4334 | 0 | --quotient; |
4335 | 0 | remainder = divisor - remainder; |
4336 | 0 | } |
4337 | 0 | } |
4338 | 3.19M | } |
4339 | | |
4340 | | Integer Integer::DividedBy(word b) const |
4341 | 0 | { |
4342 | 0 | word remainder; |
4343 | 0 | Integer quotient; |
4344 | 0 | Integer::Divide(remainder, quotient, *this, b); |
4345 | 0 | return quotient; |
4346 | 0 | } |
4347 | | |
4348 | | word Integer::Modulo(word divisor) const |
4349 | 1.37M | { |
4350 | 1.37M | if (!divisor) |
4351 | 0 | throw Integer::DivideByZero(); |
4352 | | |
4353 | 1.37M | word remainder; |
4354 | | |
4355 | | // Profiling guided the flow below. |
4356 | 1.37M | if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2 |
4357 | 0 | { |
4358 | | // Profiling guided the flow below. |
4359 | 0 | unsigned int i = WordCount(); |
4360 | 0 | if (divisor > 5) |
4361 | 0 | { |
4362 | 0 | remainder = 0; |
4363 | 0 | while (i--) |
4364 | 0 | remainder = DWord(reg[i], remainder) % divisor; |
4365 | 0 | } |
4366 | 0 | else |
4367 | 0 | { |
4368 | 0 | DWord sum(0, 0); |
4369 | 0 | while (i--) |
4370 | 0 | sum += reg[i]; |
4371 | 0 | remainder = sum % divisor; |
4372 | 0 | } |
4373 | 0 | } |
4374 | 1.37M | else // divisor is a power of 2 |
4375 | 1.37M | { |
4376 | 1.37M | remainder = reg[0] & (divisor-1); |
4377 | 1.37M | } |
4378 | | |
4379 | 1.37M | if (IsNegative() && remainder) |
4380 | 0 | remainder = divisor - remainder; |
4381 | | |
4382 | 1.37M | return remainder; |
4383 | 1.37M | } |
4384 | | |
4385 | | void Integer::Negate() |
4386 | 651 | { |
4387 | 651 | if (!!(*this)) // don't flip sign if *this==0 |
4388 | 622 | sign = Sign(1-sign); |
4389 | 651 | } |
4390 | | |
4391 | | int Integer::PositiveCompare(const Integer& t) const |
4392 | 474k | { |
4393 | | // Profiling guided the flow below. |
4394 | 474k | const unsigned size = WordCount(), tSize = t.WordCount(); |
4395 | 474k | if (size != tSize) |
4396 | 468k | return size > tSize ? 1 : -1; |
4397 | 6.00k | else |
4398 | 6.00k | return CryptoPP::Compare(reg, t.reg, size); |
4399 | 474k | } |
4400 | | |
4401 | | int Integer::Compare(const Integer& t) const |
4402 | 475k | { |
4403 | 475k | if (NotNegative()) |
4404 | 474k | { |
4405 | 474k | if (t.NotNegative()) |
4406 | 474k | return PositiveCompare(t); |
4407 | 0 | else |
4408 | 0 | return 1; |
4409 | 474k | } |
4410 | 536 | else |
4411 | 536 | { |
4412 | 536 | if (t.NotNegative()) |
4413 | 536 | return -1; |
4414 | 0 | else |
4415 | 0 | return -PositiveCompare(t); |
4416 | 536 | } |
4417 | 475k | } |
4418 | | |
4419 | | Integer Integer::SquareRoot() const |
4420 | 229 | { |
4421 | 229 | if (!IsPositive()) |
4422 | 9 | return Zero(); |
4423 | | |
4424 | | // overestimate square root |
4425 | 220 | Integer x, y = Power2((BitCount()+1)/2); |
4426 | 220 | CRYPTOPP_ASSERT(y*y >= *this); |
4427 | | |
4428 | 220 | do |
4429 | 1.80k | { |
4430 | 1.80k | x = y; |
4431 | 1.80k | y = (x + *this/x) >> 1; |
4432 | 1.80k | } while (y<x); |
4433 | | |
4434 | 220 | return x; |
4435 | 229 | } |
4436 | | |
4437 | | bool Integer::IsSquare() const |
4438 | 41 | { |
4439 | 41 | Integer r = SquareRoot(); |
4440 | 41 | return *this == r.Squared(); |
4441 | 41 | } |
4442 | | |
4443 | | bool Integer::IsUnit() const |
4444 | 0 | { |
4445 | 0 | return (WordCount() == 1) && (reg[0] == 1); |
4446 | 0 | } |
4447 | | |
4448 | | Integer Integer::MultiplicativeInverse() const |
4449 | 0 | { |
4450 | 0 | return IsUnit() ? *this : Zero(); |
4451 | 0 | } |
4452 | | |
4453 | | Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) |
4454 | 128 | { |
4455 | 128 | CRYPTOPP_ASSERT(m.NotZero()); |
4456 | 128 | if (m.IsZero()) |
4457 | 0 | throw Integer::DivideByZero(); |
4458 | | |
4459 | 128 | return x*y%m; |
4460 | 128 | } |
4461 | | |
4462 | | Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) |
4463 | 3.69k | { |
4464 | 3.69k | CRYPTOPP_ASSERT(m.NotZero()); |
4465 | 3.69k | if (m.IsZero()) |
4466 | 0 | throw Integer::DivideByZero(); |
4467 | | |
4468 | 3.69k | ModularArithmetic mr(m); |
4469 | 3.69k | return mr.Exponentiate(x, e); |
4470 | 3.69k | } |
4471 | | |
4472 | | Integer Integer::Gcd(const Integer &a, const Integer &b) |
4473 | 765 | { |
4474 | 765 | return EuclideanDomainOf<Integer>().Gcd(a, b); |
4475 | 765 | } |
4476 | | |
4477 | | Integer Integer::InverseMod(const Integer &m) const |
4478 | 1.50k | { |
4479 | 1.50k | CRYPTOPP_ASSERT(m.NotNegative()); |
4480 | 1.50k | CRYPTOPP_ASSERT(m.NotZero()); |
4481 | | |
4482 | 1.50k | if (IsNegative()) |
4483 | 0 | return Modulo(m).InverseModNext(m); |
4484 | | |
4485 | | // http://github.com/weidai11/cryptopp/issues/602 |
4486 | 1.50k | if (*this >= m) |
4487 | 0 | return Modulo(m).InverseModNext(m); |
4488 | | |
4489 | 1.50k | return InverseModNext(m); |
4490 | 1.50k | } |
4491 | | |
4492 | | Integer Integer::InverseModNext(const Integer &m) const |
4493 | 2.20k | { |
4494 | 2.20k | CRYPTOPP_ASSERT(m.NotNegative()); |
4495 | 2.20k | CRYPTOPP_ASSERT(m.NotZero()); |
4496 | | |
4497 | 2.20k | if (m.IsEven()) |
4498 | 942 | { |
4499 | 942 | if (!m || IsEven()) |
4500 | 221 | return Zero(); // no inverse |
4501 | 721 | if (*this == One()) |
4502 | 25 | return One(); |
4503 | | |
4504 | 696 | Integer u = m.Modulo(*this).InverseModNext(*this); |
4505 | 696 | return !u ? Zero() : (m*(*this-u)+1)/(*this); |
4506 | 721 | } |
4507 | | |
4508 | | // AlmostInverse requires a 4x workspace |
4509 | 1.25k | IntegerSecBlock T(m.reg.size() * 4); |
4510 | 1.25k | Integer r((word)0, m.reg.size()); |
4511 | 1.25k | unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); |
4512 | 1.25k | DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); |
4513 | 1.25k | return r; |
4514 | 2.20k | } |
4515 | | |
4516 | | word Integer::InverseMod(word mod) const |
4517 | 0 | { |
4518 | 0 | CRYPTOPP_ASSERT(mod != 0); |
4519 | |
|
4520 | 0 | word g0 = mod, g1 = *this % mod; |
4521 | 0 | word v0 = 0, v1 = 1; |
4522 | 0 | word y; |
4523 | |
|
4524 | 0 | while (g1) |
4525 | 0 | { |
4526 | 0 | if (g1 == 1) |
4527 | 0 | return v1; |
4528 | 0 | y = g0 / g1; |
4529 | 0 | g0 = g0 % g1; |
4530 | 0 | v0 += y * v1; |
4531 | |
|
4532 | 0 | if (!g0) |
4533 | 0 | break; |
4534 | 0 | if (g0 == 1) |
4535 | 0 | return mod-v0; |
4536 | 0 | y = g1 / g0; |
4537 | 0 | g1 = g1 % g0; |
4538 | 0 | v1 += y * v0; |
4539 | 0 | } |
4540 | 0 | return 0; |
4541 | 0 | } |
4542 | | |
4543 | | // ******************************************************** |
4544 | | |
4545 | | ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) |
4546 | 0 | { |
4547 | 0 | BERSequenceDecoder seq(bt); |
4548 | 0 | OID oid(seq); |
4549 | 0 | if (oid != ASN1::prime_field()) |
4550 | 0 | BERDecodeError(); |
4551 | 0 | m_modulus.BERDecode(seq); |
4552 | 0 | seq.MessageEnd(); |
4553 | 0 | m_result.reg.resize(m_modulus.reg.size()); |
4554 | 0 | } |
4555 | | |
4556 | | void ModularArithmetic::DEREncode(BufferedTransformation &bt) const |
4557 | 0 | { |
4558 | 0 | DERSequenceEncoder seq(bt); |
4559 | 0 | ASN1::prime_field().DEREncode(seq); |
4560 | 0 | m_modulus.DEREncode(seq); |
4561 | 0 | seq.MessageEnd(); |
4562 | 0 | } |
4563 | | |
4564 | | void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const |
4565 | 0 | { |
4566 | 0 | a.DEREncodeAsOctetString(out, MaxElementByteLength()); |
4567 | 0 | } |
4568 | | |
4569 | | void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const |
4570 | 0 | { |
4571 | 0 | a.BERDecodeAsOctetString(in, MaxElementByteLength()); |
4572 | 0 | } |
4573 | | |
4574 | | const Integer& ModularArithmetic::Half(const Integer &a) const |
4575 | 0 | { |
4576 | 0 | if (a.reg.size()==m_modulus.reg.size()) |
4577 | 0 | { |
4578 | 0 | CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size()); |
4579 | 0 | return m_result; |
4580 | 0 | } |
4581 | 0 | else |
4582 | 0 | return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1)); |
4583 | 0 | } |
4584 | | |
4585 | | const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const |
4586 | 0 | { |
4587 | 0 | if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
4588 | 0 | { |
4589 | 0 | if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size()) |
4590 | 0 | || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0) |
4591 | 0 | { |
4592 | 0 | CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); |
4593 | 0 | } |
4594 | 0 | return m_result; |
4595 | 0 | } |
4596 | 0 | else |
4597 | 0 | { |
4598 | 0 | m_result1 = a+b; |
4599 | 0 | if (m_result1 >= m_modulus) |
4600 | 0 | m_result1 -= m_modulus; |
4601 | 0 | return m_result1; |
4602 | 0 | } |
4603 | 0 | } |
4604 | | |
4605 | | Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const |
4606 | 0 | { |
4607 | 0 | if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
4608 | 0 | { |
4609 | 0 | if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) |
4610 | 0 | || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0) |
4611 | 0 | { |
4612 | 0 | CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size()); |
4613 | 0 | } |
4614 | 0 | } |
4615 | 0 | else |
4616 | 0 | { |
4617 | 0 | a+=b; |
4618 | 0 | if (a>=m_modulus) |
4619 | 0 | a-=m_modulus; |
4620 | 0 | } |
4621 | |
|
4622 | 0 | return a; |
4623 | 0 | } |
4624 | | |
4625 | | const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const |
4626 | 0 | { |
4627 | 0 | if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
4628 | 0 | { |
4629 | 0 | if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size())) |
4630 | 0 | CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); |
4631 | 0 | return m_result; |
4632 | 0 | } |
4633 | 0 | else |
4634 | 0 | { |
4635 | 0 | m_result1 = a-b; |
4636 | 0 | if (m_result1.IsNegative()) |
4637 | 0 | m_result1 += m_modulus; |
4638 | 0 | return m_result1; |
4639 | 0 | } |
4640 | 0 | } |
4641 | | |
4642 | | Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const |
4643 | 0 | { |
4644 | 0 | if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) |
4645 | 0 | { |
4646 | 0 | if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) |
4647 | 0 | CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size()); |
4648 | 0 | } |
4649 | 0 | else |
4650 | 0 | { |
4651 | 0 | a-=b; |
4652 | 0 | if (a.IsNegative()) |
4653 | 0 | a+=m_modulus; |
4654 | 0 | } |
4655 | |
|
4656 | 0 | return a; |
4657 | 0 | } |
4658 | | |
4659 | | const Integer& ModularArithmetic::Inverse(const Integer &a) const |
4660 | 0 | { |
4661 | 0 | if (!a) |
4662 | 0 | return a; |
4663 | | |
4664 | 0 | CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size()); |
4665 | 0 | if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size())) |
4666 | 0 | Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size()); |
4667 | |
|
4668 | 0 | return m_result; |
4669 | 0 | } |
4670 | | |
4671 | | Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const |
4672 | 0 | { |
4673 | 0 | if (m_modulus.IsOdd()) |
4674 | 0 | { |
4675 | 0 | MontgomeryRepresentation dr(m_modulus); |
4676 | 0 | return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); |
4677 | 0 | } |
4678 | 0 | else |
4679 | 0 | return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); |
4680 | 0 | } |
4681 | | |
4682 | | void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const |
4683 | 3.69k | { |
4684 | 3.69k | if (m_modulus.IsOdd()) |
4685 | 1.68k | { |
4686 | 1.68k | MontgomeryRepresentation dr(m_modulus); |
4687 | 1.68k | dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); |
4688 | 3.36k | for (unsigned int i=0; i<exponentsCount; i++) |
4689 | 1.68k | results[i] = dr.ConvertOut(results[i]); |
4690 | 1.68k | } |
4691 | 2.01k | else |
4692 | 2.01k | AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); |
4693 | 3.69k | } |
4694 | | |
4695 | | MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd |
4696 | | : ModularArithmetic(m), |
4697 | | m_u((word)0, m_modulus.reg.size()), |
4698 | | m_workspace(5*m_modulus.reg.size()) |
4699 | 28.8k | { |
4700 | 28.8k | if (!m_modulus.IsOdd()) |
4701 | 0 | throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus"); |
4702 | | |
4703 | 28.8k | RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size()); |
4704 | 28.8k | } |
4705 | | |
4706 | | const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const |
4707 | 175k | { |
4708 | 175k | word *const T = m_workspace.begin(); |
4709 | 175k | word *const R = m_result.reg.begin(); |
4710 | 175k | const size_t N = m_modulus.reg.size(); |
4711 | 175k | CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N); |
4712 | | |
4713 | 175k | AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); |
4714 | 175k | SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); |
4715 | 175k | MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4716 | 175k | return m_result; |
4717 | 175k | } |
4718 | | |
4719 | | const Integer& MontgomeryRepresentation::Square(const Integer &a) const |
4720 | 867k | { |
4721 | 867k | word *const T = m_workspace.begin(); |
4722 | 867k | word *const R = m_result.reg.begin(); |
4723 | 867k | const size_t N = m_modulus.reg.size(); |
4724 | 867k | CRYPTOPP_ASSERT(a.reg.size()<=N); |
4725 | | |
4726 | 867k | CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); |
4727 | 867k | SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); |
4728 | 867k | MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4729 | 867k | return m_result; |
4730 | 867k | } |
4731 | | |
4732 | | Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const |
4733 | 1.68k | { |
4734 | 1.68k | word *const T = m_workspace.begin(); |
4735 | 1.68k | word *const R = m_result.reg.begin(); |
4736 | 1.68k | const size_t N = m_modulus.reg.size(); |
4737 | 1.68k | CRYPTOPP_ASSERT(a.reg.size()<=N); |
4738 | | |
4739 | 1.68k | CopyWords(T, a.reg, a.reg.size()); |
4740 | 1.68k | SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); |
4741 | 1.68k | MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4742 | 1.68k | return m_result; |
4743 | 1.68k | } |
4744 | | |
4745 | | const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const |
4746 | 0 | { |
4747 | | // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus; |
4748 | 0 | word *const T = m_workspace.begin(); |
4749 | 0 | word *const R = m_result.reg.begin(); |
4750 | 0 | const size_t N = m_modulus.reg.size(); |
4751 | 0 | CRYPTOPP_ASSERT(a.reg.size()<=N); |
4752 | |
|
4753 | 0 | CopyWords(T, a.reg, a.reg.size()); |
4754 | 0 | SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); |
4755 | 0 | MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); |
4756 | 0 | unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N); |
4757 | | |
4758 | | // cout << "k=" << k << " N*32=" << 32*N << endl; |
4759 | |
|
4760 | 0 | if (k>N*WORD_BITS) |
4761 | 0 | DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N); |
4762 | 0 | else |
4763 | 0 | MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N); |
4764 | |
|
4765 | 0 | return m_result; |
4766 | 0 | } |
4767 | | |
4768 | | // Specialization declared in misc.h to allow us to print integers |
4769 | | // with additional control options, like arbitrary bases and uppercase. |
4770 | | template <> CRYPTOPP_DLL |
4771 | | std::string IntToString<Integer>(Integer value, unsigned int base) |
4772 | 9.06k | { |
4773 | | // Hack... set the high bit for uppercase. Set the next bit fo a suffix. |
4774 | 9.06k | static const unsigned int BIT_32 = (1U << 31); |
4775 | 9.06k | const bool UPPER = !!(base & BIT_32); |
4776 | 9.06k | static const unsigned int BIT_31 = (1U << 30); |
4777 | 9.06k | const bool BASE = !!(base & BIT_31); |
4778 | | |
4779 | 9.06k | const char CH = UPPER ? 'A' : 'a'; |
4780 | 9.06k | base &= ~(BIT_32|BIT_31); |
4781 | 9.06k | CRYPTOPP_ASSERT(base >= 2 && base <= 32); |
4782 | | |
4783 | 9.06k | if (value == 0) |
4784 | 1.38k | return "0"; |
4785 | | |
4786 | 7.68k | bool negative = false, zero = false; |
4787 | 7.68k | if (value.IsNegative()) |
4788 | 536 | { |
4789 | 536 | negative = true; |
4790 | 536 | value.Negate(); |
4791 | 536 | } |
4792 | | |
4793 | 7.68k | if (!value) |
4794 | 0 | zero = true; |
4795 | | |
4796 | 7.68k | SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1); |
4797 | 7.68k | Integer temp; |
4798 | | |
4799 | 7.68k | unsigned int i=0; |
4800 | 3.20M | while (!!value) |
4801 | 3.19M | { |
4802 | 3.19M | word digit; |
4803 | 3.19M | Integer::Divide(digit, temp, value, word(base)); |
4804 | 3.19M | s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit); |
4805 | 3.19M | value.swap(temp); |
4806 | 3.19M | } |
4807 | | |
4808 | 7.68k | std::string result; |
4809 | 7.68k | result.reserve(i+2); |
4810 | | |
4811 | 7.68k | if (negative) |
4812 | 536 | result += '-'; |
4813 | | |
4814 | 7.68k | if (zero) |
4815 | 0 | result += '0'; |
4816 | | |
4817 | 3.20M | while (i--) |
4818 | 3.19M | result += s[i]; |
4819 | | |
4820 | 7.68k | if (BASE) |
4821 | 0 | { |
4822 | 0 | if (base == 10) |
4823 | 0 | result += '.'; |
4824 | 0 | else if (base == 16) |
4825 | 0 | result += 'h'; |
4826 | 0 | else if (base == 8) |
4827 | 0 | result += 'o'; |
4828 | 0 | else if (base == 2) |
4829 | 0 | result += 'b'; |
4830 | 0 | } |
4831 | | |
4832 | 7.68k | return result; |
4833 | 9.06k | } |
4834 | | |
4835 | | // Specialization declared in misc.h to avoid Coverity findings. |
4836 | | template <> CRYPTOPP_DLL |
4837 | | std::string IntToString<word64>(word64 value, unsigned int base) |
4838 | 645 | { |
4839 | | // Hack... set the high bit for uppercase. |
4840 | 645 | static const unsigned int HIGH_BIT = (1U << 31); |
4841 | 645 | const char CH = !!(base & HIGH_BIT) ? 'A' : 'a'; |
4842 | 645 | base &= ~HIGH_BIT; |
4843 | | |
4844 | 645 | CRYPTOPP_ASSERT(base >= 2); |
4845 | 645 | if (value == 0) |
4846 | 61 | return "0"; |
4847 | | |
4848 | 584 | std::string result; |
4849 | 2.63k | while (value > 0) |
4850 | 2.05k | { |
4851 | 2.05k | word64 digit = value % base; |
4852 | 2.05k | result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result; |
4853 | 2.05k | value /= base; |
4854 | 2.05k | } |
4855 | 584 | return result; |
4856 | 645 | } |
4857 | | |
4858 | | #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER |
4859 | | // Allow the linker to discard Integer code if not needed. |
4860 | | // Also see http://github.com/weidai11/cryptopp/issues/389. |
4861 | | bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) |
4862 | 136k | { |
4863 | 136k | if (valueType != typeid(Integer)) |
4864 | 136k | return false; |
4865 | 0 | *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); |
4866 | 0 | return true; |
4867 | 136k | } |
4868 | | #endif // CRYPTOPP_NO_ASSIGN_TO_INTEGER |
4869 | | |
4870 | | // *************************** C++ Static Initialization *************************** |
4871 | | |
4872 | | class InitInteger |
4873 | | { |
4874 | | public: |
4875 | | InitInteger() |
4876 | 10 | { |
4877 | 10 | SetFunctionPointers(); |
4878 | 10 | } |
4879 | | }; |
4880 | | |
4881 | | // This is not really needed because each Integer can dynamically initialize |
4882 | | // itself, but we take a peephole optimization and initialize the class once |
4883 | | // if init priorities are available. Dynamic initialization will be used if |
4884 | | // init priorities are not available. |
4885 | | |
4886 | | #if defined(HAVE_GCC_INIT_PRIORITY) |
4887 | | const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger(); |
4888 | | const Integer g_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 11))) = Integer(0L); |
4889 | | const Integer g_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 12))) = Integer(1L); |
4890 | | const Integer g_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 13))) = Integer(2L); |
4891 | | #elif defined(HAVE_MSC_INIT_PRIORITY) |
4892 | | #pragma warning(disable: 4075) |
4893 | | #pragma init_seg(".CRT$XCU") |
4894 | | const InitInteger s_init; |
4895 | | const Integer g_zero(0L); |
4896 | | const Integer g_one(1L); |
4897 | | const Integer g_two(2L); |
4898 | | #pragma warning(default: 4075) |
4899 | | #elif HAVE_XLC_INIT_PRIORITY |
4900 | | // XLC needs constant, not a define |
4901 | | #pragma priority(280) |
4902 | | const InitInteger s_init; |
4903 | | const Integer g_zero(0L); |
4904 | | const Integer g_one(1L); |
4905 | | const Integer g_two(2L); |
4906 | | #else |
4907 | | const InitInteger s_init; |
4908 | | #endif |
4909 | | |
4910 | | // ***************** Library code ******************** |
4911 | | |
4912 | | const Integer &Integer::Zero() |
4913 | 494k | { |
4914 | 494k | #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) |
4915 | 494k | return g_zero; |
4916 | | #elif defined(CRYPTOPP_CXX11_STATIC_INIT) |
4917 | | static const Integer s_zero(0L); |
4918 | | return s_zero; |
4919 | | #else // Potential memory leak. Avoid if possible. |
4920 | | return Singleton<Integer, NewInteger<0L> >().Ref(); |
4921 | | #endif |
4922 | 494k | } |
4923 | | |
4924 | | const Integer &Integer::One() |
4925 | 6.44k | { |
4926 | 6.44k | #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) |
4927 | 6.44k | return g_one; |
4928 | | #elif defined(CRYPTOPP_CXX11_STATIC_INIT) |
4929 | | static const Integer s_one(1L); |
4930 | | return s_one; |
4931 | | #else // Potential memory leak. Avoid if possible. |
4932 | | return Singleton<Integer, NewInteger<1L> >().Ref(); |
4933 | | #endif |
4934 | 6.44k | } |
4935 | | |
4936 | | const Integer &Integer::Two() |
4937 | 0 | { |
4938 | 0 | #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) |
4939 | 0 | return g_two; |
4940 | | #elif defined(CRYPTOPP_CXX11_STATIC_INIT) |
4941 | | static const Integer s_two(2L); |
4942 | | return s_two; |
4943 | | #else // Potential memory leak. Avoid if possible. |
4944 | | return Singleton<Integer, NewInteger<2L> >().Ref(); |
4945 | | #endif |
4946 | 0 | } |
4947 | | |
4948 | | NAMESPACE_END |
4949 | | |
4950 | | #endif // CRYPTOPP_IMPORTS |