Coverage Report

Created: 2024-11-21 07:03

/src/cryptopp/integer.cpp
Line
Count
Source (jump to first uncovered line)
1
// integer.cpp - originally written and placed in the public domain by Wei Dai
2
// contains public domain code contributed by Alister Lee and Leonard Janke
3
4
// Notes by JW: The Integer class needs to do two things. First, it needs
5
//  to set function pointers on some platforms, like X86 and X64. The
6
//  function pointers select a fast multiply and addition based on the cpu.
7
//  Second, it wants to create Integer::Zero(), Integer::One() and
8
//  Integer::Two().
9
// The function pointers are initialized in the InitializeInteger class by
10
//  calling SetFunctionPointers(). The call to SetFunctionPointers() is
11
//  guarded to run once using a double-checked pattern. We don't use C++
12
//  std::call_once due to bad interactions between libstdc++, glibc and
13
//  pthreads. The bad interactions were causing crashes for us on platforms
14
//  like Sparc and PowerPC. Since we are only setting function pointers we
15
//  don't have to worry about leaking memory. The worst case seems to be the
16
//  pointers gets written twice until the init flag is set and visible to
17
//  all threads.
18
// For Integer::Zero(), Integer::One() and Integer::Two(), we use one of three
19
//  strategies. First, if initialization priorities are available then we use
20
//  them. Initialization priorities are init_priority() on Linux and init_seg()
21
//  on Windows. OS X and several other platforms lack them. Initialization
22
//  priorities are platform specific but they are also the most trouble free
23
//  with deterministic destruction.
24
// Second, if C++11 dynamic initialization is available, then we use it. After
25
//  the std::call_once fiasco we moved to dynamic initialization to avoid
26
//  unknown troubles platforms that are tested less frequently. In addition
27
//  Microsoft platforms mostly do not provide dynamic initialization.
28
//  The MSDN docs claim they do but they don't in practice because we need
29
//  Visual Studio 2017 and Windows 10 or above.
30
// Third, we fall back to Wei's original code of a Singleton. Wei's original
31
//  code was much simpler. It simply used the Singleton pattern, but it always
32
//  produced memory findings on some platforms. The Singleton generates memory
33
//  findings because it uses a Create On First Use pattern (a dumb Nifty
34
//  Counter) and the compiler had to be smart enough to fold them to return
35
//  the same object. Unix and Linux compilers do a good job of folding objects,
36
//  but Microsoft compilers do a rather poor job for some versions of the
37
//  compiler.
38
// Another problem with the Singleton is resource destruction requires running
39
//  resource acquisition in reverse. For resources provided through the
40
//  Singletons, there is no way to express the dependency order to safely
41
//  destroy resources. (That's one of the problems C++11 dynamic
42
//  initialization with concurrent execution is supposed to solve).
43
// The final problem with Singletons is resource/memory exhaustion in languages
44
//  like Java and .Net. Java and .Net load and unload a native DLL hundreds or
45
//  thousands of times during the life of a program. Each load produces a
46
//  memory leak and they can add up quickly. If they library is being used in
47
//  Java or .Net then Singleton must be avoided at all costs.
48
//
49
// The code below has a path cut-in for BMI2 using mulx and adcx instructions.
50
//  There was a modest speedup of approximately 0.03 ms in public key Integer
51
//  operations. We had to disable BMI2 for the moment because some OS X machines
52
//  were advertising BMI/BMI2 support but caused SIGILL's at runtime. Also see
53
//  https://github.com/weidai11/cryptopp/issues/850.
54
55
#include "pch.h"
56
#include "config.h"
57
58
#ifndef CRYPTOPP_IMPORTS
59
60
#include "integer.h"
61
#include "secblock.h"
62
#include "modarith.h"
63
#include "nbtheory.h"
64
#include "smartptr.h"
65
#include "algparam.h"
66
#include "filters.h"
67
#include "stdcpp.h"
68
#include "asn.h"
69
#include "oids.h"
70
#include "words.h"
71
#include "pubkey.h"   // for P1363_KDF2
72
#include "sha.h"
73
#include "cpu.h"
74
#include "misc.h"
75
76
#include <iostream>
77
78
#if (CRYPTOPP_MSC_VERSION >= 1400) && !defined(_M_ARM)
79
  #include <intrin.h>
80
#endif
81
82
#ifdef __DECCXX
83
  #include <c_asm.h>
84
#endif
85
86
// "Error: The operand ___LKDB cannot be assigned to",
87
// http://github.com/weidai11/cryptopp/issues/188
88
#if (__SUNPRO_CC >= 0x5130)
89
# define MAYBE_CONST
90
# define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
91
#else
92
856M
# define MAYBE_CONST const
93
856M
# define MAYBE_UNCONST_CAST(x) x
94
#endif
95
96
// "Inline assembly operands don't work with .intel_syntax",
97
//  http://llvm.org/bugs/show_bug.cgi?id=24232
98
#if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM)
99
# undef CRYPTOPP_X86_ASM_AVAILABLE
100
# undef CRYPTOPP_X32_ASM_AVAILABLE
101
# undef CRYPTOPP_X64_ASM_AVAILABLE
102
# undef CRYPTOPP_SSE2_ASM_AVAILABLE
103
# undef CRYPTOPP_SSSE3_ASM_AVAILABLE
104
#else
105
# define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86))
106
#endif
107
108
// ***************** C++ Static Initialization ********************
109
110
NAMESPACE_BEGIN(CryptoPP)
111
112
// Function body near the middle of the file
113
static void SetFunctionPointers();
114
115
// Use a double-checked pattern. We are not leaking anything so it
116
// does not matter if a pointer is written twice during a race.
117
// Avoid std::call_once due to too many problems on platforms like
118
// Solaris and Sparc. Also see
119
// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=66146 and
120
// http://github.com/weidai11/cryptopp/issues/707.
121
InitializeInteger::InitializeInteger()
122
57.2M
{
123
57.2M
  static bool s_flag;
124
57.2M
  MEMORY_BARRIER();
125
57.2M
  if (s_flag == false)
126
10
  {
127
10
    SetFunctionPointers();
128
10
    s_flag = true;
129
10
    MEMORY_BARRIER();
130
10
  }
131
57.2M
}
132
133
template <long i>
134
struct NewInteger
135
{
136
  Integer * operator()() const
137
  {
138
    return new Integer(i);
139
  }
140
};
141
142
// ***************** Library code ********************
143
144
inline static int Compare(const word *A, const word *B, size_t N)
145
33.2M
{
146
40.5M
  while (N--)
147
40.5M
    if (A[N] > B[N])
148
10.2M
      return 1;
149
30.2M
    else if (A[N] < B[N])
150
22.9M
      return -1;
151
152
6.57k
  return 0;
153
33.2M
}
154
155
inline static int Increment(word *A, size_t N, word B=1)
156
28.5M
{
157
28.5M
  CRYPTOPP_ASSERT(N);
158
28.5M
  word t = A[0];
159
28.5M
  A[0] = t+B;
160
28.5M
  if (A[0] >= t)
161
28.4M
    return 0;
162
96.2k
  for (unsigned i=1; i<N; i++)
163
90.9k
    if (++A[i])
164
27.6k
      return 0;
165
5.29k
  return 1;
166
32.9k
}
167
168
inline static int Decrement(word *A, size_t N, word B=1)
169
126k
{
170
126k
  CRYPTOPP_ASSERT(N);
171
126k
  word t = A[0];
172
126k
  A[0] = t-B;
173
126k
  if (A[0] <= t)
174
125k
    return 0;
175
1.43k
  for (unsigned i=1; i<N; i++)
176
1.43k
    if (A[i]--)
177
1.27k
      return 0;
178
4
  return 1;
179
1.27k
}
180
181
static void TwosComplement(word *A, size_t N)
182
62.8k
{
183
62.8k
  Decrement(A, N);
184
238k
  for (unsigned i=0; i<N; i++)
185
176k
    A[i] = ~A[i];
186
62.8k
}
187
188
static word AtomicInverseModPower2(word A)
189
28.8k
{
190
28.8k
  CRYPTOPP_ASSERT(A%2==1);
191
192
28.8k
  word R=A%8;
193
194
173k
  for (unsigned i=3; i<WORD_BITS; i*=2)
195
144k
    R = R*(2-R*A);
196
197
28.8k
  CRYPTOPP_ASSERT(R*A==1);
198
28.8k
  return R;
199
28.8k
}
200
201
// ********************************************************
202
203
#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || ((defined(__aarch64__) || defined(__x86_64__)) && defined(CRYPTOPP_WORD128_AVAILABLE))
204
  #define TWO_64_BIT_WORDS 1
205
1.84G
  #define Declare2Words(x)      word x##0, x##1;
206
1.47G
  #define AssignWord(a, b)      a##0 = b; a##1 = 0;
207
  #define Add2WordsBy1(a, b, c)   a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
208
5.78G
  #define LowWord(a)          a##0
209
1.31G
  #define HighWord(a)         a##1
210
  #ifdef CRYPTOPP_MSC_VERSION
211
    #define MultiplyWordsLoHi(p0, p1, a, b)   p0 = _umul128(a, b, &p1);
212
    #ifndef __INTEL_COMPILER
213
      #define Double3Words(c, d)    d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
214
    #endif
215
  #elif defined(__aarch32__) || defined(__aarch64__)
216
    #define MultiplyWordsLoHi(p0, p1, a, b)   p0 = a*b; asm ("umulh %0,%1,%2" : "=r"(p1) : "r"(a), "r"(b));
217
  #elif defined(__DECCXX)
218
    #define MultiplyWordsLoHi(p0, p1, a, b)   p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
219
  #elif defined(__x86_64__)
220
    #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
221
      // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
222
      #define MultiplyWordsLoHi(p0, p1, a, b)   asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
223
    #elif defined(__BMI2__) && 0
224
      #define MultiplyWordsLoHi(p0, p1, a, b)   asm ("mulxq %3, %0, %1" : "=r"(p0), "=r"(p1) : "d"(a), "r"(b));
225
      #define MulAcc(c, d, a, b)    asm ("mulxq %6, %3, %4; addq %3, %0; adcxq %4, %1; adcxq %7, %2;" : "+&r"(c), "+&r"(d##0), "+&r"(d##1), "=&r"(p0), "=&r"(p1) : "d"(a), "r"(b), "r"(W64LIT(0)) : "cc");
226
      #define Double3Words(c, d)    asm ("addq %0, %0; adcxq %1, %1; adcxq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
227
      #define Acc2WordsBy1(a, b)    asm ("addq %2, %0; adcxq %3, %1;" : "+&r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc");
228
      #define Acc2WordsBy2(a, b)    asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
229
      #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=&r"(e##0), "=&r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc");
230
    #else
231
1.87G
      #define MultiplyWordsLoHi(p0, p1, a, b)   asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
232
4.87G
      #define MulAcc(c, d, a, b)    asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
233
131M
      #define Double3Words(c, d)    asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
234
892M
      #define Acc2WordsBy1(a, b)    asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
235
421M
      #define Acc2WordsBy2(a, b)    asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
236
131M
      #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
237
    #endif
238
  #endif
239
1.87G
  #define MultiplyWords(p, a, b)    MultiplyWordsLoHi(p##0, p##1, a, b)
240
  #ifndef Double3Words
241
    #define Double3Words(c, d)    d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
242
  #endif
243
  #ifndef Acc2WordsBy2
244
    #define Acc2WordsBy2(a, b)    a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
245
  #endif
246
1.62G
  #define AddWithCarry(u, a, b)   {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
247
1.47G
  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
248
61.4M
  #define GetCarry(u)         u##1
249
37.5M
  #define GetBorrow(u)        u##1
250
#else
251
  #define Declare2Words(x)      dword x;
252
  #if CRYPTOPP_MSC_VERSION >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
253
    #define MultiplyWords(p, a, b)    p = __emulu(a, b);
254
  #else
255
    #define MultiplyWords(p, a, b)    p = (dword)a*b;
256
  #endif
257
  #define AssignWord(a, b)      a = b;
258
  #define Add2WordsBy1(a, b, c)   a = b + c;
259
  #define Acc2WordsBy2(a, b)      a += b;
260
  #define LowWord(a)          word(a)
261
  #define HighWord(a)         word(a>>WORD_BITS)
262
  #define Double3Words(c, d)      d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
263
  #define AddWithCarry(u, a, b)   u = dword(a) + b + GetCarry(u);
264
  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
265
  #define GetCarry(u)         HighWord(u)
266
  #define GetBorrow(u)        word(u>>(WORD_BITS*2-1))
267
#endif
268
#ifndef MulAcc
269
  #define MulAcc(c, d, a, b)      MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
270
#endif
271
#ifndef Acc2WordsBy1
272
  #define Acc2WordsBy1(a, b)      Add2WordsBy1(a, a, b)
273
#endif
274
#ifndef Acc3WordsBy2
275
  #define Acc3WordsBy2(c, d, e)   Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
276
#endif
277
278
class DWord
279
{
280
public:
281
#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
282
270M
  DWord() {std::memset(&m_whole, 0x00, sizeof(m_whole));}
283
#else
284
  DWord() {std::memset(&m_halfs, 0x00, sizeof(m_halfs));}
285
#endif
286
287
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
288
105M
  explicit DWord(word low) : m_whole(low) { }
289
#else
290
  explicit DWord(word low)
291
  {
292
    m_halfs.high = 0;
293
    m_halfs.low = low;
294
  }
295
#endif
296
297
#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
298
  DWord(word low, word high) : m_whole()
299
#else
300
  DWord(word low, word high) : m_halfs()
301
#endif
302
395M
  {
303
395M
#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
304
395M
#  if (CRYPTOPP_LITTLE_ENDIAN)
305
395M
    const word t[2] = {low,high};
306
395M
    std::memcpy(&m_whole, t, sizeof(m_whole));
307
#  else
308
    const word t[2] = {high,low};
309
    std::memcpy(&m_whole, t, sizeof(m_whole));
310
#  endif
311
#else
312
    m_halfs.low = low;
313
    m_halfs.high = high;
314
#endif
315
395M
  }
316
317
  static DWord Multiply(word a, word b)
318
74.8M
  {
319
74.8M
    DWord r;
320
74.8M
    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
321
74.8M
      r.m_whole = (dword)a * b;
322
    #elif defined(MultiplyWordsLoHi)
323
      MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
324
    #else
325
      CRYPTOPP_ASSERT(0);
326
    #endif
327
74.8M
    return r;
328
74.8M
  }
329
330
  static DWord MultiplyAndAdd(word a, word b, word c)
331
0
  {
332
0
    DWord r = Multiply(a, b);
333
0
    return r += c;
334
0
  }
335
336
  DWord & operator+=(word a)
337
0
  {
338
0
    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
339
0
      m_whole = m_whole + a;
340
    #else
341
      m_halfs.low += a;
342
      m_halfs.high += (m_halfs.low < a);
343
    #endif
344
0
    return *this;
345
0
  }
346
347
  DWord operator+(word a)
348
0
  {
349
0
    DWord r;
350
0
    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
351
0
      r.m_whole = m_whole + a;
352
0
    #else
353
0
      r.m_halfs.low = m_halfs.low + a;
354
0
      r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
355
0
    #endif
356
0
    return r;
357
0
  }
358
359
  DWord operator-(DWord a)
360
37.4M
  {
361
37.4M
    DWord r;
362
37.4M
    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
363
37.4M
      r.m_whole = m_whole - a.m_whole;
364
    #else
365
      r.m_halfs.low = m_halfs.low - a.m_halfs.low;
366
      r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
367
    #endif
368
37.4M
    return r;
369
37.4M
  }
370
371
  DWord operator-(word a)
372
157M
  {
373
157M
    DWord r;
374
157M
    #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
375
157M
      r.m_whole = m_whole - a;
376
    #else
377
      r.m_halfs.low = m_halfs.low - a;
378
      r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
379
    #endif
380
157M
    return r;
381
157M
  }
382
383
  // returns quotient, which must fit in a word
384
  word operator/(word divisor);
385
386
  word operator%(word a);
387
388
  bool operator!() const
389
18.8M
  {
390
18.8M
  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
391
18.8M
    return !m_whole;
392
  #else
393
    return !m_halfs.high && !m_halfs.low;
394
  #endif
395
18.8M
  }
396
397
  // TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
398
  //  union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
399
236M
  word GetLowHalf() const {return m_halfs.low;}
400
183M
  word GetHighHalf() const {return m_halfs.high;}
401
52.5M
  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
402
403
private:
404
  // Issue 274, "Types cannot be declared in anonymous union",
405
  //   http://github.com/weidai11/cryptopp/issues/274
406
  //   Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
407
    struct half_words
408
    {
409
    #if (CRYPTOPP_LITTLE_ENDIAN)
410
        word low;
411
        word high;
412
    #else
413
        word high;
414
        word low;
415
    #endif
416
   };
417
   union
418
   {
419
   #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
420
       dword m_whole;
421
   #endif
422
       half_words m_halfs;
423
   };
424
};
425
426
class Word
427
{
428
public:
429
0
  Word() : m_whole(0) {}
430
0
  Word(word value) : m_whole(value) {}
431
0
  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
432
433
  static Word Multiply(hword a, hword b)
434
0
  {
435
0
    Word r;
436
0
    r.m_whole = (word)a * b;
437
0
    return r;
438
0
  }
439
440
  Word operator-(Word a)
441
0
  {
442
0
    Word r;
443
0
    r.m_whole = m_whole - a.m_whole;
444
0
    return r;
445
0
  }
446
447
  Word operator-(hword a)
448
0
  {
449
0
    Word r;
450
0
    r.m_whole = m_whole - a;
451
0
    return r;
452
0
  }
453
454
  // returns quotient, which must fit in a word
455
  hword operator/(hword divisor)
456
0
  {
457
0
    return hword(m_whole / divisor);
458
0
  }
459
460
  bool operator!() const
461
0
  {
462
0
    return !m_whole;
463
0
  }
464
465
0
  word GetWhole() const {return m_whole;}
466
0
  hword GetLowHalf() const {return hword(m_whole);}
467
0
  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
468
0
  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
469
470
private:
471
  word m_whole;
472
};
473
474
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
475
template <class S, class D>
476
S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
477
37.4M
{
478
37.4M
  CRYPTOPP_UNUSED(dummy);
479
480
  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
481
37.4M
  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
482
483
  // Profiling guided the flow below.
484
485
  // estimate the quotient: do a 2 S by 1 S divide.
486
37.4M
  S Q; bool pre = (S(B1+1) == 0);
487
37.4M
  if (B1 > 0 && !pre)
488
37.3M
    Q = D(A[1], A[2]) / S(B1+1);
489
55.2k
  else if (pre)
490
55.2k
    Q = A[2];
491
0
  else
492
0
    Q = D(A[0], A[1]) / B0;
493
494
  // now subtract Q*B from A
495
37.4M
  D p = D::Multiply(B0, Q);
496
37.4M
  D u = (D) A[0] - p.GetLowHalf();
497
37.4M
  A[0] = u.GetLowHalf();
498
37.4M
  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
499
37.4M
  A[1] = u.GetLowHalf();
500
37.4M
  A[2] += u.GetHighHalf();
501
502
  // Q <= actual quotient, so fix it
503
52.5M
  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
504
15.1M
  {
505
15.1M
    u = (D) A[0] - B0;
506
15.1M
    A[0] = u.GetLowHalf();
507
15.1M
    u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
508
15.1M
    A[1] = u.GetLowHalf();
509
15.1M
    A[2] += u.GetHighHalf();
510
15.1M
    Q++;
511
15.1M
    CRYPTOPP_ASSERT(Q);  // shouldn't overflow
512
15.1M
  }
513
514
37.4M
  return Q;
515
37.4M
}
516
517
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
518
template <class S, class D>
519
inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
520
18.8M
{
521
  // Profiling guided the flow below.
522
523
18.8M
  if (!!B)
524
18.7M
  {
525
18.7M
    S Q[2];
526
18.7M
    T[0] = Al.GetLowHalf();
527
18.7M
    T[1] = Al.GetHighHalf();
528
18.7M
    T[2] = Ah.GetLowHalf();
529
18.7M
    T[3] = Ah.GetHighHalf();
530
18.7M
    Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
531
18.7M
    Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
532
18.7M
    return D(Q[0], Q[1]);
533
18.7M
  }
534
98.2k
  else  // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
535
98.2k
  {
536
98.2k
    return D(Ah.GetLowHalf(), Ah.GetHighHalf());
537
98.2k
  }
538
18.8M
}
539
540
// returns quotient, which must fit in a word
541
inline word DWord::operator/(word a)
542
178M
{
543
178M
  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
544
178M
    return word(m_whole / a);
545
  #else
546
    hword r[4];
547
    return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
548
  #endif
549
178M
}
550
551
inline word DWord::operator%(word a)
552
141M
{
553
141M
  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
554
141M
    return word(m_whole % a);
555
  #else
556
    if (a < (word(1) << (WORD_BITS/2)))
557
    {
558
      hword h = hword(a);
559
      word r = m_halfs.high % h;
560
      r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
561
      return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
562
    }
563
    else
564
    {
565
      hword r[4];
566
      DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
567
      return Word(r[0], r[1]).GetWhole();
568
    }
569
  #endif
570
141M
}
571
572
// ********************************************************
573
574
// Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
575
#if defined(__GNUC__)
576
  #define AddPrologue \
577
    int result; \
578
    __asm__ __volatile__ \
579
    ( \
580
      INTEL_NOPREFIX
581
  #define AddEpilogue \
582
      ATT_PREFIX \
583
          : "=a" (result)\
584
          : "d" (C), "a" (A), "D" (B), "c" (N) \
585
          : "%esi", "memory", "cc" \
586
    );\
587
    return result;
588
  #define MulPrologue \
589
    __asm__ __volatile__ \
590
    ( \
591
      INTEL_NOPREFIX \
592
      AS1(  push  ebx) \
593
      AS2(  mov   ebx, edx)
594
  #define MulEpilogue \
595
      AS1(  pop   ebx) \
596
      ATT_PREFIX \
597
      : \
598
      : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
599
      : "%esi", "memory", "cc" \
600
    );
601
  #define SquPrologue   MulPrologue
602
  #define SquEpilogue \
603
      AS1(  pop   ebx) \
604
      ATT_PREFIX \
605
      : \
606
      : "d" (s_maskLow16), "c" (C), "a" (A) \
607
      : "%esi", "%edi", "memory", "cc" \
608
    );
609
  #define TopPrologue   MulPrologue
610
  #define TopEpilogue \
611
      AS1(  pop   ebx) \
612
      ATT_PREFIX \
613
      : \
614
      : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
615
      : "memory", "cc" \
616
    );
617
#else
618
  #define AddPrologue \
619
    __asm push edi \
620
    __asm push esi \
621
    __asm mov   eax, [esp+12] \
622
    __asm mov   edi, [esp+16]
623
  #define AddEpilogue \
624
    __asm pop esi \
625
    __asm pop edi \
626
    __asm ret 8
627
  #define SaveEBX
628
  #define RestoreEBX
629
  #define SquPrologue         \
630
    AS2(  mov   eax, A)     \
631
    AS2(  mov   ecx, C)     \
632
    SaveEBX             \
633
    AS2(  lea   ebx, s_maskLow16)
634
  #define MulPrologue         \
635
    AS2(  mov   eax, A)     \
636
    AS2(  mov   edi, B)     \
637
    AS2(  mov   ecx, C)     \
638
    SaveEBX             \
639
    AS2(  lea   ebx, s_maskLow16)
640
  #define TopPrologue         \
641
    AS2(  mov   eax, A)     \
642
    AS2(  mov   edi, B)     \
643
    AS2(  mov   ecx, C)     \
644
    AS2(  mov   esi, L)     \
645
    SaveEBX             \
646
    AS2(  lea   ebx, s_maskLow16)
647
  #define SquEpilogue   RestoreEBX
648
  #define MulEpilogue   RestoreEBX
649
  #define TopEpilogue   RestoreEBX
650
#endif
651
652
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
653
extern "C" {
654
int Baseline_Add(size_t N, word *C, const word *A, const word *B);
655
int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
656
}
657
#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
658
int Baseline_Add(size_t N, word *C, const word *A, const word *B)
659
{
660
  word result;
661
  __asm__ __volatile__
662
  (
663
  INTEL_NOPREFIX
664
  AS1(  neg   %1)
665
  ASJ(  jz,   1, f)
666
  AS2(  mov   %0,[%3+8*%1])
667
  AS2(  add   %0,[%4+8*%1])
668
  AS2(  mov   [%2+8*%1],%0)
669
  ASL(0)
670
  AS2(  mov   %0,[%3+8*%1+8])
671
  AS2(  adc   %0,[%4+8*%1+8])
672
  AS2(  mov   [%2+8*%1+8],%0)
673
  AS2(  lea   %1,[%1+2])
674
  ASJ(  jrcxz,  1, f)
675
  AS2(  mov   %0,[%3+8*%1])
676
  AS2(  adc   %0,[%4+8*%1])
677
  AS2(  mov   [%2+8*%1],%0)
678
  ASJ(  jmp,  0, b)
679
  ASL(1)
680
  AS2(  mov   %0, 0)
681
  AS2(  adc   %0, %0)
682
  ATT_NOPREFIX
683
  : "=&r" (result), "+c" (N)
684
  : "r" (C+N), "r" (A+N), "r" (B+N)
685
  : "memory", "cc"
686
  );
687
  return (int)result;
688
}
689
690
int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
691
{
692
  word result;
693
  __asm__ __volatile__
694
  (
695
  INTEL_NOPREFIX
696
  AS1(  neg   %1)
697
  ASJ(  jz,   1, f)
698
  AS2(  mov   %0,[%3+8*%1])
699
  AS2(  sub   %0,[%4+8*%1])
700
  AS2(  mov   [%2+8*%1],%0)
701
  ASL(0)
702
  AS2(  mov   %0,[%3+8*%1+8])
703
  AS2(  sbb   %0,[%4+8*%1+8])
704
  AS2(  mov   [%2+8*%1+8],%0)
705
  AS2(  lea   %1,[%1+2])
706
  ASJ(  jrcxz,  1, f)
707
  AS2(  mov   %0,[%3+8*%1])
708
  AS2(  sbb   %0,[%4+8*%1])
709
  AS2(  mov   [%2+8*%1],%0)
710
  ASJ(  jmp,  0, b)
711
  ASL(1)
712
  AS2(  mov   %0, 0)
713
  AS2(  adc   %0, %0)
714
  ATT_NOPREFIX
715
  : "=&r" (result), "+c" (N)
716
  : "r" (C+N), "r" (A+N), "r" (B+N)
717
  : "memory", "cc"
718
  );
719
  return (int)result;
720
}
721
#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
722
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
723
{
724
  AddPrologue
725
726
  // now: eax = A, edi = B, edx = C, ecx = N
727
  AS2(  lea   eax, [eax+4*ecx])
728
  AS2(  lea   edi, [edi+4*ecx])
729
  AS2(  lea   edx, [edx+4*ecx])
730
731
  AS1(  neg   ecx)        // ecx is negative index
732
  AS2(  test  ecx, 2)       // this clears carry flag
733
  ASJ(  jz,   0, f)
734
  AS2(  sub   ecx, 2)
735
  ASJ(  jmp,  1, f)
736
737
  ASL(0)
738
  ASJ(  jecxz,  2, f)       // loop until ecx overflows and becomes zero
739
  AS2(  mov   esi,[eax+4*ecx])
740
  AS2(  adc   esi,[edi+4*ecx])
741
  AS2(  mov   [edx+4*ecx],esi)
742
  AS2(  mov   esi,[eax+4*ecx+4])
743
  AS2(  adc   esi,[edi+4*ecx+4])
744
  AS2(  mov   [edx+4*ecx+4],esi)
745
  ASL(1)
746
  AS2(  mov   esi,[eax+4*ecx+8])
747
  AS2(  adc   esi,[edi+4*ecx+8])
748
  AS2(  mov   [edx+4*ecx+8],esi)
749
  AS2(  mov   esi,[eax+4*ecx+12])
750
  AS2(  adc   esi,[edi+4*ecx+12])
751
  AS2(  mov   [edx+4*ecx+12],esi)
752
753
  AS2(  lea   ecx,[ecx+4])    // advance index, avoid inc which causes slowdown on Intel Core 2
754
  ASJ(  jmp,  0, b)
755
756
  ASL(2)
757
  AS2(  mov   eax, 0)
758
  AS1(  setc  al)         // store carry into eax (return result register)
759
760
  AddEpilogue
761
762
  // http://github.com/weidai11/cryptopp/issues/340
763
  CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
764
  CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
765
}
766
767
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
768
{
769
  AddPrologue
770
771
  // now: eax = A, edi = B, edx = C, ecx = N
772
  AS2(  lea   eax, [eax+4*ecx])
773
  AS2(  lea   edi, [edi+4*ecx])
774
  AS2(  lea   edx, [edx+4*ecx])
775
776
  AS1(  neg   ecx)        // ecx is negative index
777
  AS2(  test  ecx, 2)       // this clears carry flag
778
  ASJ(  jz,   0, f)
779
  AS2(  sub   ecx, 2)
780
  ASJ(  jmp,  1, f)
781
782
  ASL(0)
783
  ASJ(  jecxz,  2, f)       // loop until ecx overflows and becomes zero
784
  AS2(  mov   esi,[eax+4*ecx])
785
  AS2(  sbb   esi,[edi+4*ecx])
786
  AS2(  mov   [edx+4*ecx],esi)
787
  AS2(  mov   esi,[eax+4*ecx+4])
788
  AS2(  sbb   esi,[edi+4*ecx+4])
789
  AS2(  mov   [edx+4*ecx+4],esi)
790
  ASL(1)
791
  AS2(  mov   esi,[eax+4*ecx+8])
792
  AS2(  sbb   esi,[edi+4*ecx+8])
793
  AS2(  mov   [edx+4*ecx+8],esi)
794
  AS2(  mov   esi,[eax+4*ecx+12])
795
  AS2(  sbb   esi,[edi+4*ecx+12])
796
  AS2(  mov   [edx+4*ecx+12],esi)
797
798
  AS2(  lea   ecx,[ecx+4])    // advance index, avoid inc which causes slowdown on Intel Core 2
799
  ASJ(  jmp,  0, b)
800
801
  ASL(2)
802
  AS2(  mov   eax, 0)
803
  AS1(  setc  al)         // store carry into eax (return result register)
804
805
  AddEpilogue
806
807
  // http://github.com/weidai11/cryptopp/issues/340
808
  CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
809
  CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
810
}
811
812
#if CRYPTOPP_INTEGER_SSE2
813
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
814
{
815
  AddPrologue
816
817
  // now: eax = A, edi = B, edx = C, ecx = N
818
  AS2(  lea   eax, [eax+4*ecx])
819
  AS2(  lea   edi, [edi+4*ecx])
820
  AS2(  lea   edx, [edx+4*ecx])
821
822
  AS1(  neg   ecx)        // ecx is negative index
823
  AS2(  pxor    mm2, mm2)
824
  ASJ(  jz,   2, f)
825
  AS2(  test  ecx, 2)       // this clears carry flag
826
  ASJ(  jz,   0, f)
827
  AS2(  sub   ecx, 2)
828
  ASJ(  jmp,  1, f)
829
830
  ASL(0)
831
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx])
832
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx])
833
  AS2(  paddq    mm0, mm1)
834
  AS2(  paddq  mm2, mm0)
835
  AS2(  movd   DWORD PTR [edx+4*ecx], mm2)
836
  AS2(  psrlq    mm2, 32)
837
838
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx+4])
839
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+4])
840
  AS2(  paddq    mm0, mm1)
841
  AS2(  paddq  mm2, mm0)
842
  AS2(  movd   DWORD PTR [edx+4*ecx+4], mm2)
843
  AS2(  psrlq    mm2, 32)
844
845
  ASL(1)
846
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx+8])
847
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+8])
848
  AS2(  paddq    mm0, mm1)
849
  AS2(  paddq  mm2, mm0)
850
  AS2(  movd   DWORD PTR [edx+4*ecx+8], mm2)
851
  AS2(  psrlq    mm2, 32)
852
853
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx+12])
854
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+12])
855
  AS2(  paddq    mm0, mm1)
856
  AS2(  paddq  mm2, mm0)
857
  AS2(  movd   DWORD PTR [edx+4*ecx+12], mm2)
858
  AS2(  psrlq    mm2, 32)
859
860
  AS2(  add   ecx, 4)
861
  ASJ(  jnz,  0, b)
862
863
  ASL(2)
864
  AS2(  movd  eax, mm2)
865
  AS1(  emms)
866
867
  AddEpilogue
868
869
  // http://github.com/weidai11/cryptopp/issues/340
870
  CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
871
  CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
872
}
873
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
874
{
875
  AddPrologue
876
877
  // now: eax = A, edi = B, edx = C, ecx = N
878
  AS2(  lea   eax, [eax+4*ecx])
879
  AS2(  lea   edi, [edi+4*ecx])
880
  AS2(  lea   edx, [edx+4*ecx])
881
882
  AS1(  neg   ecx)        // ecx is negative index
883
  AS2(  pxor    mm2, mm2)
884
  ASJ(  jz,   2, f)
885
  AS2(  test  ecx, 2)       // this clears carry flag
886
  ASJ(  jz,   0, f)
887
  AS2(  sub   ecx, 2)
888
  ASJ(  jmp,  1, f)
889
890
  ASL(0)
891
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx])
892
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx])
893
  AS2(  psubq    mm0, mm1)
894
  AS2(  psubq  mm0, mm2)
895
  AS2(  movd   DWORD PTR [edx+4*ecx], mm0)
896
  AS2(  psrlq    mm0, 63)
897
898
  AS2(  movd     mm2, DWORD PTR [eax+4*ecx+4])
899
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+4])
900
  AS2(  psubq    mm2, mm1)
901
  AS2(  psubq  mm2, mm0)
902
  AS2(  movd   DWORD PTR [edx+4*ecx+4], mm2)
903
  AS2(  psrlq    mm2, 63)
904
905
  ASL(1)
906
  AS2(  movd     mm0, DWORD PTR [eax+4*ecx+8])
907
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+8])
908
  AS2(  psubq    mm0, mm1)
909
  AS2(  psubq  mm0, mm2)
910
  AS2(  movd   DWORD PTR [edx+4*ecx+8], mm0)
911
  AS2(  psrlq    mm0, 63)
912
913
  AS2(  movd     mm2, DWORD PTR [eax+4*ecx+12])
914
  AS2(  movd     mm1, DWORD PTR [edi+4*ecx+12])
915
  AS2(  psubq    mm2, mm1)
916
  AS2(  psubq  mm2, mm0)
917
  AS2(  movd   DWORD PTR [edx+4*ecx+12], mm2)
918
  AS2(  psrlq    mm2, 63)
919
920
  AS2(  add   ecx, 4)
921
  ASJ(  jnz,  0, b)
922
923
  ASL(2)
924
  AS2(  movd  eax, mm2)
925
  AS1(  emms)
926
927
  AddEpilogue
928
929
  // http://github.com/weidai11/cryptopp/issues/340
930
  CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
931
  CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
932
}
933
#endif  // CRYPTOPP_INTEGER_SSE2
934
#else   // CRYPTOPP_SSE2_ASM_AVAILABLE
935
int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
936
61.4M
{
937
61.4M
  CRYPTOPP_ASSERT (N%2 == 0);
938
939
61.4M
  Declare2Words(u);
940
61.4M
  AssignWord(u, 0);
941
874M
  for (size_t i=0; i<N; i+=2)
942
812M
  {
943
812M
    AddWithCarry(u, A[i], B[i]);
944
812M
    C[i] = LowWord(u);
945
812M
    AddWithCarry(u, A[i+1], B[i+1]);
946
812M
    C[i+1] = LowWord(u);
947
812M
  }
948
61.4M
  return int(GetCarry(u));
949
61.4M
}
950
951
int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
952
37.5M
{
953
37.5M
  CRYPTOPP_ASSERT (N%2 == 0);
954
955
37.5M
  Declare2Words(u);
956
37.5M
  AssignWord(u, 0);
957
772M
  for (size_t i=0; i<N; i+=2)
958
735M
  {
959
735M
    SubtractWithBorrow(u, A[i], B[i]);
960
735M
    C[i] = LowWord(u);
961
735M
    SubtractWithBorrow(u, A[i+1], B[i+1]);
962
735M
    C[i+1] = LowWord(u);
963
735M
  }
964
37.5M
  return int(GetBorrow(u));
965
37.5M
}
966
#endif
967
968
static word LinearMultiply(word *C, const word *AA, word B, size_t N)
969
12.7M
{
970
  // http://github.com/weidai11/cryptopp/issues/188
971
12.7M
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
972
973
12.7M
  word carry=0;
974
904M
  for(unsigned i=0; i<N; i++)
975
891M
  {
976
891M
    Declare2Words(p);
977
891M
    MultiplyWords(p, A[i], B);
978
891M
    Acc2WordsBy1(p, carry);
979
891M
    C[i] = LowWord(p);
980
891M
    carry = HighWord(p);
981
891M
  }
982
12.7M
  return carry;
983
12.7M
}
984
985
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
986
987
#define Mul_2 \
988
403M
  Mul_Begin(2) \
989
403M
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
990
403M
  Mul_End(1, 1)
991
992
#define Mul_4 \
993
33.7k
  Mul_Begin(4) \
994
33.7k
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
995
33.7k
  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
996
33.7k
  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
997
33.7k
  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
998
33.7k
  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
999
33.7k
  Mul_End(5, 3)
1000
1001
#define Mul_8 \
1002
23.9k
  Mul_Begin(8) \
1003
23.9k
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1004
23.9k
  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
1005
23.9k
  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
1006
23.9k
  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1007
23.9k
  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1008
23.9k
  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1009
23.9k
  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1010
23.9k
  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1011
23.9k
  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1012
23.9k
  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1013
23.9k
  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1014
23.9k
  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1015
23.9k
  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
1016
23.9k
  Mul_End(13, 7)
1017
1018
#define Mul_16 \
1019
13.0M
  Mul_Begin(16) \
1020
13.0M
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1021
13.0M
  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1022
13.0M
  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1023
13.0M
  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1024
13.0M
  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1025
13.0M
  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1026
13.0M
  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1027
13.0M
  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1028
13.0M
  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1029
13.0M
  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1030
13.0M
  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1031
13.0M
  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1032
13.0M
  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1033
13.0M
  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1034
13.0M
  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1035
13.0M
  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1036
13.0M
  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1037
13.0M
  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1038
13.0M
  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1039
13.0M
  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1040
13.0M
  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1041
13.0M
  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1042
13.0M
  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1043
13.0M
  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1044
13.0M
  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1045
13.0M
  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1046
13.0M
  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1047
13.0M
  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1048
13.0M
  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
1049
13.0M
  Mul_End(29, 15)
1050
1051
#define Squ_2 \
1052
435k
  Squ_Begin(2) \
1053
435k
  Squ_End(2)
1054
1055
#define Squ_4 \
1056
132k
  Squ_Begin(4) \
1057
132k
  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1058
132k
  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1059
132k
  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
1060
132k
  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
1061
132k
  Squ_End(4)
1062
1063
#define Squ_8 \
1064
107k
  Squ_Begin(8) \
1065
107k
  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1066
107k
  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1067
107k
  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1068
107k
  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1069
107k
  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1070
107k
  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1071
107k
  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1072
107k
  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5)  Squ_NonDiag \
1073
107k
  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1074
107k
  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1075
107k
  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1076
107k
  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1077
107k
  Squ_End(8)
1078
1079
#define Squ_16 \
1080
4.45M
  Squ_Begin(16) \
1081
4.45M
  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1082
4.45M
  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1083
4.45M
  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1084
4.45M
  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1085
4.45M
  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1086
4.45M
  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1087
4.45M
  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1088
4.45M
  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1089
4.45M
  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1090
4.45M
  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1091
4.45M
  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1092
4.45M
  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1093
4.45M
  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1094
4.45M
  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1095
4.45M
  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1096
4.45M
  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1097
4.45M
  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1098
4.45M
  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1099
4.45M
  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1100
4.45M
  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1101
4.45M
  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1102
4.45M
  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1103
4.45M
  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1104
4.45M
  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1105
4.45M
  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1106
4.45M
  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1107
4.45M
  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1108
4.45M
  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1109
4.45M
  Squ_End(16)
1110
1111
#define Bot_2 \
1112
360k
  Mul_Begin(2) \
1113
360k
  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1114
360k
  Bot_End(2)
1115
1116
#define Bot_4 \
1117
111k
  Mul_Begin(4) \
1118
111k
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1119
111k
  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2)  \
1120
111k
  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0)  \
1121
111k
  Bot_End(4)
1122
1123
#define Bot_8 \
1124
80.2k
  Mul_Begin(8) \
1125
80.2k
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1126
80.2k
  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0)  \
1127
80.2k
  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
1128
80.2k
  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1129
80.2k
  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1130
80.2k
  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1131
80.2k
  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1132
80.2k
  Bot_End(8)
1133
1134
#define Bot_16 \
1135
2.28M
  Mul_Begin(16) \
1136
2.28M
  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1137
2.28M
  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1138
2.28M
  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1139
2.28M
  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1140
2.28M
  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1141
2.28M
  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1142
2.28M
  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1143
2.28M
  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1144
2.28M
  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1145
2.28M
  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1146
2.28M
  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1147
2.28M
  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1148
2.28M
  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1149
2.28M
  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1150
2.28M
  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1151
2.28M
  Bot_End(16)
1152
1153
#endif
1154
1155
#if 0
1156
#define Mul_Begin(n)        \
1157
  Declare2Words(p)        \
1158
  Declare2Words(c)        \
1159
  Declare2Words(d)        \
1160
  MultiplyWords(p, A[0], B[0])  \
1161
  AssignWord(c, LowWord(p))   \
1162
  AssignWord(d, HighWord(p))
1163
1164
#define Mul_Acc(i, j)       \
1165
  MultiplyWords(p, A[i], B[j])  \
1166
  Acc2WordsBy1(c, LowWord(p))   \
1167
  Acc2WordsBy1(d, HighWord(p))
1168
1169
#define Mul_SaveAcc(k, i, j)    \
1170
  R[k] = LowWord(c);        \
1171
  Add2WordsBy1(c, d, HighWord(c)) \
1172
  MultiplyWords(p, A[i], B[j])  \
1173
  AssignWord(d, HighWord(p))    \
1174
  Acc2WordsBy1(c, LowWord(p))
1175
1176
#define Mul_End(n)          \
1177
  R[2*n-3] = LowWord(c);      \
1178
  Acc2WordsBy1(d, HighWord(c))  \
1179
  MultiplyWords(p, A[n-1], B[n-1])\
1180
  Acc2WordsBy2(d, p)        \
1181
  R[2*n-2] = LowWord(d);      \
1182
  R[2*n-1] = HighWord(d);
1183
1184
#define Bot_SaveAcc(k, i, j)    \
1185
  R[k] = LowWord(c);        \
1186
  word e = LowWord(d) + HighWord(c);  \
1187
  e += A[i] * B[j];
1188
1189
#define Bot_Acc(i, j) \
1190
  e += A[i] * B[j];
1191
1192
#define Bot_End(n)    \
1193
  R[n-1] = e;
1194
#else
1195
#define Mul_Begin(n)        \
1196
419M
  Declare2Words(p)       \
1197
419M
  word c; \
1198
419M
  Declare2Words(d)       \
1199
419M
  MultiplyWords(p, A[0], B[0]) \
1200
419M
  c = LowWord(p);    \
1201
419M
  AssignWord(d, HighWord(p))
1202
1203
#define Mul_Acc(i, j)       \
1204
3.58G
  MulAcc(c, d, A[i], B[j])
1205
1206
#define Mul_SaveAcc(k, i, j)    \
1207
815M
  R[k] = c;       \
1208
815M
  c = LowWord(d);  \
1209
815M
  AssignWord(d, HighWord(d))  \
1210
815M
  MulAcc(c, d, A[i], B[j])
1211
1212
#define Mul_End(k, i)         \
1213
416M
  R[k] = c;     \
1214
416M
  MultiplyWords(p, A[i], B[i]) \
1215
416M
  Acc2WordsBy2(p, d)        \
1216
416M
  R[k+1] = LowWord(p);      \
1217
416M
  R[k+2] = HighWord(p);
1218
1219
#define Bot_SaveAcc(k, i, j)    \
1220
2.83M
  R[k] = c;       \
1221
2.83M
  c = LowWord(d);  \
1222
2.83M
  c += A[i] * B[j];
1223
1224
#define Bot_Acc(i, j) \
1225
35.4M
  c += A[i] * B[j];
1226
1227
#define Bot_End(n)    \
1228
2.83M
  R[n-1] = c;
1229
#endif
1230
1231
#define Squ_Begin(n)        \
1232
5.12M
  Declare2Words(p)       \
1233
5.12M
  word c;       \
1234
5.12M
  Declare2Words(d)       \
1235
5.12M
  Declare2Words(e)       \
1236
5.12M
  MultiplyWords(p, A[0], A[0]) \
1237
5.12M
  R[0] = LowWord(p);        \
1238
5.12M
  AssignWord(e, HighWord(p))    \
1239
5.12M
  MultiplyWords(p, A[0], A[1]) \
1240
5.12M
  c = LowWord(p);    \
1241
5.12M
  AssignWord(d, HighWord(p))    \
1242
5.12M
  Squ_NonDiag            \
1243
1244
#define Squ_NonDiag       \
1245
131M
  Double3Words(c, d)
1246
1247
#define Squ_SaveAcc(k, i, j)    \
1248
126M
  Acc3WordsBy2(c, d, e)     \
1249
126M
  R[k] = c;       \
1250
126M
  MultiplyWords(p, A[i], A[j]) \
1251
126M
  c = LowWord(p);    \
1252
126M
  AssignWord(d, HighWord(p))    \
1253
1254
#define Squ_Acc(i, j)       \
1255
406M
  MulAcc(c, d, A[i], A[j])
1256
1257
#define Squ_Diag(i)         \
1258
63.2M
  Squ_NonDiag            \
1259
63.2M
  MulAcc(c, d, A[i], A[i])
1260
1261
#define Squ_End(n)          \
1262
5.12M
  Acc3WordsBy2(c, d, e)     \
1263
5.12M
  R[2*n-3] = c;     \
1264
5.12M
  MultiplyWords(p, A[n-1], A[n-1])\
1265
5.12M
  Acc2WordsBy2(p, e)        \
1266
5.12M
  R[2*n-2] = LowWord(p);      \
1267
5.12M
  R[2*n-1] = HighWord(p);
1268
1269
1270
void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1271
403M
{
1272
  // http://github.com/weidai11/cryptopp/issues/188
1273
403M
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1274
403M
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1275
1276
403M
  Mul_2
1277
403M
}
1278
1279
void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1280
33.7k
{
1281
  // http://github.com/weidai11/cryptopp/issues/188
1282
33.7k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1283
33.7k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1284
1285
33.7k
  Mul_4
1286
33.7k
}
1287
1288
void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1289
23.9k
{
1290
  // http://github.com/weidai11/cryptopp/issues/188
1291
23.9k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1292
23.9k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1293
1294
23.9k
  Mul_8
1295
23.9k
}
1296
1297
void Baseline_Square2(word *R, const word *AA)
1298
435k
{
1299
  // http://github.com/weidai11/cryptopp/issues/188
1300
435k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1301
1302
435k
  Squ_2
1303
435k
}
1304
1305
void Baseline_Square4(word *R, const word *AA)
1306
132k
{
1307
  // http://github.com/weidai11/cryptopp/issues/188
1308
132k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1309
1310
132k
  Squ_4
1311
132k
}
1312
1313
void Baseline_Square8(word *R, const word *AA)
1314
107k
{
1315
  // http://github.com/weidai11/cryptopp/issues/188
1316
107k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1317
1318
107k
  Squ_8
1319
107k
}
1320
1321
void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1322
360k
{
1323
  // http://github.com/weidai11/cryptopp/issues/188
1324
360k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1325
360k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1326
1327
360k
  Bot_2
1328
1329
// http://github.com/weidai11/cryptopp/issues/340
1330
360k
#if defined(TWO_64_BIT_WORDS)
1331
360k
  CRYPTOPP_UNUSED(d0); CRYPTOPP_UNUSED(d1);
1332
360k
#endif
1333
360k
}
1334
1335
void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1336
111k
{
1337
  // http://github.com/weidai11/cryptopp/issues/188
1338
111k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1339
111k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1340
1341
111k
  Bot_4
1342
111k
}
1343
1344
void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1345
80.2k
{
1346
  // http://github.com/weidai11/cryptopp/issues/188
1347
80.2k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1348
80.2k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1349
1350
80.2k
  Bot_8
1351
80.2k
}
1352
1353
#define Top_Begin(n)        \
1354
186k
  Declare2Words(p)       \
1355
186k
  word c; \
1356
186k
  Declare2Words(d)       \
1357
186k
  MultiplyWords(p, A[0], B[n-2]);\
1358
186k
  AssignWord(d, HighWord(p));
1359
1360
#define Top_Acc(i, j) \
1361
735k
  MultiplyWords(p, A[i], B[j]);\
1362
735k
  Acc2WordsBy1(d, HighWord(p));
1363
1364
#define Top_SaveAcc0(i, j)    \
1365
186k
  c = LowWord(d);  \
1366
186k
  AssignWord(d, HighWord(d))  \
1367
186k
  MulAcc(c, d, A[i], B[j])
1368
1369
#define Top_SaveAcc1(i, j)    \
1370
186k
  c = L<c; \
1371
186k
  Acc2WordsBy1(d, c); \
1372
186k
  c = LowWord(d);  \
1373
186k
  AssignWord(d, HighWord(d))  \
1374
186k
  MulAcc(c, d, A[i], B[j])
1375
1376
void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1377
279k
{
1378
279k
  CRYPTOPP_UNUSED(L);
1379
279k
  word T[4];
1380
279k
  Baseline_Multiply2(T, A, B);
1381
279k
  R[0] = T[2];
1382
279k
  R[1] = T[3];
1383
279k
}
1384
1385
void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1386
104k
{
1387
  // http://github.com/weidai11/cryptopp/issues/188
1388
104k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1389
104k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1390
1391
104k
  Top_Begin(4)
1392
104k
  Top_Acc(1, 1) Top_Acc(2, 0)  \
1393
104k
  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0)  \
1394
104k
  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1)  \
1395
104k
  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1396
104k
  Mul_End(1, 3)
1397
104k
}
1398
1399
void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1400
78.1k
{
1401
  // http://github.com/weidai11/cryptopp/issues/188
1402
78.1k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1403
78.1k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1404
1405
78.1k
  Top_Begin(8)
1406
78.1k
  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1407
78.1k
  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1408
78.1k
  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1409
78.1k
  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1410
78.1k
  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1411
78.1k
  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1412
78.1k
  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1413
78.1k
  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1414
78.1k
  Mul_End(5, 7)
1415
78.1k
}
1416
1417
#if !CRYPTOPP_INTEGER_SSE2  // save memory by not compiling these functions when SSE2 is available
1418
void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1419
13.0M
{
1420
  // http://github.com/weidai11/cryptopp/issues/188
1421
13.0M
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1422
13.0M
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1423
1424
13.0M
  Mul_16
1425
13.0M
}
1426
1427
void Baseline_Square16(word *R, const word *AA)
1428
4.45M
{
1429
  // http://github.com/weidai11/cryptopp/issues/188
1430
4.45M
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1431
1432
4.45M
  Squ_16
1433
4.45M
}
1434
1435
void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1436
2.28M
{
1437
  // http://github.com/weidai11/cryptopp/issues/188
1438
2.28M
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1439
2.28M
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1440
1441
2.28M
  Bot_16
1442
2.28M
}
1443
1444
void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1445
4.20k
{
1446
  // http://github.com/weidai11/cryptopp/issues/188
1447
4.20k
  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1448
4.20k
  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1449
1450
4.20k
  Top_Begin(16)
1451
4.20k
  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1452
4.20k
  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1453
4.20k
  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1454
4.20k
  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1455
4.20k
  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1456
4.20k
  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1457
4.20k
  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1458
4.20k
  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1459
4.20k
  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1460
4.20k
  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1461
4.20k
  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1462
4.20k
  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1463
4.20k
  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1464
4.20k
  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1465
4.20k
  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1466
4.20k
  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1467
4.20k
  Mul_End(13, 15)
1468
4.20k
}
1469
#endif
1470
1471
// ********************************************************
1472
1473
#if CRYPTOPP_INTEGER_SSE2
1474
1475
CRYPTOPP_ALIGN_DATA(16)
1476
CRYPTOPP_TABLE
1477
const word32 s_maskLow16[4] = {
1478
  0xffff,0xffff,0xffff,0xffff
1479
};
1480
1481
#undef Mul_Begin
1482
#undef Mul_Acc
1483
#undef Top_Begin
1484
#undef Top_Acc
1485
#undef Squ_Acc
1486
#undef Squ_NonDiag
1487
#undef Squ_Diag
1488
#undef Squ_SaveAcc
1489
#undef Squ_Begin
1490
#undef Mul_SaveAcc
1491
#undef Bot_Acc
1492
#undef Bot_SaveAcc
1493
#undef Bot_End
1494
#undef Squ_End
1495
#undef Mul_End
1496
1497
#define SSE2_FinalSave(k)     \
1498
  AS2(  psllq   xmm5, 16) \
1499
  AS2(  paddq   xmm4, xmm5) \
1500
  AS2(  movq    QWORD PTR [ecx+8*(k)], xmm4)
1501
1502
#define SSE2_SaveShift(k)     \
1503
  AS2(  movq    xmm0, xmm6) \
1504
  AS2(  punpckhqdq  xmm6, xmm0) \
1505
  AS2(  movq    xmm1, xmm7) \
1506
  AS2(  punpckhqdq  xmm7, xmm1) \
1507
  AS2(  paddd   xmm6, xmm0) \
1508
  AS2(  pslldq    xmm6, 4)  \
1509
  AS2(  paddd   xmm7, xmm1) \
1510
  AS2(  paddd   xmm4, xmm6) \
1511
  AS2(  pslldq    xmm7, 4)  \
1512
  AS2(  movq    xmm6, xmm4) \
1513
  AS2(  paddd   xmm5, xmm7) \
1514
  AS2(  movq    xmm7, xmm5) \
1515
  AS2(  movd    DWORD PTR [ecx+8*(k)], xmm4)  \
1516
  AS2(  psrlq   xmm6, 16) \
1517
  AS2(  paddq   xmm6, xmm7) \
1518
  AS2(  punpckhqdq  xmm4, xmm0) \
1519
  AS2(  punpckhqdq  xmm5, xmm0) \
1520
  AS2(  movq    QWORD PTR [ecx+8*(k)+2], xmm6)  \
1521
  AS2(  psrlq   xmm6, 3*16) \
1522
  AS2(  paddd   xmm4, xmm6) \
1523
1524
#define Squ_SSE2_SaveShift(k)     \
1525
  AS2(  movq    xmm0, xmm6) \
1526
  AS2(  punpckhqdq  xmm6, xmm0) \
1527
  AS2(  movq    xmm1, xmm7) \
1528
  AS2(  punpckhqdq  xmm7, xmm1) \
1529
  AS2(  paddd   xmm6, xmm0) \
1530
  AS2(  pslldq    xmm6, 4)  \
1531
  AS2(  paddd   xmm7, xmm1) \
1532
  AS2(  paddd   xmm4, xmm6) \
1533
  AS2(  pslldq    xmm7, 4)  \
1534
  AS2(  movhlps   xmm6, xmm4) \
1535
  AS2(  movd    DWORD PTR [ecx+8*(k)], xmm4)  \
1536
  AS2(  paddd   xmm5, xmm7) \
1537
  AS2(  movhps    QWORD PTR [esp+12], xmm5)\
1538
  AS2(  psrlq   xmm4, 16) \
1539
  AS2(  paddq   xmm4, xmm5) \
1540
  AS2(  movq    QWORD PTR [ecx+8*(k)+2], xmm4)  \
1541
  AS2(  psrlq   xmm4, 3*16) \
1542
  AS2(  paddd   xmm4, xmm6) \
1543
  AS2(  movq    QWORD PTR [esp+4], xmm4)\
1544
1545
#define SSE2_FirstMultiply(i)       \
1546
  AS2(  movdqa    xmm7, [esi+(i)*16])\
1547
  AS2(  movdqa    xmm5, [edi-(i)*16])\
1548
  AS2(  pmuludq   xmm5, xmm7)   \
1549
  AS2(  movdqa    xmm4, [ebx])\
1550
  AS2(  movdqa    xmm6, xmm4)   \
1551
  AS2(  pand    xmm4, xmm5)   \
1552
  AS2(  psrld   xmm5, 16)   \
1553
  AS2(  pmuludq   xmm7, [edx-(i)*16])\
1554
  AS2(  pand    xmm6, xmm7)   \
1555
  AS2(  psrld   xmm7, 16)
1556
1557
#define Squ_Begin(n)              \
1558
  SquPrologue                 \
1559
  AS2(  mov   esi, esp)\
1560
  AS2(  and   esp, 0xfffffff0)\
1561
  AS2(  lea   edi, [esp-32*n])\
1562
  AS2(  sub   esp, 32*n+16)\
1563
  AS1(  push  esi)\
1564
  AS2(  mov   esi, edi)         \
1565
  AS2(  xor   edx, edx)         \
1566
  ASL(1)                    \
1567
  ASS(  pshufd  xmm0, [eax+edx], 3,1,2,0) \
1568
  ASS(  pshufd  xmm1, [eax+edx], 2,0,3,1) \
1569
  AS2(  movdqa  [edi+2*edx], xmm0)    \
1570
  AS2(  psrlq xmm0, 32)         \
1571
  AS2(  movdqa  [edi+2*edx+16], xmm0) \
1572
  AS2(  movdqa  [edi+16*n+2*edx], xmm1)   \
1573
  AS2(  psrlq xmm1, 32)         \
1574
  AS2(  movdqa  [edi+16*n+2*edx+16], xmm1)  \
1575
  AS2(  add   edx, 16)          \
1576
  AS2(  cmp   edx, 8*(n))         \
1577
  ASJ(  jne,  1, b)           \
1578
  AS2(  lea   edx, [edi+16*n])\
1579
  SSE2_FirstMultiply(0)             \
1580
1581
#define Squ_Acc(i)                \
1582
  ASL(LSqu##i)                \
1583
  AS2(  movdqa    xmm1, [esi+(i)*16]) \
1584
  AS2(  movdqa    xmm0, [edi-(i)*16]) \
1585
  AS2(  movdqa    xmm2, [ebx])  \
1586
  AS2(  pmuludq   xmm0, xmm1)       \
1587
  AS2(  pmuludq   xmm1, [edx-(i)*16]) \
1588
  AS2(  movdqa    xmm3, xmm2)     \
1589
  AS2(  pand    xmm2, xmm0)     \
1590
  AS2(  psrld   xmm0, 16)     \
1591
  AS2(  paddd   xmm4, xmm2)     \
1592
  AS2(  paddd   xmm5, xmm0)     \
1593
  AS2(  pand    xmm3, xmm1)     \
1594
  AS2(  psrld   xmm1, 16)     \
1595
  AS2(  paddd   xmm6, xmm3)     \
1596
  AS2(  paddd   xmm7, xmm1)   \
1597
1598
#define Squ_Acc1(i)
1599
#define Squ_Acc2(i)   ASC(call, LSqu##i)
1600
#define Squ_Acc3(i)   Squ_Acc2(i)
1601
#define Squ_Acc4(i)   Squ_Acc2(i)
1602
#define Squ_Acc5(i)   Squ_Acc2(i)
1603
#define Squ_Acc6(i)   Squ_Acc2(i)
1604
#define Squ_Acc7(i)   Squ_Acc2(i)
1605
#define Squ_Acc8(i)   Squ_Acc2(i)
1606
1607
#define SSE2_End(E, n)          \
1608
  SSE2_SaveShift(2*(n)-3)     \
1609
  AS2(  movdqa    xmm7, [esi+16]) \
1610
  AS2(  movdqa    xmm0, [edi])  \
1611
  AS2(  pmuludq   xmm0, xmm7)       \
1612
  AS2(  movdqa    xmm2, [ebx])    \
1613
  AS2(  pmuludq   xmm7, [edx])  \
1614
  AS2(  movdqa    xmm6, xmm2)       \
1615
  AS2(  pand    xmm2, xmm0)       \
1616
  AS2(  psrld   xmm0, 16)       \
1617
  AS2(  paddd   xmm4, xmm2)       \
1618
  AS2(  paddd   xmm5, xmm0)       \
1619
  AS2(  pand    xmm6, xmm7)       \
1620
  AS2(  psrld   xmm7, 16) \
1621
  SSE2_SaveShift(2*(n)-2)     \
1622
  SSE2_FinalSave(2*(n)-1)     \
1623
  AS1(  pop   esp)\
1624
  E
1625
1626
#define Squ_End(n)    SSE2_End(SquEpilogue, n)
1627
#define Mul_End(n)    SSE2_End(MulEpilogue, n)
1628
#define Top_End(n)    SSE2_End(TopEpilogue, n)
1629
1630
#define Squ_Column1(k, i) \
1631
  Squ_SSE2_SaveShift(k)         \
1632
  AS2(  add     esi, 16)  \
1633
  SSE2_FirstMultiply(1)\
1634
  Squ_Acc##i(i) \
1635
  AS2(  paddd   xmm4, xmm4)   \
1636
  AS2(  paddd   xmm5, xmm5)   \
1637
  AS2(  movdqa    xmm3, [esi])        \
1638
  AS2(  movq    xmm1, QWORD PTR [esi+8])  \
1639
  AS2(  pmuludq   xmm1, xmm3)   \
1640
  AS2(  pmuludq   xmm3, xmm3)   \
1641
  AS2(  movdqa    xmm0, [ebx])\
1642
  AS2(  movdqa    xmm2, xmm0)   \
1643
  AS2(  pand    xmm0, xmm1)   \
1644
  AS2(  psrld   xmm1, 16)   \
1645
  AS2(  paddd   xmm6, xmm0)   \
1646
  AS2(  paddd   xmm7, xmm1)   \
1647
  AS2(  pand    xmm2, xmm3)   \
1648
  AS2(  psrld   xmm3, 16)   \
1649
  AS2(  paddd   xmm6, xmm6)   \
1650
  AS2(  paddd   xmm7, xmm7)   \
1651
  AS2(  paddd   xmm4, xmm2)   \
1652
  AS2(  paddd   xmm5, xmm3)   \
1653
  AS2(  movq    xmm0, QWORD PTR [esp+4])\
1654
  AS2(  movq    xmm1, QWORD PTR [esp+12])\
1655
  AS2(  paddd   xmm4, xmm0)\
1656
  AS2(  paddd   xmm5, xmm1)\
1657
1658
#define Squ_Column0(k, i) \
1659
  Squ_SSE2_SaveShift(k)         \
1660
  AS2(  add     edi, 16)  \
1661
  AS2(  add     edx, 16)  \
1662
  SSE2_FirstMultiply(1)\
1663
  Squ_Acc##i(i) \
1664
  AS2(  paddd   xmm6, xmm6)   \
1665
  AS2(  paddd   xmm7, xmm7)   \
1666
  AS2(  paddd   xmm4, xmm4)   \
1667
  AS2(  paddd   xmm5, xmm5)   \
1668
  AS2(  movq    xmm0, QWORD PTR [esp+4])\
1669
  AS2(  movq    xmm1, QWORD PTR [esp+12])\
1670
  AS2(  paddd   xmm4, xmm0)\
1671
  AS2(  paddd   xmm5, xmm1)\
1672
1673
#define SSE2_MulAdd45           \
1674
  AS2(  movdqa    xmm7, [esi])  \
1675
  AS2(  movdqa    xmm0, [edi])  \
1676
  AS2(  pmuludq   xmm0, xmm7)       \
1677
  AS2(  movdqa    xmm2, [ebx])    \
1678
  AS2(  pmuludq   xmm7, [edx])  \
1679
  AS2(  movdqa    xmm6, xmm2)       \
1680
  AS2(  pand    xmm2, xmm0)       \
1681
  AS2(  psrld   xmm0, 16)       \
1682
  AS2(  paddd   xmm4, xmm2)       \
1683
  AS2(  paddd   xmm5, xmm0)       \
1684
  AS2(  pand    xmm6, xmm7)       \
1685
  AS2(  psrld   xmm7, 16)
1686
1687
#define Mul_Begin(n)              \
1688
  MulPrologue                 \
1689
  AS2(  mov   esi, esp)\
1690
  AS2(  and   esp, 0xfffffff0)\
1691
  AS2(  sub   esp, 48*n+16)\
1692
  AS1(  push  esi)\
1693
  AS2(  xor   edx, edx)         \
1694
  ASL(1)                    \
1695
  ASS(  pshufd  xmm0, [eax+edx], 3,1,2,0) \
1696
  ASS(  pshufd  xmm1, [eax+edx], 2,0,3,1) \
1697
  ASS(  pshufd  xmm2, [edi+edx], 3,1,2,0) \
1698
  AS2(  movdqa  [esp+20+2*edx], xmm0)   \
1699
  AS2(  psrlq xmm0, 32)         \
1700
  AS2(  movdqa  [esp+20+2*edx+16], xmm0)  \
1701
  AS2(  movdqa  [esp+20+16*n+2*edx], xmm1)    \
1702
  AS2(  psrlq xmm1, 32)         \
1703
  AS2(  movdqa  [esp+20+16*n+2*edx+16], xmm1) \
1704
  AS2(  movdqa  [esp+20+32*n+2*edx], xmm2)    \
1705
  AS2(  psrlq xmm2, 32)         \
1706
  AS2(  movdqa  [esp+20+32*n+2*edx+16], xmm2) \
1707
  AS2(  add   edx, 16)          \
1708
  AS2(  cmp   edx, 8*(n))         \
1709
  ASJ(  jne,  1, b)           \
1710
  AS2(  lea   edi, [esp+20])\
1711
  AS2(  lea   edx, [esp+20+16*n])\
1712
  AS2(  lea   esi, [esp+20+32*n])\
1713
  SSE2_FirstMultiply(0)             \
1714
1715
#define Mul_Acc(i)                \
1716
  ASL(LMul##i)                    \
1717
  AS2(  movdqa    xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1718
  AS2(  movdqa    xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1719
  AS2(  movdqa    xmm2, [ebx])  \
1720
  AS2(  pmuludq   xmm0, xmm1)       \
1721
  AS2(  pmuludq   xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1722
  AS2(  movdqa    xmm3, xmm2)     \
1723
  AS2(  pand    xmm2, xmm0)     \
1724
  AS2(  psrld   xmm0, 16)     \
1725
  AS2(  paddd   xmm4, xmm2)     \
1726
  AS2(  paddd   xmm5, xmm0)     \
1727
  AS2(  pand    xmm3, xmm1)     \
1728
  AS2(  psrld   xmm1, 16)     \
1729
  AS2(  paddd   xmm6, xmm3)     \
1730
  AS2(  paddd   xmm7, xmm1)   \
1731
1732
#define Mul_Acc1(i)
1733
#define Mul_Acc2(i)   ASC(call, LMul##i)
1734
#define Mul_Acc3(i)   Mul_Acc2(i)
1735
#define Mul_Acc4(i)   Mul_Acc2(i)
1736
#define Mul_Acc5(i)   Mul_Acc2(i)
1737
#define Mul_Acc6(i)   Mul_Acc2(i)
1738
#define Mul_Acc7(i)   Mul_Acc2(i)
1739
#define Mul_Acc8(i)   Mul_Acc2(i)
1740
#define Mul_Acc9(i)   Mul_Acc2(i)
1741
#define Mul_Acc10(i)  Mul_Acc2(i)
1742
#define Mul_Acc11(i)  Mul_Acc2(i)
1743
#define Mul_Acc12(i)  Mul_Acc2(i)
1744
#define Mul_Acc13(i)  Mul_Acc2(i)
1745
#define Mul_Acc14(i)  Mul_Acc2(i)
1746
#define Mul_Acc15(i)  Mul_Acc2(i)
1747
#define Mul_Acc16(i)  Mul_Acc2(i)
1748
1749
#define Mul_Column1(k, i) \
1750
  SSE2_SaveShift(k)         \
1751
  AS2(  add     esi, 16)  \
1752
  SSE2_MulAdd45\
1753
  Mul_Acc##i(i) \
1754
1755
#define Mul_Column0(k, i) \
1756
  SSE2_SaveShift(k)         \
1757
  AS2(  add     edi, 16)  \
1758
  AS2(  add     edx, 16)  \
1759
  SSE2_MulAdd45\
1760
  Mul_Acc##i(i) \
1761
1762
#define Bot_Acc(i)              \
1763
  AS2(  movdqa    xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1764
  AS2(  movdqa    xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1765
  AS2(  pmuludq   xmm0, xmm1)       \
1766
  AS2(  pmuludq   xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])   \
1767
  AS2(  paddq   xmm4, xmm0)       \
1768
  AS2(  paddd   xmm6, xmm1)
1769
1770
#define Bot_SaveAcc(k)          \
1771
  SSE2_SaveShift(k)             \
1772
  AS2(  add     edi, 16)  \
1773
  AS2(  add     edx, 16)  \
1774
  AS2(  movdqa    xmm6, [esi])  \
1775
  AS2(  movdqa    xmm0, [edi])  \
1776
  AS2(  pmuludq   xmm0, xmm6)       \
1777
  AS2(  paddq   xmm4, xmm0)       \
1778
  AS2(  psllq   xmm5, 16)       \
1779
  AS2(  paddq   xmm4, xmm5)       \
1780
  AS2(  pmuludq   xmm6, [edx])
1781
1782
#define Bot_End(n)              \
1783
  AS2(  movhlps   xmm7, xmm6)     \
1784
  AS2(  paddd   xmm6, xmm7)     \
1785
  AS2(  psllq   xmm6, 32)     \
1786
  AS2(  paddd   xmm4, xmm6)     \
1787
  AS2(  movq    QWORD PTR [ecx+8*((n)-1)], xmm4)  \
1788
  AS1(  pop   esp)\
1789
  MulEpilogue
1790
1791
#define Top_Begin(n)              \
1792
  TopPrologue                 \
1793
  AS2(  mov   edx, esp)\
1794
  AS2(  and   esp, 0xfffffff0)\
1795
  AS2(  sub   esp, 48*n+16)\
1796
  AS1(  push  edx)\
1797
  AS2(  xor   edx, edx)         \
1798
  ASL(1)                    \
1799
  ASS(  pshufd  xmm0, [eax+edx], 3,1,2,0) \
1800
  ASS(  pshufd  xmm1, [eax+edx], 2,0,3,1) \
1801
  ASS(  pshufd  xmm2, [edi+edx], 3,1,2,0) \
1802
  AS2(  movdqa  [esp+20+2*edx], xmm0)   \
1803
  AS2(  psrlq xmm0, 32)         \
1804
  AS2(  movdqa  [esp+20+2*edx+16], xmm0)  \
1805
  AS2(  movdqa  [esp+20+16*n+2*edx], xmm1)    \
1806
  AS2(  psrlq xmm1, 32)         \
1807
  AS2(  movdqa  [esp+20+16*n+2*edx+16], xmm1) \
1808
  AS2(  movdqa  [esp+20+32*n+2*edx], xmm2)    \
1809
  AS2(  psrlq xmm2, 32)         \
1810
  AS2(  movdqa  [esp+20+32*n+2*edx+16], xmm2) \
1811
  AS2(  add   edx, 16)          \
1812
  AS2(  cmp   edx, 8*(n))         \
1813
  ASJ(  jne,  1, b)           \
1814
  AS2(  mov   eax, esi)         \
1815
  AS2(  lea   edi, [esp+20+00*n+16*(n/2-1)])\
1816
  AS2(  lea   edx, [esp+20+16*n+16*(n/2-1)])\
1817
  AS2(  lea   esi, [esp+20+32*n+16*(n/2-1)])\
1818
  AS2(  pxor  xmm4, xmm4)\
1819
  AS2(  pxor  xmm5, xmm5)
1820
1821
#define Top_Acc(i)              \
1822
  AS2(  movq    xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1823
  AS2(  pmuludq   xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1824
  AS2(  psrlq   xmm0, 48)       \
1825
  AS2(  paddd   xmm5, xmm0)\
1826
1827
#define Top_Column0(i)  \
1828
  AS2(  psllq   xmm5, 32)       \
1829
  AS2(  add     edi, 16)  \
1830
  AS2(  add     edx, 16)  \
1831
  SSE2_MulAdd45\
1832
  Mul_Acc##i(i) \
1833
1834
#define Top_Column1(i)  \
1835
  SSE2_SaveShift(0)         \
1836
  AS2(  add     esi, 16)  \
1837
  SSE2_MulAdd45\
1838
  Mul_Acc##i(i) \
1839
  AS2(  shr     eax, 16)  \
1840
  AS2(  movd    xmm0, eax)\
1841
  AS2(  movd    xmm1, [ecx+4])\
1842
  AS2(  psrld   xmm1, 16)\
1843
  AS2(  pcmpgtd   xmm1, xmm0)\
1844
  AS2(  psrld   xmm1, 31)\
1845
  AS2(  paddd   xmm4, xmm1)\
1846
1847
void SSE2_Square4(word *C, const word *A)
1848
{
1849
  Squ_Begin(2)
1850
  Squ_Column0(0, 1)
1851
  Squ_End(2)
1852
}
1853
1854
void SSE2_Square8(word *C, const word *A)
1855
{
1856
  Squ_Begin(4)
1857
#ifndef __GNUC__
1858
  ASJ(  jmp,  0, f)
1859
  Squ_Acc(2)
1860
  AS1(  ret) ASL(0)
1861
#endif
1862
  Squ_Column0(0, 1)
1863
  Squ_Column1(1, 1)
1864
  Squ_Column0(2, 2)
1865
  Squ_Column1(3, 1)
1866
  Squ_Column0(4, 1)
1867
  Squ_End(4)
1868
}
1869
1870
void SSE2_Square16(word *C, const word *A)
1871
{
1872
  Squ_Begin(8)
1873
#ifndef __GNUC__
1874
  ASJ(  jmp,  0, f)
1875
  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1876
  AS1(  ret) ASL(0)
1877
#endif
1878
  Squ_Column0(0, 1)
1879
  Squ_Column1(1, 1)
1880
  Squ_Column0(2, 2)
1881
  Squ_Column1(3, 2)
1882
  Squ_Column0(4, 3)
1883
  Squ_Column1(5, 3)
1884
  Squ_Column0(6, 4)
1885
  Squ_Column1(7, 3)
1886
  Squ_Column0(8, 3)
1887
  Squ_Column1(9, 2)
1888
  Squ_Column0(10, 2)
1889
  Squ_Column1(11, 1)
1890
  Squ_Column0(12, 1)
1891
  Squ_End(8)
1892
}
1893
1894
void SSE2_Square32(word *C, const word *A)
1895
{
1896
  Squ_Begin(16)
1897
  ASJ(  jmp,  0, f)
1898
  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1899
  AS1(  ret) ASL(0)
1900
  Squ_Column0(0, 1)
1901
  Squ_Column1(1, 1)
1902
  Squ_Column0(2, 2)
1903
  Squ_Column1(3, 2)
1904
  Squ_Column0(4, 3)
1905
  Squ_Column1(5, 3)
1906
  Squ_Column0(6, 4)
1907
  Squ_Column1(7, 4)
1908
  Squ_Column0(8, 5)
1909
  Squ_Column1(9, 5)
1910
  Squ_Column0(10, 6)
1911
  Squ_Column1(11, 6)
1912
  Squ_Column0(12, 7)
1913
  Squ_Column1(13, 7)
1914
  Squ_Column0(14, 8)
1915
  Squ_Column1(15, 7)
1916
  Squ_Column0(16, 7)
1917
  Squ_Column1(17, 6)
1918
  Squ_Column0(18, 6)
1919
  Squ_Column1(19, 5)
1920
  Squ_Column0(20, 5)
1921
  Squ_Column1(21, 4)
1922
  Squ_Column0(22, 4)
1923
  Squ_Column1(23, 3)
1924
  Squ_Column0(24, 3)
1925
  Squ_Column1(25, 2)
1926
  Squ_Column0(26, 2)
1927
  Squ_Column1(27, 1)
1928
  Squ_Column0(28, 1)
1929
  Squ_End(16)
1930
}
1931
1932
void SSE2_Multiply4(word *C, const word *A, const word *B)
1933
{
1934
  Mul_Begin(2)
1935
#ifndef __GNUC__
1936
  ASJ(  jmp,  0, f)
1937
  Mul_Acc(2)
1938
  AS1(  ret) ASL(0)
1939
#endif
1940
  Mul_Column0(0, 2)
1941
  Mul_End(2)
1942
}
1943
1944
void SSE2_Multiply8(word *C, const word *A, const word *B)
1945
{
1946
  Mul_Begin(4)
1947
#ifndef __GNUC__
1948
  ASJ(  jmp,  0, f)
1949
  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1950
  AS1(  ret) ASL(0)
1951
#endif
1952
  Mul_Column0(0, 2)
1953
  Mul_Column1(1, 3)
1954
  Mul_Column0(2, 4)
1955
  Mul_Column1(3, 3)
1956
  Mul_Column0(4, 2)
1957
  Mul_End(4)
1958
}
1959
1960
void SSE2_Multiply16(word *C, const word *A, const word *B)
1961
{
1962
  Mul_Begin(8)
1963
#ifndef __GNUC__
1964
  ASJ(  jmp,  0, f)
1965
  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966
  AS1(  ret) ASL(0)
1967
#endif
1968
  Mul_Column0(0, 2)
1969
  Mul_Column1(1, 3)
1970
  Mul_Column0(2, 4)
1971
  Mul_Column1(3, 5)
1972
  Mul_Column0(4, 6)
1973
  Mul_Column1(5, 7)
1974
  Mul_Column0(6, 8)
1975
  Mul_Column1(7, 7)
1976
  Mul_Column0(8, 6)
1977
  Mul_Column1(9, 5)
1978
  Mul_Column0(10, 4)
1979
  Mul_Column1(11, 3)
1980
  Mul_Column0(12, 2)
1981
  Mul_End(8)
1982
}
1983
1984
void SSE2_Multiply32(word *C, const word *A, const word *B)
1985
{
1986
  Mul_Begin(16)
1987
  ASJ(  jmp,  0, f)
1988
  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1989
  AS1(  ret) ASL(0)
1990
  Mul_Column0(0, 2)
1991
  Mul_Column1(1, 3)
1992
  Mul_Column0(2, 4)
1993
  Mul_Column1(3, 5)
1994
  Mul_Column0(4, 6)
1995
  Mul_Column1(5, 7)
1996
  Mul_Column0(6, 8)
1997
  Mul_Column1(7, 9)
1998
  Mul_Column0(8, 10)
1999
  Mul_Column1(9, 11)
2000
  Mul_Column0(10, 12)
2001
  Mul_Column1(11, 13)
2002
  Mul_Column0(12, 14)
2003
  Mul_Column1(13, 15)
2004
  Mul_Column0(14, 16)
2005
  Mul_Column1(15, 15)
2006
  Mul_Column0(16, 14)
2007
  Mul_Column1(17, 13)
2008
  Mul_Column0(18, 12)
2009
  Mul_Column1(19, 11)
2010
  Mul_Column0(20, 10)
2011
  Mul_Column1(21, 9)
2012
  Mul_Column0(22, 8)
2013
  Mul_Column1(23, 7)
2014
  Mul_Column0(24, 6)
2015
  Mul_Column1(25, 5)
2016
  Mul_Column0(26, 4)
2017
  Mul_Column1(27, 3)
2018
  Mul_Column0(28, 2)
2019
  Mul_End(16)
2020
}
2021
2022
void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
2023
{
2024
  Mul_Begin(2)
2025
  Bot_SaveAcc(0) Bot_Acc(2)
2026
  Bot_End(2)
2027
}
2028
2029
void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
2030
{
2031
  Mul_Begin(4)
2032
#ifndef __GNUC__
2033
  ASJ(  jmp,  0, f)
2034
  Mul_Acc(3) Mul_Acc(2)
2035
  AS1(  ret) ASL(0)
2036
#endif
2037
  Mul_Column0(0, 2)
2038
  Mul_Column1(1, 3)
2039
  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2040
  Bot_End(4)
2041
}
2042
2043
void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2044
{
2045
  Mul_Begin(8)
2046
#ifndef __GNUC__
2047
  ASJ(  jmp,  0, f)
2048
  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2049
  AS1(  ret) ASL(0)
2050
#endif
2051
  Mul_Column0(0, 2)
2052
  Mul_Column1(1, 3)
2053
  Mul_Column0(2, 4)
2054
  Mul_Column1(3, 5)
2055
  Mul_Column0(4, 6)
2056
  Mul_Column1(5, 7)
2057
  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2058
  Bot_End(8)
2059
}
2060
2061
void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2062
{
2063
  Mul_Begin(16)
2064
#ifndef __GNUC__
2065
  ASJ(  jmp,  0, f)
2066
  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2067
  AS1(  ret) ASL(0)
2068
#endif
2069
  Mul_Column0(0, 2)
2070
  Mul_Column1(1, 3)
2071
  Mul_Column0(2, 4)
2072
  Mul_Column1(3, 5)
2073
  Mul_Column0(4, 6)
2074
  Mul_Column1(5, 7)
2075
  Mul_Column0(6, 8)
2076
  Mul_Column1(7, 9)
2077
  Mul_Column0(8, 10)
2078
  Mul_Column1(9, 11)
2079
  Mul_Column0(10, 12)
2080
  Mul_Column1(11, 13)
2081
  Mul_Column0(12, 14)
2082
  Mul_Column1(13, 15)
2083
  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2084
  Bot_End(16)
2085
}
2086
2087
void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2088
{
2089
  Top_Begin(4)
2090
  Top_Acc(3) Top_Acc(2) Top_Acc(1)
2091
#ifndef __GNUC__
2092
  ASJ(  jmp,  0, f)
2093
  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2094
  AS1(  ret) ASL(0)
2095
#endif
2096
  Top_Column0(4)
2097
  Top_Column1(3)
2098
  Mul_Column0(0, 2)
2099
  Top_End(2)
2100
}
2101
2102
void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2103
{
2104
  Top_Begin(8)
2105
  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2106
#ifndef __GNUC__
2107
  ASJ(  jmp,  0, f)
2108
  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2109
  AS1(  ret) ASL(0)
2110
#endif
2111
  Top_Column0(8)
2112
  Top_Column1(7)
2113
  Mul_Column0(0, 6)
2114
  Mul_Column1(1, 5)
2115
  Mul_Column0(2, 4)
2116
  Mul_Column1(3, 3)
2117
  Mul_Column0(4, 2)
2118
  Top_End(4)
2119
}
2120
2121
void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2122
{
2123
  Top_Begin(16)
2124
  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2125
#ifndef __GNUC__
2126
  ASJ(  jmp,  0, f)
2127
  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2128
  AS1(  ret) ASL(0)
2129
#endif
2130
  Top_Column0(16)
2131
  Top_Column1(15)
2132
  Mul_Column0(0, 14)
2133
  Mul_Column1(1, 13)
2134
  Mul_Column0(2, 12)
2135
  Mul_Column1(3, 11)
2136
  Mul_Column0(4, 10)
2137
  Mul_Column1(5, 9)
2138
  Mul_Column0(6, 8)
2139
  Mul_Column1(7, 7)
2140
  Mul_Column0(8, 6)
2141
  Mul_Column1(9, 5)
2142
  Mul_Column0(10, 4)
2143
  Mul_Column1(11, 3)
2144
  Mul_Column0(12, 2)
2145
  Top_End(8)
2146
}
2147
2148
#endif  // #if CRYPTOPP_INTEGER_SSE2
2149
2150
// ********************************************************
2151
2152
typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2153
typedef void (* PMul)(word *C, const word *A, const word *B);
2154
typedef void (* PSqu)(word *C, const word *A);
2155
typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2156
2157
#if CRYPTOPP_INTEGER_SSE2
2158
static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2159
static size_t s_recursionLimit = 8;
2160
#else
2161
static const size_t s_recursionLimit = 16;
2162
#endif  // CRYPTOPP_INTEGER_SSE2
2163
2164
static PMul s_pMul[9], s_pBot[9];
2165
static PSqu s_pSqu[9];
2166
static PMulTop s_pTop[9];
2167
2168
void SetFunctionPointers()
2169
20
{
2170
20
  s_pMul[0] = &Baseline_Multiply2;
2171
20
  s_pBot[0] = &Baseline_MultiplyBottom2;
2172
20
  s_pSqu[0] = &Baseline_Square2;
2173
20
  s_pTop[0] = &Baseline_MultiplyTop2;
2174
20
  s_pTop[1] = &Baseline_MultiplyTop4;
2175
2176
#if CRYPTOPP_INTEGER_SSE2
2177
  if (HasSSE2())
2178
  {
2179
    if (IsP4())
2180
    {
2181
      s_pAdd = &SSE2_Add;
2182
      s_pSub = &SSE2_Sub;
2183
    }
2184
2185
    s_recursionLimit = 32;
2186
2187
    s_pMul[1] = &SSE2_Multiply4;
2188
    s_pMul[2] = &SSE2_Multiply8;
2189
    s_pMul[4] = &SSE2_Multiply16;
2190
    s_pMul[8] = &SSE2_Multiply32;
2191
2192
    s_pBot[1] = &SSE2_MultiplyBottom4;
2193
    s_pBot[2] = &SSE2_MultiplyBottom8;
2194
    s_pBot[4] = &SSE2_MultiplyBottom16;
2195
    s_pBot[8] = &SSE2_MultiplyBottom32;
2196
2197
    s_pSqu[1] = &SSE2_Square4;
2198
    s_pSqu[2] = &SSE2_Square8;
2199
    s_pSqu[4] = &SSE2_Square16;
2200
    s_pSqu[8] = &SSE2_Square32;
2201
2202
    s_pTop[2] = &SSE2_MultiplyTop8;
2203
    s_pTop[4] = &SSE2_MultiplyTop16;
2204
    s_pTop[8] = &SSE2_MultiplyTop32;
2205
  }
2206
  else
2207
#endif  // CRYPTOPP_INTEGER_SSE2
2208
20
  {
2209
20
    s_pMul[1] = &Baseline_Multiply4;
2210
20
    s_pMul[2] = &Baseline_Multiply8;
2211
2212
20
    s_pBot[1] = &Baseline_MultiplyBottom4;
2213
20
    s_pBot[2] = &Baseline_MultiplyBottom8;
2214
2215
20
    s_pSqu[1] = &Baseline_Square4;
2216
20
    s_pSqu[2] = &Baseline_Square8;
2217
2218
20
    s_pTop[2] = &Baseline_MultiplyTop8;
2219
2220
20
#if !CRYPTOPP_INTEGER_SSE2
2221
20
    s_pMul[4] = &Baseline_Multiply16;
2222
20
    s_pBot[4] = &Baseline_MultiplyBottom16;
2223
20
    s_pSqu[4] = &Baseline_Square16;
2224
20
    s_pTop[4] = &Baseline_MultiplyTop16;
2225
20
#endif  // !CRYPTOPP_INTEGER_SSE2
2226
20
  }
2227
20
}
2228
2229
inline int Add(word *C, const word *A, const word *B, size_t N)
2230
61.4M
{
2231
#if CRYPTOPP_INTEGER_SSE2
2232
  return s_pAdd(N, C, A, B);
2233
#else
2234
61.4M
  return Baseline_Add(N, C, A, B);
2235
61.4M
#endif  // CRYPTOPP_INTEGER_SSE2
2236
61.4M
}
2237
2238
inline int Subtract(word *C, const word *A, const word *B, size_t N)
2239
37.5M
{
2240
#if CRYPTOPP_INTEGER_SSE2
2241
  return s_pSub(N, C, A, B);
2242
#else
2243
37.5M
  return Baseline_Sub(N, C, A, B);
2244
37.5M
#endif  // CRYPTOPP_INTEGER_SSE2
2245
37.5M
}
2246
2247
// ********************************************************
2248
2249
2250
17.1M
#define A0    A
2251
16.1M
#define A1    (A+N2)
2252
10.6M
#define B0    B
2253
9.57M
#define B1    (B+N2)
2254
2255
24.6M
#define T0    T
2256
4.04M
#define T1    (T+N2)
2257
25.9M
#define T2    (T+N)
2258
0
#define T3    (T+N+N2)
2259
2260
19.8M
#define R0    R
2261
42.2M
#define R1    (R+N2)
2262
26.6M
#define R2    (R+N)
2263
9.93M
#define R3    (R+N+N2)
2264
2265
// R[2*N] - result = A*B
2266
// T[2*N] - temporary work space
2267
// A[N] --- multiplier
2268
// B[N] --- multiplicant
2269
2270
void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2271
419M
{
2272
419M
  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2273
2274
419M
  if (N <= s_recursionLimit)
2275
416M
    s_pMul[N/4](R, A, B);
2276
3.34M
  else
2277
3.34M
  {
2278
3.34M
    const size_t N2 = N/2;
2279
2280
3.34M
    size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
2281
3.34M
    Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2282
2283
3.34M
    size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
2284
3.34M
    Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2285
2286
3.34M
    RecursiveMultiply(R2, T2, A1, B1, N2);
2287
3.34M
    RecursiveMultiply(T0, T2, R0, R1, N2);
2288
3.34M
    RecursiveMultiply(R0, T2, A0, B0, N2);
2289
2290
    // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2291
2292
3.34M
    int c2 = Add(R2, R2, R1, N2);
2293
3.34M
    int c3 = c2;
2294
3.34M
    c2 += Add(R1, R2, R0, N2);
2295
3.34M
    c3 += Add(R2, R2, R3, N2);
2296
2297
3.34M
    if (AN2 == BN2)
2298
1.83M
      c3 -= Subtract(R1, R1, T0, N);
2299
1.50M
    else
2300
1.50M
      c3 += Add(R1, R1, T0, N);
2301
2302
3.34M
    c3 += Increment(R2, N2, c2);
2303
3.34M
    CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2304
3.34M
    Increment(R3, N2, c3);
2305
3.34M
  }
2306
419M
}
2307
2308
// R[2*N] - result = A*A
2309
// T[2*N] - temporary work space
2310
// A[N] --- number to be squared
2311
2312
void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2313
8.37M
{
2314
8.37M
  CRYPTOPP_ASSERT(N && N%2==0);
2315
2316
8.37M
  if (N <= s_recursionLimit)
2317
5.12M
    s_pSqu[N/4](R, A);
2318
3.25M
  else
2319
3.25M
  {
2320
3.25M
    const size_t N2 = N/2;
2321
2322
3.25M
    RecursiveSquare(R0, T2, A0, N2);
2323
3.25M
    RecursiveSquare(R2, T2, A1, N2);
2324
3.25M
    RecursiveMultiply(T0, T2, A0, A1, N2);
2325
2326
3.25M
    int carry = Add(R1, R1, T0, N);
2327
3.25M
    carry += Add(R1, R1, T0, N);
2328
3.25M
    Increment(R3, N2, carry);
2329
3.25M
  }
2330
8.37M
}
2331
2332
// R[N] - bottom half of A*B
2333
// T[3*N/2] - temporary work space
2334
// A[N] - multiplier
2335
// B[N] - multiplicant
2336
2337
void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2338
4.43M
{
2339
4.43M
  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2340
2341
4.43M
  if (N <= s_recursionLimit)
2342
2.77M
    s_pBot[N/4](R, A, B);
2343
1.66M
  else
2344
1.66M
  {
2345
1.66M
    const size_t N2 = N/2;
2346
2347
1.66M
    RecursiveMultiply(R, T, A0, B0, N2);
2348
1.66M
    RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2349
1.66M
    Add(R1, R1, T0, N2);
2350
1.66M
    RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2351
1.66M
    Add(R1, R1, T0, N2);
2352
1.66M
  }
2353
4.43M
}
2354
2355
// R[N] --- upper half of A*B
2356
// T[2*N] - temporary work space
2357
// L[N] --- lower half of A*B
2358
// A[N] --- multiplier
2359
// B[N] --- multiplicant
2360
2361
void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2362
1.07M
{
2363
1.07M
  CRYPTOPP_ASSERT(N>=2 && N%2==0);
2364
2365
1.07M
  if (N <= s_recursionLimit)
2366
465k
    s_pTop[N/4](R, A, B, L[N-1]);
2367
613k
  else
2368
613k
  {
2369
613k
    const size_t N2 = N/2;
2370
2371
613k
    size_t AN2 = Compare(A0, A1, N2) > 0 ?  0 : N2;
2372
613k
    Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2373
2374
613k
    size_t BN2 = Compare(B0, B1, N2) > 0 ?  0 : N2;
2375
613k
    Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2376
2377
613k
    RecursiveMultiply(T0, T2, R0, R1, N2);
2378
613k
    RecursiveMultiply(R0, T2, A1, B1, N2);
2379
2380
    // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2381
2382
613k
    int t, c3;
2383
613k
    int c2 = Subtract(T2, L+N2, L, N2);
2384
2385
613k
    if (AN2 == BN2)
2386
305k
    {
2387
305k
      c2 -= Add(T2, T2, T0, N2);
2388
305k
      t = (Compare(T2, R0, N2) == -1);
2389
305k
      c3 = t - Subtract(T2, T2, T1, N2);
2390
305k
    }
2391
307k
    else
2392
307k
    {
2393
307k
      c2 += Subtract(T2, T2, T0, N2);
2394
307k
      t = (Compare(T2, R0, N2) == -1);
2395
307k
      c3 = t + Add(T2, T2, T1, N2);
2396
307k
    }
2397
2398
613k
    c2 += t;
2399
613k
    if (c2 >= 0)
2400
593k
      c3 += Increment(T2, N2, c2);
2401
19.2k
    else
2402
19.2k
      c3 -= Decrement(T2, N2, -c2);
2403
613k
    c3 += Add(R0, T2, R1, N2);
2404
2405
613k
    CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2406
613k
    Increment(R1, N2, c3);
2407
613k
  }
2408
1.07M
}
2409
2410
inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2411
403M
{
2412
403M
  RecursiveMultiply(R, T, A, B, N);
2413
403M
}
2414
2415
inline void Square(word *R, word *T, const word *A, size_t N)
2416
1.87M
{
2417
1.87M
  RecursiveSquare(R, T, A, N);
2418
1.87M
}
2419
2420
inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2421
1.11M
{
2422
1.11M
  RecursiveMultiplyBottom(R, T, A, B, N);
2423
1.11M
}
2424
2425
// R[NA+NB] - result = A*B
2426
// T[NA+NB] - temporary work space
2427
// A[NA] ---- multiplier
2428
// B[NB] ---- multiplicant
2429
2430
void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2431
33.5M
{
2432
33.5M
  if (NA == NB)
2433
2.96M
  {
2434
    // Profiling guided the flow below.
2435
2.96M
    if (A != B)
2436
1.96M
      Multiply(R, T, A, B, NA);
2437
1.00M
    else
2438
1.00M
      Square(R, T, A, NA);
2439
2440
2.96M
    return;
2441
2.96M
  }
2442
2443
30.5M
  if (NA > NB)
2444
11.9M
  {
2445
11.9M
    std::swap(A, B);
2446
11.9M
    std::swap(NA, NB);
2447
11.9M
  }
2448
2449
30.5M
  CRYPTOPP_ASSERT(NB % NA == 0);
2450
2451
30.5M
  if (NA==2 && !A[1])
2452
12.7M
  {
2453
    // Profiling guided the flow below.
2454
12.7M
    switch (A[0])
2455
12.7M
    {
2456
12.7M
    default:
2457
12.7M
      R[NB] = LinearMultiply(R, B, A[0], NB);
2458
12.7M
      R[NB+1] = 0;
2459
12.7M
      return;
2460
274
    case 0:
2461
274
      SetWords(R, 0, NB+2);
2462
274
      return;
2463
25.7k
    case 1:
2464
25.7k
      CopyWords(R, B, NB);
2465
25.7k
      R[NB] = R[NB+1] = 0;
2466
25.7k
      return;
2467
12.7M
    }
2468
12.7M
  }
2469
2470
17.7M
  size_t i;
2471
17.7M
  if ((NB/NA)%2 == 0)
2472
4.42M
  {
2473
4.42M
    Multiply(R, T, A, B, NA);
2474
4.42M
    CopyWords(T+2*NA, R+NA, NA);
2475
2476
45.5M
    for (i=2*NA; i<NB; i+=2*NA)
2477
41.1M
      Multiply(T+NA+i, T, A, B+i, NA);
2478
50.0M
    for (i=NA; i<NB; i+=2*NA)
2479
45.5M
      Multiply(R+i, T, A, B+i, NA);
2480
4.42M
  }
2481
13.3M
  else
2482
13.3M
  {
2483
175M
    for (i=0; i<NB; i+=2*NA)
2484
161M
      Multiply(R+i, T, A, B+i, NA);
2485
161M
    for (i=NA; i<NB; i+=2*NA)
2486
148M
      Multiply(T+NA+i, T, A, B+i, NA);
2487
13.3M
  }
2488
2489
17.7M
  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2490
5.31M
    Increment(R+NB, NA);
2491
17.7M
}
2492
2493
// R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2494
// T[3*N/2] - temporary work space
2495
// A[N] ----- an odd number as input
2496
2497
void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2498
62.8k
{
2499
  // Profiling guided the flow below.
2500
62.8k
  if (N!=2)
2501
33.9k
  {
2502
33.9k
    const size_t N2 = N/2;
2503
33.9k
    RecursiveInverseModPower2(R0, T0, A0, N2);
2504
33.9k
    T0[0] = 1;
2505
33.9k
    SetWords(T0+1, 0, N2-1);
2506
33.9k
    MultiplyTop(R1, T1, T0, R0, A0, N2);
2507
33.9k
    MultiplyBottom(T0, T1, R0, A1, N2);
2508
33.9k
    Add(T0, R1, T0, N2);
2509
33.9k
    TwosComplement(T0, N2);
2510
33.9k
    MultiplyBottom(R1, T1, R0, T0, N2);
2511
33.9k
  }
2512
28.8k
  else
2513
28.8k
  {
2514
28.8k
    T[0] = AtomicInverseModPower2(A[0]);
2515
28.8k
    T[1] = 0;
2516
28.8k
    s_pBot[0](T+2, T, A);
2517
28.8k
    TwosComplement(T+2, 2);
2518
28.8k
    Increment(T+2, 2, 2);
2519
28.8k
    s_pBot[0](R, T, T+2);
2520
28.8k
  }
2521
62.8k
}
2522
2523
// R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2524
// T[3*N] - temporary work space
2525
// X[2*N] - number to be reduced
2526
// M[N] --- modulus
2527
// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2528
2529
void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2530
1.04M
{
2531
1.04M
#if 1
2532
1.04M
  MultiplyBottom(R, T, X, U, N);
2533
1.04M
  MultiplyTop(T, T+N, X, R, M, N);
2534
1.04M
  word borrow = Subtract(T, X+N, T, N);
2535
  // defend against timing attack by doing this Add even when not needed
2536
1.04M
  word carry = Add(T+N, T, M, N);
2537
1.04M
  CRYPTOPP_ASSERT(carry | !borrow);
2538
1.04M
  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2539
1.04M
  CopyWords(R, T + ((0-borrow) & N), N);
2540
#elif 0
2541
  const word u = 0-U[0];
2542
  Declare2Words(p)
2543
  for (size_t i=0; i<N; i++)
2544
  {
2545
    const word t = u * X[i];
2546
    word c = 0;
2547
    for (size_t j=0; j<N; j+=2)
2548
    {
2549
      MultiplyWords(p, t, M[j]);
2550
      Acc2WordsBy1(p, X[i+j]);
2551
      Acc2WordsBy1(p, c);
2552
      X[i+j] = LowWord(p);
2553
      c = HighWord(p);
2554
      MultiplyWords(p, t, M[j+1]);
2555
      Acc2WordsBy1(p, X[i+j+1]);
2556
      Acc2WordsBy1(p, c);
2557
      X[i+j+1] = LowWord(p);
2558
      c = HighWord(p);
2559
    }
2560
2561
    if (Increment(X+N+i, N-i, c))
2562
      while (!Subtract(X+N, X+N, M, N)) {}
2563
  }
2564
2565
  std::memcpy(R, X+N, N*WORD_SIZE);
2566
#else
2567
  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2568
  for (size_t i=0; i<N; i++)
2569
  {
2570
    __m64 t = _mm_cvtsi32_si64(X[i]);
2571
    t = _mm_mul_su32(t, u);
2572
    __m64 c = _mm_setzero_si64();
2573
    for (size_t j=0; j<N; j+=2)
2574
    {
2575
      p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2576
      p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2577
      c = _mm_add_si64(c, p);
2578
      X[i+j] = _mm_cvtsi64_si32(c);
2579
      c = _mm_srli_si64(c, 32);
2580
      p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2581
      p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2582
      c = _mm_add_si64(c, p);
2583
      X[i+j+1] = _mm_cvtsi64_si32(c);
2584
      c = _mm_srli_si64(c, 32);
2585
    }
2586
2587
    if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2588
      while (!Subtract(X+N, X+N, M, N)) {}
2589
  }
2590
2591
  std::memcpy(R, X+N, N*WORD_SIZE);
2592
  _mm_empty();
2593
#endif
2594
1.04M
}
2595
2596
// R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2597
// T[2*N] - temporary work space
2598
// X[2*N] - number to be reduced
2599
// M[N] --- modulus
2600
// U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2601
// V[N] --- 2**(WORD_BITS*3*N/2) mod M
2602
2603
void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2604
0
{
2605
0
  CRYPTOPP_ASSERT(N%2==0 && N>=4);
2606
2607
0
#define M0    M
2608
0
#define M1    (M+N2)
2609
0
#define V0    V
2610
0
#define V1    (V+N2)
2611
2612
0
#define X0    X
2613
0
#define X1    (X+N2)
2614
0
#define X2    (X+N)
2615
0
#define X3    (X+N+N2)
2616
2617
0
  const size_t N2 = N/2;
2618
0
  Multiply(T0, T2, V0, X3, N2);
2619
0
  int c2 = Add(T0, T0, X0, N);
2620
0
  MultiplyBottom(T3, T2, T0, U, N2);
2621
0
  MultiplyTop(T2, R, T0, T3, M0, N2);
2622
0
  c2 -= Subtract(T2, T1, T2, N2);
2623
0
  Multiply(T0, R, T3, M1, N2);
2624
0
  c2 -= Subtract(T0, T2, T0, N2);
2625
0
  int c3 = -(int)Subtract(T1, X2, T1, N2);
2626
0
  Multiply(R0, T2, V1, X3, N2);
2627
0
  c3 += Add(R, R, T, N);
2628
2629
0
  if (c2>0)
2630
0
    c3 += Increment(R1, N2);
2631
0
  else if (c2<0)
2632
0
    c3 -= Decrement(R1, N2, -c2);
2633
2634
0
  CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2635
0
  if (c3>0)
2636
0
    Subtract(R, R, M, N);
2637
0
  else if (c3<0)
2638
0
    Add(R, R, M, N);
2639
2640
0
#undef M0
2641
0
#undef M1
2642
0
#undef V0
2643
0
#undef V1
2644
2645
0
#undef X0
2646
0
#undef X1
2647
0
#undef X2
2648
0
#undef X3
2649
0
}
2650
2651
#undef A0
2652
#undef A1
2653
#undef B0
2654
#undef B1
2655
2656
#undef T0
2657
#undef T1
2658
#undef T2
2659
#undef T3
2660
2661
#undef R0
2662
#undef R1
2663
#undef R2
2664
#undef R3
2665
2666
/*
2667
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2668
static word SubatomicDivide(word *A, word B0, word B1)
2669
{
2670
  // Assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2671
  CRYPTOPP_ASSERT(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2672
2673
  // estimate the quotient: do a 2 word by 1 word divide
2674
  word Q;
2675
  if (B1+1 == 0)
2676
    Q = A[2];
2677
  else
2678
    Q = DWord(A[1], A[2]).DividedBy(B1+1);
2679
2680
  // now subtract Q*B from A
2681
  DWord p = DWord::Multiply(B0, Q);
2682
  DWord u = (DWord) A[0] - p.GetLowHalf();
2683
  A[0] = u.GetLowHalf();
2684
  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2685
  A[1] = u.GetLowHalf();
2686
  A[2] += u.GetHighHalf();
2687
2688
  // Q <= actual quotient, so fix it
2689
  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2690
  {
2691
    u = (DWord) A[0] - B0;
2692
    A[0] = u.GetLowHalf();
2693
    u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2694
    A[1] = u.GetLowHalf();
2695
    A[2] += u.GetHighHalf();
2696
    Q++;
2697
    CRYPTOPP_ASSERT(Q); // shouldn't overflow
2698
  }
2699
2700
  return Q;
2701
}
2702
2703
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2704
static inline void AtomicDivide(word *Q, const word *A, const word *B)
2705
{
2706
  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2707
  {
2708
    Q[0] = A[2];
2709
    Q[1] = A[3];
2710
  }
2711
  else
2712
  {
2713
    word T[4];
2714
    T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2715
    Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2716
    Q[0] = SubatomicDivide(T, B[0], B[1]);
2717
2718
#if defined(CRYPTOPP_DEBUG)
2719
    // multiply quotient and divisor and add remainder, make sure it equals dividend
2720
    CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2721
    word P[4];
2722
    LowLevel::Multiply2(P, Q, B);
2723
    Add(P, P, T, 4);
2724
    CRYPTOPP_ASSERT(std::memcmp(P, A, 4*WORD_SIZE)==0);
2725
#endif
2726
  }
2727
}
2728
*/
2729
2730
static inline void AtomicDivide(word *Q, const word *A, const word *B)
2731
18.8M
{
2732
18.8M
  word T[4];
2733
18.8M
  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2734
18.8M
  Q[0] = q.GetLowHalf();
2735
18.8M
  Q[1] = q.GetHighHalf();
2736
2737
#if defined(CRYPTOPP_DEBUG)
2738
  if (B[0] || B[1])
2739
  {
2740
    // multiply quotient and divisor and add remainder, make sure it equals dividend
2741
    CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2742
    word P[4];
2743
    s_pMul[0](P, Q, B);
2744
    Add(P, P, T, 4);
2745
    CRYPTOPP_ASSERT(std::memcmp(P, A, 4*WORD_SIZE)==0);
2746
  }
2747
#endif
2748
18.8M
}
2749
2750
// for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2751
static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2752
18.8M
{
2753
18.8M
  CRYPTOPP_ASSERT(N && N%2==0);
2754
2755
18.8M
  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2756
2757
18.8M
  word borrow = Subtract(R, R, T, N+2);
2758
18.8M
  CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2759
18.8M
  CRYPTOPP_UNUSED(borrow);
2760
2761
23.7M
  while (R[N] || Compare(R, B, N) >= 0)
2762
4.91M
  {
2763
4.91M
    R[N] -= Subtract(R, R, B, N);
2764
4.91M
    Q[1] += (++Q[0]==0);
2765
4.91M
    CRYPTOPP_ASSERT(Q[0] || Q[1]); // no overflow
2766
4.91M
  }
2767
18.8M
}
2768
2769
// R[NB] -------- remainder = A%B
2770
// Q[NA-NB+2] --- quotient  = A/B
2771
// T[NA+3*(NB+2)] - temp work space
2772
// A[NA] -------- dividend
2773
// B[NB] -------- divisor
2774
2775
void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2776
2.24M
{
2777
2.24M
  CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2778
2.24M
  CRYPTOPP_ASSERT(B[NB-1] || B[NB-2]);
2779
2.24M
  CRYPTOPP_ASSERT(NB <= NA);
2780
2781
  // set up temporary work space
2782
2.24M
  word *const TA=T;
2783
2.24M
  word *const TB=T+NA+2;
2784
2.24M
  word *const TP=T+NA+2+NB;
2785
2786
  // copy B into TB and normalize it so that TB has highest bit set to 1
2787
2.24M
  unsigned shiftWords = (B[NB-1]==0);
2788
2.24M
  TB[0] = TB[NB-1] = 0;
2789
2.24M
  CopyWords(TB+shiftWords, B, NB-shiftWords);
2790
2.24M
  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2791
2.24M
  CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2792
2.24M
  ShiftWordsLeftByBits(TB, NB, shiftBits);
2793
2794
  // copy A into TA and normalize it
2795
2.24M
  TA[0] = TA[NA] = TA[NA+1] = 0;
2796
2.24M
  CopyWords(TA+shiftWords, A, NA);
2797
2.24M
  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2798
2799
2.24M
  if (TA[NA+1]==0 && TA[NA] <= 1)
2800
1.01M
  {
2801
1.01M
    Q[NA-NB+1] = Q[NA-NB] = 0;
2802
1.75M
    while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2803
736k
    {
2804
736k
      TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2805
736k
      ++Q[NA-NB];
2806
736k
    }
2807
1.01M
  }
2808
1.23M
  else
2809
1.23M
  {
2810
1.23M
    NA+=2;
2811
1.23M
    CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2812
1.23M
  }
2813
2814
2.24M
  word BT[2];
2815
2.24M
  BT[0] = TB[NB-2] + 1;
2816
2.24M
  BT[1] = TB[NB-1] + (BT[0]==0);
2817
2818
  // start reducing TA mod TB, 2 words at a time
2819
21.0M
  for (size_t i=NA-2; i>=NB; i-=2)
2820
18.8M
  {
2821
18.8M
    AtomicDivide(Q+i-NB, TA+i-2, BT);
2822
18.8M
    CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2823
18.8M
  }
2824
2825
  // copy TA into R, and denormalize it
2826
2.24M
  CopyWords(R, TA+shiftWords, NB);
2827
2.24M
  ShiftWordsRightByBits(R, NB, shiftBits);
2828
2.24M
}
2829
2830
static inline size_t EvenWordCount(const word *X, size_t N)
2831
3.18k
{
2832
11.4k
  while (N && X[N-2]==0 && X[N-1]==0)
2833
8.29k
    N-=2;
2834
3.18k
  return N;
2835
3.18k
}
2836
2837
// return k
2838
// R[N] --- result = A^(-1) * 2^k mod M
2839
// T[4*N] - temporary work space
2840
// A[NA] -- number to take inverse of
2841
// M[N] --- modulus
2842
2843
unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2844
1.25k
{
2845
1.25k
  CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2846
2847
1.25k
  word *b = T;
2848
1.25k
  word *c = T+N;
2849
1.25k
  word *f = T+2*N;
2850
1.25k
  word *g = T+3*N;
2851
1.25k
  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2852
1.25k
  unsigned int k=0;
2853
1.25k
  bool s=false;
2854
2855
1.25k
  SetWords(T, 0, 3*N);
2856
1.25k
  b[0]=1;
2857
1.25k
  CopyWords(f, A, NA);
2858
1.25k
  CopyWords(g, M, N);
2859
2860
1.04M
  while (1)
2861
1.04M
  {
2862
1.04M
    word t=f[0];
2863
1.04M
    while (!t)
2864
499
    {
2865
499
      if (EvenWordCount(f, fgLen)==0)
2866
273
      {
2867
273
        SetWords(R, 0, N);
2868
273
        return 0;
2869
273
      }
2870
2871
226
      ShiftWordsRightByWords(f, fgLen, 1);
2872
226
      bcLen += 2 * (c[bcLen-1] != 0);
2873
226
      CRYPTOPP_ASSERT(bcLen <= N);
2874
226
      ShiftWordsLeftByWords(c, bcLen, 1);
2875
226
      k+=WORD_BITS;
2876
226
      t=f[0];
2877
226
    }
2878
2879
    // t must be non-0; otherwise undefined.
2880
1.04M
    unsigned int i = TrailingZeros(t);
2881
1.04M
    t >>= i;
2882
1.04M
    k += i;
2883
2884
1.04M
    if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2885
985
    {
2886
985
      if (s)
2887
497
        Subtract(R, M, b, N);
2888
488
      else
2889
488
        CopyWords(R, b, N);
2890
985
      return k;
2891
985
    }
2892
2893
1.04M
    ShiftWordsRightByBits(f, fgLen, i);
2894
1.04M
    t = ShiftWordsLeftByBits(c, bcLen, i);
2895
1.04M
    c[bcLen] += t;
2896
1.04M
    bcLen += 2 * (t!=0);
2897
1.04M
    CRYPTOPP_ASSERT(bcLen <= N);
2898
2899
1.04M
    bool swap = Compare(f, g, fgLen)==-1;
2900
1.04M
    ConditionalSwapPointers(swap, f, g);
2901
1.04M
    ConditionalSwapPointers(swap, b, c);
2902
1.04M
    s ^= swap;
2903
2904
1.04M
    fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2905
2906
1.04M
    Subtract(f, f, g, fgLen);
2907
1.04M
    t = Add(b, b, c, bcLen);
2908
1.04M
    b[bcLen] += t;
2909
1.04M
    bcLen += 2*t;
2910
1.04M
    CRYPTOPP_ASSERT(bcLen <= N);
2911
1.04M
  }
2912
1.25k
}
2913
2914
// R[N] - result = A/(2^k) mod M
2915
// A[N] - input
2916
// M[N] - modulus
2917
2918
void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2919
1.25k
{
2920
1.25k
  CopyWords(R, A, N);
2921
2922
1.83M
  while (k--)
2923
1.83M
  {
2924
1.83M
    if (R[0]%2==0)
2925
918k
      ShiftWordsRightByBits(R, N, 1);
2926
914k
    else
2927
914k
    {
2928
914k
      word carry = Add(R, R, M, N);
2929
914k
      ShiftWordsRightByBits(R, N, 1);
2930
914k
      R[N-1] += carry<<(WORD_BITS-1);
2931
914k
    }
2932
1.83M
  }
2933
1.25k
}
2934
2935
// R[N] - result = A*(2^k) mod M
2936
// A[N] - input
2937
// M[N] - modulus
2938
2939
void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2940
0
{
2941
0
  CopyWords(R, A, N);
2942
2943
0
  while (k--)
2944
0
    if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2945
0
      Subtract(R, R, M, N);
2946
0
}
2947
2948
// ******************************************************************
2949
2950
static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2951
2952
static inline size_t RoundupSize(size_t n)
2953
76.5M
{
2954
76.5M
  if (n<=8)
2955
35.6M
    return RoundupSizeTable[n];
2956
40.9M
  else if (n<=16)
2957
5.64M
    return 16;
2958
35.3M
  else if (n<=32)
2959
8.44M
    return 32;
2960
26.8M
  else if (n<=64)
2961
12.2M
    return 64;
2962
14.6M
  else
2963
14.6M
    return size_t(1) << BitPrecision(n-1);
2964
76.5M
}
2965
2966
Integer::Integer()
2967
  : reg(2), sign(POSITIVE)
2968
19.7M
{
2969
19.7M
  reg[0] = reg[1] = 0;
2970
19.7M
}
2971
2972
Integer::Integer(const Integer& t)
2973
  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2974
886k
{
2975
886k
  CopyWords(reg, t.reg, reg.size());
2976
886k
}
2977
2978
Integer::Integer(Sign s, lword value)
2979
  : reg(2), sign(s)
2980
0
{
2981
0
  reg[0] = word(value);
2982
0
  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2983
0
}
2984
2985
Integer::Integer(signed long value)
2986
  : reg(2)
2987
26.6M
{
2988
26.6M
  if (value >= 0)
2989
26.6M
    sign = POSITIVE;
2990
271
  else
2991
271
  {
2992
271
    sign = NEGATIVE;
2993
271
    value = -value;
2994
271
  }
2995
26.6M
  reg[0] = word(value);
2996
26.6M
  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2997
26.6M
}
2998
2999
Integer::Integer(Sign s, word high, word low)
3000
  : reg(2), sign(s)
3001
0
{
3002
0
  reg[0] = low;
3003
0
  reg[1] = high;
3004
0
}
3005
3006
bool Integer::IsConvertableToLong() const
3007
239
{
3008
239
  if (ByteCount() > sizeof(long))
3009
76
    return false;
3010
3011
163
  unsigned long value = (unsigned long)reg[0];
3012
163
  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3013
3014
163
  if (sign==POSITIVE)
3015
163
    return (signed long)value >= 0;
3016
0
  else
3017
0
    return -(signed long)value < 0;
3018
163
}
3019
3020
signed long Integer::ConvertToLong() const
3021
156
{
3022
156
  CRYPTOPP_ASSERT(IsConvertableToLong());
3023
3024
156
  unsigned long value = (unsigned long)reg[0];
3025
156
  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
3026
156
  return sign==POSITIVE ? value : -(signed long)value;
3027
156
}
3028
3029
Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3030
81.5k
{
3031
81.5k
  CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
3032
3033
81.5k
  if (o != LITTLE_ENDIAN_ORDER)
3034
81.5k
  {
3035
81.5k
    Decode(encodedInteger, byteCount, s);
3036
81.5k
  }
3037
0
  else
3038
0
  {
3039
0
    SecByteBlock block(byteCount);
3040
0
    encodedInteger.Get(block, block.size());
3041
0
    std::reverse(block.begin(), block.begin()+block.size());
3042
3043
0
    Decode(block.begin(), block.size(), s);
3044
0
  }
3045
81.5k
}
3046
3047
Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
3048
4.82M
{
3049
4.82M
  CRYPTOPP_ASSERT(encodedInteger && byteCount); // NULL buffer
3050
4.82M
  CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
3051
3052
4.82M
  if (o != LITTLE_ENDIAN_ORDER)
3053
4.82M
  {
3054
4.82M
    Decode(encodedInteger, byteCount, s);
3055
4.82M
  }
3056
0
  else
3057
0
  {
3058
0
    SecByteBlock block(byteCount);
3059
#if (CRYPTOPP_MSC_VERSION >= 1500)
3060
    std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3061
      stdext::make_checked_array_iterator(block.begin(), block.size()));
3062
#else
3063
0
    std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
3064
0
#endif
3065
0
    Decode(block.begin(), block.size(), s);
3066
0
    return;
3067
0
  }
3068
4.82M
}
3069
3070
Integer::Integer(BufferedTransformation &bt)
3071
0
{
3072
  // Make explicit call to avoid virtual-dispatch findings in ctor
3073
0
  Integer::BERDecode(bt);
3074
0
}
3075
3076
Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
3077
0
{
3078
0
  Randomize(rng, bitcount);
3079
0
}
3080
3081
Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3082
0
{
3083
0
  if (!Randomize(rng, min, max, rnType, equiv, mod))
3084
0
    throw Integer::RandomNumberNotFound();
3085
0
}
3086
3087
Integer Integer::Power2(size_t e)
3088
1.90k
{
3089
1.90k
  Integer r((word)0, BitsToWords(e+1));
3090
1.90k
  r.SetBit(e);
3091
1.90k
  return r;
3092
1.90k
}
3093
3094
bool Integer::operator!() const
3095
3.73M
{
3096
3.73M
  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3097
3.73M
}
3098
3099
Integer& Integer::operator=(const Integer& t)
3100
18.5M
{
3101
18.5M
  if (this != &t)
3102
18.5M
  {
3103
18.5M
    if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3104
14.3M
      reg.New(RoundupSize(t.WordCount()));
3105
18.5M
    CopyWords(reg, t.reg, reg.size());
3106
18.5M
    sign = t.sign;
3107
18.5M
  }
3108
18.5M
  return *this;
3109
18.5M
}
3110
3111
bool Integer::GetBit(size_t n) const
3112
1.89M
{
3113
  // Profiling guided the flow below.
3114
1.89M
  if (n/WORD_BITS < reg.size())
3115
1.89M
    return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3116
29
  else
3117
29
    return 0;
3118
1.89M
}
3119
3120
void Integer::SetBit(size_t n, bool value)
3121
1.99k
{
3122
1.99k
  if (value)
3123
1.98k
  {
3124
1.98k
    reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3125
1.98k
    reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3126
1.98k
  }
3127
12
  else
3128
12
  {
3129
12
    if (n/WORD_BITS < reg.size())
3130
10
      reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3131
12
  }
3132
1.99k
}
3133
3134
byte Integer::GetByte(size_t n) const
3135
282M
{
3136
  // Profiling guided the flow below.
3137
282M
  if (n/WORD_SIZE < reg.size())
3138
282M
    return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3139
96
  else
3140
96
    return 0;
3141
282M
}
3142
3143
void Integer::SetByte(size_t n, byte value)
3144
0
{
3145
0
  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3146
0
  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3147
0
  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3148
0
}
3149
3150
lword Integer::GetBits(size_t i, size_t n) const
3151
0
{
3152
0
  lword v = 0;
3153
0
  CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3154
0
  for (unsigned int j=0; j<n; j++)
3155
0
    v |= lword(GetBit(i+j)) << j;
3156
0
  return v;
3157
0
}
3158
3159
Integer Integer::operator-() const
3160
115
{
3161
115
  Integer result(*this);
3162
115
  result.Negate();
3163
115
  return result;
3164
115
}
3165
3166
Integer Integer::AbsoluteValue() const
3167
136
{
3168
136
  Integer result(*this);
3169
136
  result.sign = POSITIVE;
3170
136
  return result;
3171
136
}
3172
3173
void Integer::swap(Integer &a)
3174
3.71M
{
3175
3.71M
  reg.swap(a.reg);
3176
3.71M
  std::swap(sign, a.sign);
3177
3.71M
}
3178
3179
Integer::Integer(word value, size_t length)
3180
  : reg(RoundupSize(length)), sign(POSITIVE)
3181
5.00M
{
3182
5.00M
  reg[0] = value;
3183
5.00M
  SetWords(reg+1, 0, reg.size()-1);
3184
5.00M
}
3185
3186
template <class T>
3187
static Integer StringToInteger(const T *str, ByteOrder order)
3188
70.7k
{
3189
70.7k
  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3190
3191
70.7k
  int radix, sign = 1;
3192
  // GCC workaround
3193
  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3194
70.7k
  unsigned int length;
3195
13.3M
  for (length = 0; str[length] != 0; length++) {}
3196
3197
70.7k
  Integer v;
3198
3199
70.7k
  if (length == 0)
3200
0
    return Integer::Zero();
3201
3202
  // 'str' is of length 1 or more
3203
70.7k
  switch (str[length-1])
3204
70.7k
  {
3205
0
  case 'h':
3206
0
  case 'H':
3207
0
    radix=16;
3208
0
    break;
3209
0
  case 'o':
3210
0
  case 'O':
3211
0
    radix=8;
3212
0
    break;
3213
0
  case 'b':
3214
0
  case 'B':
3215
0
    radix=2;
3216
0
    break;
3217
70.7k
  default:
3218
70.7k
    radix=10;
3219
70.7k
  }
3220
3221
  // 'str' is of length 1 or more
3222
70.7k
  if (str[0] == '-')
3223
0
  {
3224
0
    sign = -1;
3225
0
    str += 1, length -= 1;
3226
0
  }
3227
3228
  // Recognize common prefixes for hexadecimal, octal and decimal.
3229
  // Microsoft's MASM also recognizes 0t for octal, but not here.
3230
70.7k
  if (length > 2 && str[0] == '0')
3231
11.6k
  {
3232
11.6k
    if (str[1] == 'x' || str[1] == 'X')
3233
0
    {
3234
0
      radix = 16;
3235
0
      str += 2, length -= 2;
3236
0
    }
3237
11.6k
    else if (str[1] == 'n' || str[1] == 'N')
3238
0
    {
3239
0
      radix = 10;
3240
0
      str += 2, length -= 2;
3241
0
    }
3242
11.6k
    else if (str[1] == 'o' || str[1] == 'O')
3243
0
    {
3244
0
      radix = 8;
3245
0
      str += 2, length -= 2;
3246
0
    }
3247
11.6k
  }
3248
3249
70.7k
  if (order == BIG_ENDIAN_ORDER)
3250
70.7k
  {
3251
13.3M
    for (unsigned int i=0; i<length; i++)
3252
13.2M
    {
3253
13.2M
      int digit, ch = static_cast<int>(str[i]);
3254
3255
      // Profiling guided the flow below.
3256
13.2M
      if (ch >= '0' && ch <= '9')
3257
13.2M
        digit = ch - '0';
3258
0
      else if (ch >= 'a' && ch <= 'f')
3259
0
        digit = ch - 'a' + 10;
3260
0
      else if (ch >= 'A' && ch <= 'F')
3261
0
        digit = ch - 'A' + 10;
3262
0
      else
3263
0
        digit = radix;
3264
3265
13.2M
      if (digit < radix)
3266
13.2M
      {
3267
13.2M
        v *= radix;
3268
13.2M
        v += digit;
3269
13.2M
      }
3270
13.2M
    }
3271
70.7k
  }
3272
0
  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3273
0
  {
3274
    // Nibble high, low and count
3275
0
    unsigned int nh = 0, nl = 0, nc = 0;
3276
0
    Integer position(Integer::One());
3277
3278
0
    for (unsigned int i=0; i<length; i++)
3279
0
    {
3280
0
      int digit, ch = static_cast<int>(str[i]);
3281
3282
0
      if (ch >= '0' && ch <= '9')
3283
0
        digit = ch - '0';
3284
0
      else if (ch >= 'a' && ch <= 'f')
3285
0
        digit = ch - 'a' + 10;
3286
0
      else if (ch >= 'A' && ch <= 'F')
3287
0
        digit = ch - 'A' + 10;
3288
0
      else
3289
0
        digit = radix;
3290
3291
0
      if (digit < radix)
3292
0
      {
3293
0
        if (nc++ == 0)
3294
0
          nh = digit;
3295
0
        else
3296
0
          nl = digit;
3297
3298
0
        if (nc == 2)
3299
0
        {
3300
0
          v += position * (nh << 4 | nl);
3301
0
          nc = 0, position <<= 8;
3302
0
        }
3303
0
      }
3304
0
    }
3305
3306
0
    if (nc == 1)
3307
0
      v += nh * position;
3308
0
  }
3309
0
  else // LITTLE_ENDIAN_ORDER && radix != 16
3310
0
  {
3311
0
    for (int i=static_cast<int>(length)-1; i>=0; i--)
3312
0
    {
3313
0
      int digit, ch = static_cast<int>(str[i]);
3314
3315
0
      if (ch >= '0' && ch <= '9')
3316
0
        digit = ch - '0';
3317
0
      else if (ch >= 'a' && ch <= 'f')
3318
0
        digit = ch - 'a' + 10;
3319
0
      else if (ch >= 'A' && ch <= 'F')
3320
0
        digit = ch - 'A' + 10;
3321
0
      else
3322
0
        digit = radix;
3323
3324
0
      if (digit < radix)
3325
0
      {
3326
0
        v *= radix;
3327
0
        v += digit;
3328
0
      }
3329
0
    }
3330
0
  }
3331
3332
70.7k
  if (sign == -1)
3333
0
    v.Negate();
3334
3335
70.7k
  return v;
3336
70.7k
}
integer.cpp:CryptoPP::Integer CryptoPP::StringToInteger<char>(char const*, CryptoPP::ByteOrder)
Line
Count
Source
3188
70.7k
{
3189
70.7k
  CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3190
3191
70.7k
  int radix, sign = 1;
3192
  // GCC workaround
3193
  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3194
70.7k
  unsigned int length;
3195
13.3M
  for (length = 0; str[length] != 0; length++) {}
3196
3197
70.7k
  Integer v;
3198
3199
70.7k
  if (length == 0)
3200
0
    return Integer::Zero();
3201
3202
  // 'str' is of length 1 or more
3203
70.7k
  switch (str[length-1])
3204
70.7k
  {
3205
0
  case 'h':
3206
0
  case 'H':
3207
0
    radix=16;
3208
0
    break;
3209
0
  case 'o':
3210
0
  case 'O':
3211
0
    radix=8;
3212
0
    break;
3213
0
  case 'b':
3214
0
  case 'B':
3215
0
    radix=2;
3216
0
    break;
3217
70.7k
  default:
3218
70.7k
    radix=10;
3219
70.7k
  }
3220
3221
  // 'str' is of length 1 or more
3222
70.7k
  if (str[0] == '-')
3223
0
  {
3224
0
    sign = -1;
3225
0
    str += 1, length -= 1;
3226
0
  }
3227
3228
  // Recognize common prefixes for hexadecimal, octal and decimal.
3229
  // Microsoft's MASM also recognizes 0t for octal, but not here.
3230
70.7k
  if (length > 2 && str[0] == '0')
3231
11.6k
  {
3232
11.6k
    if (str[1] == 'x' || str[1] == 'X')
3233
0
    {
3234
0
      radix = 16;
3235
0
      str += 2, length -= 2;
3236
0
    }
3237
11.6k
    else if (str[1] == 'n' || str[1] == 'N')
3238
0
    {
3239
0
      radix = 10;
3240
0
      str += 2, length -= 2;
3241
0
    }
3242
11.6k
    else if (str[1] == 'o' || str[1] == 'O')
3243
0
    {
3244
0
      radix = 8;
3245
0
      str += 2, length -= 2;
3246
0
    }
3247
11.6k
  }
3248
3249
70.7k
  if (order == BIG_ENDIAN_ORDER)
3250
70.7k
  {
3251
13.3M
    for (unsigned int i=0; i<length; i++)
3252
13.2M
    {
3253
13.2M
      int digit, ch = static_cast<int>(str[i]);
3254
3255
      // Profiling guided the flow below.
3256
13.2M
      if (ch >= '0' && ch <= '9')
3257
13.2M
        digit = ch - '0';
3258
0
      else if (ch >= 'a' && ch <= 'f')
3259
0
        digit = ch - 'a' + 10;
3260
0
      else if (ch >= 'A' && ch <= 'F')
3261
0
        digit = ch - 'A' + 10;
3262
0
      else
3263
0
        digit = radix;
3264
3265
13.2M
      if (digit < radix)
3266
13.2M
      {
3267
13.2M
        v *= radix;
3268
13.2M
        v += digit;
3269
13.2M
      }
3270
13.2M
    }
3271
70.7k
  }
3272
0
  else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3273
0
  {
3274
    // Nibble high, low and count
3275
0
    unsigned int nh = 0, nl = 0, nc = 0;
3276
0
    Integer position(Integer::One());
3277
3278
0
    for (unsigned int i=0; i<length; i++)
3279
0
    {
3280
0
      int digit, ch = static_cast<int>(str[i]);
3281
3282
0
      if (ch >= '0' && ch <= '9')
3283
0
        digit = ch - '0';
3284
0
      else if (ch >= 'a' && ch <= 'f')
3285
0
        digit = ch - 'a' + 10;
3286
0
      else if (ch >= 'A' && ch <= 'F')
3287
0
        digit = ch - 'A' + 10;
3288
0
      else
3289
0
        digit = radix;
3290
3291
0
      if (digit < radix)
3292
0
      {
3293
0
        if (nc++ == 0)
3294
0
          nh = digit;
3295
0
        else
3296
0
          nl = digit;
3297
3298
0
        if (nc == 2)
3299
0
        {
3300
0
          v += position * (nh << 4 | nl);
3301
0
          nc = 0, position <<= 8;
3302
0
        }
3303
0
      }
3304
0
    }
3305
3306
0
    if (nc == 1)
3307
0
      v += nh * position;
3308
0
  }
3309
0
  else // LITTLE_ENDIAN_ORDER && radix != 16
3310
0
  {
3311
0
    for (int i=static_cast<int>(length)-1; i>=0; i--)
3312
0
    {
3313
0
      int digit, ch = static_cast<int>(str[i]);
3314
3315
0
      if (ch >= '0' && ch <= '9')
3316
0
        digit = ch - '0';
3317
0
      else if (ch >= 'a' && ch <= 'f')
3318
0
        digit = ch - 'a' + 10;
3319
0
      else if (ch >= 'A' && ch <= 'F')
3320
0
        digit = ch - 'A' + 10;
3321
0
      else
3322
0
        digit = radix;
3323
3324
0
      if (digit < radix)
3325
0
      {
3326
0
        v *= radix;
3327
0
        v += digit;
3328
0
      }
3329
0
    }
3330
0
  }
3331
3332
70.7k
  if (sign == -1)
3333
0
    v.Negate();
3334
3335
70.7k
  return v;
3336
70.7k
}
Unexecuted instantiation: integer.cpp:CryptoPP::Integer CryptoPP::StringToInteger<wchar_t>(wchar_t const*, CryptoPP::ByteOrder)
3337
3338
Integer::Integer(const char *str, ByteOrder order)
3339
  : reg(2), sign(POSITIVE)
3340
70.7k
{
3341
70.7k
  *this = StringToInteger(str,order);
3342
70.7k
}
3343
3344
Integer::Integer(const wchar_t *str, ByteOrder order)
3345
  : reg(2), sign(POSITIVE)
3346
0
{
3347
0
  *this = StringToInteger(str,order);
3348
0
}
3349
3350
unsigned int Integer::WordCount() const
3351
54.5M
{
3352
54.5M
  return (unsigned int)CountWords(reg, reg.size());
3353
54.5M
}
3354
3355
unsigned int Integer::ByteCount() const
3356
54.6k
{
3357
54.6k
  unsigned wordCount = WordCount();
3358
54.6k
  if (wordCount)
3359
54.5k
    return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3360
51
  else
3361
51
    return 0;
3362
54.6k
}
3363
3364
unsigned int Integer::BitCount() const
3365
11.6k
{
3366
11.6k
  unsigned wordCount = WordCount();
3367
11.6k
  if (wordCount)
3368
11.6k
    return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3369
4
  else
3370
4
    return 0;
3371
11.6k
}
3372
3373
void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3374
4.82M
{
3375
4.82M
  CRYPTOPP_ASSERT(input && inputLen); // NULL buffer
3376
4.82M
  StringStore store(input, inputLen);
3377
4.82M
  Decode(store, inputLen, s);
3378
4.82M
}
3379
3380
void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
3381
4.98M
{
3382
4.98M
  CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3383
4.98M
  if (bt.MaxRetrievable() < inputLen)
3384
0
    throw InvalidArgument("Integer: input length is too small");
3385
3386
4.98M
  byte b;
3387
4.98M
  bt.Peek(b);
3388
4.98M
  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3389
3390
5.26M
  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3391
282k
  {
3392
282k
    bt.Skip(1);
3393
282k
    inputLen--;
3394
282k
    bt.Peek(b);
3395
282k
  }
3396
3397
4.98M
  reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
3398
294M
  for (size_t i=inputLen; i > 0; i--)
3399
289M
  {
3400
289M
    (void)bt.Get(b);
3401
289M
    reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3402
289M
  }
3403
3404
4.98M
  if (sign == NEGATIVE)
3405
0
  {
3406
0
    for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3407
0
      reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3408
0
    TwosComplement(reg, reg.size());
3409
0
  }
3410
4.98M
}
3411
3412
size_t Integer::MinEncodedSize(Signedness signedness) const
3413
0
{
3414
  // Profiling guided the flow below.
3415
0
  unsigned int outputLen = STDMAX(1U, ByteCount());
3416
0
  const bool pre = (signedness == UNSIGNED);
3417
0
  if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3418
0
    outputLen++;
3419
0
  if (pre)
3420
0
    return outputLen;
3421
0
  if (IsNegative() && *this < -Power2(outputLen*8-1))
3422
0
    outputLen++;
3423
0
  return outputLen;
3424
0
}
3425
3426
// PKCS12_PBKDF and other classes use undersized buffers
3427
void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3428
4.79M
{
3429
4.79M
  CRYPTOPP_ASSERT(output && outputLen);            // NULL buffer
3430
4.79M
  ArraySink sink(output, outputLen);
3431
4.79M
  Encode(sink, outputLen, signedness);
3432
4.79M
}
3433
3434
void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3435
4.79M
{
3436
4.79M
  if (signedness == UNSIGNED || NotNegative())
3437
4.79M
  {
3438
287M
    for (size_t i=outputLen; i > 0; i--)
3439
282M
      bt.Put(GetByte(i-1));
3440
4.79M
  }
3441
0
  else
3442
0
  {
3443
    // take two's complement of *this
3444
0
    Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3445
0
    temp.Encode(bt, outputLen, UNSIGNED);
3446
0
  }
3447
4.79M
}
3448
3449
void Integer::DEREncode(BufferedTransformation &bt) const
3450
0
{
3451
0
  DERGeneralEncoder enc(bt, INTEGER);
3452
0
  Encode(enc, MinEncodedSize(SIGNED), SIGNED);
3453
0
  enc.MessageEnd();
3454
0
}
3455
3456
void Integer::BERDecode(const byte *input, size_t len)
3457
0
{
3458
0
  CRYPTOPP_ASSERT(input && len); // NULL buffer
3459
0
  StringStore store(input, len);
3460
0
  BERDecode(store);
3461
0
}
3462
3463
void Integer::BERDecode(BufferedTransformation &bt)
3464
0
{
3465
0
  BERGeneralDecoder dec(bt, INTEGER);
3466
0
  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3467
0
    BERDecodeError();
3468
0
  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3469
0
  dec.MessageEnd();
3470
0
}
3471
3472
void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
3473
0
{
3474
0
  DERGeneralEncoder enc(bt, OCTET_STRING);
3475
0
  Encode(enc, length);
3476
0
  enc.MessageEnd();
3477
0
}
3478
3479
void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
3480
0
{
3481
0
  BERGeneralDecoder dec(bt, OCTET_STRING);
3482
0
  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3483
0
    BERDecodeError();
3484
0
  Decode(dec, length);
3485
0
  dec.MessageEnd();
3486
0
}
3487
3488
size_t Integer::OpenPGPEncode(byte *output, size_t bufferSize) const
3489
0
{
3490
0
  CRYPTOPP_ASSERT(output && bufferSize);         // NULL buffer
3491
0
  CRYPTOPP_ASSERT(bufferSize >= 2+ByteCount());  // Undersized buffer
3492
0
  ArraySink sink(output, bufferSize);
3493
0
  return OpenPGPEncode(sink);
3494
0
}
3495
3496
size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
3497
0
{
3498
0
  word16 bitCount = word16(BitCount());
3499
0
  bt.PutWord16(bitCount);
3500
0
  size_t byteCount = BitsToBytes(bitCount);
3501
0
  Encode(bt, byteCount);
3502
0
  return 2 + byteCount;
3503
0
}
3504
3505
void Integer::OpenPGPDecode(const byte *input, size_t len)
3506
0
{
3507
0
  CRYPTOPP_ASSERT(input && len);  // NULL buffer
3508
0
  StringStore store(input, len);
3509
0
  OpenPGPDecode(store);
3510
0
}
3511
3512
void Integer::OpenPGPDecode(BufferedTransformation &bt)
3513
0
{
3514
0
  word16 bitCount;
3515
0
  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3516
0
    throw OpenPGPDecodeErr();
3517
0
  Decode(bt, BitsToBytes(bitCount));
3518
0
}
3519
3520
void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
3521
0
{
3522
0
  const size_t nbytes = nbits/8 + 1;
3523
0
  SecByteBlock buf(nbytes);
3524
0
  rng.GenerateBlock(buf, nbytes);
3525
3526
  // https://github.com/weidai11/cryptopp/issues/1206
3527
  // if (nbytes)
3528
  //     buf[0] = (byte)Crop(buf[0], nbits % 8);
3529
3530
0
  buf[0] = (byte)Crop(buf[0], nbits % 8);
3531
0
  Decode(buf, nbytes, UNSIGNED);
3532
0
}
3533
3534
void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3535
0
{
3536
0
  if (min > max)
3537
0
    throw InvalidArgument("Integer: Min must be no greater than Max");
3538
3539
0
  Integer range = max - min;
3540
0
  const unsigned int nbits = range.BitCount();
3541
3542
0
  do
3543
0
  {
3544
0
    Randomize(rng, nbits);
3545
0
  }
3546
0
  while (*this > range);
3547
3548
0
  *this += min;
3549
0
}
3550
3551
bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3552
0
{
3553
0
  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)
3554
0
    ("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3555
0
}
3556
3557
class KDF2_RNG : public RandomNumberGenerator
3558
{
3559
public:
3560
  KDF2_RNG(const byte *seed, size_t seedSize)
3561
    : m_counter(0), m_counterAndSeed(ClampSize(seedSize) + 4)
3562
0
  {
3563
0
    std::memcpy(m_counterAndSeed + 4, seed, ClampSize(seedSize));
3564
0
  }
3565
3566
  void GenerateBlock(byte *output, size_t size)
3567
0
  {
3568
0
    CRYPTOPP_ASSERT(output && size); // NULL buffer
3569
0
    PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3570
0
    ++m_counter;
3571
0
    P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULLPTR, 0);
3572
0
  }
3573
3574
  // UBsan finding, -Wstringop-overflow
3575
  inline size_t ClampSize(size_t req) const
3576
0
  {
3577
    // Clamp at 16 MB
3578
0
    if (req > 16U*1024*1024)
3579
0
      return 16U*1024*1024;
3580
0
    return req;
3581
0
  }
3582
3583
private:
3584
  word32 m_counter;
3585
  SecByteBlock m_counterAndSeed;
3586
};
3587
3588
bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3589
0
{
3590
0
  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3591
0
  Integer max;
3592
0
  if (!params.GetValue("Max", max))
3593
0
  {
3594
0
    int bitLength;
3595
0
    if (params.GetIntValue("BitLength", bitLength))
3596
0
      max = Integer::Power2(bitLength);
3597
0
    else
3598
0
      throw InvalidArgument("Integer: missing Max argument");
3599
0
  }
3600
0
  if (min > max)
3601
0
    throw InvalidArgument("Integer: Min must be no greater than Max");
3602
3603
0
  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3604
0
  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3605
3606
0
  if (equiv.IsNegative() || equiv >= mod)
3607
0
    throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3608
3609
0
  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3610
3611
0
  member_ptr<KDF2_RNG> kdf2Rng;
3612
0
  ConstByteArrayParameter seed;
3613
0
  if (params.GetValue(Name::Seed(), seed))
3614
0
  {
3615
0
    ByteQueue bq;
3616
0
    DERSequenceEncoder seq(bq);
3617
0
    min.DEREncode(seq);
3618
0
    max.DEREncode(seq);
3619
0
    equiv.DEREncode(seq);
3620
0
    mod.DEREncode(seq);
3621
0
    DEREncodeUnsigned(seq, rnType);
3622
0
    DEREncodeOctetString(seq, seed.begin(), seed.size());
3623
0
    seq.MessageEnd();
3624
3625
0
    SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3626
0
    bq.Get(finalSeed, finalSeed.size());
3627
0
    kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3628
0
  }
3629
0
  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3630
3631
0
  switch (rnType)
3632
0
  {
3633
0
    case ANY:
3634
0
      if (mod == One())
3635
0
        Randomize(rng, min, max);
3636
0
      else
3637
0
      {
3638
0
        Integer min1 = min + (equiv-min)%mod;
3639
0
        if (max < min1)
3640
0
          return false;
3641
0
        Randomize(rng, Zero(), (max - min1) / mod);
3642
0
        *this *= mod;
3643
0
        *this += min1;
3644
0
      }
3645
0
      return true;
3646
3647
0
    case PRIME:
3648
0
    {
3649
0
      const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULLPTR);
3650
3651
0
      int i;
3652
0
      i = 0;
3653
0
      while (1)
3654
0
      {
3655
0
        if (++i==16)
3656
0
        {
3657
          // check if there are any suitable primes in [min, max]
3658
0
          Integer first = min;
3659
0
          if (FirstPrime(first, max, equiv, mod, pSelector))
3660
0
          {
3661
            // if there is only one suitable prime, we're done
3662
0
            *this = first;
3663
0
            if (!FirstPrime(first, max, equiv, mod, pSelector))
3664
0
              return true;
3665
0
          }
3666
0
          else
3667
0
            return false;
3668
0
        }
3669
3670
0
        Randomize(rng, min, max);
3671
0
        if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3672
0
          return true;
3673
0
      }
3674
0
    }
3675
3676
0
    default:
3677
0
      throw InvalidArgument("Integer: invalid RandomNumberType argument");
3678
0
  }
3679
0
}
3680
3681
std::istream& operator>>(std::istream& in, Integer &a)
3682
0
{
3683
0
  char c;
3684
0
  unsigned int length = 0;
3685
0
  SecBlock<char> str(length + 16);
3686
3687
0
  std::ws(in);
3688
3689
0
  do
3690
0
  {
3691
0
    in.read(&c, 1);
3692
0
    str[length++] = c;
3693
0
    if (length >= str.size())
3694
0
      str.Grow(length + 16);
3695
0
  }
3696
0
  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3697
3698
0
  if (in.gcount())
3699
0
    in.putback(c);
3700
0
  str[length-1] = '\0';
3701
0
  a = Integer(str);
3702
3703
0
  return in;
3704
0
}
3705
3706
// Ensure base 10 is default
3707
0
inline int FlagToBase(long f) {
3708
0
  return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10);
3709
0
}
3710
3711
0
inline char FlagToSuffix(long f) {
3712
0
  return f == std::ios::hex ? 'h' : (f == std::ios::oct ? 'o' : '.');
3713
0
}
3714
3715
// Ensure base 10 is default
3716
std::ostream& operator<<(std::ostream& out, const Integer &a)
3717
0
{
3718
  // Get relevant conversion specifications from ostream.
3719
0
  const long f = out.flags() & std::ios::basefield;
3720
0
  const int base = FlagToBase(f);
3721
0
  const char suffix = FlagToSuffix(f);
3722
3723
0
  Integer temp1=a, temp2;
3724
0
  if (a.IsNegative())
3725
0
  {
3726
0
    out << '-';
3727
0
    temp1.Negate();
3728
0
  }
3729
3730
0
  if (!a)
3731
0
    out << '0';
3732
3733
0
  static const char upper[]="0123456789ABCDEF";
3734
0
  static const char lower[]="0123456789abcdef";
3735
3736
0
  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3737
0
  unsigned int i=0;
3738
0
  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3739
3740
0
  while (!!temp1)
3741
0
  {
3742
0
    word digit;
3743
0
    Integer::Divide(digit, temp2, temp1, base);
3744
0
    s[i++]=vec[digit];
3745
0
    temp1.swap(temp2);
3746
0
  }
3747
3748
0
  while (i--)
3749
0
  {
3750
0
    out << s[i];
3751
0
  }
3752
3753
#ifdef CRYPTOPP_USE_STD_SHOWBASE
3754
  if (out.flags() & std::ios_base::showbase)
3755
    out << suffix;
3756
3757
  return out;
3758
#else
3759
0
  return out << suffix;
3760
0
#endif
3761
0
}
3762
3763
Integer& Integer::operator++()
3764
24.9k
{
3765
24.9k
  if (NotNegative())
3766
24.9k
  {
3767
24.9k
    if (Increment(reg, reg.size()))
3768
0
    {
3769
0
      reg.CleanGrow(2*reg.size());
3770
0
      reg[reg.size()/2]=1;
3771
0
    }
3772
24.9k
  }
3773
0
  else
3774
0
  {
3775
0
    word borrow = Decrement(reg, reg.size());
3776
0
    CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3777
3778
0
    if (WordCount()==0)
3779
0
      *this = Zero();
3780
0
  }
3781
24.9k
  return *this;
3782
24.9k
}
3783
3784
Integer& Integer::operator--()
3785
0
{
3786
0
  if (IsNegative())
3787
0
  {
3788
0
    if (Increment(reg, reg.size()))
3789
0
    {
3790
0
      reg.CleanGrow(2*reg.size());
3791
0
      reg[reg.size()/2]=1;
3792
0
    }
3793
0
  }
3794
0
  else
3795
0
  {
3796
0
    if (Decrement(reg, reg.size()))
3797
0
      *this = -One();
3798
0
  }
3799
0
  return *this;
3800
0
}
3801
3802
// This is a bit operation. We set sign to POSITIVE, so there's no need to
3803
//  worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3804
Integer Integer::And(const Integer& t) const
3805
99
{
3806
  // Grow due to https://github.com/weidai11/cryptopp/issues/1072
3807
  // The temporary Integer 'result' may have fewer blocks than
3808
  // 'this' or 't', if leading 0-blocks are trimmed in copy ctor.
3809
3810
99
  if (this == &t)
3811
0
  {
3812
0
    return AbsoluteValue();
3813
0
  }
3814
99
  else if (reg.size() >= t.reg.size())
3815
58
  {
3816
58
    IntegerSecBlock temp(t.reg.size());
3817
    // AndWords(temp, reg, t.reg, t.reg.size());
3818
940
    for (size_t i=0; i<t.reg.size(); ++i)
3819
882
      temp[i] = reg[i] & t.reg[i];
3820
3821
58
    Integer result;
3822
58
    std::swap(result.reg, temp);
3823
3824
58
    return result;
3825
58
  }
3826
41
  else // reg.size() < t.reg.size()
3827
41
  {
3828
41
    IntegerSecBlock temp(reg.size());
3829
    // AndWords(temp, reg, t.reg, reg.size());
3830
397
    for (size_t i=0; i<reg.size(); ++i)
3831
356
      temp[i] = reg[i] & t.reg[i];
3832
3833
41
    Integer result;
3834
41
    std::swap(result.reg, temp);
3835
3836
41
    return result;
3837
41
  }
3838
99
}
3839
3840
// This is a bit operation. We set sign to POSITIVE, so there's no need to
3841
//  worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3842
Integer Integer::Or(const Integer& t) const
3843
104
{
3844
  // Grow due to https://github.com/weidai11/cryptopp/issues/1072
3845
  // The temporary Integer 'result' may have fewer blocks than
3846
  // 'this' or 't', if leading 0-blocks are trimmed in copy ctor.
3847
3848
104
  if (this == &t)
3849
0
  {
3850
0
    return AbsoluteValue();
3851
0
  }
3852
104
  else if (reg.size() >= t.reg.size())
3853
59
  {
3854
59
    IntegerSecBlock temp(reg, reg.size());
3855
    // OrWords(temp, t.reg, t.reg.size());
3856
969
    for (size_t i=0; i<t.reg.size(); ++i)
3857
910
      temp[i] |= t.reg[i];
3858
3859
59
    Integer result;
3860
59
    std::swap(result.reg, temp);
3861
3862
59
    return result;
3863
59
  }
3864
45
  else // reg.size() < t.reg.size()
3865
45
  {
3866
45
    IntegerSecBlock temp(t.reg, t.reg.size());
3867
    // OrWords(temp, reg, reg.size());
3868
291
    for (size_t i=0; i<reg.size(); ++i)
3869
246
      temp[i] |= reg[i];
3870
3871
45
    Integer result;
3872
45
    std::swap(result.reg, temp);
3873
3874
45
    return result;
3875
45
  }
3876
104
}
3877
3878
// This is a bit operation. We set sign to POSITIVE, so there's no need to
3879
//  worry about negative zero. Also see http://stackoverflow.com/q/11644362.
3880
Integer Integer::Xor(const Integer& t) const
3881
122
{
3882
  // Grow due to https://github.com/weidai11/cryptopp/issues/1072
3883
  // The temporary Integer 'result' may have fewer blocks than
3884
  // 'this' or 't', if leading 0-blocks are trimmed in copy ctor.
3885
3886
122
  if (this == &t)
3887
0
  {
3888
0
    return Integer::Zero();
3889
0
  }
3890
122
  else if (reg.size() >= t.reg.size())
3891
82
  {
3892
82
    IntegerSecBlock temp(reg, reg.size());
3893
    // OrWords(temp, t.reg, t.reg.size());
3894
2.31k
    for (size_t i=0; i<t.reg.size(); ++i)
3895
2.23k
      temp[i] ^= t.reg[i];
3896
3897
82
    Integer result;
3898
82
    std::swap(result.reg, temp);
3899
3900
82
    return result;
3901
82
  }
3902
40
  else // reg.size() < t.reg.size()
3903
40
  {
3904
40
    IntegerSecBlock temp(t.reg, t.reg.size());
3905
    // OrWords(temp, reg, reg.size());
3906
370
    for (size_t i=0; i<reg.size(); ++i)
3907
330
      temp[i] ^= reg[i];
3908
3909
40
    Integer result;
3910
40
    std::swap(result.reg, temp);
3911
3912
40
    return result;
3913
40
  }
3914
122
}
3915
3916
void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3917
18.0M
{
3918
  // Profiling guided the flow below.
3919
18.0M
  int carry; const bool pre = (a.reg.size() == b.reg.size());
3920
18.0M
  if (!pre && a.reg.size() > b.reg.size())
3921
11.9M
  {
3922
11.9M
    carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3923
11.9M
    CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3924
11.9M
    carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3925
11.9M
  }
3926
6.10M
  else if (pre)
3927
6.10M
  {
3928
6.10M
    carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3929
6.10M
  }
3930
585
  else
3931
585
  {
3932
585
    carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3933
585
    CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3934
585
    carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3935
585
  }
3936
3937
18.0M
  if (carry)
3938
2.24M
  {
3939
2.24M
    sum.reg.CleanGrow(2*sum.reg.size());
3940
2.24M
    sum.reg[sum.reg.size()/2] = 1;
3941
2.24M
  }
3942
18.0M
  sum.sign = Integer::POSITIVE;
3943
18.0M
}
3944
3945
void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3946
55.2k
{
3947
55.2k
  unsigned aSize = a.WordCount();
3948
55.2k
  aSize += aSize%2;
3949
55.2k
  unsigned bSize = b.WordCount();
3950
55.2k
  bSize += bSize%2;
3951
3952
  // Profiling guided the flow below.
3953
55.2k
  if (aSize > bSize)
3954
44.5k
  {
3955
44.5k
    word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3956
44.5k
    CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3957
44.5k
    borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3958
44.5k
    CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3959
44.5k
    diff.sign = Integer::POSITIVE;
3960
44.5k
  }
3961
10.6k
  else if (aSize == bSize)
3962
10.5k
  {
3963
10.5k
    if (Compare(a.reg, b.reg, aSize) >= 0)
3964
10.4k
    {
3965
10.4k
      Subtract(diff.reg, a.reg, b.reg, aSize);
3966
10.4k
      diff.sign = Integer::POSITIVE;
3967
10.4k
    }
3968
47
    else
3969
47
    {
3970
47
      Subtract(diff.reg, b.reg, a.reg, aSize);
3971
47
      diff.sign = Integer::NEGATIVE;
3972
47
    }
3973
10.5k
  }
3974
132
  else
3975
132
  {
3976
132
    word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3977
132
    CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3978
132
    borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3979
132
    CRYPTOPP_ASSERT(!borrow); CRYPTOPP_UNUSED(borrow);
3980
132
    diff.sign = Integer::NEGATIVE;
3981
132
  }
3982
55.2k
}
3983
3984
// MSVC .NET 2003 workaround
3985
template <class T> inline const T& STDMAX2(const T& a, const T& b)
3986
4.85M
{
3987
4.85M
  return a < b ? b : a;
3988
4.85M
}
3989
3990
Integer Integer::Plus(const Integer& b) const
3991
4.80M
{
3992
4.80M
  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3993
4.80M
  if (NotNegative())
3994
4.80M
  {
3995
4.80M
    if (b.NotNegative())
3996
4.80M
      PositiveAdd(sum, *this, b);
3997
0
    else
3998
0
      PositiveSubtract(sum, *this, b);
3999
4.80M
  }
4000
0
  else
4001
0
  {
4002
0
    if (b.NotNegative())
4003
0
      PositiveSubtract(sum, b, *this);
4004
0
    else
4005
0
    {
4006
0
      PositiveAdd(sum, *this, b);
4007
0
      sum.sign = Integer::NEGATIVE;
4008
0
    }
4009
0
  }
4010
4.80M
  return sum;
4011
4.80M
}
4012
4013
Integer& Integer::operator+=(const Integer& t)
4014
13.2M
{
4015
13.2M
  reg.CleanGrow(t.reg.size());
4016
13.2M
  if (NotNegative())
4017
13.2M
  {
4018
13.2M
    if (t.NotNegative())
4019
13.2M
      PositiveAdd(*this, *this, t);
4020
0
    else
4021
0
      PositiveSubtract(*this, *this, t);
4022
13.2M
  }
4023
0
  else
4024
0
  {
4025
0
    if (t.NotNegative())
4026
0
      PositiveSubtract(*this, t, *this);
4027
0
    else
4028
0
    {
4029
0
      PositiveAdd(*this, *this, t);
4030
0
      sign = Integer::NEGATIVE;
4031
0
    }
4032
0
  }
4033
13.2M
  return *this;
4034
13.2M
}
4035
4036
Integer Integer::Minus(const Integer& b) const
4037
55.2k
{
4038
55.2k
  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
4039
55.2k
  if (NotNegative())
4040
55.2k
  {
4041
55.2k
    if (b.NotNegative())
4042
55.2k
      PositiveSubtract(diff, *this, b);
4043
0
    else
4044
0
      PositiveAdd(diff, *this, b);
4045
55.2k
  }
4046
0
  else
4047
0
  {
4048
0
    if (b.NotNegative())
4049
0
    {
4050
0
      PositiveAdd(diff, *this, b);
4051
0
      diff.sign = Integer::NEGATIVE;
4052
0
    }
4053
0
    else
4054
0
      PositiveSubtract(diff, b, *this);
4055
0
  }
4056
55.2k
  return diff;
4057
55.2k
}
4058
4059
Integer& Integer::operator-=(const Integer& t)
4060
0
{
4061
0
  reg.CleanGrow(t.reg.size());
4062
0
  if (NotNegative())
4063
0
  {
4064
0
    if (t.NotNegative())
4065
0
      PositiveSubtract(*this, *this, t);
4066
0
    else
4067
0
      PositiveAdd(*this, *this, t);
4068
0
  }
4069
0
  else
4070
0
  {
4071
0
    if (t.NotNegative())
4072
0
    {
4073
0
      PositiveAdd(*this, *this, t);
4074
0
      sign = Integer::NEGATIVE;
4075
0
    }
4076
0
    else
4077
0
      PositiveSubtract(*this, t, *this);
4078
0
  }
4079
0
  return *this;
4080
0
}
4081
4082
Integer& Integer::operator<<=(size_t n)
4083
114k
{
4084
114k
  const size_t wordCount = WordCount();
4085
114k
  const size_t shiftWords = n / WORD_BITS;
4086
114k
  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4087
4088
114k
  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
4089
114k
  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
4090
114k
  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
4091
114k
  return *this;
4092
114k
}
4093
4094
Integer& Integer::operator>>=(size_t n)
4095
784k
{
4096
784k
  const size_t wordCount = WordCount();
4097
784k
  const size_t shiftWords = n / WORD_BITS;
4098
784k
  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
4099
4100
784k
  ShiftWordsRightByWords(reg, wordCount, shiftWords);
4101
784k
  if (wordCount > shiftWords)
4102
784k
    ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4103
784k
  if (IsNegative() && WordCount()==0)   // avoid -0
4104
0
    *this = Zero();
4105
784k
  return *this;
4106
784k
}
4107
4108
Integer& Integer::operator&=(const Integer& t)
4109
0
{
4110
0
  if (this != &t)
4111
0
  {
4112
0
    const size_t size = STDMIN(reg.size(), t.reg.size());
4113
0
    reg.resize(size);
4114
0
    AndWords(reg, t.reg, size);
4115
0
  }
4116
0
  sign = POSITIVE;
4117
0
  return *this;
4118
0
}
4119
4120
Integer& Integer::operator|=(const Integer& t)
4121
0
{
4122
0
  if (this != &t)
4123
0
  {
4124
0
    if (reg.size() >= t.reg.size())
4125
0
    {
4126
0
      OrWords(reg, t.reg, t.reg.size());
4127
0
    }
4128
0
    else  // reg.size() < t.reg.size()
4129
0
    {
4130
0
      const size_t head = reg.size();
4131
0
      const size_t tail = t.reg.size() - reg.size();
4132
0
      reg.resize(head+tail);
4133
0
      OrWords(reg, t.reg, head);
4134
0
      CopyWords(reg+head,t.reg+head,tail);
4135
0
    }
4136
0
  }
4137
0
  sign = POSITIVE;
4138
0
  return *this;
4139
0
}
4140
4141
Integer& Integer::operator^=(const Integer& t)
4142
0
{
4143
0
  if (this == &t)
4144
0
  {
4145
0
    *this = Zero();
4146
0
  }
4147
0
  else
4148
0
  {
4149
0
    if (reg.size() >= t.reg.size())
4150
0
    {
4151
0
      XorWords(reg, t.reg, t.reg.size());
4152
0
    }
4153
0
    else  // reg.size() < t.reg.size()
4154
0
    {
4155
0
      const size_t head = reg.size();
4156
0
      const size_t tail = t.reg.size() - reg.size();
4157
0
      reg.resize(head+tail);
4158
0
      XorWords(reg, t.reg, head);
4159
0
      CopyWords(reg+head,t.reg+head,tail);
4160
0
    }
4161
0
  }
4162
0
  sign = POSITIVE;
4163
0
  return *this;
4164
0
}
4165
4166
void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
4167
14.5M
{
4168
14.5M
  size_t aSize = RoundupSize(a.WordCount());
4169
14.5M
  size_t bSize = RoundupSize(b.WordCount());
4170
4171
14.5M
  product.reg.CleanNew(RoundupSize(aSize+bSize));
4172
14.5M
  product.sign = Integer::POSITIVE;
4173
4174
14.5M
  IntegerSecBlock workspace(aSize + bSize);
4175
14.5M
  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4176
14.5M
}
4177
4178
void Multiply(Integer &product, const Integer &a, const Integer &b)
4179
14.5M
{
4180
14.5M
  PositiveMultiply(product, a, b);
4181
4182
14.5M
  if (a.NotNegative() != b.NotNegative())
4183
0
    product.Negate();
4184
14.5M
}
4185
4186
Integer Integer::Times(const Integer &b) const
4187
14.5M
{
4188
14.5M
  Integer product;
4189
14.5M
  Multiply(product, *this, b);
4190
14.5M
  return product;
4191
14.5M
}
4192
4193
/*
4194
void PositiveDivide(Integer &remainder, Integer &quotient,
4195
           const Integer &dividend, const Integer &divisor)
4196
{
4197
  remainder.reg.CleanNew(divisor.reg.size());
4198
  remainder.sign = Integer::POSITIVE;
4199
  quotient.reg.New(0);
4200
  quotient.sign = Integer::POSITIVE;
4201
  unsigned i=dividend.BitCount();
4202
  while (i--)
4203
  {
4204
    word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
4205
    remainder.reg[0] |= dividend[i];
4206
    if (overflow || remainder >= divisor)
4207
    {
4208
      Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
4209
      quotient.SetBit(i);
4210
    }
4211
  }
4212
}
4213
*/
4214
4215
void PositiveDivide(Integer &remainder, Integer &quotient,
4216
           const Integer &a, const Integer &b)
4217
2.29M
{
4218
2.29M
  unsigned aSize = a.WordCount();
4219
2.29M
  unsigned bSize = b.WordCount();
4220
4221
2.29M
  if (!bSize)
4222
89
    throw Integer::DivideByZero();
4223
4224
2.29M
  if (aSize < bSize)
4225
44.8k
  {
4226
44.8k
    remainder = a;
4227
44.8k
    remainder.sign = Integer::POSITIVE;
4228
44.8k
    quotient = Integer::Zero();
4229
44.8k
    return;
4230
44.8k
  }
4231
4232
2.24M
  aSize += aSize%2; // round up to next even number
4233
2.24M
  bSize += bSize%2;
4234
4235
2.24M
  remainder.reg.CleanNew(RoundupSize(bSize));
4236
2.24M
  remainder.sign = Integer::POSITIVE;
4237
2.24M
  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4238
2.24M
  quotient.sign = Integer::POSITIVE;
4239
4240
2.24M
  IntegerSecBlock T(aSize+3*(bSize+2));
4241
2.24M
  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4242
2.24M
}
4243
4244
void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4245
2.29M
{
4246
2.29M
  PositiveDivide(remainder, quotient, dividend, divisor);
4247
4248
2.29M
  if (dividend.IsNegative())
4249
0
  {
4250
0
    quotient.Negate();
4251
0
    if (remainder.NotZero())
4252
0
    {
4253
0
      --quotient;
4254
0
      remainder = divisor.AbsoluteValue() - remainder;
4255
0
    }
4256
0
  }
4257
4258
2.29M
  if (divisor.IsNegative())
4259
0
    quotient.Negate();
4260
2.29M
}
4261
4262
void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4263
0
{
4264
0
  q = a;
4265
0
  q >>= n;
4266
4267
0
  const size_t wordCount = BitsToWords(n);
4268
0
  if (wordCount <= a.WordCount())
4269
0
  {
4270
0
    r.reg.resize(RoundupSize(wordCount));
4271
0
    CopyWords(r.reg, a.reg, wordCount);
4272
0
    SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4273
0
    if (n % WORD_BITS != 0)
4274
0
      r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4275
0
  }
4276
0
  else
4277
0
  {
4278
0
    r.reg.resize(RoundupSize(a.WordCount()));
4279
0
    CopyWords(r.reg, a.reg, r.reg.size());
4280
0
  }
4281
0
  r.sign = POSITIVE;
4282
4283
0
  if (a.IsNegative() && r.NotZero())
4284
0
  {
4285
0
    --q;
4286
0
    r = Power2(n) - r;
4287
0
  }
4288
0
}
4289
4290
Integer Integer::DividedBy(const Integer &b) const
4291
2.91k
{
4292
2.91k
  Integer remainder, quotient;
4293
2.91k
  Integer::Divide(remainder, quotient, *this, b);
4294
2.91k
  return quotient;
4295
2.91k
}
4296
4297
Integer Integer::Modulo(const Integer &b) const
4298
2.29M
{
4299
2.29M
  Integer remainder, quotient;
4300
2.29M
  Integer::Divide(remainder, quotient, *this, b);
4301
2.29M
  return remainder;
4302
2.29M
}
4303
4304
void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4305
3.19M
{
4306
3.19M
  if (!divisor)
4307
0
    throw Integer::DivideByZero();
4308
4309
  // IsPowerOf2 uses BMI on x86 if available. There is a small
4310
  // but measurable improvement during decryption and signing.
4311
3.19M
  if (IsPowerOf2(divisor))
4312
0
  {
4313
0
    quotient = dividend >> (BitPrecision(divisor)-1);
4314
0
    remainder = dividend.reg[0] & (divisor-1);
4315
0
    return;
4316
0
  }
4317
4318
3.19M
  unsigned int i = dividend.WordCount();
4319
3.19M
  quotient.reg.CleanNew(RoundupSize(i));
4320
3.19M
  remainder = 0;
4321
144M
  while (i--)
4322
141M
  {
4323
141M
    quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4324
141M
    remainder = DWord(dividend.reg[i], remainder) % divisor;
4325
141M
  }
4326
4327
3.19M
  if (dividend.NotNegative())
4328
3.19M
    quotient.sign = POSITIVE;
4329
0
  else
4330
0
  {
4331
0
    quotient.sign = NEGATIVE;
4332
0
    if (remainder)
4333
0
    {
4334
0
      --quotient;
4335
0
      remainder = divisor - remainder;
4336
0
    }
4337
0
  }
4338
3.19M
}
4339
4340
Integer Integer::DividedBy(word b) const
4341
0
{
4342
0
  word remainder;
4343
0
  Integer quotient;
4344
0
  Integer::Divide(remainder, quotient, *this, b);
4345
0
  return quotient;
4346
0
}
4347
4348
word Integer::Modulo(word divisor) const
4349
1.37M
{
4350
1.37M
  if (!divisor)
4351
0
    throw Integer::DivideByZero();
4352
4353
1.37M
  word remainder;
4354
4355
  // Profiling guided the flow below.
4356
1.37M
  if ((divisor & (divisor-1)) != 0)  // divisor is not a power of 2
4357
0
  {
4358
    // Profiling guided the flow below.
4359
0
    unsigned int i = WordCount();
4360
0
    if (divisor > 5)
4361
0
    {
4362
0
      remainder = 0;
4363
0
      while (i--)
4364
0
        remainder = DWord(reg[i], remainder) % divisor;
4365
0
    }
4366
0
    else
4367
0
    {
4368
0
      DWord sum(0, 0);
4369
0
      while (i--)
4370
0
        sum += reg[i];
4371
0
      remainder = sum % divisor;
4372
0
    }
4373
0
  }
4374
1.37M
  else  // divisor is a power of 2
4375
1.37M
  {
4376
1.37M
    remainder = reg[0] & (divisor-1);
4377
1.37M
  }
4378
4379
1.37M
  if (IsNegative() && remainder)
4380
0
    remainder = divisor - remainder;
4381
4382
1.37M
  return remainder;
4383
1.37M
}
4384
4385
void Integer::Negate()
4386
651
{
4387
651
  if (!!(*this))  // don't flip sign if *this==0
4388
622
    sign = Sign(1-sign);
4389
651
}
4390
4391
int Integer::PositiveCompare(const Integer& t) const
4392
474k
{
4393
  // Profiling guided the flow below.
4394
474k
  const unsigned size = WordCount(), tSize = t.WordCount();
4395
474k
  if (size != tSize)
4396
468k
    return size > tSize ? 1 : -1;
4397
6.00k
  else
4398
6.00k
    return CryptoPP::Compare(reg, t.reg, size);
4399
474k
}
4400
4401
int Integer::Compare(const Integer& t) const
4402
475k
{
4403
475k
  if (NotNegative())
4404
474k
  {
4405
474k
    if (t.NotNegative())
4406
474k
      return PositiveCompare(t);
4407
0
    else
4408
0
      return 1;
4409
474k
  }
4410
536
  else
4411
536
  {
4412
536
    if (t.NotNegative())
4413
536
      return -1;
4414
0
    else
4415
0
      return -PositiveCompare(t);
4416
536
  }
4417
475k
}
4418
4419
Integer Integer::SquareRoot() const
4420
229
{
4421
229
  if (!IsPositive())
4422
9
    return Zero();
4423
4424
  // overestimate square root
4425
220
  Integer x, y = Power2((BitCount()+1)/2);
4426
220
  CRYPTOPP_ASSERT(y*y >= *this);
4427
4428
220
  do
4429
1.80k
  {
4430
1.80k
    x = y;
4431
1.80k
    y = (x + *this/x) >> 1;
4432
1.80k
  } while (y<x);
4433
4434
220
  return x;
4435
229
}
4436
4437
bool Integer::IsSquare() const
4438
41
{
4439
41
  Integer r = SquareRoot();
4440
41
  return *this == r.Squared();
4441
41
}
4442
4443
bool Integer::IsUnit() const
4444
0
{
4445
0
  return (WordCount() == 1) && (reg[0] == 1);
4446
0
}
4447
4448
Integer Integer::MultiplicativeInverse() const
4449
0
{
4450
0
  return IsUnit() ? *this : Zero();
4451
0
}
4452
4453
Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4454
128
{
4455
128
  CRYPTOPP_ASSERT(m.NotZero());
4456
128
  if (m.IsZero())
4457
0
    throw Integer::DivideByZero();
4458
4459
128
  return x*y%m;
4460
128
}
4461
4462
Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4463
3.69k
{
4464
3.69k
  CRYPTOPP_ASSERT(m.NotZero());
4465
3.69k
  if (m.IsZero())
4466
0
    throw Integer::DivideByZero();
4467
4468
3.69k
  ModularArithmetic mr(m);
4469
3.69k
  return mr.Exponentiate(x, e);
4470
3.69k
}
4471
4472
Integer Integer::Gcd(const Integer &a, const Integer &b)
4473
765
{
4474
765
  return EuclideanDomainOf<Integer>().Gcd(a, b);
4475
765
}
4476
4477
Integer Integer::InverseMod(const Integer &m) const
4478
1.50k
{
4479
1.50k
  CRYPTOPP_ASSERT(m.NotNegative());
4480
1.50k
  CRYPTOPP_ASSERT(m.NotZero());
4481
4482
1.50k
  if (IsNegative())
4483
0
    return Modulo(m).InverseModNext(m);
4484
4485
  // http://github.com/weidai11/cryptopp/issues/602
4486
1.50k
  if (*this >= m)
4487
0
    return Modulo(m).InverseModNext(m);
4488
4489
1.50k
  return InverseModNext(m);
4490
1.50k
}
4491
4492
Integer Integer::InverseModNext(const Integer &m) const
4493
2.20k
{
4494
2.20k
  CRYPTOPP_ASSERT(m.NotNegative());
4495
2.20k
  CRYPTOPP_ASSERT(m.NotZero());
4496
4497
2.20k
  if (m.IsEven())
4498
942
  {
4499
942
    if (!m || IsEven())
4500
221
      return Zero();  // no inverse
4501
721
    if (*this == One())
4502
25
      return One();
4503
4504
696
    Integer u = m.Modulo(*this).InverseModNext(*this);
4505
696
    return !u ? Zero() : (m*(*this-u)+1)/(*this);
4506
721
  }
4507
4508
  // AlmostInverse requires a 4x workspace
4509
1.25k
  IntegerSecBlock T(m.reg.size() * 4);
4510
1.25k
  Integer r((word)0, m.reg.size());
4511
1.25k
  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4512
1.25k
  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4513
1.25k
  return r;
4514
2.20k
}
4515
4516
word Integer::InverseMod(word mod) const
4517
0
{
4518
0
  CRYPTOPP_ASSERT(mod != 0);
4519
4520
0
  word g0 = mod, g1 = *this % mod;
4521
0
  word v0 = 0, v1 = 1;
4522
0
  word y;
4523
4524
0
  while (g1)
4525
0
  {
4526
0
    if (g1 == 1)
4527
0
      return v1;
4528
0
    y = g0 / g1;
4529
0
    g0 = g0 % g1;
4530
0
    v0 += y * v1;
4531
4532
0
    if (!g0)
4533
0
      break;
4534
0
    if (g0 == 1)
4535
0
      return mod-v0;
4536
0
    y = g1 / g0;
4537
0
    g1 = g1 % g0;
4538
0
    v1 += y * v0;
4539
0
  }
4540
0
  return 0;
4541
0
}
4542
4543
// ********************************************************
4544
4545
ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
4546
0
{
4547
0
  BERSequenceDecoder seq(bt);
4548
0
  OID oid(seq);
4549
0
  if (oid != ASN1::prime_field())
4550
0
    BERDecodeError();
4551
0
  m_modulus.BERDecode(seq);
4552
0
  seq.MessageEnd();
4553
0
  m_result.reg.resize(m_modulus.reg.size());
4554
0
}
4555
4556
void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
4557
0
{
4558
0
  DERSequenceEncoder seq(bt);
4559
0
  ASN1::prime_field().DEREncode(seq);
4560
0
  m_modulus.DEREncode(seq);
4561
0
  seq.MessageEnd();
4562
0
}
4563
4564
void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
4565
0
{
4566
0
  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4567
0
}
4568
4569
void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
4570
0
{
4571
0
  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4572
0
}
4573
4574
const Integer& ModularArithmetic::Half(const Integer &a) const
4575
0
{
4576
0
  if (a.reg.size()==m_modulus.reg.size())
4577
0
  {
4578
0
    CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4579
0
    return m_result;
4580
0
  }
4581
0
  else
4582
0
    return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4583
0
}
4584
4585
const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4586
0
{
4587
0
  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4588
0
  {
4589
0
    if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4590
0
      || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4591
0
    {
4592
0
      CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4593
0
    }
4594
0
    return m_result;
4595
0
  }
4596
0
  else
4597
0
  {
4598
0
    m_result1 = a+b;
4599
0
    if (m_result1 >= m_modulus)
4600
0
      m_result1 -= m_modulus;
4601
0
    return m_result1;
4602
0
  }
4603
0
}
4604
4605
Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
4606
0
{
4607
0
  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4608
0
  {
4609
0
    if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4610
0
      || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4611
0
    {
4612
0
      CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4613
0
    }
4614
0
  }
4615
0
  else
4616
0
  {
4617
0
    a+=b;
4618
0
    if (a>=m_modulus)
4619
0
      a-=m_modulus;
4620
0
  }
4621
4622
0
  return a;
4623
0
}
4624
4625
const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4626
0
{
4627
0
  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4628
0
  {
4629
0
    if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4630
0
      CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4631
0
    return m_result;
4632
0
  }
4633
0
  else
4634
0
  {
4635
0
    m_result1 = a-b;
4636
0
    if (m_result1.IsNegative())
4637
0
      m_result1 += m_modulus;
4638
0
    return m_result1;
4639
0
  }
4640
0
}
4641
4642
Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
4643
0
{
4644
0
  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4645
0
  {
4646
0
    if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4647
0
      CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4648
0
  }
4649
0
  else
4650
0
  {
4651
0
    a-=b;
4652
0
    if (a.IsNegative())
4653
0
      a+=m_modulus;
4654
0
  }
4655
4656
0
  return a;
4657
0
}
4658
4659
const Integer& ModularArithmetic::Inverse(const Integer &a) const
4660
0
{
4661
0
  if (!a)
4662
0
    return a;
4663
4664
0
  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4665
0
  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4666
0
    Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4667
4668
0
  return m_result;
4669
0
}
4670
4671
Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4672
0
{
4673
0
  if (m_modulus.IsOdd())
4674
0
  {
4675
0
    MontgomeryRepresentation dr(m_modulus);
4676
0
    return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4677
0
  }
4678
0
  else
4679
0
    return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4680
0
}
4681
4682
void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4683
3.69k
{
4684
3.69k
  if (m_modulus.IsOdd())
4685
1.68k
  {
4686
1.68k
    MontgomeryRepresentation dr(m_modulus);
4687
1.68k
    dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4688
3.36k
    for (unsigned int i=0; i<exponentsCount; i++)
4689
1.68k
      results[i] = dr.ConvertOut(results[i]);
4690
1.68k
  }
4691
2.01k
  else
4692
2.01k
    AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4693
3.69k
}
4694
4695
MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)  // modulus must be odd
4696
  : ModularArithmetic(m),
4697
    m_u((word)0, m_modulus.reg.size()),
4698
    m_workspace(5*m_modulus.reg.size())
4699
28.8k
{
4700
28.8k
  if (!m_modulus.IsOdd())
4701
0
    throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4702
4703
28.8k
  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4704
28.8k
}
4705
4706
const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
4707
175k
{
4708
175k
  word *const T = m_workspace.begin();
4709
175k
  word *const R = m_result.reg.begin();
4710
175k
  const size_t N = m_modulus.reg.size();
4711
175k
  CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4712
4713
175k
  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4714
175k
  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4715
175k
  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4716
175k
  return m_result;
4717
175k
}
4718
4719
const Integer& MontgomeryRepresentation::Square(const Integer &a) const
4720
867k
{
4721
867k
  word *const T = m_workspace.begin();
4722
867k
  word *const R = m_result.reg.begin();
4723
867k
  const size_t N = m_modulus.reg.size();
4724
867k
  CRYPTOPP_ASSERT(a.reg.size()<=N);
4725
4726
867k
  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4727
867k
  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4728
867k
  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4729
867k
  return m_result;
4730
867k
}
4731
4732
Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
4733
1.68k
{
4734
1.68k
  word *const T = m_workspace.begin();
4735
1.68k
  word *const R = m_result.reg.begin();
4736
1.68k
  const size_t N = m_modulus.reg.size();
4737
1.68k
  CRYPTOPP_ASSERT(a.reg.size()<=N);
4738
4739
1.68k
  CopyWords(T, a.reg, a.reg.size());
4740
1.68k
  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4741
1.68k
  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4742
1.68k
  return m_result;
4743
1.68k
}
4744
4745
const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
4746
0
{
4747
//    return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4748
0
  word *const T = m_workspace.begin();
4749
0
  word *const R = m_result.reg.begin();
4750
0
  const size_t N = m_modulus.reg.size();
4751
0
  CRYPTOPP_ASSERT(a.reg.size()<=N);
4752
4753
0
  CopyWords(T, a.reg, a.reg.size());
4754
0
  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4755
0
  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4756
0
  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4757
4758
//  cout << "k=" << k << " N*32=" << 32*N << endl;
4759
4760
0
  if (k>N*WORD_BITS)
4761
0
    DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4762
0
  else
4763
0
    MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4764
4765
0
  return m_result;
4766
0
}
4767
4768
// Specialization declared in misc.h to allow us to print integers
4769
//  with additional control options, like arbitrary bases and uppercase.
4770
template <> CRYPTOPP_DLL
4771
std::string IntToString<Integer>(Integer value, unsigned int base)
4772
9.06k
{
4773
  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4774
9.06k
  static const unsigned int BIT_32 = (1U << 31);
4775
9.06k
  const bool UPPER = !!(base & BIT_32);
4776
9.06k
  static const unsigned int BIT_31 = (1U << 30);
4777
9.06k
  const bool BASE = !!(base & BIT_31);
4778
4779
9.06k
  const char CH = UPPER ? 'A' : 'a';
4780
9.06k
  base &= ~(BIT_32|BIT_31);
4781
9.06k
  CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4782
4783
9.06k
  if (value == 0)
4784
1.38k
    return "0";
4785
4786
7.68k
  bool negative = false, zero = false;
4787
7.68k
  if (value.IsNegative())
4788
536
  {
4789
536
    negative = true;
4790
536
    value.Negate();
4791
536
  }
4792
4793
7.68k
  if (!value)
4794
0
    zero = true;
4795
4796
7.68k
  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4797
7.68k
  Integer temp;
4798
4799
7.68k
  unsigned int i=0;
4800
3.20M
  while (!!value)
4801
3.19M
  {
4802
3.19M
    word digit;
4803
3.19M
    Integer::Divide(digit, temp, value, word(base));
4804
3.19M
    s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4805
3.19M
    value.swap(temp);
4806
3.19M
  }
4807
4808
7.68k
  std::string result;
4809
7.68k
  result.reserve(i+2);
4810
4811
7.68k
  if (negative)
4812
536
    result += '-';
4813
4814
7.68k
  if (zero)
4815
0
    result += '0';
4816
4817
3.20M
  while (i--)
4818
3.19M
    result += s[i];
4819
4820
7.68k
  if (BASE)
4821
0
  {
4822
0
    if (base == 10)
4823
0
      result += '.';
4824
0
    else if (base == 16)
4825
0
      result += 'h';
4826
0
    else if (base == 8)
4827
0
      result += 'o';
4828
0
    else if (base == 2)
4829
0
      result += 'b';
4830
0
  }
4831
4832
7.68k
  return result;
4833
9.06k
}
4834
4835
// Specialization declared in misc.h to avoid Coverity findings.
4836
template <> CRYPTOPP_DLL
4837
std::string IntToString<word64>(word64 value, unsigned int base)
4838
645
{
4839
  // Hack... set the high bit for uppercase.
4840
645
  static const unsigned int HIGH_BIT = (1U << 31);
4841
645
  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4842
645
  base &= ~HIGH_BIT;
4843
4844
645
  CRYPTOPP_ASSERT(base >= 2);
4845
645
  if (value == 0)
4846
61
    return "0";
4847
4848
584
  std::string result;
4849
2.63k
  while (value > 0)
4850
2.05k
  {
4851
2.05k
    word64 digit = value % base;
4852
2.05k
    result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4853
2.05k
    value /= base;
4854
2.05k
  }
4855
584
  return result;
4856
645
}
4857
4858
#ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER
4859
// Allow the linker to discard Integer code if not needed.
4860
// Also see http://github.com/weidai11/cryptopp/issues/389.
4861
bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
4862
136k
{
4863
136k
  if (valueType != typeid(Integer))
4864
136k
    return false;
4865
0
  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
4866
0
  return true;
4867
136k
}
4868
#endif  // CRYPTOPP_NO_ASSIGN_TO_INTEGER
4869
4870
// *************************** C++ Static Initialization ***************************
4871
4872
class InitInteger
4873
{
4874
public:
4875
  InitInteger()
4876
10
  {
4877
10
    SetFunctionPointers();
4878
10
  }
4879
};
4880
4881
// This is not really needed because each Integer can dynamically initialize
4882
// itself, but we take a peephole optimization and initialize the class once
4883
// if init priorities are available. Dynamic initialization will be used if
4884
// init priorities are not available.
4885
4886
#if defined(HAVE_GCC_INIT_PRIORITY)
4887
  const InitInteger s_init __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 10))) = InitInteger();
4888
  const Integer g_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 11))) = Integer(0L);
4889
  const Integer g_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 12))) = Integer(1L);
4890
  const Integer g_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 13))) = Integer(2L);
4891
#elif defined(HAVE_MSC_INIT_PRIORITY)
4892
  #pragma warning(disable: 4075)
4893
  #pragma init_seg(".CRT$XCU")
4894
  const InitInteger s_init;
4895
  const Integer g_zero(0L);
4896
  const Integer g_one(1L);
4897
  const Integer g_two(2L);
4898
  #pragma warning(default: 4075)
4899
#elif HAVE_XLC_INIT_PRIORITY
4900
  // XLC needs constant, not a define
4901
  #pragma priority(280)
4902
  const InitInteger s_init;
4903
  const Integer g_zero(0L);
4904
  const Integer g_one(1L);
4905
  const Integer g_two(2L);
4906
#else
4907
  const InitInteger s_init;
4908
#endif
4909
4910
// ***************** Library code ********************
4911
4912
const Integer &Integer::Zero()
4913
494k
{
4914
494k
#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4915
494k
  return g_zero;
4916
#elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4917
  static const Integer s_zero(0L);
4918
  return s_zero;
4919
#else  // Potential memory leak. Avoid if possible.
4920
  return Singleton<Integer, NewInteger<0L> >().Ref();
4921
#endif
4922
494k
}
4923
4924
const Integer &Integer::One()
4925
6.44k
{
4926
6.44k
#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4927
6.44k
  return g_one;
4928
#elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4929
  static const Integer s_one(1L);
4930
  return s_one;
4931
#else  // Potential memory leak. Avoid if possible.
4932
  return Singleton<Integer, NewInteger<1L> >().Ref();
4933
#endif
4934
6.44k
}
4935
4936
const Integer &Integer::Two()
4937
0
{
4938
0
#if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY)
4939
0
  return g_two;
4940
#elif defined(CRYPTOPP_CXX11_STATIC_INIT)
4941
  static const Integer s_two(2L);
4942
  return s_two;
4943
#else  // Potential memory leak. Avoid if possible.
4944
  return Singleton<Integer, NewInteger<2L> >().Ref();
4945
#endif
4946
0
}
4947
4948
NAMESPACE_END
4949
4950
#endif  // CRYPTOPP_IMPORTS