Coverage Report

Created: 2025-04-11 06:34

/src/botan/build/include/internal/botan/internal/mp_asmi.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Lowest Level MPI Algorithms
3
* (C) 1999-2010,2025 Jack Lloyd
4
*     2006 Luca Piccarreta
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#ifndef BOTAN_MP_ASM_INTERNAL_H_
10
#define BOTAN_MP_ASM_INTERNAL_H_
11
12
#include <botan/compiler.h>
13
#include <botan/types.h>
14
#include <botan/internal/target_info.h>
15
#include <concepts>
16
17
#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
18
   #include <botan/internal/donna128.h>
19
#endif
20
21
namespace Botan {
22
23
#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
24
   #define BOTAN_MP_USE_X86_64_ASM
25
#endif
26
27
/*
28
* Expressing an add with carry is sadly quite difficult in standard C/C++.
29
*
30
* Compilers will recognize various idioms and generate a reasonable carry
31
* chain. Unfortunately which idioms the compiler will understand vary, so we
32
* have to decide what to do based on the compiler. This is fragile; what will
33
* work varies not just based on compiler but also version, target architecture,
34
* and optimization flags.
35
*/
36
#if defined(__clang__)
37
static constexpr bool use_dword_for_word_add = false;
38
#else
39
static constexpr bool use_dword_for_word_add = true;
40
#endif
41
42
/*
43
* Concept for allowed multiprecision word types
44
*/
45
template <typename T>
46
concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
47
48
template <WordType W>
49
struct WordInfo {};
50
51
template <>
52
struct WordInfo<uint32_t> {
53
   public:
54
      static const constexpr size_t bytes = 4;
55
      static const constexpr size_t bits = 32;
56
      static const constexpr uint32_t max = 0xFFFFFFFF;
57
      static const constexpr uint32_t top_bit = 0x80000000;
58
59
      typedef uint64_t dword;
60
      static const constexpr bool dword_is_native = true;
61
};
62
63
template <>
64
struct WordInfo<uint64_t> {
65
   public:
66
      static const constexpr size_t bytes = 8;
67
      static const constexpr size_t bits = 64;
68
      static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF;
69
      static const constexpr uint64_t top_bit = 0x8000000000000000;
70
71
#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
72
      typedef uint128_t dword;
73
      static const constexpr bool dword_is_native = true;
74
#else
75
      typedef donna128 dword;
76
      static const constexpr bool dword_is_native = false;
77
#endif
78
};
79
80
/*
81
* Word Multiply/Add
82
*/
83
template <WordType W>
84
30.9M
inline constexpr auto word_madd2(W a, W b, W* c) -> W {
85
30.9M
#if defined(BOTAN_MP_USE_X86_64_ASM)
86
30.9M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
87
30.9M
      asm(R"(
88
30.9M
         mulq %[b]
89
30.9M
         addq %[c],%[a]
90
30.9M
         adcq $0,%[carry]
91
30.9M
         )"
92
30.9M
          : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*c)
93
30.9M
          : "0"(a), "1"(b), [c] "g"(*c)
94
30.9M
          : "cc");
95
96
30.9M
      return a;
97
30.9M
   }
98
0
#endif
99
100
0
   typedef typename WordInfo<W>::dword dword;
101
0
   const dword s = dword(a) * b + *c;
102
0
   *c = static_cast<W>(s >> WordInfo<W>::bits);
103
0
   return static_cast<W>(s);
104
30.9M
}
105
106
/*
107
* Word Multiply/Add
108
*/
109
template <WordType W>
110
400M
inline constexpr auto word_madd3(W a, W b, W c, W* d) -> W {
111
400M
#if defined(BOTAN_MP_USE_X86_64_ASM)
112
400M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
113
400M
      asm(R"(
114
400M
         mulq %[b]
115
400M
116
400M
         addq %[c],%[a]
117
400M
         adcq $0,%[carry]
118
400M
119
400M
         addq %[d],%[a]
120
400M
         adcq $0,%[carry]
121
400M
         )"
122
400M
          : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*d)
123
400M
          : "0"(a), "1"(b), [c] "g"(c), [d] "g"(*d)
124
400M
          : "cc");
125
126
400M
      return a;
127
400M
   }
128
0
#endif
129
130
0
   typedef typename WordInfo<W>::dword dword;
131
0
   const dword s = dword(a) * b + c + *d;
132
0
   *d = static_cast<W>(s >> WordInfo<W>::bits);
133
0
   return static_cast<W>(s);
134
400M
}
135
136
#if defined(BOTAN_MP_USE_X86_64_ASM)
137
138
   #define ASM(x) x "\n\t"
139
140
   #define DO_8_TIMES(MACRO, ARG) \
141
      MACRO(ARG, 0)               \
142
      MACRO(ARG, 1)               \
143
      MACRO(ARG, 2)               \
144
      MACRO(ARG, 3)               \
145
      MACRO(ARG, 4)               \
146
      MACRO(ARG, 5)               \
147
      MACRO(ARG, 6)               \
148
      MACRO(ARG, 7)
149
150
   #define ADDSUB2_OP(OPERATION, INDEX)        \
151
      ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
152
      ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
153
154
   #define ADDSUB3_OP(OPERATION, INDEX)              \
155
      ASM("movq 8*" #INDEX "(%[x]), %[carry]")       \
156
      ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
157
      ASM("movq %[carry], 8*" #INDEX "(%[z])")
158
159
   #define LINMUL_OP(WRITE_TO, INDEX)      \
160
      ASM("movq 8*" #INDEX "(%[x]),%%rax") \
161
      ASM("mulq %[y]")                     \
162
      ASM("addq %[carry],%%rax")           \
163
      ASM("adcq $0,%%rdx")                 \
164
      ASM("movq %%rdx,%[carry]")           \
165
      ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
166
167
   #define MULADD_OP(IGNORED, INDEX)       \
168
      ASM("movq 8*" #INDEX "(%[x]),%%rax") \
169
      ASM("mulq %[y]")                     \
170
      ASM("addq %[carry],%%rax")           \
171
      ASM("adcq $0,%%rdx")                 \
172
      ASM("addq 8*" #INDEX "(%[z]),%%rax") \
173
      ASM("adcq $0,%%rdx")                 \
174
      ASM("movq %%rdx,%[carry]")           \
175
      ASM("movq %%rax, 8*" #INDEX " (%[z])")
176
177
   #define ADD_OR_SUBTRACT(CORE_CODE) \
178
      ASM("rorq %[carry]")            \
179
      CORE_CODE                       \
180
      ASM("sbbq %[carry],%[carry]")   \
181
      ASM("negq %[carry]")
182
183
#endif
184
185
/*
186
* Word Addition
187
*/
188
template <WordType W>
189
456M
inline constexpr auto word_add(W x, W y, W* carry) -> W {
190
456M
#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
191
456M
   if(!std::is_constant_evaluated()) {
192
      if constexpr(std::same_as<W, unsigned int>) {
193
         return __builtin_addc(x, y, *carry & 1, carry);
194
456M
      } else if constexpr(std::same_as<W, unsigned long>) {
195
456M
         return __builtin_addcl(x, y, *carry & 1, carry);
196
      } else if constexpr(std::same_as<W, unsigned long long>) {
197
         return __builtin_addcll(x, y, *carry & 1, carry);
198
      }
199
456M
   }
200
0
#endif
201
202
   if constexpr(WordInfo<W>::dword_is_native && use_dword_for_word_add) {
203
      /*
204
      TODO(Botan4) this is largely a performance hack for GCCs that don't
205
      support __builtin_addc, if we increase the minimum supported version of
206
      GCC to GCC 14 then we can remove this and not worry about it
207
      */
208
      const W cb = *carry & 1;
209
      const auto s = typename WordInfo<W>::dword(x) + y + cb;
210
      *carry = static_cast<W>(s >> WordInfo<W>::bits);
211
      return static_cast<W>(s);
212
456M
   } else {
213
456M
      const W cb = *carry & 1;
214
456M
      W z = x + y;
215
456M
      W c1 = (z < x);
216
456M
      z += cb;
217
456M
      *carry = c1 | (z < cb);
218
456M
      return z;
219
456M
   }
220
456M
}
221
222
/*
223
* Eight Word Block Addition, Two Argument
224
*/
225
template <WordType W>
226
13.6M
inline constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W {
227
13.6M
#if defined(BOTAN_MP_USE_X86_64_ASM)
228
13.6M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
229
13.6M
      asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
230
13.6M
                   : [carry] "=r"(carry)
231
13.6M
                   : [x] "r"(x), [y] "r"(y), "0"(carry)
232
13.6M
                   : "cc", "memory");
233
13.6M
      return carry;
234
13.6M
   }
235
0
#endif
236
237
0
   x[0] = word_add(x[0], y[0], &carry);
238
0
   x[1] = word_add(x[1], y[1], &carry);
239
0
   x[2] = word_add(x[2], y[2], &carry);
240
0
   x[3] = word_add(x[3], y[3], &carry);
241
0
   x[4] = word_add(x[4], y[4], &carry);
242
0
   x[5] = word_add(x[5], y[5], &carry);
243
0
   x[6] = word_add(x[6], y[6], &carry);
244
0
   x[7] = word_add(x[7], y[7], &carry);
245
0
   return carry;
246
13.6M
}
247
248
/*
249
* Eight Word Block Addition, Three Argument
250
*/
251
template <WordType W>
252
5.41M
inline constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W {
253
5.41M
#if defined(BOTAN_MP_USE_X86_64_ASM)
254
5.41M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
255
5.41M
      asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
256
5.41M
                   : [carry] "=r"(carry)
257
5.41M
                   : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry)
258
5.41M
                   : "cc", "memory");
259
5.41M
      return carry;
260
5.41M
   }
261
0
#endif
262
263
0
   z[0] = word_add(x[0], y[0], &carry);
264
0
   z[1] = word_add(x[1], y[1], &carry);
265
0
   z[2] = word_add(x[2], y[2], &carry);
266
0
   z[3] = word_add(x[3], y[3], &carry);
267
0
   z[4] = word_add(x[4], y[4], &carry);
268
0
   z[5] = word_add(x[5], y[5], &carry);
269
0
   z[6] = word_add(x[6], y[6], &carry);
270
0
   z[7] = word_add(x[7], y[7], &carry);
271
0
   return carry;
272
5.41M
}
273
274
/*
275
* Word Subtraction
276
*/
277
template <WordType W>
278
990M
inline constexpr auto word_sub(W x, W y, W* carry) -> W {
279
990M
#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
280
990M
   if(!std::is_constant_evaluated()) {
281
      if constexpr(std::same_as<W, unsigned int>) {
282
         return __builtin_subc(x, y, *carry & 1, carry);
283
990M
      } else if constexpr(std::same_as<W, unsigned long>) {
284
990M
         return __builtin_subcl(x, y, *carry & 1, carry);
285
      } else if constexpr(std::same_as<W, unsigned long long>) {
286
         return __builtin_subcll(x, y, *carry & 1, carry);
287
      }
288
990M
   }
289
0
#endif
290
291
0
   const W cb = *carry & 1;
292
990M
   W t0 = x - y;
293
990M
   W c1 = (t0 > x);
294
990M
   W z = t0 - cb;
295
990M
   *carry = c1 | (z > t0);
296
990M
   return z;
297
990M
}
298
299
/*
300
* Eight Word Block Subtraction, Two Argument
301
*/
302
template <WordType W>
303
5.78M
inline constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W {
304
5.78M
#if defined(BOTAN_MP_USE_X86_64_ASM)
305
5.78M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
306
5.78M
      asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
307
5.78M
          : [carry] "=r"(carry)
308
5.78M
          : [x] "r"(x), [y] "r"(y), "0"(carry)
309
5.78M
          : "cc", "memory");
310
5.78M
      return carry;
311
5.78M
   }
312
0
#endif
313
314
0
   x[0] = word_sub(x[0], y[0], &carry);
315
0
   x[1] = word_sub(x[1], y[1], &carry);
316
0
   x[2] = word_sub(x[2], y[2], &carry);
317
0
   x[3] = word_sub(x[3], y[3], &carry);
318
0
   x[4] = word_sub(x[4], y[4], &carry);
319
0
   x[5] = word_sub(x[5], y[5], &carry);
320
0
   x[6] = word_sub(x[6], y[6], &carry);
321
0
   x[7] = word_sub(x[7], y[7], &carry);
322
0
   return carry;
323
5.78M
}
324
325
/*
326
* Eight Word Block Subtraction, Two Argument
327
*/
328
template <WordType W>
329
2.46M
inline constexpr auto word8_sub2_rev(W x[8], const W y[8], W carry) -> W {
330
2.46M
#if defined(BOTAN_MP_USE_X86_64_ASM)
331
2.46M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
332
2.46M
      asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
333
2.46M
          : [carry] "=r"(carry)
334
2.46M
          : [x] "r"(y), [y] "r"(x), [z] "r"(x), "0"(carry)
335
2.46M
          : "cc", "memory");
336
2.46M
      return carry;
337
2.46M
   }
338
0
#endif
339
340
0
   x[0] = word_sub(y[0], x[0], &carry);
341
0
   x[1] = word_sub(y[1], x[1], &carry);
342
0
   x[2] = word_sub(y[2], x[2], &carry);
343
0
   x[3] = word_sub(y[3], x[3], &carry);
344
0
   x[4] = word_sub(y[4], x[4], &carry);
345
0
   x[5] = word_sub(y[5], x[5], &carry);
346
0
   x[6] = word_sub(y[6], x[6], &carry);
347
0
   x[7] = word_sub(y[7], x[7], &carry);
348
0
   return carry;
349
2.46M
}
350
351
/*
352
* Eight Word Block Subtraction, Three Argument
353
*/
354
template <WordType W>
355
335M
inline constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W {
356
335M
#if defined(BOTAN_MP_USE_X86_64_ASM)
357
335M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
358
335M
      asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
359
335M
                   : [carry] "=r"(carry)
360
335M
                   : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry)
361
335M
                   : "cc", "memory");
362
335M
      return carry;
363
335M
   }
364
0
#endif
365
366
0
   z[0] = word_sub(x[0], y[0], &carry);
367
0
   z[1] = word_sub(x[1], y[1], &carry);
368
0
   z[2] = word_sub(x[2], y[2], &carry);
369
0
   z[3] = word_sub(x[3], y[3], &carry);
370
0
   z[4] = word_sub(x[4], y[4], &carry);
371
0
   z[5] = word_sub(x[5], y[5], &carry);
372
0
   z[6] = word_sub(x[6], y[6], &carry);
373
0
   z[7] = word_sub(x[7], y[7], &carry);
374
0
   return carry;
375
335M
}
376
377
/*
378
* Eight Word Block Linear Multiplication
379
*/
380
template <WordType W>
381
1.25M
inline constexpr auto word8_linmul2(W x[8], W y, W carry) -> W {
382
1.25M
#if defined(BOTAN_MP_USE_X86_64_ASM)
383
1.25M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
384
1.25M
      asm(DO_8_TIMES(LINMUL_OP, "x")
385
1.25M
          : [carry] "=r"(carry)
386
1.25M
          : [x] "r"(x), [y] "rm"(y), "0"(carry)
387
1.25M
          : "cc", "%rax", "%rdx");
388
1.25M
      return carry;
389
1.25M
   }
390
0
#endif
391
392
0
   x[0] = word_madd2(x[0], y, &carry);
393
0
   x[1] = word_madd2(x[1], y, &carry);
394
0
   x[2] = word_madd2(x[2], y, &carry);
395
0
   x[3] = word_madd2(x[3], y, &carry);
396
0
   x[4] = word_madd2(x[4], y, &carry);
397
0
   x[5] = word_madd2(x[5], y, &carry);
398
0
   x[6] = word_madd2(x[6], y, &carry);
399
0
   x[7] = word_madd2(x[7], y, &carry);
400
0
   return carry;
401
1.25M
}
402
403
/*
404
* Eight Word Block Linear Multiplication
405
*/
406
template <WordType W>
407
3.68M
inline constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W {
408
3.68M
#if defined(BOTAN_MP_USE_X86_64_ASM)
409
3.68M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
410
3.68M
      asm(DO_8_TIMES(LINMUL_OP, "z")
411
3.68M
          : [carry] "=r"(carry)
412
3.68M
          : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry)
413
3.68M
          : "cc", "%rax", "%rdx");
414
3.68M
      return carry;
415
3.68M
   }
416
0
#endif
417
418
0
   z[0] = word_madd2(x[0], y, &carry);
419
0
   z[1] = word_madd2(x[1], y, &carry);
420
0
   z[2] = word_madd2(x[2], y, &carry);
421
0
   z[3] = word_madd2(x[3], y, &carry);
422
0
   z[4] = word_madd2(x[4], y, &carry);
423
0
   z[5] = word_madd2(x[5], y, &carry);
424
0
   z[6] = word_madd2(x[6], y, &carry);
425
0
   z[7] = word_madd2(x[7], y, &carry);
426
0
   return carry;
427
3.68M
}
428
429
/*
430
* Eight Word Block Multiply/Add
431
*/
432
template <WordType W>
433
145M
inline constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W {
434
145M
#if defined(BOTAN_MP_USE_X86_64_ASM)
435
145M
   if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
436
145M
      asm(DO_8_TIMES(MULADD_OP, "")
437
145M
          : [carry] "=r"(carry)
438
145M
          : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry)
439
145M
          : "cc", "%rax", "%rdx");
440
145M
      return carry;
441
145M
   }
442
0
#endif
443
444
0
   z[0] = word_madd3(x[0], y, z[0], &carry);
445
0
   z[1] = word_madd3(x[1], y, z[1], &carry);
446
0
   z[2] = word_madd3(x[2], y, z[2], &carry);
447
0
   z[3] = word_madd3(x[3], y, z[3], &carry);
448
0
   z[4] = word_madd3(x[4], y, z[4], &carry);
449
0
   z[5] = word_madd3(x[5], y, z[5], &carry);
450
0
   z[6] = word_madd3(x[6], y, z[6], &carry);
451
0
   z[7] = word_madd3(x[7], y, z[7], &carry);
452
0
   return carry;
453
145M
}
454
455
/**
456
* Helper for 3-word accumulators
457
*
458
* A number of algorithms especially Comba multiplication and
459
* Montgomery reduction can take advantage of wide accumulators, which
460
* consume inputs via addition with outputs extracted from the low
461
* bits.
462
*/
463
template <WordType W>
464
class word3 final {
465
#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
466
467
   public:
468
140M
      constexpr word3() { m_w = 0; }
469
470
4.02G
      inline constexpr void mul(W x, W y) { m_w += static_cast<W3>(x) * y; }
471
472
630M
      inline constexpr void mul_x2(W x, W y) { m_w += static_cast<W3>(x) * y * 2; }
473
474
531M
      inline constexpr void add(W x) { m_w += x; }
475
476
1.36G
      inline constexpr W extract() {
477
1.36G
         W r = static_cast<W>(m_w);
478
1.36G
         m_w >>= WordInfo<W>::bits;
479
1.36G
         return r;
480
1.36G
      }
481
482
265M
      inline constexpr W monty_step(W p0, W p_dash) {
483
265M
         const W w0 = static_cast<W>(m_w);
484
265M
         const W r = w0 * p_dash;
485
265M
         mul(r, p0);
486
265M
         m_w >>= WordInfo<W>::bits;
487
265M
         return r;
488
265M
      }
489
490
      inline constexpr W monty_step_pdash1() {
491
         const W r = static_cast<W>(m_w);
492
         m_w >>= WordInfo<W>::bits;
493
         m_w += static_cast<W3>(r);
494
         return r;
495
      }
496
497
   private:
498
      __extension__ typedef unsigned _BitInt(WordInfo<W>::bits * 3) W3;
499
      W3 m_w;
500
#else
501
502
   public:
503
      constexpr word3() {
504
         m_w2 = 0;
505
         m_w1 = 0;
506
         m_w0 = 0;
507
      }
508
509
      inline constexpr void mul(W x, W y) {
510
   #if defined(BOTAN_MP_USE_X86_64_ASM)
511
         if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
512
            W z0 = 0, z1 = 0;
513
514
            asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc");
515
516
            asm(R"(
517
                 addq %[z0],%[w0]
518
                 adcq %[z1],%[w1]
519
                 adcq $0,%[w2]
520
                )"
521
                : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2)
522
                : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2)
523
                : "cc");
524
            return;
525
         }
526
   #endif
527
528
         typedef typename WordInfo<W>::dword dword;
529
         const dword s = dword(x) * y + m_w0;
530
         W carry = static_cast<W>(s >> WordInfo<W>::bits);
531
         m_w0 = static_cast<W>(s);
532
         m_w1 += carry;
533
         m_w2 += (m_w1 < carry);
534
      }
535
536
      inline constexpr void mul_x2(W x, W y) {
537
   #if defined(BOTAN_MP_USE_X86_64_ASM)
538
         if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
539
            W z0 = 0, z1 = 0;
540
541
            asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc");
542
543
            asm(R"(
544
                 addq %[z0],%[w0]
545
                 adcq %[z1],%[w1]
546
                 adcq $0,%[w2]
547
548
                 addq %[z0],%[w0]
549
                 adcq %[z1],%[w1]
550
                 adcq $0,%[w2]
551
                   )"
552
                : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2)
553
                : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2)
554
                : "cc");
555
            return;
556
         }
557
   #endif
558
559
         W carry = 0;
560
         x = word_madd2(x, y, &carry);
561
         y = carry;
562
563
         carry = 0;
564
         m_w0 = word_add(m_w0, x, &carry);
565
         m_w1 = word_add(m_w1, y, &carry);
566
         m_w2 += carry;
567
568
         carry = 0;
569
         m_w0 = word_add(m_w0, x, &carry);
570
         m_w1 = word_add(m_w1, y, &carry);
571
         m_w2 += carry;
572
      }
573
574
      inline constexpr void add(W x) {
575
         constexpr W z = 0;
576
577
         W carry = 0;
578
         m_w0 = word_add(m_w0, x, &carry);
579
         m_w1 = word_add(m_w1, z, &carry);
580
         m_w2 += carry;
581
      }
582
583
      inline constexpr W extract() {
584
         W r = m_w0;
585
         m_w0 = m_w1;
586
         m_w1 = m_w2;
587
         m_w2 = 0;
588
         return r;
589
      }
590
591
      inline constexpr W monty_step(W p0, W p_dash) {
592
         W r = m_w0 * p_dash;
593
         mul(r, p0);
594
         m_w0 = m_w1;
595
         m_w1 = m_w2;
596
         m_w2 = 0;
597
         return r;
598
      }
599
600
      inline constexpr W monty_step_pdash1() {
601
         // If p_dash == 1 then p[0] = -1 and everything simplifies
602
         const W r = m_w0;
603
         m_w0 += m_w1;
604
         m_w1 = m_w2 + (m_w0 < m_w1);
605
         m_w2 = 0;
606
         return r;
607
      }
608
609
   private:
610
      W m_w0, m_w1, m_w2;
611
#endif
612
};
613
614
#if defined(ASM)
615
   #undef ASM
616
   #undef DO_8_TIMES
617
   #undef ADD_OR_SUBTRACT
618
   #undef ADDSUB2_OP
619
   #undef ADDSUB3_OP
620
   #undef LINMUL_OP
621
   #undef MULADD_OP
622
#endif
623
624
}  // namespace Botan
625
626
#endif