Coverage Report

Created: 2021-05-04 09:02

/src/botan/build/include/botan/internal/mp_asmi.h
Line
Count
Source
1
/*
2
* Lowest Level MPI Algorithms
3
* (C) 1999-2010 Jack Lloyd
4
*     2006 Luca Piccarreta
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#ifndef BOTAN_MP_ASM_INTERNAL_H_
10
#define BOTAN_MP_ASM_INTERNAL_H_
11
12
#include <botan/types.h>
13
#include <botan/internal/mul128.h>
14
15
namespace Botan {
16
17
#if (BOTAN_MP_WORD_BITS == 32)
18
  typedef uint64_t dword;
19
  #define BOTAN_HAS_MP_DWORD
20
21
#elif (BOTAN_MP_WORD_BITS == 64)
22
  #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
23
    typedef uint128_t dword;
24
    #define BOTAN_HAS_MP_DWORD
25
  #else
26
    // No native 128 bit integer type; use mul64x64_128 instead
27
  #endif
28
29
#else
30
  #error BOTAN_MP_WORD_BITS must be 32 or 64
31
#endif
32
33
#if defined(BOTAN_USE_GCC_INLINE_ASM)
34
35
  #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32)
36
    #define BOTAN_MP_USE_X86_32_ASM
37
  #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64)
38
    #define BOTAN_MP_USE_X86_64_ASM
39
  #endif
40
41
#endif
42
43
/*
44
* Word Multiply/Add
45
*/
46
inline word word_madd2(word a, word b, word* c)
47
132M
   {
48
#if defined(BOTAN_MP_USE_X86_32_ASM)
49
   asm(R"(
50
      mull %[b]
51
      addl %[c],%[a]
52
      adcl $0,%[carry]
53
      )"
54
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
55
      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
56
57
   return a;
58
59
#elif defined(BOTAN_MP_USE_X86_64_ASM)
60
132M
      asm(R"(
61
132M
         mulq %[b]
62
132M
         addq %[c],%[a]
63
132M
         adcq $0,%[carry]
64
132M
      )"
65
132M
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
66
132M
      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
67
68
132M
   return a;
69
70
#elif defined(BOTAN_HAS_MP_DWORD)
71
   const dword s = static_cast<dword>(a) * b + *c;
72
   *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
73
   return static_cast<word>(s);
74
#else
75
   static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
76
77
   word hi = 0, lo = 0;
78
79
   mul64x64_128(a, b, &lo, &hi);
80
81
   lo += *c;
82
   hi += (lo < *c); // carry?
83
84
   *c = hi;
85
   return lo;
86
#endif
87
132M
   }
88
89
/*
90
* Word Multiply/Add
91
*/
92
inline word word_madd3(word a, word b, word c, word* d)
93
306M
   {
94
#if defined(BOTAN_MP_USE_X86_32_ASM)
95
   asm(R"(
96
      mull %[b]
97
98
      addl %[c],%[a]
99
      adcl $0,%[carry]
100
101
      addl %[d],%[a]
102
      adcl $0,%[carry]
103
      )"
104
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
105
      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
106
107
   return a;
108
109
#elif defined(BOTAN_MP_USE_X86_64_ASM)
110
306M
   asm(R"(
111
306M
      mulq %[b]
112
306M
      addq %[c],%[a]
113
306M
      adcq $0,%[carry]
114
306M
      addq %[d],%[a]
115
306M
      adcq $0,%[carry]
116
306M
      )"
117
306M
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
118
306M
      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
119
120
306M
   return a;
121
122
#elif defined(BOTAN_HAS_MP_DWORD)
123
   const dword s = static_cast<dword>(a) * b + c + *d;
124
   *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
125
   return static_cast<word>(s);
126
#else
127
   static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
128
129
   word hi = 0, lo = 0;
130
131
   mul64x64_128(a, b, &lo, &hi);
132
133
   lo += c;
134
   hi += (lo < c); // carry?
135
136
   lo += *d;
137
   hi += (lo < *d); // carry?
138
139
   *d = hi;
140
   return lo;
141
#endif
142
306M
   }
143
144
#if defined(BOTAN_MP_USE_X86_32_ASM)
145
146
#define ADDSUB2_OP(OPERATION, INDEX)                     \
147
        ASM("movl 4*" #INDEX "(%[y]), %[carry]")         \
148
        ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")   \
149
150
#define ADDSUB3_OP(OPERATION, INDEX)                     \
151
        ASM("movl 4*" #INDEX "(%[x]), %[carry]")         \
152
        ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]")   \
153
        ASM("movl %[carry], 4*" #INDEX "(%[z])")         \
154
155
#define LINMUL_OP(WRITE_TO, INDEX)                       \
156
        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
157
        ASM("mull %[y]")                                 \
158
        ASM("addl %[carry],%%eax")                       \
159
        ASM("adcl $0,%%edx")                             \
160
        ASM("movl %%edx,%[carry]")                       \
161
        ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
162
163
#define MULADD_OP(IGNORED, INDEX)                        \
164
        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
165
        ASM("mull %[y]")                                 \
166
        ASM("addl %[carry],%%eax")                       \
167
        ASM("adcl $0,%%edx")                             \
168
        ASM("addl 4*" #INDEX "(%[z]),%%eax")             \
169
        ASM("adcl $0,%%edx")                             \
170
        ASM("movl %%edx,%[carry]")                       \
171
        ASM("movl %%eax, 4*" #INDEX " (%[z])")
172
173
#define ADD_OR_SUBTRACT(CORE_CODE)     \
174
        ASM("rorl %[carry]")           \
175
        CORE_CODE                      \
176
        ASM("sbbl %[carry],%[carry]")  \
177
        ASM("negl %[carry]")
178
179
#elif defined(BOTAN_MP_USE_X86_64_ASM)
180
181
#define ADDSUB2_OP(OPERATION, INDEX)                     \
182
        ASM("movq 8*" #INDEX "(%[y]), %[carry]")         \
183
        ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")   \
184
185
#define ADDSUB3_OP(OPERATION, INDEX)                     \
186
        ASM("movq 8*" #INDEX "(%[x]), %[carry]")         \
187
        ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]")   \
188
        ASM("movq %[carry], 8*" #INDEX "(%[z])")         \
189
190
#define LINMUL_OP(WRITE_TO, INDEX)                       \
191
        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
192
        ASM("mulq %[y]")                                 \
193
        ASM("addq %[carry],%%rax")                       \
194
        ASM("adcq $0,%%rdx")                             \
195
        ASM("movq %%rdx,%[carry]")                       \
196
        ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
197
198
#define MULADD_OP(IGNORED, INDEX)                        \
199
        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
200
        ASM("mulq %[y]")                                 \
201
        ASM("addq %[carry],%%rax")                       \
202
        ASM("adcq $0,%%rdx")                             \
203
        ASM("addq 8*" #INDEX "(%[z]),%%rax")             \
204
        ASM("adcq $0,%%rdx")                             \
205
        ASM("movq %%rdx,%[carry]")                       \
206
        ASM("movq %%rax, 8*" #INDEX " (%[z])")
207
208
#define ADD_OR_SUBTRACT(CORE_CODE)     \
209
        ASM("rorq %[carry]")           \
210
        CORE_CODE                      \
211
        ASM("sbbq %[carry],%[carry]")  \
212
        ASM("negq %[carry]")
213
214
#endif
215
216
#if defined(ADD_OR_SUBTRACT)
217
218
#define ASM(x) x "\n\t"
219
220
#define DO_8_TIMES(MACRO, ARG) \
221
        MACRO(ARG, 0) \
222
        MACRO(ARG, 1) \
223
        MACRO(ARG, 2) \
224
        MACRO(ARG, 3) \
225
        MACRO(ARG, 4) \
226
        MACRO(ARG, 5) \
227
        MACRO(ARG, 6) \
228
        MACRO(ARG, 7)
229
230
#endif
231
232
/*
233
* Word Addition
234
*/
235
inline word word_add(word x, word y, word* carry)
236
2.11G
   {
237
#if defined(BOTAN_MP_USE_X86_32_ASM)
238
   asm(
239
      ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
240
      : [x]"=r"(x), [carry]"=r"(*carry)
241
      : "0"(x), [y]"rm"(y), "1"(*carry)
242
      : "cc");
243
   return x;
244
245
#elif defined(BOTAN_MP_USE_X86_64_ASM)
246
247
2.11G
   asm(
248
2.11G
      ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
249
2.11G
      : [x]"=r"(x), [carry]"=r"(*carry)
250
2.11G
      : "0"(x), [y]"rm"(y), "1"(*carry)
251
2.11G
      : "cc");
252
2.11G
   return x;
253
254
#else
255
   word z = x + y;
256
   word c1 = (z < x);
257
   z += *carry;
258
   *carry = c1 | (z < *carry);
259
   return z;
260
#endif
261
2.11G
   }
262
263
/*
264
* Eight Word Block Addition, Two Argument
265
*/
266
inline word word8_add2(word x[8], const word y[8], word carry)
267
9.92M
   {
268
#if defined(BOTAN_MP_USE_X86_32_ASM)
269
   asm(
270
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
271
      : [carry]"=r"(carry)
272
      : [x]"r"(x), [y]"r"(y), "0"(carry)
273
      : "cc", "memory");
274
   return carry;
275
276
#elif defined(BOTAN_MP_USE_X86_64_ASM)
277
278
9.92M
   asm(
279
9.92M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
280
9.92M
      : [carry]"=r"(carry)
281
9.92M
      : [x]"r"(x), [y]"r"(y), "0"(carry)
282
9.92M
      : "cc", "memory");
283
9.92M
   return carry;
284
285
#else
286
   x[0] = word_add(x[0], y[0], &carry);
287
   x[1] = word_add(x[1], y[1], &carry);
288
   x[2] = word_add(x[2], y[2], &carry);
289
   x[3] = word_add(x[3], y[3], &carry);
290
   x[4] = word_add(x[4], y[4], &carry);
291
   x[5] = word_add(x[5], y[5], &carry);
292
   x[6] = word_add(x[6], y[6], &carry);
293
   x[7] = word_add(x[7], y[7], &carry);
294
   return carry;
295
#endif
296
9.92M
   }
297
298
/*
299
* Eight Word Block Addition, Three Argument
300
*/
301
inline word word8_add3(word z[8], const word x[8],
302
                       const word y[8], word carry)
303
286M
   {
304
#if defined(BOTAN_MP_USE_X86_32_ASM)
305
   asm(
306
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
307
      : [carry]"=r"(carry)
308
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
309
      : "cc", "memory");
310
   return carry;
311
312
#elif defined(BOTAN_MP_USE_X86_64_ASM)
313
314
286M
   asm(
315
286M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
316
286M
      : [carry]"=r"(carry)
317
286M
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
318
286M
      : "cc", "memory");
319
286M
   return carry;
320
321
#else
322
   z[0] = word_add(x[0], y[0], &carry);
323
   z[1] = word_add(x[1], y[1], &carry);
324
   z[2] = word_add(x[2], y[2], &carry);
325
   z[3] = word_add(x[3], y[3], &carry);
326
   z[4] = word_add(x[4], y[4], &carry);
327
   z[5] = word_add(x[5], y[5], &carry);
328
   z[6] = word_add(x[6], y[6], &carry);
329
   z[7] = word_add(x[7], y[7], &carry);
330
   return carry;
331
#endif
332
286M
   }
333
334
/*
335
* Word Subtraction
336
*/
337
inline word word_sub(word x, word y, word* carry)
338
2.89G
   {
339
#if defined(BOTAN_MP_USE_X86_32_ASM)
340
   asm(
341
      ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
342
      : [x]"=r"(x), [carry]"=r"(*carry)
343
      : "0"(x), [y]"rm"(y), "1"(*carry)
344
      : "cc");
345
   return x;
346
347
#elif defined(BOTAN_MP_USE_X86_64_ASM)
348
349
2.89G
   asm(
350
2.89G
      ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
351
2.89G
      : [x]"=r"(x), [carry]"=r"(*carry)
352
2.89G
      : "0"(x), [y]"rm"(y), "1"(*carry)
353
2.89G
      : "cc");
354
2.89G
   return x;
355
356
#else
357
   word t0 = x - y;
358
   word c1 = (t0 > x);
359
   word z = t0 - *carry;
360
   *carry = c1 | (z > t0);
361
   return z;
362
#endif
363
2.89G
   }
364
365
/*
366
* Eight Word Block Subtraction, Two Argument
367
*/
368
inline word word8_sub2(word x[8], const word y[8], word carry)
369
26.1M
   {
370
#if defined(BOTAN_MP_USE_X86_32_ASM)
371
   asm(
372
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
373
      : [carry]"=r"(carry)
374
      : [x]"r"(x), [y]"r"(y), "0"(carry)
375
      : "cc", "memory");
376
   return carry;
377
378
#elif defined(BOTAN_MP_USE_X86_64_ASM)
379
380
26.1M
   asm(
381
26.1M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
382
26.1M
      : [carry]"=r"(carry)
383
26.1M
      : [x]"r"(x), [y]"r"(y), "0"(carry)
384
26.1M
      : "cc", "memory");
385
26.1M
   return carry;
386
387
#else
388
   x[0] = word_sub(x[0], y[0], &carry);
389
   x[1] = word_sub(x[1], y[1], &carry);
390
   x[2] = word_sub(x[2], y[2], &carry);
391
   x[3] = word_sub(x[3], y[3], &carry);
392
   x[4] = word_sub(x[4], y[4], &carry);
393
   x[5] = word_sub(x[5], y[5], &carry);
394
   x[6] = word_sub(x[6], y[6], &carry);
395
   x[7] = word_sub(x[7], y[7], &carry);
396
   return carry;
397
#endif
398
26.1M
   }
399
400
/*
401
* Eight Word Block Subtraction, Two Argument
402
*/
403
inline word word8_sub2_rev(word x[8], const word y[8], word carry)
404
2.51M
   {
405
#if defined(BOTAN_MP_USE_X86_32_ASM)
406
   asm(
407
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
408
      : [carry]"=r"(carry)
409
      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
410
      : "cc", "memory");
411
   return carry;
412
413
#elif defined(BOTAN_MP_USE_X86_64_ASM)
414
415
2.51M
   asm(
416
2.51M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
417
2.51M
      : [carry]"=r"(carry)
418
2.51M
      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
419
2.51M
      : "cc", "memory");
420
2.51M
   return carry;
421
422
#else
423
   x[0] = word_sub(y[0], x[0], &carry);
424
   x[1] = word_sub(y[1], x[1], &carry);
425
   x[2] = word_sub(y[2], x[2], &carry);
426
   x[3] = word_sub(y[3], x[3], &carry);
427
   x[4] = word_sub(y[4], x[4], &carry);
428
   x[5] = word_sub(y[5], x[5], &carry);
429
   x[6] = word_sub(y[6], x[6], &carry);
430
   x[7] = word_sub(y[7], x[7], &carry);
431
   return carry;
432
#endif
433
2.51M
   }
434
435
/*
436
* Eight Word Block Subtraction, Three Argument
437
*/
438
inline word word8_sub3(word z[8], const word x[8],
439
                       const word y[8], word carry)
440
603M
   {
441
#if defined(BOTAN_MP_USE_X86_32_ASM)
442
   asm(
443
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
444
      : [carry]"=r"(carry)
445
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
446
      : "cc", "memory");
447
   return carry;
448
449
#elif defined(BOTAN_MP_USE_X86_64_ASM)
450
451
603M
   asm(
452
603M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
453
603M
      : [carry]"=r"(carry)
454
603M
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
455
603M
      : "cc", "memory");
456
603M
   return carry;
457
458
#else
459
   z[0] = word_sub(x[0], y[0], &carry);
460
   z[1] = word_sub(x[1], y[1], &carry);
461
   z[2] = word_sub(x[2], y[2], &carry);
462
   z[3] = word_sub(x[3], y[3], &carry);
463
   z[4] = word_sub(x[4], y[4], &carry);
464
   z[5] = word_sub(x[5], y[5], &carry);
465
   z[6] = word_sub(x[6], y[6], &carry);
466
   z[7] = word_sub(x[7], y[7], &carry);
467
   return carry;
468
#endif
469
603M
   }
470
471
/*
472
* Eight Word Block Linear Multiplication
473
*/
474
inline word word8_linmul2(word x[8], word y, word carry)
475
281M
   {
476
#if defined(BOTAN_MP_USE_X86_32_ASM)
477
   asm(
478
      DO_8_TIMES(LINMUL_OP, "x")
479
      : [carry]"=r"(carry)
480
      : [x]"r"(x), [y]"rm"(y), "0"(carry)
481
      : "cc", "%eax", "%edx");
482
   return carry;
483
484
#elif defined(BOTAN_MP_USE_X86_64_ASM)
485
486
281M
   asm(
487
281M
      DO_8_TIMES(LINMUL_OP, "x")
488
281M
      : [carry]"=r"(carry)
489
281M
      : [x]"r"(x), [y]"rm"(y), "0"(carry)
490
281M
      : "cc", "%rax", "%rdx");
491
281M
   return carry;
492
493
#else
494
   x[0] = word_madd2(x[0], y, &carry);
495
   x[1] = word_madd2(x[1], y, &carry);
496
   x[2] = word_madd2(x[2], y, &carry);
497
   x[3] = word_madd2(x[3], y, &carry);
498
   x[4] = word_madd2(x[4], y, &carry);
499
   x[5] = word_madd2(x[5], y, &carry);
500
   x[6] = word_madd2(x[6], y, &carry);
501
   x[7] = word_madd2(x[7], y, &carry);
502
   return carry;
503
#endif
504
281M
   }
505
506
/*
507
* Eight Word Block Linear Multiplication
508
*/
509
inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
510
23.5M
   {
511
#if defined(BOTAN_MP_USE_X86_32_ASM)
512
   asm(
513
      DO_8_TIMES(LINMUL_OP, "z")
514
      : [carry]"=r"(carry)
515
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
516
      : "cc", "%eax", "%edx");
517
   return carry;
518
519
#elif defined(BOTAN_MP_USE_X86_64_ASM)
520
23.5M
   asm(
521
23.5M
      DO_8_TIMES(LINMUL_OP, "z")
522
23.5M
      : [carry]"=r"(carry)
523
23.5M
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
524
23.5M
      : "cc", "%rax", "%rdx");
525
23.5M
   return carry;
526
527
#else
528
   z[0] = word_madd2(x[0], y, &carry);
529
   z[1] = word_madd2(x[1], y, &carry);
530
   z[2] = word_madd2(x[2], y, &carry);
531
   z[3] = word_madd2(x[3], y, &carry);
532
   z[4] = word_madd2(x[4], y, &carry);
533
   z[5] = word_madd2(x[5], y, &carry);
534
   z[6] = word_madd2(x[6], y, &carry);
535
   z[7] = word_madd2(x[7], y, &carry);
536
   return carry;
537
#endif
538
23.5M
   }
539
540
/*
541
* Eight Word Block Multiply/Add
542
*/
543
inline word word8_madd3(word z[8], const word x[8], word y, word carry)
544
134M
   {
545
#if defined(BOTAN_MP_USE_X86_32_ASM)
546
   asm(
547
      DO_8_TIMES(MULADD_OP, "")
548
      : [carry]"=r"(carry)
549
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
550
      : "cc", "%eax", "%edx");
551
   return carry;
552
553
#elif defined(BOTAN_MP_USE_X86_64_ASM)
554
555
134M
   asm(
556
134M
      DO_8_TIMES(MULADD_OP, "")
557
134M
      : [carry]"=r"(carry)
558
134M
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
559
134M
      : "cc", "%rax", "%rdx");
560
134M
   return carry;
561
562
#else
563
   z[0] = word_madd3(x[0], y, z[0], &carry);
564
   z[1] = word_madd3(x[1], y, z[1], &carry);
565
   z[2] = word_madd3(x[2], y, z[2], &carry);
566
   z[3] = word_madd3(x[3], y, z[3], &carry);
567
   z[4] = word_madd3(x[4], y, z[4], &carry);
568
   z[5] = word_madd3(x[5], y, z[5], &carry);
569
   z[6] = word_madd3(x[6], y, z[6], &carry);
570
   z[7] = word_madd3(x[7], y, z[7], &carry);
571
   return carry;
572
#endif
573
134M
   }
574
575
/*
576
* Multiply-Add Accumulator
577
* (w2,w1,w0) += x * y
578
*/
579
inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
580
14.4G
   {
581
#if defined(BOTAN_MP_USE_X86_32_ASM)
582
   word z0 = 0, z1 = 0;
583
584
   asm("mull %[y]"
585
        : "=a"(z0),"=d"(z1)
586
        : "a"(x), [y]"rm"(y)
587
        : "cc");
588
589
   asm(R"(
590
       addl %[z0],%[w0]
591
       adcl %[z1],%[w1]
592
       adcl $0,%[w2]
593
       )"
594
       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
595
       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
596
       : "cc");
597
598
#elif defined(BOTAN_MP_USE_X86_64_ASM)
599
600
14.4G
   word z0 = 0, z1 = 0;
601
602
14.4G
   asm("mulq %[y]"
603
14.4G
        : "=a"(z0),"=d"(z1)
604
14.4G
        : "a"(x), [y]"rm"(y)
605
14.4G
        : "cc");
606
607
14.4G
   asm(R"(
608
14.4G
       addq %[z0],%[w0]
609
14.4G
       adcq %[z1],%[w1]
610
14.4G
       adcq $0,%[w2]
611
14.4G
       )"
612
14.4G
       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
613
14.4G
       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
614
14.4G
       : "cc");
615
616
#else
617
   word carry = *w0;
618
   *w0 = word_madd2(x, y, &carry);
619
   *w1 += carry;
620
   *w2 += (*w1 < carry);
621
#endif
622
14.4G
   }
623
624
/*
625
* 3-word addition
626
* (w2,w1,w0) += x
627
*/
628
inline void word3_add(word* w2, word* w1, word* w0, word x)
629
1.18G
   {
630
#if defined(BOTAN_MP_USE_X86_32_ASM)
631
   asm(R"(
632
      addl %[x],%[w0]
633
      adcl $0,%[w1]
634
      adcl $0,%[w2]
635
      )"
636
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
637
      : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
638
      : "cc");
639
640
#elif defined(BOTAN_MP_USE_X86_64_ASM)
641
642
1.18G
   asm(R"(
643
1.18G
      addq %[x],%[w0]
644
1.18G
      adcq $0,%[w1]
645
1.18G
      adcq $0,%[w2]
646
1.18G
      )"
647
1.18G
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
648
1.18G
      : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
649
1.18G
      : "cc");
650
651
#else
652
   *w0 += x;
653
   word c1 = (*w0 < x);
654
   *w1 += c1;
655
   word c2 = (*w1 < c1);
656
   *w2 += c2;
657
#endif
658
1.18G
   }
659
660
/*
661
* Multiply-Add Accumulator
662
* (w2,w1,w0) += 2 * x * y
663
*/
664
inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
665
3.71G
   {
666
#if defined(BOTAN_MP_USE_X86_32_ASM)
667
668
   word z0 = 0, z1 = 0;
669
670
   asm("mull %[y]"
671
        : "=a"(z0),"=d"(z1)
672
        : "a"(x), [y]"rm"(y)
673
        : "cc");
674
675
   asm(R"(
676
      addl %[z0],%[w0]
677
      adcl %[z1],%[w1]
678
      adcl $0,%[w2]
679
680
      addl %[z0],%[w0]
681
      adcl %[z1],%[w1]
682
      adcl $0,%[w2]
683
      )"
684
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
685
      : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
686
      : "cc");
687
688
#elif defined(BOTAN_MP_USE_X86_64_ASM)
689
690
3.71G
   word z0 = 0, z1 = 0;
691
692
3.71G
   asm("mulq %[y]"
693
3.71G
        : "=a"(z0),"=d"(z1)
694
3.71G
        : "a"(x), [y]"rm"(y)
695
3.71G
        : "cc");
696
697
3.71G
   asm(R"(
698
3.71G
      addq %[z0],%[w0]
699
3.71G
      adcq %[z1],%[w1]
700
3.71G
      adcq $0,%[w2]
701
3.71G
702
3.71G
      addq %[z0],%[w0]
703
3.71G
      adcq %[z1],%[w1]
704
3.71G
      adcq $0,%[w2]
705
3.71G
      )"
706
3.71G
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
707
3.71G
      : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
708
3.71G
      : "cc");
709
710
#else
711
   word carry = 0;
712
   x = word_madd2(x, y, &carry);
713
   y = carry;
714
715
   word top = (y >> (BOTAN_MP_WORD_BITS-1));
716
   y <<= 1;
717
   y |= (x >> (BOTAN_MP_WORD_BITS-1));
718
   x <<= 1;
719
720
   carry = 0;
721
   *w0 = word_add(*w0, x, &carry);
722
   *w1 = word_add(*w1, y, &carry);
723
   *w2 = word_add(*w2, top, &carry);
724
#endif
725
3.71G
   }
726
727
#if defined(ASM)
728
  #undef ASM
729
  #undef DO_8_TIMES
730
  #undef ADD_OR_SUBTRACT
731
  #undef ADDSUB2_OP
732
  #undef ADDSUB3_OP
733
  #undef LINMUL_OP
734
  #undef MULADD_OP
735
#endif
736
737
}
738
739
#endif