Coverage Report

Created: 2022-01-14 08:07

/src/botan/build/include/botan/internal/mp_asmi.h
Line
Count
Source
1
/*
2
* Lowest Level MPI Algorithms
3
* (C) 1999-2010 Jack Lloyd
4
*     2006 Luca Piccarreta
5
*
6
* Botan is released under the Simplified BSD License (see license.txt)
7
*/
8
9
#ifndef BOTAN_MP_ASM_INTERNAL_H_
10
#define BOTAN_MP_ASM_INTERNAL_H_
11
12
#include <botan/types.h>
13
#include <botan/internal/mul128.h>
14
15
namespace Botan {
16
17
#if (BOTAN_MP_WORD_BITS == 32)
18
  typedef uint64_t dword;
19
  #define BOTAN_HAS_MP_DWORD
20
21
#elif (BOTAN_MP_WORD_BITS == 64)
22
  #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
23
    typedef uint128_t dword;
24
    #define BOTAN_HAS_MP_DWORD
25
  #else
26
    // No native 128 bit integer type; use mul64x64_128 instead
27
  #endif
28
29
#else
30
  #error BOTAN_MP_WORD_BITS must be 32 or 64
31
#endif
32
33
#if defined(BOTAN_USE_GCC_INLINE_ASM)
34
35
  #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32)
36
    #define BOTAN_MP_USE_X86_32_ASM
37
  #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64)
38
    #define BOTAN_MP_USE_X86_64_ASM
39
  #endif
40
41
#endif
42
43
/*
44
* Word Multiply/Add
45
*/
46
inline word word_madd2(word a, word b, word* c)
47
217M
   {
48
#if defined(BOTAN_MP_USE_X86_32_ASM)
49
   asm(R"(
50
      mull %[b]
51
      addl %[c],%[a]
52
      adcl $0,%[carry]
53
      )"
54
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
55
      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
56
57
   return a;
58
59
#elif defined(BOTAN_MP_USE_X86_64_ASM)
60
217M
   asm(R"(
61
217M
      mulq %[b]
62
217M
      addq %[c],%[a]
63
217M
      adcq $0,%[carry]
64
217M
      )"
65
217M
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c)
66
217M
      : "0"(a), "1"(b), [c]"g"(*c) : "cc");
67
68
217M
   return a;
69
70
#elif defined(BOTAN_HAS_MP_DWORD)
71
   const dword s = static_cast<dword>(a) * b + *c;
72
   *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
73
   return static_cast<word>(s);
74
#else
75
   static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
76
77
   word hi = 0, lo = 0;
78
79
   mul64x64_128(a, b, &lo, &hi);
80
81
   lo += *c;
82
   hi += (lo < *c); // carry?
83
84
   *c = hi;
85
   return lo;
86
#endif
87
217M
   }
88
89
/*
90
* Word Multiply/Add
91
*/
92
inline word word_madd3(word a, word b, word c, word* d)
93
413M
   {
94
#if defined(BOTAN_MP_USE_X86_32_ASM)
95
   asm(R"(
96
      mull %[b]
97
98
      addl %[c],%[a]
99
      adcl $0,%[carry]
100
101
      addl %[d],%[a]
102
      adcl $0,%[carry]
103
      )"
104
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
105
      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
106
107
   return a;
108
109
#elif defined(BOTAN_MP_USE_X86_64_ASM)
110
413M
   asm(R"(
111
413M
      mulq %[b]
112
413M
113
413M
      addq %[c],%[a]
114
413M
      adcq $0,%[carry]
115
413M
116
413M
      addq %[d],%[a]
117
413M
      adcq $0,%[carry]
118
413M
      )"
119
413M
      : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d)
120
413M
      : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc");
121
122
413M
   return a;
123
124
#elif defined(BOTAN_HAS_MP_DWORD)
125
   const dword s = static_cast<dword>(a) * b + c + *d;
126
   *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS);
127
   return static_cast<word>(s);
128
#else
129
   static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size");
130
131
   word hi = 0, lo = 0;
132
133
   mul64x64_128(a, b, &lo, &hi);
134
135
   lo += c;
136
   hi += (lo < c); // carry?
137
138
   lo += *d;
139
   hi += (lo < *d); // carry?
140
141
   *d = hi;
142
   return lo;
143
#endif
144
413M
   }
145
146
#if defined(BOTAN_MP_USE_X86_32_ASM)
147
148
#define ADDSUB2_OP(OPERATION, INDEX)                     \
149
        ASM("movl 4*" #INDEX "(%[y]), %[carry]")         \
150
        ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")   \
151
152
#define ADDSUB3_OP(OPERATION, INDEX)                     \
153
        ASM("movl 4*" #INDEX "(%[x]), %[carry]")         \
154
        ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]")   \
155
        ASM("movl %[carry], 4*" #INDEX "(%[z])")         \
156
157
#define LINMUL_OP(WRITE_TO, INDEX)                       \
158
        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
159
        ASM("mull %[y]")                                 \
160
        ASM("addl %[carry],%%eax")                       \
161
        ASM("adcl $0,%%edx")                             \
162
        ASM("movl %%edx,%[carry]")                       \
163
        ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
164
165
#define MULADD_OP(IGNORED, INDEX)                        \
166
        ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
167
        ASM("mull %[y]")                                 \
168
        ASM("addl %[carry],%%eax")                       \
169
        ASM("adcl $0,%%edx")                             \
170
        ASM("addl 4*" #INDEX "(%[z]),%%eax")             \
171
        ASM("adcl $0,%%edx")                             \
172
        ASM("movl %%edx,%[carry]")                       \
173
        ASM("movl %%eax, 4*" #INDEX " (%[z])")
174
175
#define ADD_OR_SUBTRACT(CORE_CODE)     \
176
        ASM("rorl %[carry]")           \
177
        CORE_CODE                      \
178
        ASM("sbbl %[carry],%[carry]")  \
179
        ASM("negl %[carry]")
180
181
#elif defined(BOTAN_MP_USE_X86_64_ASM)
182
183
#define ADDSUB2_OP(OPERATION, INDEX)                     \
184
        ASM("movq 8*" #INDEX "(%[y]), %[carry]")         \
185
        ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")   \
186
187
#define ADDSUB3_OP(OPERATION, INDEX)                     \
188
        ASM("movq 8*" #INDEX "(%[x]), %[carry]")         \
189
        ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]")   \
190
        ASM("movq %[carry], 8*" #INDEX "(%[z])")         \
191
192
#define LINMUL_OP(WRITE_TO, INDEX)                       \
193
        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
194
        ASM("mulq %[y]")                                 \
195
        ASM("addq %[carry],%%rax")                       \
196
        ASM("adcq $0,%%rdx")                             \
197
        ASM("movq %%rdx,%[carry]")                       \
198
        ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
199
200
#define MULADD_OP(IGNORED, INDEX)                        \
201
        ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
202
        ASM("mulq %[y]")                                 \
203
        ASM("addq %[carry],%%rax")                       \
204
        ASM("adcq $0,%%rdx")                             \
205
        ASM("addq 8*" #INDEX "(%[z]),%%rax")             \
206
        ASM("adcq $0,%%rdx")                             \
207
        ASM("movq %%rdx,%[carry]")                       \
208
        ASM("movq %%rax, 8*" #INDEX " (%[z])")
209
210
#define ADD_OR_SUBTRACT(CORE_CODE)     \
211
        ASM("rorq %[carry]")           \
212
        CORE_CODE                      \
213
        ASM("sbbq %[carry],%[carry]")  \
214
        ASM("negq %[carry]")
215
216
#endif
217
218
#if defined(ADD_OR_SUBTRACT)
219
220
#define ASM(x) x "\n\t"
221
222
#define DO_8_TIMES(MACRO, ARG) \
223
        MACRO(ARG, 0) \
224
        MACRO(ARG, 1) \
225
        MACRO(ARG, 2) \
226
        MACRO(ARG, 3) \
227
        MACRO(ARG, 4) \
228
        MACRO(ARG, 5) \
229
        MACRO(ARG, 6) \
230
        MACRO(ARG, 7)
231
232
#endif
233
234
/*
235
* Word Addition
236
*/
237
inline word word_add(word x, word y, word* carry)
238
3.23G
   {
239
#if defined(BOTAN_MP_USE_X86_32_ASM)
240
   asm(
241
      ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
242
      : [x]"=r"(x), [carry]"=r"(*carry)
243
      : "0"(x), [y]"rm"(y), "1"(*carry)
244
      : "cc");
245
   return x;
246
247
#elif defined(BOTAN_MP_USE_X86_64_ASM)
248
249
3.23G
   asm(
250
3.23G
      ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
251
3.23G
      : [x]"=r"(x), [carry]"=r"(*carry)
252
3.23G
      : "0"(x), [y]"rm"(y), "1"(*carry)
253
3.23G
      : "cc");
254
3.23G
   return x;
255
256
#else
257
   word z = x + y;
258
   word c1 = (z < x);
259
   z += *carry;
260
   *carry = c1 | (z < *carry);
261
   return z;
262
#endif
263
3.23G
   }
264
265
/*
266
* Eight Word Block Addition, Two Argument
267
*/
268
inline word word8_add2(word x[8], const word y[8], word carry)
269
12.5M
   {
270
#if defined(BOTAN_MP_USE_X86_32_ASM)
271
   asm(
272
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
273
      : [carry]"=r"(carry)
274
      : [x]"r"(x), [y]"r"(y), "0"(carry)
275
      : "cc", "memory");
276
277
#elif defined(BOTAN_MP_USE_X86_64_ASM)
278
279
12.5M
   asm(
280
12.5M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
281
12.5M
      : [carry]"=r"(carry)
282
12.5M
      : [x]"r"(x), [y]"r"(y), "0"(carry)
283
12.5M
      : "cc", "memory");
284
285
#else
286
   x[0] = word_add(x[0], y[0], &carry);
287
   x[1] = word_add(x[1], y[1], &carry);
288
   x[2] = word_add(x[2], y[2], &carry);
289
   x[3] = word_add(x[3], y[3], &carry);
290
   x[4] = word_add(x[4], y[4], &carry);
291
   x[5] = word_add(x[5], y[5], &carry);
292
   x[6] = word_add(x[6], y[6], &carry);
293
   x[7] = word_add(x[7], y[7], &carry);
294
#endif
295
296
12.5M
   return carry;
297
12.5M
   }
298
299
/*
300
* Eight Word Block Addition, Three Argument
301
*/
302
inline word word8_add3(word z[8], const word x[8],
303
                       const word y[8], word carry)
304
314M
   {
305
#if defined(BOTAN_MP_USE_X86_32_ASM)
306
   asm(
307
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
308
      : [carry]"=r"(carry)
309
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
310
      : "cc", "memory");
311
312
#elif defined(BOTAN_MP_USE_X86_64_ASM)
313
314M
   asm(
314
314M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
315
314M
      : [carry]"=r"(carry)
316
314M
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
317
314M
      : "cc", "memory");
318
319
#else
320
   z[0] = word_add(x[0], y[0], &carry);
321
   z[1] = word_add(x[1], y[1], &carry);
322
   z[2] = word_add(x[2], y[2], &carry);
323
   z[3] = word_add(x[3], y[3], &carry);
324
   z[4] = word_add(x[4], y[4], &carry);
325
   z[5] = word_add(x[5], y[5], &carry);
326
   z[6] = word_add(x[6], y[6], &carry);
327
   z[7] = word_add(x[7], y[7], &carry);
328
#endif
329
330
314M
   return carry;
331
314M
   }
332
333
/*
334
* Word Subtraction
335
*/
336
inline word word_sub(word x, word y, word* carry)
337
4.79G
   {
338
#if defined(BOTAN_MP_USE_X86_32_ASM)
339
   asm(
340
      ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
341
      : [x]"=r"(x), [carry]"=r"(*carry)
342
      : "0"(x), [y]"rm"(y), "1"(*carry)
343
      : "cc");
344
   return x;
345
346
#elif defined(BOTAN_MP_USE_X86_64_ASM)
347
4.79G
   asm(
348
4.79G
      ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
349
4.79G
      : [x]"=r"(x), [carry]"=r"(*carry)
350
4.79G
      : "0"(x), [y]"rm"(y), "1"(*carry)
351
4.79G
      : "cc");
352
4.79G
   return x;
353
354
#else
355
   word t0 = x - y;
356
   word c1 = (t0 > x);
357
   word z = t0 - *carry;
358
   *carry = c1 | (z > t0);
359
   return z;
360
#endif
361
4.79G
   }
362
363
/*
364
* Eight Word Block Subtraction, Two Argument
365
*/
366
inline word word8_sub2(word x[8], const word y[8], word carry)
367
31.3M
   {
368
#if defined(BOTAN_MP_USE_X86_32_ASM)
369
   asm(
370
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
371
      : [carry]"=r"(carry)
372
      : [x]"r"(x), [y]"r"(y), "0"(carry)
373
      : "cc", "memory");
374
375
#elif defined(BOTAN_MP_USE_X86_64_ASM)
376
31.3M
   asm(
377
31.3M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
378
31.3M
      : [carry]"=r"(carry)
379
31.3M
      : [x]"r"(x), [y]"r"(y), "0"(carry)
380
31.3M
      : "cc", "memory");
381
382
#else
383
   x[0] = word_sub(x[0], y[0], &carry);
384
   x[1] = word_sub(x[1], y[1], &carry);
385
   x[2] = word_sub(x[2], y[2], &carry);
386
   x[3] = word_sub(x[3], y[3], &carry);
387
   x[4] = word_sub(x[4], y[4], &carry);
388
   x[5] = word_sub(x[5], y[5], &carry);
389
   x[6] = word_sub(x[6], y[6], &carry);
390
   x[7] = word_sub(x[7], y[7], &carry);
391
#endif
392
393
31.3M
   return carry;
394
31.3M
   }
395
396
/*
397
* Eight Word Block Subtraction, Two Argument
398
*/
399
inline word word8_sub2_rev(word x[8], const word y[8], word carry)
400
3.24M
   {
401
#if defined(BOTAN_MP_USE_X86_32_ASM)
402
   asm(
403
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
404
      : [carry]"=r"(carry)
405
      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
406
      : "cc", "memory");
407
408
#elif defined(BOTAN_MP_USE_X86_64_ASM)
409
3.24M
   asm(
410
3.24M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
411
3.24M
      : [carry]"=r"(carry)
412
3.24M
      : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
413
3.24M
      : "cc", "memory");
414
415
#else
416
   x[0] = word_sub(y[0], x[0], &carry);
417
   x[1] = word_sub(y[1], x[1], &carry);
418
   x[2] = word_sub(y[2], x[2], &carry);
419
   x[3] = word_sub(y[3], x[3], &carry);
420
   x[4] = word_sub(y[4], x[4], &carry);
421
   x[5] = word_sub(y[5], x[5], &carry);
422
   x[6] = word_sub(y[6], x[6], &carry);
423
   x[7] = word_sub(y[7], x[7], &carry);
424
#endif
425
426
3.24M
   return carry;
427
3.24M
   }
428
429
/*
430
* Eight Word Block Subtraction, Three Argument
431
*/
432
inline word word8_sub3(word z[8], const word x[8],
433
                       const word y[8], word carry)
434
673M
   {
435
#if defined(BOTAN_MP_USE_X86_32_ASM)
436
   asm(
437
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
438
      : [carry]"=r"(carry)
439
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
440
      : "cc", "memory");
441
442
#elif defined(BOTAN_MP_USE_X86_64_ASM)
443
673M
   asm(
444
673M
      ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
445
673M
      : [carry]"=r"(carry)
446
673M
      : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
447
673M
      : "cc", "memory");
448
449
#else
450
   z[0] = word_sub(x[0], y[0], &carry);
451
   z[1] = word_sub(x[1], y[1], &carry);
452
   z[2] = word_sub(x[2], y[2], &carry);
453
   z[3] = word_sub(x[3], y[3], &carry);
454
   z[4] = word_sub(x[4], y[4], &carry);
455
   z[5] = word_sub(x[5], y[5], &carry);
456
   z[6] = word_sub(x[6], y[6], &carry);
457
   z[7] = word_sub(x[7], y[7], &carry);
458
#endif
459
460
673M
   return carry;
461
673M
   }
462
463
/*
464
* Eight Word Block Linear Multiplication
465
*/
466
inline word word8_linmul2(word x[8], word y, word carry)
467
361M
   {
468
#if defined(BOTAN_MP_USE_X86_32_ASM)
469
   asm(
470
      DO_8_TIMES(LINMUL_OP, "x")
471
      : [carry]"=r"(carry)
472
      : [x]"r"(x), [y]"rm"(y), "0"(carry)
473
      : "cc", "%eax", "%edx");
474
475
#elif defined(BOTAN_MP_USE_X86_64_ASM)
476
361M
   asm(
477
361M
      DO_8_TIMES(LINMUL_OP, "x")
478
361M
      : [carry]"=r"(carry)
479
361M
      : [x]"r"(x), [y]"rm"(y), "0"(carry)
480
361M
      : "cc", "%rax", "%rdx");
481
482
#else
483
   x[0] = word_madd2(x[0], y, &carry);
484
   x[1] = word_madd2(x[1], y, &carry);
485
   x[2] = word_madd2(x[2], y, &carry);
486
   x[3] = word_madd2(x[3], y, &carry);
487
   x[4] = word_madd2(x[4], y, &carry);
488
   x[5] = word_madd2(x[5], y, &carry);
489
   x[6] = word_madd2(x[6], y, &carry);
490
   x[7] = word_madd2(x[7], y, &carry);
491
#endif
492
493
361M
   return carry;
494
361M
   }
495
496
/*
497
* Eight Word Block Linear Multiplication
498
*/
499
inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
500
29.8M
   {
501
#if defined(BOTAN_MP_USE_X86_32_ASM)
502
   asm(
503
      DO_8_TIMES(LINMUL_OP, "z")
504
      : [carry]"=r"(carry)
505
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
506
      : "cc", "%eax", "%edx");
507
508
#elif defined(BOTAN_MP_USE_X86_64_ASM)
509
29.8M
   asm(
510
29.8M
      DO_8_TIMES(LINMUL_OP, "z")
511
29.8M
      : [carry]"=r"(carry)
512
29.8M
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
513
29.8M
      : "cc", "%rax", "%rdx");
514
515
#else
516
   z[0] = word_madd2(x[0], y, &carry);
517
   z[1] = word_madd2(x[1], y, &carry);
518
   z[2] = word_madd2(x[2], y, &carry);
519
   z[3] = word_madd2(x[3], y, &carry);
520
   z[4] = word_madd2(x[4], y, &carry);
521
   z[5] = word_madd2(x[5], y, &carry);
522
   z[6] = word_madd2(x[6], y, &carry);
523
   z[7] = word_madd2(x[7], y, &carry);
524
#endif
525
526
29.8M
   return carry;
527
29.8M
   }
528
529
/*
530
* Eight Word Block Multiply/Add
531
*/
532
inline word word8_madd3(word z[8], const word x[8], word y, word carry)
533
150M
   {
534
#if defined(BOTAN_MP_USE_X86_32_ASM)
535
   asm(
536
      DO_8_TIMES(MULADD_OP, "")
537
      : [carry]"=r"(carry)
538
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
539
      : "cc", "%eax", "%edx");
540
541
#elif defined(BOTAN_MP_USE_X86_64_ASM)
542
150M
   asm(
543
150M
      DO_8_TIMES(MULADD_OP, "")
544
150M
      : [carry]"=r"(carry)
545
150M
      : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
546
150M
      : "cc", "%rax", "%rdx");
547
548
#else
549
   z[0] = word_madd3(x[0], y, z[0], &carry);
550
   z[1] = word_madd3(x[1], y, z[1], &carry);
551
   z[2] = word_madd3(x[2], y, z[2], &carry);
552
   z[3] = word_madd3(x[3], y, z[3], &carry);
553
   z[4] = word_madd3(x[4], y, z[4], &carry);
554
   z[5] = word_madd3(x[5], y, z[5], &carry);
555
   z[6] = word_madd3(x[6], y, z[6], &carry);
556
   z[7] = word_madd3(x[7], y, z[7], &carry);
557
#endif
558
559
150M
   return carry;
560
150M
   }
561
562
/*
563
* Multiply-Add Accumulator
564
* (w2,w1,w0) += x * y
565
*/
566
inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
567
17.3G
   {
568
#if defined(BOTAN_MP_USE_X86_32_ASM)
569
   word z0 = 0, z1 = 0;
570
571
   asm("mull %[y]"
572
        : "=a"(z0),"=d"(z1)
573
        : "a"(x), [y]"rm"(y)
574
        : "cc");
575
576
   asm(R"(
577
       addl %[z0],%[w0]
578
       adcl %[z1],%[w1]
579
       adcl $0,%[w2]
580
       )"
581
       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
582
       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
583
       : "cc");
584
585
#elif defined(BOTAN_MP_USE_X86_64_ASM)
586
17.3G
   word z0 = 0, z1 = 0;
587
588
17.3G
   asm("mulq %[y]"
589
17.3G
        : "=a"(z0),"=d"(z1)
590
17.3G
        : "a"(x), [y]"rm"(y)
591
17.3G
        : "cc");
592
593
17.3G
   asm(R"(
594
17.3G
       addq %[z0],%[w0]
595
17.3G
       adcq %[z1],%[w1]
596
17.3G
       adcq $0,%[w2]
597
17.3G
       )"
598
17.3G
       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
599
17.3G
       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
600
17.3G
       : "cc");
601
602
#else
603
   word carry = *w0;
604
   *w0 = word_madd2(x, y, &carry);
605
   *w1 += carry;
606
   *w2 += (*w1 < carry);
607
#endif
608
17.3G
   }
609
610
/*
611
* 3-word addition
612
* (w2,w1,w0) += x
613
*/
614
inline void word3_add(word* w2, word* w1, word* w0, word x)
615
1.44G
   {
616
#if defined(BOTAN_MP_USE_X86_32_ASM)
617
   asm(R"(
618
      addl %[x],%[w0]
619
      adcl $0,%[w1]
620
      adcl $0,%[w2]
621
      )"
622
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
623
      : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
624
      : "cc");
625
626
#elif defined(BOTAN_MP_USE_X86_64_ASM)
627
1.44G
   asm(R"(
628
1.44G
      addq %[x],%[w0]
629
1.44G
      adcq $0,%[w1]
630
1.44G
      adcq $0,%[w2]
631
1.44G
      )"
632
1.44G
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
633
1.44G
      : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
634
1.44G
      : "cc");
635
636
#else
637
   *w0 += x;
638
   word c1 = (*w0 < x);
639
   *w1 += c1;
640
   word c2 = (*w1 < c1);
641
   *w2 += c2;
642
#endif
643
1.44G
   }
644
645
/*
646
* Multiply-Add Accumulator
647
* (w2,w1,w0) += 2 * x * y
648
*/
649
inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
650
4.59G
   {
651
#if defined(BOTAN_MP_USE_X86_32_ASM)
652
   word z0 = 0, z1 = 0;
653
654
   asm("mull %[y]"
655
        : "=a"(z0),"=d"(z1)
656
        : "a"(x), [y]"rm"(y)
657
        : "cc");
658
659
   asm(R"(
660
      addl %[z0],%[w0]
661
      adcl %[z1],%[w1]
662
      adcl $0,%[w2]
663
664
      addl %[z0],%[w0]
665
      adcl %[z1],%[w1]
666
      adcl $0,%[w2]
667
      )"
668
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
669
      : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
670
      : "cc");
671
672
#elif defined(BOTAN_MP_USE_X86_64_ASM)
673
4.59G
   word z0 = 0, z1 = 0;
674
675
4.59G
   asm("mulq %[y]"
676
4.59G
        : "=a"(z0),"=d"(z1)
677
4.59G
        : "a"(x), [y]"rm"(y)
678
4.59G
        : "cc");
679
680
4.59G
   asm(R"(
681
4.59G
      addq %[z0],%[w0]
682
4.59G
      adcq %[z1],%[w1]
683
4.59G
      adcq $0,%[w2]
684
4.59G
685
4.59G
      addq %[z0],%[w0]
686
4.59G
      adcq %[z1],%[w1]
687
4.59G
      adcq $0,%[w2]
688
4.59G
      )"
689
4.59G
      : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
690
4.59G
      : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
691
4.59G
      : "cc");
692
693
#else
694
   word carry = 0;
695
   x = word_madd2(x, y, &carry);
696
   y = carry;
697
698
   word top = (y >> (BOTAN_MP_WORD_BITS-1));
699
   y <<= 1;
700
   y |= (x >> (BOTAN_MP_WORD_BITS-1));
701
   x <<= 1;
702
703
   carry = 0;
704
   *w0 = word_add(*w0, x, &carry);
705
   *w1 = word_add(*w1, y, &carry);
706
   *w2 = word_add(*w2, top, &carry);
707
#endif
708
4.59G
   }
709
710
#if defined(ASM)
711
  #undef ASM
712
  #undef DO_8_TIMES
713
  #undef ADD_OR_SUBTRACT
714
  #undef ADDSUB2_OP
715
  #undef ADDSUB3_OP
716
  #undef LINMUL_OP
717
  #undef MULADD_OP
718
#endif
719
720
}
721
722
#endif