/src/botan/build/include/botan/internal/mp_asmi.h
Line | Count | Source |
1 | | /* |
2 | | * Lowest Level MPI Algorithms |
3 | | * (C) 1999-2010 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #ifndef BOTAN_MP_ASM_INTERNAL_H_ |
10 | | #define BOTAN_MP_ASM_INTERNAL_H_ |
11 | | |
12 | | #include <botan/types.h> |
13 | | #include <botan/internal/mul128.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | #if (BOTAN_MP_WORD_BITS == 32) |
18 | | typedef uint64_t dword; |
19 | | #define BOTAN_HAS_MP_DWORD |
20 | | |
21 | | #elif (BOTAN_MP_WORD_BITS == 64) |
22 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
23 | | typedef uint128_t dword; |
24 | | #define BOTAN_HAS_MP_DWORD |
25 | | #else |
26 | | // No native 128 bit integer type; use mul64x64_128 instead |
27 | | #endif |
28 | | |
29 | | #else |
30 | | #error BOTAN_MP_WORD_BITS must be 32 or 64 |
31 | | #endif |
32 | | |
33 | | #if defined(BOTAN_USE_GCC_INLINE_ASM) |
34 | | |
35 | | #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) |
36 | | #define BOTAN_MP_USE_X86_32_ASM |
37 | | #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) |
38 | | #define BOTAN_MP_USE_X86_64_ASM |
39 | | #endif |
40 | | |
41 | | #endif |
42 | | |
43 | | /* |
44 | | * Word Multiply/Add |
45 | | */ |
46 | | inline word word_madd2(word a, word b, word* c) |
47 | 132M | { |
48 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
49 | | asm(R"( |
50 | | mull %[b] |
51 | | addl %[c],%[a] |
52 | | adcl $0,%[carry] |
53 | | )" |
54 | | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) |
55 | | : "0"(a), "1"(b), [c]"g"(*c) : "cc"); |
56 | | |
57 | | return a; |
58 | | |
59 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
60 | 132M | asm(R"( |
61 | 132M | mulq %[b] |
62 | 132M | addq %[c],%[a] |
63 | 132M | adcq $0,%[carry] |
64 | 132M | )" |
65 | 132M | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) |
66 | 132M | : "0"(a), "1"(b), [c]"g"(*c) : "cc"); |
67 | | |
68 | 132M | return a; |
69 | | |
70 | | #elif defined(BOTAN_HAS_MP_DWORD) |
71 | | const dword s = static_cast<dword>(a) * b + *c; |
72 | | *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
73 | | return static_cast<word>(s); |
74 | | #else |
75 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
76 | | |
77 | | word hi = 0, lo = 0; |
78 | | |
79 | | mul64x64_128(a, b, &lo, &hi); |
80 | | |
81 | | lo += *c; |
82 | | hi += (lo < *c); // carry? |
83 | | |
84 | | *c = hi; |
85 | | return lo; |
86 | | #endif |
87 | 132M | } |
88 | | |
89 | | /* |
90 | | * Word Multiply/Add |
91 | | */ |
92 | | inline word word_madd3(word a, word b, word c, word* d) |
93 | 306M | { |
94 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
95 | | asm(R"( |
96 | | mull %[b] |
97 | | |
98 | | addl %[c],%[a] |
99 | | adcl $0,%[carry] |
100 | | |
101 | | addl %[d],%[a] |
102 | | adcl $0,%[carry] |
103 | | )" |
104 | | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) |
105 | | : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); |
106 | | |
107 | | return a; |
108 | | |
109 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
110 | 306M | asm(R"( |
111 | 306M | mulq %[b] |
112 | 306M | addq %[c],%[a] |
113 | 306M | adcq $0,%[carry] |
114 | 306M | addq %[d],%[a] |
115 | 306M | adcq $0,%[carry] |
116 | 306M | )" |
117 | 306M | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) |
118 | 306M | : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); |
119 | | |
120 | 306M | return a; |
121 | | |
122 | | #elif defined(BOTAN_HAS_MP_DWORD) |
123 | | const dword s = static_cast<dword>(a) * b + c + *d; |
124 | | *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
125 | | return static_cast<word>(s); |
126 | | #else |
127 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
128 | | |
129 | | word hi = 0, lo = 0; |
130 | | |
131 | | mul64x64_128(a, b, &lo, &hi); |
132 | | |
133 | | lo += c; |
134 | | hi += (lo < c); // carry? |
135 | | |
136 | | lo += *d; |
137 | | hi += (lo < *d); // carry? |
138 | | |
139 | | *d = hi; |
140 | | return lo; |
141 | | #endif |
142 | 306M | } |
143 | | |
144 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
145 | | |
146 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
147 | | ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ |
148 | | ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ |
149 | | |
150 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
151 | | ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ |
152 | | ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ |
153 | | ASM("movl %[carry], 4*" #INDEX "(%[z])") \ |
154 | | |
155 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
156 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
157 | | ASM("mull %[y]") \ |
158 | | ASM("addl %[carry],%%eax") \ |
159 | | ASM("adcl $0,%%edx") \ |
160 | | ASM("movl %%edx,%[carry]") \ |
161 | | ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") |
162 | | |
163 | | #define MULADD_OP(IGNORED, INDEX) \ |
164 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
165 | | ASM("mull %[y]") \ |
166 | | ASM("addl %[carry],%%eax") \ |
167 | | ASM("adcl $0,%%edx") \ |
168 | | ASM("addl 4*" #INDEX "(%[z]),%%eax") \ |
169 | | ASM("adcl $0,%%edx") \ |
170 | | ASM("movl %%edx,%[carry]") \ |
171 | | ASM("movl %%eax, 4*" #INDEX " (%[z])") |
172 | | |
173 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
174 | | ASM("rorl %[carry]") \ |
175 | | CORE_CODE \ |
176 | | ASM("sbbl %[carry],%[carry]") \ |
177 | | ASM("negl %[carry]") |
178 | | |
179 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
180 | | |
181 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
182 | | ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ |
183 | | ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ |
184 | | |
185 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
186 | | ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ |
187 | | ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ |
188 | | ASM("movq %[carry], 8*" #INDEX "(%[z])") \ |
189 | | |
190 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
191 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
192 | | ASM("mulq %[y]") \ |
193 | | ASM("addq %[carry],%%rax") \ |
194 | | ASM("adcq $0,%%rdx") \ |
195 | | ASM("movq %%rdx,%[carry]") \ |
196 | | ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") |
197 | | |
198 | | #define MULADD_OP(IGNORED, INDEX) \ |
199 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
200 | | ASM("mulq %[y]") \ |
201 | | ASM("addq %[carry],%%rax") \ |
202 | | ASM("adcq $0,%%rdx") \ |
203 | | ASM("addq 8*" #INDEX "(%[z]),%%rax") \ |
204 | | ASM("adcq $0,%%rdx") \ |
205 | | ASM("movq %%rdx,%[carry]") \ |
206 | | ASM("movq %%rax, 8*" #INDEX " (%[z])") |
207 | | |
208 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
209 | | ASM("rorq %[carry]") \ |
210 | | CORE_CODE \ |
211 | | ASM("sbbq %[carry],%[carry]") \ |
212 | | ASM("negq %[carry]") |
213 | | |
214 | | #endif |
215 | | |
216 | | #if defined(ADD_OR_SUBTRACT) |
217 | | |
218 | | #define ASM(x) x "\n\t" |
219 | | |
220 | | #define DO_8_TIMES(MACRO, ARG) \ |
221 | | MACRO(ARG, 0) \ |
222 | | MACRO(ARG, 1) \ |
223 | | MACRO(ARG, 2) \ |
224 | | MACRO(ARG, 3) \ |
225 | | MACRO(ARG, 4) \ |
226 | | MACRO(ARG, 5) \ |
227 | | MACRO(ARG, 6) \ |
228 | | MACRO(ARG, 7) |
229 | | |
230 | | #endif |
231 | | |
232 | | /* |
233 | | * Word Addition |
234 | | */ |
235 | | inline word word_add(word x, word y, word* carry) |
236 | 2.11G | { |
237 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
238 | | asm( |
239 | | ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) |
240 | | : [x]"=r"(x), [carry]"=r"(*carry) |
241 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
242 | | : "cc"); |
243 | | return x; |
244 | | |
245 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
246 | | |
247 | 2.11G | asm( |
248 | 2.11G | ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) |
249 | 2.11G | : [x]"=r"(x), [carry]"=r"(*carry) |
250 | 2.11G | : "0"(x), [y]"rm"(y), "1"(*carry) |
251 | 2.11G | : "cc"); |
252 | 2.11G | return x; |
253 | | |
254 | | #else |
255 | | word z = x + y; |
256 | | word c1 = (z < x); |
257 | | z += *carry; |
258 | | *carry = c1 | (z < *carry); |
259 | | return z; |
260 | | #endif |
261 | 2.11G | } |
262 | | |
263 | | /* |
264 | | * Eight Word Block Addition, Two Argument |
265 | | */ |
266 | | inline word word8_add2(word x[8], const word y[8], word carry) |
267 | 9.92M | { |
268 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
269 | | asm( |
270 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) |
271 | | : [carry]"=r"(carry) |
272 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
273 | | : "cc", "memory"); |
274 | | return carry; |
275 | | |
276 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
277 | | |
278 | 9.92M | asm( |
279 | 9.92M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) |
280 | 9.92M | : [carry]"=r"(carry) |
281 | 9.92M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
282 | 9.92M | : "cc", "memory"); |
283 | 9.92M | return carry; |
284 | | |
285 | | #else |
286 | | x[0] = word_add(x[0], y[0], &carry); |
287 | | x[1] = word_add(x[1], y[1], &carry); |
288 | | x[2] = word_add(x[2], y[2], &carry); |
289 | | x[3] = word_add(x[3], y[3], &carry); |
290 | | x[4] = word_add(x[4], y[4], &carry); |
291 | | x[5] = word_add(x[5], y[5], &carry); |
292 | | x[6] = word_add(x[6], y[6], &carry); |
293 | | x[7] = word_add(x[7], y[7], &carry); |
294 | | return carry; |
295 | | #endif |
296 | 9.92M | } |
297 | | |
298 | | /* |
299 | | * Eight Word Block Addition, Three Argument |
300 | | */ |
301 | | inline word word8_add3(word z[8], const word x[8], |
302 | | const word y[8], word carry) |
303 | 286M | { |
304 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
305 | | asm( |
306 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) |
307 | | : [carry]"=r"(carry) |
308 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
309 | | : "cc", "memory"); |
310 | | return carry; |
311 | | |
312 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
313 | | |
314 | 286M | asm( |
315 | 286M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) |
316 | 286M | : [carry]"=r"(carry) |
317 | 286M | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
318 | 286M | : "cc", "memory"); |
319 | 286M | return carry; |
320 | | |
321 | | #else |
322 | | z[0] = word_add(x[0], y[0], &carry); |
323 | | z[1] = word_add(x[1], y[1], &carry); |
324 | | z[2] = word_add(x[2], y[2], &carry); |
325 | | z[3] = word_add(x[3], y[3], &carry); |
326 | | z[4] = word_add(x[4], y[4], &carry); |
327 | | z[5] = word_add(x[5], y[5], &carry); |
328 | | z[6] = word_add(x[6], y[6], &carry); |
329 | | z[7] = word_add(x[7], y[7], &carry); |
330 | | return carry; |
331 | | #endif |
332 | 286M | } |
333 | | |
334 | | /* |
335 | | * Word Subtraction |
336 | | */ |
337 | | inline word word_sub(word x, word y, word* carry) |
338 | 2.89G | { |
339 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
340 | | asm( |
341 | | ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) |
342 | | : [x]"=r"(x), [carry]"=r"(*carry) |
343 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
344 | | : "cc"); |
345 | | return x; |
346 | | |
347 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
348 | | |
349 | 2.89G | asm( |
350 | 2.89G | ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) |
351 | 2.89G | : [x]"=r"(x), [carry]"=r"(*carry) |
352 | 2.89G | : "0"(x), [y]"rm"(y), "1"(*carry) |
353 | 2.89G | : "cc"); |
354 | 2.89G | return x; |
355 | | |
356 | | #else |
357 | | word t0 = x - y; |
358 | | word c1 = (t0 > x); |
359 | | word z = t0 - *carry; |
360 | | *carry = c1 | (z > t0); |
361 | | return z; |
362 | | #endif |
363 | 2.89G | } |
364 | | |
365 | | /* |
366 | | * Eight Word Block Subtraction, Two Argument |
367 | | */ |
368 | | inline word word8_sub2(word x[8], const word y[8], word carry) |
369 | 26.1M | { |
370 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
371 | | asm( |
372 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) |
373 | | : [carry]"=r"(carry) |
374 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
375 | | : "cc", "memory"); |
376 | | return carry; |
377 | | |
378 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
379 | | |
380 | 26.1M | asm( |
381 | 26.1M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) |
382 | 26.1M | : [carry]"=r"(carry) |
383 | 26.1M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
384 | 26.1M | : "cc", "memory"); |
385 | 26.1M | return carry; |
386 | | |
387 | | #else |
388 | | x[0] = word_sub(x[0], y[0], &carry); |
389 | | x[1] = word_sub(x[1], y[1], &carry); |
390 | | x[2] = word_sub(x[2], y[2], &carry); |
391 | | x[3] = word_sub(x[3], y[3], &carry); |
392 | | x[4] = word_sub(x[4], y[4], &carry); |
393 | | x[5] = word_sub(x[5], y[5], &carry); |
394 | | x[6] = word_sub(x[6], y[6], &carry); |
395 | | x[7] = word_sub(x[7], y[7], &carry); |
396 | | return carry; |
397 | | #endif |
398 | 26.1M | } |
399 | | |
400 | | /* |
401 | | * Eight Word Block Subtraction, Two Argument |
402 | | */ |
403 | | inline word word8_sub2_rev(word x[8], const word y[8], word carry) |
404 | 2.51M | { |
405 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
406 | | asm( |
407 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
408 | | : [carry]"=r"(carry) |
409 | | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
410 | | : "cc", "memory"); |
411 | | return carry; |
412 | | |
413 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
414 | | |
415 | 2.51M | asm( |
416 | 2.51M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
417 | 2.51M | : [carry]"=r"(carry) |
418 | 2.51M | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
419 | 2.51M | : "cc", "memory"); |
420 | 2.51M | return carry; |
421 | | |
422 | | #else |
423 | | x[0] = word_sub(y[0], x[0], &carry); |
424 | | x[1] = word_sub(y[1], x[1], &carry); |
425 | | x[2] = word_sub(y[2], x[2], &carry); |
426 | | x[3] = word_sub(y[3], x[3], &carry); |
427 | | x[4] = word_sub(y[4], x[4], &carry); |
428 | | x[5] = word_sub(y[5], x[5], &carry); |
429 | | x[6] = word_sub(y[6], x[6], &carry); |
430 | | x[7] = word_sub(y[7], x[7], &carry); |
431 | | return carry; |
432 | | #endif |
433 | 2.51M | } |
434 | | |
435 | | /* |
436 | | * Eight Word Block Subtraction, Three Argument |
437 | | */ |
438 | | inline word word8_sub3(word z[8], const word x[8], |
439 | | const word y[8], word carry) |
440 | 603M | { |
441 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
442 | | asm( |
443 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
444 | | : [carry]"=r"(carry) |
445 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
446 | | : "cc", "memory"); |
447 | | return carry; |
448 | | |
449 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
450 | | |
451 | 603M | asm( |
452 | 603M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
453 | 603M | : [carry]"=r"(carry) |
454 | 603M | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
455 | 603M | : "cc", "memory"); |
456 | 603M | return carry; |
457 | | |
458 | | #else |
459 | | z[0] = word_sub(x[0], y[0], &carry); |
460 | | z[1] = word_sub(x[1], y[1], &carry); |
461 | | z[2] = word_sub(x[2], y[2], &carry); |
462 | | z[3] = word_sub(x[3], y[3], &carry); |
463 | | z[4] = word_sub(x[4], y[4], &carry); |
464 | | z[5] = word_sub(x[5], y[5], &carry); |
465 | | z[6] = word_sub(x[6], y[6], &carry); |
466 | | z[7] = word_sub(x[7], y[7], &carry); |
467 | | return carry; |
468 | | #endif |
469 | 603M | } |
470 | | |
471 | | /* |
472 | | * Eight Word Block Linear Multiplication |
473 | | */ |
474 | | inline word word8_linmul2(word x[8], word y, word carry) |
475 | 281M | { |
476 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
477 | | asm( |
478 | | DO_8_TIMES(LINMUL_OP, "x") |
479 | | : [carry]"=r"(carry) |
480 | | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
481 | | : "cc", "%eax", "%edx"); |
482 | | return carry; |
483 | | |
484 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
485 | | |
486 | 281M | asm( |
487 | 281M | DO_8_TIMES(LINMUL_OP, "x") |
488 | 281M | : [carry]"=r"(carry) |
489 | 281M | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
490 | 281M | : "cc", "%rax", "%rdx"); |
491 | 281M | return carry; |
492 | | |
493 | | #else |
494 | | x[0] = word_madd2(x[0], y, &carry); |
495 | | x[1] = word_madd2(x[1], y, &carry); |
496 | | x[2] = word_madd2(x[2], y, &carry); |
497 | | x[3] = word_madd2(x[3], y, &carry); |
498 | | x[4] = word_madd2(x[4], y, &carry); |
499 | | x[5] = word_madd2(x[5], y, &carry); |
500 | | x[6] = word_madd2(x[6], y, &carry); |
501 | | x[7] = word_madd2(x[7], y, &carry); |
502 | | return carry; |
503 | | #endif |
504 | 281M | } |
505 | | |
506 | | /* |
507 | | * Eight Word Block Linear Multiplication |
508 | | */ |
509 | | inline word word8_linmul3(word z[8], const word x[8], word y, word carry) |
510 | 23.5M | { |
511 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
512 | | asm( |
513 | | DO_8_TIMES(LINMUL_OP, "z") |
514 | | : [carry]"=r"(carry) |
515 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
516 | | : "cc", "%eax", "%edx"); |
517 | | return carry; |
518 | | |
519 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
520 | 23.5M | asm( |
521 | 23.5M | DO_8_TIMES(LINMUL_OP, "z") |
522 | 23.5M | : [carry]"=r"(carry) |
523 | 23.5M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
524 | 23.5M | : "cc", "%rax", "%rdx"); |
525 | 23.5M | return carry; |
526 | | |
527 | | #else |
528 | | z[0] = word_madd2(x[0], y, &carry); |
529 | | z[1] = word_madd2(x[1], y, &carry); |
530 | | z[2] = word_madd2(x[2], y, &carry); |
531 | | z[3] = word_madd2(x[3], y, &carry); |
532 | | z[4] = word_madd2(x[4], y, &carry); |
533 | | z[5] = word_madd2(x[5], y, &carry); |
534 | | z[6] = word_madd2(x[6], y, &carry); |
535 | | z[7] = word_madd2(x[7], y, &carry); |
536 | | return carry; |
537 | | #endif |
538 | 23.5M | } |
539 | | |
540 | | /* |
541 | | * Eight Word Block Multiply/Add |
542 | | */ |
543 | | inline word word8_madd3(word z[8], const word x[8], word y, word carry) |
544 | 134M | { |
545 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
546 | | asm( |
547 | | DO_8_TIMES(MULADD_OP, "") |
548 | | : [carry]"=r"(carry) |
549 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
550 | | : "cc", "%eax", "%edx"); |
551 | | return carry; |
552 | | |
553 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
554 | | |
555 | 134M | asm( |
556 | 134M | DO_8_TIMES(MULADD_OP, "") |
557 | 134M | : [carry]"=r"(carry) |
558 | 134M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
559 | 134M | : "cc", "%rax", "%rdx"); |
560 | 134M | return carry; |
561 | | |
562 | | #else |
563 | | z[0] = word_madd3(x[0], y, z[0], &carry); |
564 | | z[1] = word_madd3(x[1], y, z[1], &carry); |
565 | | z[2] = word_madd3(x[2], y, z[2], &carry); |
566 | | z[3] = word_madd3(x[3], y, z[3], &carry); |
567 | | z[4] = word_madd3(x[4], y, z[4], &carry); |
568 | | z[5] = word_madd3(x[5], y, z[5], &carry); |
569 | | z[6] = word_madd3(x[6], y, z[6], &carry); |
570 | | z[7] = word_madd3(x[7], y, z[7], &carry); |
571 | | return carry; |
572 | | #endif |
573 | 134M | } |
574 | | |
575 | | /* |
576 | | * Multiply-Add Accumulator |
577 | | * (w2,w1,w0) += x * y |
578 | | */ |
579 | | inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) |
580 | 14.4G | { |
581 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
582 | | word z0 = 0, z1 = 0; |
583 | | |
584 | | asm("mull %[y]" |
585 | | : "=a"(z0),"=d"(z1) |
586 | | : "a"(x), [y]"rm"(y) |
587 | | : "cc"); |
588 | | |
589 | | asm(R"( |
590 | | addl %[z0],%[w0] |
591 | | adcl %[z1],%[w1] |
592 | | adcl $0,%[w2] |
593 | | )" |
594 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
595 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
596 | | : "cc"); |
597 | | |
598 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
599 | | |
600 | 14.4G | word z0 = 0, z1 = 0; |
601 | | |
602 | 14.4G | asm("mulq %[y]" |
603 | 14.4G | : "=a"(z0),"=d"(z1) |
604 | 14.4G | : "a"(x), [y]"rm"(y) |
605 | 14.4G | : "cc"); |
606 | | |
607 | 14.4G | asm(R"( |
608 | 14.4G | addq %[z0],%[w0] |
609 | 14.4G | adcq %[z1],%[w1] |
610 | 14.4G | adcq $0,%[w2] |
611 | 14.4G | )" |
612 | 14.4G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
613 | 14.4G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
614 | 14.4G | : "cc"); |
615 | | |
616 | | #else |
617 | | word carry = *w0; |
618 | | *w0 = word_madd2(x, y, &carry); |
619 | | *w1 += carry; |
620 | | *w2 += (*w1 < carry); |
621 | | #endif |
622 | 14.4G | } |
623 | | |
624 | | /* |
625 | | * 3-word addition |
626 | | * (w2,w1,w0) += x |
627 | | */ |
628 | | inline void word3_add(word* w2, word* w1, word* w0, word x) |
629 | 1.18G | { |
630 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
631 | | asm(R"( |
632 | | addl %[x],%[w0] |
633 | | adcl $0,%[w1] |
634 | | adcl $0,%[w2] |
635 | | )" |
636 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
637 | | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
638 | | : "cc"); |
639 | | |
640 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
641 | | |
642 | 1.18G | asm(R"( |
643 | 1.18G | addq %[x],%[w0] |
644 | 1.18G | adcq $0,%[w1] |
645 | 1.18G | adcq $0,%[w2] |
646 | 1.18G | )" |
647 | 1.18G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
648 | 1.18G | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
649 | 1.18G | : "cc"); |
650 | | |
651 | | #else |
652 | | *w0 += x; |
653 | | word c1 = (*w0 < x); |
654 | | *w1 += c1; |
655 | | word c2 = (*w1 < c1); |
656 | | *w2 += c2; |
657 | | #endif |
658 | 1.18G | } |
659 | | |
660 | | /* |
661 | | * Multiply-Add Accumulator |
662 | | * (w2,w1,w0) += 2 * x * y |
663 | | */ |
664 | | inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) |
665 | 3.71G | { |
666 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
667 | | |
668 | | word z0 = 0, z1 = 0; |
669 | | |
670 | | asm("mull %[y]" |
671 | | : "=a"(z0),"=d"(z1) |
672 | | : "a"(x), [y]"rm"(y) |
673 | | : "cc"); |
674 | | |
675 | | asm(R"( |
676 | | addl %[z0],%[w0] |
677 | | adcl %[z1],%[w1] |
678 | | adcl $0,%[w2] |
679 | | |
680 | | addl %[z0],%[w0] |
681 | | adcl %[z1],%[w1] |
682 | | adcl $0,%[w2] |
683 | | )" |
684 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
685 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
686 | | : "cc"); |
687 | | |
688 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
689 | | |
690 | 3.71G | word z0 = 0, z1 = 0; |
691 | | |
692 | 3.71G | asm("mulq %[y]" |
693 | 3.71G | : "=a"(z0),"=d"(z1) |
694 | 3.71G | : "a"(x), [y]"rm"(y) |
695 | 3.71G | : "cc"); |
696 | | |
697 | 3.71G | asm(R"( |
698 | 3.71G | addq %[z0],%[w0] |
699 | 3.71G | adcq %[z1],%[w1] |
700 | 3.71G | adcq $0,%[w2] |
701 | 3.71G | |
702 | 3.71G | addq %[z0],%[w0] |
703 | 3.71G | adcq %[z1],%[w1] |
704 | 3.71G | adcq $0,%[w2] |
705 | 3.71G | )" |
706 | 3.71G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
707 | 3.71G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
708 | 3.71G | : "cc"); |
709 | | |
710 | | #else |
711 | | word carry = 0; |
712 | | x = word_madd2(x, y, &carry); |
713 | | y = carry; |
714 | | |
715 | | word top = (y >> (BOTAN_MP_WORD_BITS-1)); |
716 | | y <<= 1; |
717 | | y |= (x >> (BOTAN_MP_WORD_BITS-1)); |
718 | | x <<= 1; |
719 | | |
720 | | carry = 0; |
721 | | *w0 = word_add(*w0, x, &carry); |
722 | | *w1 = word_add(*w1, y, &carry); |
723 | | *w2 = word_add(*w2, top, &carry); |
724 | | #endif |
725 | 3.71G | } |
726 | | |
727 | | #if defined(ASM) |
728 | | #undef ASM |
729 | | #undef DO_8_TIMES |
730 | | #undef ADD_OR_SUBTRACT |
731 | | #undef ADDSUB2_OP |
732 | | #undef ADDSUB3_OP |
733 | | #undef LINMUL_OP |
734 | | #undef MULADD_OP |
735 | | #endif |
736 | | |
737 | | } |
738 | | |
739 | | #endif |