/src/botan/build/include/botan/internal/mp_asmi.h
Line | Count | Source |
1 | | /* |
2 | | * Lowest Level MPI Algorithms |
3 | | * (C) 1999-2010 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #ifndef BOTAN_MP_ASM_INTERNAL_H_ |
10 | | #define BOTAN_MP_ASM_INTERNAL_H_ |
11 | | |
12 | | #include <botan/types.h> |
13 | | #include <botan/internal/mul128.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | #if (BOTAN_MP_WORD_BITS == 32) |
18 | | typedef uint64_t dword; |
19 | | #define BOTAN_HAS_MP_DWORD |
20 | | |
21 | | #elif (BOTAN_MP_WORD_BITS == 64) |
22 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
23 | | typedef uint128_t dword; |
24 | | #define BOTAN_HAS_MP_DWORD |
25 | | #else |
26 | | // No native 128 bit integer type; use mul64x64_128 instead |
27 | | #endif |
28 | | |
29 | | #else |
30 | | #error BOTAN_MP_WORD_BITS must be 32 or 64 |
31 | | #endif |
32 | | |
33 | | #if defined(BOTAN_USE_GCC_INLINE_ASM) |
34 | | |
35 | | #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) |
36 | | #define BOTAN_MP_USE_X86_32_ASM |
37 | | #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) |
38 | | #define BOTAN_MP_USE_X86_64_ASM |
39 | | #endif |
40 | | |
41 | | #endif |
42 | | |
43 | | /* |
44 | | * Word Multiply/Add |
45 | | */ |
46 | | inline word word_madd2(word a, word b, word* c) |
47 | 217M | { |
48 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
49 | | asm(R"( |
50 | | mull %[b] |
51 | | addl %[c],%[a] |
52 | | adcl $0,%[carry] |
53 | | )" |
54 | | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) |
55 | | : "0"(a), "1"(b), [c]"g"(*c) : "cc"); |
56 | | |
57 | | return a; |
58 | | |
59 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
60 | 217M | asm(R"( |
61 | 217M | mulq %[b] |
62 | 217M | addq %[c],%[a] |
63 | 217M | adcq $0,%[carry] |
64 | 217M | )" |
65 | 217M | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*c) |
66 | 217M | : "0"(a), "1"(b), [c]"g"(*c) : "cc"); |
67 | | |
68 | 217M | return a; |
69 | | |
70 | | #elif defined(BOTAN_HAS_MP_DWORD) |
71 | | const dword s = static_cast<dword>(a) * b + *c; |
72 | | *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
73 | | return static_cast<word>(s); |
74 | | #else |
75 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
76 | | |
77 | | word hi = 0, lo = 0; |
78 | | |
79 | | mul64x64_128(a, b, &lo, &hi); |
80 | | |
81 | | lo += *c; |
82 | | hi += (lo < *c); // carry? |
83 | | |
84 | | *c = hi; |
85 | | return lo; |
86 | | #endif |
87 | 217M | } |
88 | | |
89 | | /* |
90 | | * Word Multiply/Add |
91 | | */ |
92 | | inline word word_madd3(word a, word b, word c, word* d) |
93 | 413M | { |
94 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
95 | | asm(R"( |
96 | | mull %[b] |
97 | | |
98 | | addl %[c],%[a] |
99 | | adcl $0,%[carry] |
100 | | |
101 | | addl %[d],%[a] |
102 | | adcl $0,%[carry] |
103 | | )" |
104 | | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) |
105 | | : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); |
106 | | |
107 | | return a; |
108 | | |
109 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
110 | 413M | asm(R"( |
111 | 413M | mulq %[b] |
112 | 413M | |
113 | 413M | addq %[c],%[a] |
114 | 413M | adcq $0,%[carry] |
115 | 413M | |
116 | 413M | addq %[d],%[a] |
117 | 413M | adcq $0,%[carry] |
118 | 413M | )" |
119 | 413M | : [a]"=a"(a), [b]"=rm"(b), [carry]"=&d"(*d) |
120 | 413M | : "0"(a), "1"(b), [c]"g"(c), [d]"g"(*d) : "cc"); |
121 | | |
122 | 413M | return a; |
123 | | |
124 | | #elif defined(BOTAN_HAS_MP_DWORD) |
125 | | const dword s = static_cast<dword>(a) * b + c + *d; |
126 | | *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
127 | | return static_cast<word>(s); |
128 | | #else |
129 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
130 | | |
131 | | word hi = 0, lo = 0; |
132 | | |
133 | | mul64x64_128(a, b, &lo, &hi); |
134 | | |
135 | | lo += c; |
136 | | hi += (lo < c); // carry? |
137 | | |
138 | | lo += *d; |
139 | | hi += (lo < *d); // carry? |
140 | | |
141 | | *d = hi; |
142 | | return lo; |
143 | | #endif |
144 | 413M | } |
145 | | |
146 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
147 | | |
148 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
149 | | ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ |
150 | | ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ |
151 | | |
152 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
153 | | ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ |
154 | | ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ |
155 | | ASM("movl %[carry], 4*" #INDEX "(%[z])") \ |
156 | | |
157 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
158 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
159 | | ASM("mull %[y]") \ |
160 | | ASM("addl %[carry],%%eax") \ |
161 | | ASM("adcl $0,%%edx") \ |
162 | | ASM("movl %%edx,%[carry]") \ |
163 | | ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") |
164 | | |
165 | | #define MULADD_OP(IGNORED, INDEX) \ |
166 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
167 | | ASM("mull %[y]") \ |
168 | | ASM("addl %[carry],%%eax") \ |
169 | | ASM("adcl $0,%%edx") \ |
170 | | ASM("addl 4*" #INDEX "(%[z]),%%eax") \ |
171 | | ASM("adcl $0,%%edx") \ |
172 | | ASM("movl %%edx,%[carry]") \ |
173 | | ASM("movl %%eax, 4*" #INDEX " (%[z])") |
174 | | |
175 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
176 | | ASM("rorl %[carry]") \ |
177 | | CORE_CODE \ |
178 | | ASM("sbbl %[carry],%[carry]") \ |
179 | | ASM("negl %[carry]") |
180 | | |
181 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
182 | | |
183 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
184 | | ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ |
185 | | ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ |
186 | | |
187 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
188 | | ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ |
189 | | ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ |
190 | | ASM("movq %[carry], 8*" #INDEX "(%[z])") \ |
191 | | |
192 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
193 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
194 | | ASM("mulq %[y]") \ |
195 | | ASM("addq %[carry],%%rax") \ |
196 | | ASM("adcq $0,%%rdx") \ |
197 | | ASM("movq %%rdx,%[carry]") \ |
198 | | ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") |
199 | | |
200 | | #define MULADD_OP(IGNORED, INDEX) \ |
201 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
202 | | ASM("mulq %[y]") \ |
203 | | ASM("addq %[carry],%%rax") \ |
204 | | ASM("adcq $0,%%rdx") \ |
205 | | ASM("addq 8*" #INDEX "(%[z]),%%rax") \ |
206 | | ASM("adcq $0,%%rdx") \ |
207 | | ASM("movq %%rdx,%[carry]") \ |
208 | | ASM("movq %%rax, 8*" #INDEX " (%[z])") |
209 | | |
210 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
211 | | ASM("rorq %[carry]") \ |
212 | | CORE_CODE \ |
213 | | ASM("sbbq %[carry],%[carry]") \ |
214 | | ASM("negq %[carry]") |
215 | | |
216 | | #endif |
217 | | |
218 | | #if defined(ADD_OR_SUBTRACT) |
219 | | |
220 | | #define ASM(x) x "\n\t" |
221 | | |
222 | | #define DO_8_TIMES(MACRO, ARG) \ |
223 | | MACRO(ARG, 0) \ |
224 | | MACRO(ARG, 1) \ |
225 | | MACRO(ARG, 2) \ |
226 | | MACRO(ARG, 3) \ |
227 | | MACRO(ARG, 4) \ |
228 | | MACRO(ARG, 5) \ |
229 | | MACRO(ARG, 6) \ |
230 | | MACRO(ARG, 7) |
231 | | |
232 | | #endif |
233 | | |
234 | | /* |
235 | | * Word Addition |
236 | | */ |
237 | | inline word word_add(word x, word y, word* carry) |
238 | 3.23G | { |
239 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
240 | | asm( |
241 | | ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) |
242 | | : [x]"=r"(x), [carry]"=r"(*carry) |
243 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
244 | | : "cc"); |
245 | | return x; |
246 | | |
247 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
248 | | |
249 | 3.23G | asm( |
250 | 3.23G | ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) |
251 | 3.23G | : [x]"=r"(x), [carry]"=r"(*carry) |
252 | 3.23G | : "0"(x), [y]"rm"(y), "1"(*carry) |
253 | 3.23G | : "cc"); |
254 | 3.23G | return x; |
255 | | |
256 | | #else |
257 | | word z = x + y; |
258 | | word c1 = (z < x); |
259 | | z += *carry; |
260 | | *carry = c1 | (z < *carry); |
261 | | return z; |
262 | | #endif |
263 | 3.23G | } |
264 | | |
265 | | /* |
266 | | * Eight Word Block Addition, Two Argument |
267 | | */ |
268 | | inline word word8_add2(word x[8], const word y[8], word carry) |
269 | 12.5M | { |
270 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
271 | | asm( |
272 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) |
273 | | : [carry]"=r"(carry) |
274 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
275 | | : "cc", "memory"); |
276 | | |
277 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
278 | | |
279 | 12.5M | asm( |
280 | 12.5M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) |
281 | 12.5M | : [carry]"=r"(carry) |
282 | 12.5M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
283 | 12.5M | : "cc", "memory"); |
284 | | |
285 | | #else |
286 | | x[0] = word_add(x[0], y[0], &carry); |
287 | | x[1] = word_add(x[1], y[1], &carry); |
288 | | x[2] = word_add(x[2], y[2], &carry); |
289 | | x[3] = word_add(x[3], y[3], &carry); |
290 | | x[4] = word_add(x[4], y[4], &carry); |
291 | | x[5] = word_add(x[5], y[5], &carry); |
292 | | x[6] = word_add(x[6], y[6], &carry); |
293 | | x[7] = word_add(x[7], y[7], &carry); |
294 | | #endif |
295 | | |
296 | 12.5M | return carry; |
297 | 12.5M | } |
298 | | |
299 | | /* |
300 | | * Eight Word Block Addition, Three Argument |
301 | | */ |
302 | | inline word word8_add3(word z[8], const word x[8], |
303 | | const word y[8], word carry) |
304 | 314M | { |
305 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
306 | | asm( |
307 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) |
308 | | : [carry]"=r"(carry) |
309 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
310 | | : "cc", "memory"); |
311 | | |
312 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
313 | 314M | asm( |
314 | 314M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) |
315 | 314M | : [carry]"=r"(carry) |
316 | 314M | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
317 | 314M | : "cc", "memory"); |
318 | | |
319 | | #else |
320 | | z[0] = word_add(x[0], y[0], &carry); |
321 | | z[1] = word_add(x[1], y[1], &carry); |
322 | | z[2] = word_add(x[2], y[2], &carry); |
323 | | z[3] = word_add(x[3], y[3], &carry); |
324 | | z[4] = word_add(x[4], y[4], &carry); |
325 | | z[5] = word_add(x[5], y[5], &carry); |
326 | | z[6] = word_add(x[6], y[6], &carry); |
327 | | z[7] = word_add(x[7], y[7], &carry); |
328 | | #endif |
329 | | |
330 | 314M | return carry; |
331 | 314M | } |
332 | | |
333 | | /* |
334 | | * Word Subtraction |
335 | | */ |
336 | | inline word word_sub(word x, word y, word* carry) |
337 | 4.79G | { |
338 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
339 | | asm( |
340 | | ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) |
341 | | : [x]"=r"(x), [carry]"=r"(*carry) |
342 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
343 | | : "cc"); |
344 | | return x; |
345 | | |
346 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
347 | 4.79G | asm( |
348 | 4.79G | ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) |
349 | 4.79G | : [x]"=r"(x), [carry]"=r"(*carry) |
350 | 4.79G | : "0"(x), [y]"rm"(y), "1"(*carry) |
351 | 4.79G | : "cc"); |
352 | 4.79G | return x; |
353 | | |
354 | | #else |
355 | | word t0 = x - y; |
356 | | word c1 = (t0 > x); |
357 | | word z = t0 - *carry; |
358 | | *carry = c1 | (z > t0); |
359 | | return z; |
360 | | #endif |
361 | 4.79G | } |
362 | | |
363 | | /* |
364 | | * Eight Word Block Subtraction, Two Argument |
365 | | */ |
366 | | inline word word8_sub2(word x[8], const word y[8], word carry) |
367 | 31.3M | { |
368 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
369 | | asm( |
370 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) |
371 | | : [carry]"=r"(carry) |
372 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
373 | | : "cc", "memory"); |
374 | | |
375 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
376 | 31.3M | asm( |
377 | 31.3M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) |
378 | 31.3M | : [carry]"=r"(carry) |
379 | 31.3M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
380 | 31.3M | : "cc", "memory"); |
381 | | |
382 | | #else |
383 | | x[0] = word_sub(x[0], y[0], &carry); |
384 | | x[1] = word_sub(x[1], y[1], &carry); |
385 | | x[2] = word_sub(x[2], y[2], &carry); |
386 | | x[3] = word_sub(x[3], y[3], &carry); |
387 | | x[4] = word_sub(x[4], y[4], &carry); |
388 | | x[5] = word_sub(x[5], y[5], &carry); |
389 | | x[6] = word_sub(x[6], y[6], &carry); |
390 | | x[7] = word_sub(x[7], y[7], &carry); |
391 | | #endif |
392 | | |
393 | 31.3M | return carry; |
394 | 31.3M | } |
395 | | |
396 | | /* |
397 | | * Eight Word Block Subtraction, Two Argument |
398 | | */ |
399 | | inline word word8_sub2_rev(word x[8], const word y[8], word carry) |
400 | 3.24M | { |
401 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
402 | | asm( |
403 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
404 | | : [carry]"=r"(carry) |
405 | | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
406 | | : "cc", "memory"); |
407 | | |
408 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
409 | 3.24M | asm( |
410 | 3.24M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
411 | 3.24M | : [carry]"=r"(carry) |
412 | 3.24M | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
413 | 3.24M | : "cc", "memory"); |
414 | | |
415 | | #else |
416 | | x[0] = word_sub(y[0], x[0], &carry); |
417 | | x[1] = word_sub(y[1], x[1], &carry); |
418 | | x[2] = word_sub(y[2], x[2], &carry); |
419 | | x[3] = word_sub(y[3], x[3], &carry); |
420 | | x[4] = word_sub(y[4], x[4], &carry); |
421 | | x[5] = word_sub(y[5], x[5], &carry); |
422 | | x[6] = word_sub(y[6], x[6], &carry); |
423 | | x[7] = word_sub(y[7], x[7], &carry); |
424 | | #endif |
425 | | |
426 | 3.24M | return carry; |
427 | 3.24M | } |
428 | | |
429 | | /* |
430 | | * Eight Word Block Subtraction, Three Argument |
431 | | */ |
432 | | inline word word8_sub3(word z[8], const word x[8], |
433 | | const word y[8], word carry) |
434 | 673M | { |
435 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
436 | | asm( |
437 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
438 | | : [carry]"=r"(carry) |
439 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
440 | | : "cc", "memory"); |
441 | | |
442 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
443 | 673M | asm( |
444 | 673M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
445 | 673M | : [carry]"=r"(carry) |
446 | 673M | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
447 | 673M | : "cc", "memory"); |
448 | | |
449 | | #else |
450 | | z[0] = word_sub(x[0], y[0], &carry); |
451 | | z[1] = word_sub(x[1], y[1], &carry); |
452 | | z[2] = word_sub(x[2], y[2], &carry); |
453 | | z[3] = word_sub(x[3], y[3], &carry); |
454 | | z[4] = word_sub(x[4], y[4], &carry); |
455 | | z[5] = word_sub(x[5], y[5], &carry); |
456 | | z[6] = word_sub(x[6], y[6], &carry); |
457 | | z[7] = word_sub(x[7], y[7], &carry); |
458 | | #endif |
459 | | |
460 | 673M | return carry; |
461 | 673M | } |
462 | | |
463 | | /* |
464 | | * Eight Word Block Linear Multiplication |
465 | | */ |
466 | | inline word word8_linmul2(word x[8], word y, word carry) |
467 | 361M | { |
468 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
469 | | asm( |
470 | | DO_8_TIMES(LINMUL_OP, "x") |
471 | | : [carry]"=r"(carry) |
472 | | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
473 | | : "cc", "%eax", "%edx"); |
474 | | |
475 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
476 | 361M | asm( |
477 | 361M | DO_8_TIMES(LINMUL_OP, "x") |
478 | 361M | : [carry]"=r"(carry) |
479 | 361M | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
480 | 361M | : "cc", "%rax", "%rdx"); |
481 | | |
482 | | #else |
483 | | x[0] = word_madd2(x[0], y, &carry); |
484 | | x[1] = word_madd2(x[1], y, &carry); |
485 | | x[2] = word_madd2(x[2], y, &carry); |
486 | | x[3] = word_madd2(x[3], y, &carry); |
487 | | x[4] = word_madd2(x[4], y, &carry); |
488 | | x[5] = word_madd2(x[5], y, &carry); |
489 | | x[6] = word_madd2(x[6], y, &carry); |
490 | | x[7] = word_madd2(x[7], y, &carry); |
491 | | #endif |
492 | | |
493 | 361M | return carry; |
494 | 361M | } |
495 | | |
496 | | /* |
497 | | * Eight Word Block Linear Multiplication |
498 | | */ |
499 | | inline word word8_linmul3(word z[8], const word x[8], word y, word carry) |
500 | 29.8M | { |
501 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
502 | | asm( |
503 | | DO_8_TIMES(LINMUL_OP, "z") |
504 | | : [carry]"=r"(carry) |
505 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
506 | | : "cc", "%eax", "%edx"); |
507 | | |
508 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
509 | 29.8M | asm( |
510 | 29.8M | DO_8_TIMES(LINMUL_OP, "z") |
511 | 29.8M | : [carry]"=r"(carry) |
512 | 29.8M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
513 | 29.8M | : "cc", "%rax", "%rdx"); |
514 | | |
515 | | #else |
516 | | z[0] = word_madd2(x[0], y, &carry); |
517 | | z[1] = word_madd2(x[1], y, &carry); |
518 | | z[2] = word_madd2(x[2], y, &carry); |
519 | | z[3] = word_madd2(x[3], y, &carry); |
520 | | z[4] = word_madd2(x[4], y, &carry); |
521 | | z[5] = word_madd2(x[5], y, &carry); |
522 | | z[6] = word_madd2(x[6], y, &carry); |
523 | | z[7] = word_madd2(x[7], y, &carry); |
524 | | #endif |
525 | | |
526 | 29.8M | return carry; |
527 | 29.8M | } |
528 | | |
529 | | /* |
530 | | * Eight Word Block Multiply/Add |
531 | | */ |
532 | | inline word word8_madd3(word z[8], const word x[8], word y, word carry) |
533 | 150M | { |
534 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
535 | | asm( |
536 | | DO_8_TIMES(MULADD_OP, "") |
537 | | : [carry]"=r"(carry) |
538 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
539 | | : "cc", "%eax", "%edx"); |
540 | | |
541 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
542 | 150M | asm( |
543 | 150M | DO_8_TIMES(MULADD_OP, "") |
544 | 150M | : [carry]"=r"(carry) |
545 | 150M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
546 | 150M | : "cc", "%rax", "%rdx"); |
547 | | |
548 | | #else |
549 | | z[0] = word_madd3(x[0], y, z[0], &carry); |
550 | | z[1] = word_madd3(x[1], y, z[1], &carry); |
551 | | z[2] = word_madd3(x[2], y, z[2], &carry); |
552 | | z[3] = word_madd3(x[3], y, z[3], &carry); |
553 | | z[4] = word_madd3(x[4], y, z[4], &carry); |
554 | | z[5] = word_madd3(x[5], y, z[5], &carry); |
555 | | z[6] = word_madd3(x[6], y, z[6], &carry); |
556 | | z[7] = word_madd3(x[7], y, z[7], &carry); |
557 | | #endif |
558 | | |
559 | 150M | return carry; |
560 | 150M | } |
561 | | |
562 | | /* |
563 | | * Multiply-Add Accumulator |
564 | | * (w2,w1,w0) += x * y |
565 | | */ |
566 | | inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) |
567 | 17.3G | { |
568 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
569 | | word z0 = 0, z1 = 0; |
570 | | |
571 | | asm("mull %[y]" |
572 | | : "=a"(z0),"=d"(z1) |
573 | | : "a"(x), [y]"rm"(y) |
574 | | : "cc"); |
575 | | |
576 | | asm(R"( |
577 | | addl %[z0],%[w0] |
578 | | adcl %[z1],%[w1] |
579 | | adcl $0,%[w2] |
580 | | )" |
581 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
582 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
583 | | : "cc"); |
584 | | |
585 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
586 | 17.3G | word z0 = 0, z1 = 0; |
587 | | |
588 | 17.3G | asm("mulq %[y]" |
589 | 17.3G | : "=a"(z0),"=d"(z1) |
590 | 17.3G | : "a"(x), [y]"rm"(y) |
591 | 17.3G | : "cc"); |
592 | | |
593 | 17.3G | asm(R"( |
594 | 17.3G | addq %[z0],%[w0] |
595 | 17.3G | adcq %[z1],%[w1] |
596 | 17.3G | adcq $0,%[w2] |
597 | 17.3G | )" |
598 | 17.3G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
599 | 17.3G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
600 | 17.3G | : "cc"); |
601 | | |
602 | | #else |
603 | | word carry = *w0; |
604 | | *w0 = word_madd2(x, y, &carry); |
605 | | *w1 += carry; |
606 | | *w2 += (*w1 < carry); |
607 | | #endif |
608 | 17.3G | } |
609 | | |
610 | | /* |
611 | | * 3-word addition |
612 | | * (w2,w1,w0) += x |
613 | | */ |
614 | | inline void word3_add(word* w2, word* w1, word* w0, word x) |
615 | 1.44G | { |
616 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
617 | | asm(R"( |
618 | | addl %[x],%[w0] |
619 | | adcl $0,%[w1] |
620 | | adcl $0,%[w2] |
621 | | )" |
622 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
623 | | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
624 | | : "cc"); |
625 | | |
626 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
627 | 1.44G | asm(R"( |
628 | 1.44G | addq %[x],%[w0] |
629 | 1.44G | adcq $0,%[w1] |
630 | 1.44G | adcq $0,%[w2] |
631 | 1.44G | )" |
632 | 1.44G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
633 | 1.44G | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
634 | 1.44G | : "cc"); |
635 | | |
636 | | #else |
637 | | *w0 += x; |
638 | | word c1 = (*w0 < x); |
639 | | *w1 += c1; |
640 | | word c2 = (*w1 < c1); |
641 | | *w2 += c2; |
642 | | #endif |
643 | 1.44G | } |
644 | | |
645 | | /* |
646 | | * Multiply-Add Accumulator |
647 | | * (w2,w1,w0) += 2 * x * y |
648 | | */ |
649 | | inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) |
650 | 4.59G | { |
651 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
652 | | word z0 = 0, z1 = 0; |
653 | | |
654 | | asm("mull %[y]" |
655 | | : "=a"(z0),"=d"(z1) |
656 | | : "a"(x), [y]"rm"(y) |
657 | | : "cc"); |
658 | | |
659 | | asm(R"( |
660 | | addl %[z0],%[w0] |
661 | | adcl %[z1],%[w1] |
662 | | adcl $0,%[w2] |
663 | | |
664 | | addl %[z0],%[w0] |
665 | | adcl %[z1],%[w1] |
666 | | adcl $0,%[w2] |
667 | | )" |
668 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
669 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
670 | | : "cc"); |
671 | | |
672 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
673 | 4.59G | word z0 = 0, z1 = 0; |
674 | | |
675 | 4.59G | asm("mulq %[y]" |
676 | 4.59G | : "=a"(z0),"=d"(z1) |
677 | 4.59G | : "a"(x), [y]"rm"(y) |
678 | 4.59G | : "cc"); |
679 | | |
680 | 4.59G | asm(R"( |
681 | 4.59G | addq %[z0],%[w0] |
682 | 4.59G | adcq %[z1],%[w1] |
683 | 4.59G | adcq $0,%[w2] |
684 | 4.59G | |
685 | 4.59G | addq %[z0],%[w0] |
686 | 4.59G | adcq %[z1],%[w1] |
687 | 4.59G | adcq $0,%[w2] |
688 | 4.59G | )" |
689 | 4.59G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
690 | 4.59G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
691 | 4.59G | : "cc"); |
692 | | |
693 | | #else |
694 | | word carry = 0; |
695 | | x = word_madd2(x, y, &carry); |
696 | | y = carry; |
697 | | |
698 | | word top = (y >> (BOTAN_MP_WORD_BITS-1)); |
699 | | y <<= 1; |
700 | | y |= (x >> (BOTAN_MP_WORD_BITS-1)); |
701 | | x <<= 1; |
702 | | |
703 | | carry = 0; |
704 | | *w0 = word_add(*w0, x, &carry); |
705 | | *w1 = word_add(*w1, y, &carry); |
706 | | *w2 = word_add(*w2, top, &carry); |
707 | | #endif |
708 | 4.59G | } |
709 | | |
710 | | #if defined(ASM) |
711 | | #undef ASM |
712 | | #undef DO_8_TIMES |
713 | | #undef ADD_OR_SUBTRACT |
714 | | #undef ADDSUB2_OP |
715 | | #undef ADDSUB3_OP |
716 | | #undef LINMUL_OP |
717 | | #undef MULADD_OP |
718 | | #endif |
719 | | |
720 | | } |
721 | | |
722 | | #endif |