/src/botan/build/include/botan/internal/mp_asmi.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Lowest Level MPI Algorithms |
3 | | * (C) 1999-2010 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #ifndef BOTAN_MP_ASM_INTERNAL_H_ |
10 | | #define BOTAN_MP_ASM_INTERNAL_H_ |
11 | | |
12 | | #include <botan/types.h> |
13 | | #include <botan/internal/mul128.h> |
14 | | |
15 | | namespace Botan { |
16 | | |
17 | | #if(BOTAN_MP_WORD_BITS == 32) |
18 | | #define BOTAN_MP_DWORD uint64_t |
19 | | |
20 | | #elif(BOTAN_MP_WORD_BITS == 64) |
21 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
22 | | #define BOTAN_MP_DWORD uint128_t |
23 | | #else |
24 | | // No native 128 bit integer type; use mul64x64_128 instead |
25 | | #endif |
26 | | |
27 | | #else |
28 | | #error BOTAN_MP_WORD_BITS must be 32 or 64 |
29 | | #endif |
30 | | |
31 | | #if defined(BOTAN_USE_GCC_INLINE_ASM) |
32 | | |
33 | | #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32) |
34 | | #define BOTAN_MP_USE_X86_32_ASM |
35 | | #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64) |
36 | | #define BOTAN_MP_USE_X86_64_ASM |
37 | | #endif |
38 | | |
39 | | #endif |
40 | | |
41 | | /* |
42 | | * Word Multiply/Add |
43 | | */ |
44 | 0 | inline word word_madd2(word a, word b, word* c) { |
45 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
46 | | asm(R"( |
47 | | mull %[b] |
48 | | addl %[c],%[a] |
49 | | adcl $0,%[carry] |
50 | | )" |
51 | | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*c) |
52 | | : "0"(a), "1"(b), [c] "g"(*c) |
53 | | : "cc"); |
54 | | |
55 | | return a; |
56 | | |
57 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
58 | 0 | asm(R"( |
59 | 0 | mulq %[b] |
60 | 0 | addq %[c],%[a] |
61 | 0 | adcq $0,%[carry] |
62 | 0 | )" |
63 | 0 | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*c) |
64 | 0 | : "0"(a), "1"(b), [c] "g"(*c) |
65 | 0 | : "cc"); |
66 | |
|
67 | 0 | return a; |
68 | |
|
69 | | #elif defined(BOTAN_MP_DWORD) |
70 | | const BOTAN_MP_DWORD s = static_cast<BOTAN_MP_DWORD>(a) * b + *c; |
71 | | *c = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
72 | | return static_cast<word>(s); |
73 | | #else |
74 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
75 | | |
76 | | word hi = 0, lo = 0; |
77 | | |
78 | | mul64x64_128(a, b, &lo, &hi); |
79 | | |
80 | | lo += *c; |
81 | | hi += (lo < *c); // carry? |
82 | | |
83 | | *c = hi; |
84 | | return lo; |
85 | | #endif |
86 | 0 | } |
87 | | |
88 | | /* |
89 | | * Word Multiply/Add |
90 | | */ |
91 | 908 | inline word word_madd3(word a, word b, word c, word* d) { |
92 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
93 | | asm(R"( |
94 | | mull %[b] |
95 | | |
96 | | addl %[c],%[a] |
97 | | adcl $0,%[carry] |
98 | | |
99 | | addl %[d],%[a] |
100 | | adcl $0,%[carry] |
101 | | )" |
102 | | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*d) |
103 | | : "0"(a), "1"(b), [c] "g"(c), [d] "g"(*d) |
104 | | : "cc"); |
105 | | |
106 | | return a; |
107 | | |
108 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
109 | 908 | asm(R"( |
110 | 908 | mulq %[b] |
111 | 908 | |
112 | 908 | addq %[c],%[a] |
113 | 908 | adcq $0,%[carry] |
114 | 908 | |
115 | 908 | addq %[d],%[a] |
116 | 908 | adcq $0,%[carry] |
117 | 908 | )" |
118 | 908 | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*d) |
119 | 908 | : "0"(a), "1"(b), [c] "g"(c), [d] "g"(*d) |
120 | 908 | : "cc"); |
121 | | |
122 | 908 | return a; |
123 | | |
124 | | #elif defined(BOTAN_MP_DWORD) |
125 | | const BOTAN_MP_DWORD s = static_cast<BOTAN_MP_DWORD>(a) * b + c + *d; |
126 | | *d = static_cast<word>(s >> BOTAN_MP_WORD_BITS); |
127 | | return static_cast<word>(s); |
128 | | #else |
129 | | static_assert(BOTAN_MP_WORD_BITS == 64, "Unexpected word size"); |
130 | | |
131 | | word hi = 0, lo = 0; |
132 | | |
133 | | mul64x64_128(a, b, &lo, &hi); |
134 | | |
135 | | lo += c; |
136 | | hi += (lo < c); // carry? |
137 | | |
138 | | lo += *d; |
139 | | hi += (lo < *d); // carry? |
140 | | |
141 | | *d = hi; |
142 | | return lo; |
143 | | #endif |
144 | 908 | } |
145 | | |
146 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
147 | | |
148 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
149 | | ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ |
150 | | ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") |
151 | | |
152 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
153 | | ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ |
154 | | ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ |
155 | | ASM("movl %[carry], 4*" #INDEX "(%[z])") |
156 | | |
157 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
158 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
159 | | ASM("mull %[y]") \ |
160 | | ASM("addl %[carry],%%eax") \ |
161 | | ASM("adcl $0,%%edx") \ |
162 | | ASM("movl %%edx,%[carry]") \ |
163 | | ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") |
164 | | |
165 | | #define MULADD_OP(IGNORED, INDEX) \ |
166 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
167 | | ASM("mull %[y]") \ |
168 | | ASM("addl %[carry],%%eax") \ |
169 | | ASM("adcl $0,%%edx") \ |
170 | | ASM("addl 4*" #INDEX "(%[z]),%%eax") \ |
171 | | ASM("adcl $0,%%edx") \ |
172 | | ASM("movl %%edx,%[carry]") \ |
173 | | ASM("movl %%eax, 4*" #INDEX " (%[z])") |
174 | | |
175 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
176 | | ASM("rorl %[carry]") \ |
177 | | CORE_CODE \ |
178 | | ASM("sbbl %[carry],%[carry]") \ |
179 | | ASM("negl %[carry]") |
180 | | |
181 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
182 | | |
183 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
184 | | ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ |
185 | | ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") |
186 | | |
187 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
188 | | ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ |
189 | | ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ |
190 | | ASM("movq %[carry], 8*" #INDEX "(%[z])") |
191 | | |
192 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
193 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
194 | | ASM("mulq %[y]") \ |
195 | | ASM("addq %[carry],%%rax") \ |
196 | | ASM("adcq $0,%%rdx") \ |
197 | | ASM("movq %%rdx,%[carry]") \ |
198 | | ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") |
199 | | |
200 | | #define MULADD_OP(IGNORED, INDEX) \ |
201 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
202 | | ASM("mulq %[y]") \ |
203 | | ASM("addq %[carry],%%rax") \ |
204 | | ASM("adcq $0,%%rdx") \ |
205 | | ASM("addq 8*" #INDEX "(%[z]),%%rax") \ |
206 | | ASM("adcq $0,%%rdx") \ |
207 | | ASM("movq %%rdx,%[carry]") \ |
208 | | ASM("movq %%rax, 8*" #INDEX " (%[z])") |
209 | | |
210 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
211 | | ASM("rorq %[carry]") \ |
212 | | CORE_CODE \ |
213 | | ASM("sbbq %[carry],%[carry]") \ |
214 | | ASM("negq %[carry]") |
215 | | |
216 | | #endif |
217 | | |
218 | | #if defined(ADD_OR_SUBTRACT) |
219 | | |
220 | | #define ASM(x) x "\n\t" |
221 | | |
222 | | #define DO_8_TIMES(MACRO, ARG) \ |
223 | | MACRO(ARG, 0) \ |
224 | | MACRO(ARG, 1) \ |
225 | | MACRO(ARG, 2) \ |
226 | | MACRO(ARG, 3) \ |
227 | | MACRO(ARG, 4) \ |
228 | | MACRO(ARG, 5) \ |
229 | | MACRO(ARG, 6) \ |
230 | | MACRO(ARG, 7) |
231 | | |
232 | | #endif |
233 | | |
234 | | /* |
235 | | * Word Addition |
236 | | */ |
237 | 0 | inline word word_add(word x, word y, word* carry) { |
238 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
239 | | asm(ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) |
240 | | : [x] "=r"(x), [carry] "=r"(*carry) |
241 | | : "0"(x), [y] "rm"(y), "1"(*carry) |
242 | | : "cc"); |
243 | | return x; |
244 | | |
245 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
246 | |
|
247 | 0 | asm(ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) |
248 | 0 | : [x] "=r"(x), [carry] "=r"(*carry) |
249 | 0 | : "0"(x), [y] "rm"(y), "1"(*carry) |
250 | 0 | : "cc"); |
251 | 0 | return x; |
252 | |
|
253 | | #else |
254 | | word z = x + y; |
255 | | word c1 = (z < x); |
256 | | z += *carry; |
257 | | *carry = c1 | (z < *carry); |
258 | | return z; |
259 | | #endif |
260 | 0 | } |
261 | | |
262 | | /* |
263 | | * Eight Word Block Addition, Two Argument |
264 | | */ |
265 | 0 | inline word word8_add2(word x[8], const word y[8], word carry) { |
266 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
267 | | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) |
268 | | : [carry] "=r"(carry) |
269 | | : [x] "r"(x), [y] "r"(y), "0"(carry) |
270 | | : "cc", "memory"); |
271 | | |
272 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
273 | |
|
274 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) |
275 | 0 | : [carry] "=r"(carry) |
276 | 0 | : [x] "r"(x), [y] "r"(y), "0"(carry) |
277 | 0 | : "cc", "memory"); |
278 | |
|
279 | | #else |
280 | | x[0] = word_add(x[0], y[0], &carry); |
281 | | x[1] = word_add(x[1], y[1], &carry); |
282 | | x[2] = word_add(x[2], y[2], &carry); |
283 | | x[3] = word_add(x[3], y[3], &carry); |
284 | | x[4] = word_add(x[4], y[4], &carry); |
285 | | x[5] = word_add(x[5], y[5], &carry); |
286 | | x[6] = word_add(x[6], y[6], &carry); |
287 | | x[7] = word_add(x[7], y[7], &carry); |
288 | | #endif |
289 | |
|
290 | 0 | return carry; |
291 | 0 | } |
292 | | |
293 | | /* |
294 | | * Eight Word Block Addition, Three Argument |
295 | | */ |
296 | 0 | inline word word8_add3(word z[8], const word x[8], const word y[8], word carry) { |
297 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
298 | | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) |
299 | | : [carry] "=r"(carry) |
300 | | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
301 | | : "cc", "memory"); |
302 | | |
303 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
304 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) |
305 | 0 | : [carry] "=r"(carry) |
306 | 0 | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
307 | 0 | : "cc", "memory"); |
308 | |
|
309 | | #else |
310 | | z[0] = word_add(x[0], y[0], &carry); |
311 | | z[1] = word_add(x[1], y[1], &carry); |
312 | | z[2] = word_add(x[2], y[2], &carry); |
313 | | z[3] = word_add(x[3], y[3], &carry); |
314 | | z[4] = word_add(x[4], y[4], &carry); |
315 | | z[5] = word_add(x[5], y[5], &carry); |
316 | | z[6] = word_add(x[6], y[6], &carry); |
317 | | z[7] = word_add(x[7], y[7], &carry); |
318 | | #endif |
319 | |
|
320 | 0 | return carry; |
321 | 0 | } |
322 | | |
323 | | /* |
324 | | * Word Subtraction |
325 | | */ |
326 | 0 | inline word word_sub(word x, word y, word* carry) { |
327 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
328 | | asm(ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) |
329 | | : [x] "=r"(x), [carry] "=r"(*carry) |
330 | | : "0"(x), [y] "rm"(y), "1"(*carry) |
331 | | : "cc"); |
332 | | return x; |
333 | | |
334 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
335 | 0 | asm(ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) |
336 | 0 | : [x] "=r"(x), [carry] "=r"(*carry) |
337 | 0 | : "0"(x), [y] "rm"(y), "1"(*carry) |
338 | 0 | : "cc"); |
339 | 0 | return x; |
340 | |
|
341 | | #else |
342 | | word t0 = x - y; |
343 | | word c1 = (t0 > x); |
344 | | word z = t0 - *carry; |
345 | | *carry = c1 | (z > t0); |
346 | | return z; |
347 | | #endif |
348 | 0 | } |
349 | | |
350 | | /* |
351 | | * Eight Word Block Subtraction, Two Argument |
352 | | */ |
353 | 0 | inline word word8_sub2(word x[8], const word y[8], word carry) { |
354 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
355 | | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) |
356 | | : [carry] "=r"(carry) |
357 | | : [x] "r"(x), [y] "r"(y), "0"(carry) |
358 | | : "cc", "memory"); |
359 | | |
360 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
361 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) |
362 | 0 | : [carry] "=r"(carry) |
363 | 0 | : [x] "r"(x), [y] "r"(y), "0"(carry) |
364 | 0 | : "cc", "memory"); |
365 | |
|
366 | | #else |
367 | | x[0] = word_sub(x[0], y[0], &carry); |
368 | | x[1] = word_sub(x[1], y[1], &carry); |
369 | | x[2] = word_sub(x[2], y[2], &carry); |
370 | | x[3] = word_sub(x[3], y[3], &carry); |
371 | | x[4] = word_sub(x[4], y[4], &carry); |
372 | | x[5] = word_sub(x[5], y[5], &carry); |
373 | | x[6] = word_sub(x[6], y[6], &carry); |
374 | | x[7] = word_sub(x[7], y[7], &carry); |
375 | | #endif |
376 | |
|
377 | 0 | return carry; |
378 | 0 | } |
379 | | |
380 | | /* |
381 | | * Eight Word Block Subtraction, Two Argument |
382 | | */ |
383 | 0 | inline word word8_sub2_rev(word x[8], const word y[8], word carry) { |
384 | 0 | #if defined(BOTAN_MP_USE_X86_32_ASM) |
385 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
386 | 0 | : [carry] "=r"(carry) |
387 | 0 | : [x] "r"(y), [y] "r"(x), [z] "r"(x), "0"(carry) |
388 | 0 | : "cc", "memory"); |
389 | 0 |
|
390 | 0 | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
391 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
392 | 0 | : [carry] "=r"(carry) |
393 | 0 | : [x] "r"(y), [y] "r"(x), [z] "r"(x), "0"(carry) |
394 | 0 | : "cc", "memory"); |
395 | 0 |
|
396 | 0 | #else |
397 | 0 | x[0] = word_sub(y[0], x[0], &carry); |
398 | 0 | x[1] = word_sub(y[1], x[1], &carry); |
399 | 0 | x[2] = word_sub(y[2], x[2], &carry); |
400 | 0 | x[3] = word_sub(y[3], x[3], &carry); |
401 | 0 | x[4] = word_sub(y[4], x[4], &carry); |
402 | 0 | x[5] = word_sub(y[5], x[5], &carry); |
403 | 0 | x[6] = word_sub(y[6], x[6], &carry); |
404 | 0 | x[7] = word_sub(y[7], x[7], &carry); |
405 | 0 | #endif |
406 | 0 |
|
407 | 0 | return carry; |
408 | 0 | } |
409 | | |
410 | | /* |
411 | | * Eight Word Block Subtraction, Three Argument |
412 | | */ |
413 | 0 | inline word word8_sub3(word z[8], const word x[8], const word y[8], word carry) { |
414 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
415 | | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
416 | | : [carry] "=r"(carry) |
417 | | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
418 | | : "cc", "memory"); |
419 | | |
420 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
421 | 0 | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
422 | 0 | : [carry] "=r"(carry) |
423 | 0 | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
424 | 0 | : "cc", "memory"); |
425 | |
|
426 | | #else |
427 | | z[0] = word_sub(x[0], y[0], &carry); |
428 | | z[1] = word_sub(x[1], y[1], &carry); |
429 | | z[2] = word_sub(x[2], y[2], &carry); |
430 | | z[3] = word_sub(x[3], y[3], &carry); |
431 | | z[4] = word_sub(x[4], y[4], &carry); |
432 | | z[5] = word_sub(x[5], y[5], &carry); |
433 | | z[6] = word_sub(x[6], y[6], &carry); |
434 | | z[7] = word_sub(x[7], y[7], &carry); |
435 | | #endif |
436 | |
|
437 | 0 | return carry; |
438 | 0 | } |
439 | | |
440 | | /* |
441 | | * Eight Word Block Linear Multiplication |
442 | | */ |
443 | 0 | inline word word8_linmul2(word x[8], word y, word carry) { |
444 | 0 | #if defined(BOTAN_MP_USE_X86_32_ASM) |
445 | 0 | asm(DO_8_TIMES(LINMUL_OP, "x") : [carry] "=r"(carry) : [x] "r"(x), [y] "rm"(y), "0"(carry) : "cc", "%eax", "%edx"); |
446 | 0 |
|
447 | 0 | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
448 | 0 | asm(DO_8_TIMES(LINMUL_OP, "x") : [carry] "=r"(carry) : [x] "r"(x), [y] "rm"(y), "0"(carry) : "cc", "%rax", "%rdx"); |
449 | 0 |
|
450 | 0 | #else |
451 | 0 | x[0] = word_madd2(x[0], y, &carry); |
452 | 0 | x[1] = word_madd2(x[1], y, &carry); |
453 | 0 | x[2] = word_madd2(x[2], y, &carry); |
454 | 0 | x[3] = word_madd2(x[3], y, &carry); |
455 | 0 | x[4] = word_madd2(x[4], y, &carry); |
456 | 0 | x[5] = word_madd2(x[5], y, &carry); |
457 | 0 | x[6] = word_madd2(x[6], y, &carry); |
458 | 0 | x[7] = word_madd2(x[7], y, &carry); |
459 | 0 | #endif |
460 | 0 |
|
461 | 0 | return carry; |
462 | 0 | } |
463 | | |
464 | | /* |
465 | | * Eight Word Block Linear Multiplication |
466 | | */ |
467 | 0 | inline word word8_linmul3(word z[8], const word x[8], word y, word carry) { |
468 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
469 | | asm(DO_8_TIMES(LINMUL_OP, "z") |
470 | | : [carry] "=r"(carry) |
471 | | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
472 | | : "cc", "%eax", "%edx"); |
473 | | |
474 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
475 | 0 | asm(DO_8_TIMES(LINMUL_OP, "z") |
476 | 0 | : [carry] "=r"(carry) |
477 | 0 | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
478 | 0 | : "cc", "%rax", "%rdx"); |
479 | |
|
480 | | #else |
481 | | z[0] = word_madd2(x[0], y, &carry); |
482 | | z[1] = word_madd2(x[1], y, &carry); |
483 | | z[2] = word_madd2(x[2], y, &carry); |
484 | | z[3] = word_madd2(x[3], y, &carry); |
485 | | z[4] = word_madd2(x[4], y, &carry); |
486 | | z[5] = word_madd2(x[5], y, &carry); |
487 | | z[6] = word_madd2(x[6], y, &carry); |
488 | | z[7] = word_madd2(x[7], y, &carry); |
489 | | #endif |
490 | |
|
491 | 0 | return carry; |
492 | 0 | } |
493 | | |
494 | | /* |
495 | | * Eight Word Block Multiply/Add |
496 | | */ |
497 | 392 | inline word word8_madd3(word z[8], const word x[8], word y, word carry) { |
498 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
499 | | asm(DO_8_TIMES(MULADD_OP, "") |
500 | | : [carry] "=r"(carry) |
501 | | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
502 | | : "cc", "%eax", "%edx"); |
503 | | |
504 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
505 | 392 | asm(DO_8_TIMES(MULADD_OP, "") |
506 | 392 | : [carry] "=r"(carry) |
507 | 392 | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
508 | 392 | : "cc", "%rax", "%rdx"); |
509 | | |
510 | | #else |
511 | | z[0] = word_madd3(x[0], y, z[0], &carry); |
512 | | z[1] = word_madd3(x[1], y, z[1], &carry); |
513 | | z[2] = word_madd3(x[2], y, z[2], &carry); |
514 | | z[3] = word_madd3(x[3], y, z[3], &carry); |
515 | | z[4] = word_madd3(x[4], y, z[4], &carry); |
516 | | z[5] = word_madd3(x[5], y, z[5], &carry); |
517 | | z[6] = word_madd3(x[6], y, z[6], &carry); |
518 | | z[7] = word_madd3(x[7], y, z[7], &carry); |
519 | | #endif |
520 | | |
521 | 392 | return carry; |
522 | 392 | } |
523 | | |
524 | | /* |
525 | | * Multiply-Add Accumulator |
526 | | * (w2,w1,w0) += x * y |
527 | | */ |
528 | 62.0k | inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) { |
529 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
530 | | word z0 = 0, z1 = 0; |
531 | | |
532 | | asm("mull %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
533 | | |
534 | | asm(R"( |
535 | | addl %[z0],%[w0] |
536 | | adcl %[z1],%[w1] |
537 | | adcl $0,%[w2] |
538 | | )" |
539 | | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
540 | | : [z0] "r"(z0), [z1] "r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
541 | | : "cc"); |
542 | | |
543 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
544 | 62.0k | word z0 = 0, z1 = 0; |
545 | | |
546 | 62.0k | asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
547 | | |
548 | 62.0k | asm(R"( |
549 | 62.0k | addq %[z0],%[w0] |
550 | 62.0k | adcq %[z1],%[w1] |
551 | 62.0k | adcq $0,%[w2] |
552 | 62.0k | )" |
553 | 62.0k | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
554 | 62.0k | : [z0] "r"(z0), [z1] "r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
555 | 62.0k | : "cc"); |
556 | | |
557 | | #else |
558 | | word carry = *w0; |
559 | | *w0 = word_madd2(x, y, &carry); |
560 | | *w1 += carry; |
561 | | *w2 += (*w1 < carry); |
562 | | #endif |
563 | 62.0k | } |
564 | | |
565 | | /* |
566 | | * 3-word addition |
567 | | * (w2,w1,w0) += x |
568 | | */ |
569 | 0 | inline void word3_add(word* w2, word* w1, word* w0, word x) { |
570 | 0 | #if defined(BOTAN_MP_USE_X86_32_ASM) |
571 | 0 | asm(R"( |
572 | 0 | addl %[x],%[w0] |
573 | 0 | adcl $0,%[w1] |
574 | 0 | adcl $0,%[w2] |
575 | 0 | )" |
576 | 0 | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
577 | 0 | : [x] "r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
578 | 0 | : "cc"); |
579 | 0 |
|
580 | 0 | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
581 | 0 | asm(R"( |
582 | 0 | addq %[x],%[w0] |
583 | 0 | adcq $0,%[w1] |
584 | 0 | adcq $0,%[w2] |
585 | 0 | )" |
586 | 0 | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
587 | 0 | : [x] "r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
588 | 0 | : "cc"); |
589 | 0 |
|
590 | 0 | #else |
591 | 0 | *w0 += x; |
592 | 0 | word c1 = (*w0 < x); |
593 | 0 | *w1 += c1; |
594 | 0 | word c2 = (*w1 < c1); |
595 | 0 | *w2 += c2; |
596 | 0 | #endif |
597 | 0 | } |
598 | | |
599 | | /* |
600 | | * Multiply-Add Accumulator |
601 | | * (w2,w1,w0) += 2 * x * y |
602 | | */ |
603 | 0 | inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) { |
604 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
605 | | word z0 = 0, z1 = 0; |
606 | | |
607 | | asm("mull %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
608 | | |
609 | | asm(R"( |
610 | | addl %[z0],%[w0] |
611 | | adcl %[z1],%[w1] |
612 | | adcl $0,%[w2] |
613 | | |
614 | | addl %[z0],%[w0] |
615 | | adcl %[z1],%[w1] |
616 | | adcl $0,%[w2] |
617 | | )" |
618 | | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
619 | | : [z0] "r"(z0), [z1] "r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
620 | | : "cc"); |
621 | | |
622 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
623 | 0 | word z0 = 0, z1 = 0; |
624 | |
|
625 | 0 | asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
626 | |
|
627 | 0 | asm(R"( |
628 | 0 | addq %[z0],%[w0] |
629 | 0 | adcq %[z1],%[w1] |
630 | 0 | adcq $0,%[w2] |
631 | 0 |
|
632 | 0 | addq %[z0],%[w0] |
633 | 0 | adcq %[z1],%[w1] |
634 | 0 | adcq $0,%[w2] |
635 | 0 | )" |
636 | 0 | : [w0] "=r"(*w0), [w1] "=r"(*w1), [w2] "=r"(*w2) |
637 | 0 | : [z0] "r"(z0), [z1] "r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
638 | 0 | : "cc"); |
639 | |
|
640 | | #else |
641 | | word carry = 0; |
642 | | x = word_madd2(x, y, &carry); |
643 | | y = carry; |
644 | | |
645 | | word top = (y >> (BOTAN_MP_WORD_BITS - 1)); |
646 | | y <<= 1; |
647 | | y |= (x >> (BOTAN_MP_WORD_BITS - 1)); |
648 | | x <<= 1; |
649 | | |
650 | | carry = 0; |
651 | | *w0 = word_add(*w0, x, &carry); |
652 | | *w1 = word_add(*w1, y, &carry); |
653 | | *w2 = word_add(*w2, top, &carry); |
654 | | #endif |
655 | 0 | } |
656 | | |
657 | | #if defined(ASM) |
658 | | #undef ASM |
659 | | #undef DO_8_TIMES |
660 | | #undef ADD_OR_SUBTRACT |
661 | | #undef ADDSUB2_OP |
662 | | #undef ADDSUB3_OP |
663 | | #undef LINMUL_OP |
664 | | #undef MULADD_OP |
665 | | #endif |
666 | | |
667 | | } // namespace Botan |
668 | | |
669 | | #endif |