/src/botan/build/include/botan/internal/mp_asmi.h
Line | Count | Source |
1 | | /* |
2 | | * Lowest Level MPI Algorithms |
3 | | * (C) 1999-2010 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #ifndef BOTAN_MP_ASM_INTERNAL_H_ |
10 | | #define BOTAN_MP_ASM_INTERNAL_H_ |
11 | | |
12 | | #include <botan/internal/mp_madd.h> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
17 | | |
18 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
19 | | ASM("movl 4*" #INDEX "(%[y]), %[carry]") \ |
20 | | ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \ |
21 | | |
22 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
23 | | ASM("movl 4*" #INDEX "(%[x]), %[carry]") \ |
24 | | ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \ |
25 | | ASM("movl %[carry], 4*" #INDEX "(%[z])") \ |
26 | | |
27 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
28 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
29 | | ASM("mull %[y]") \ |
30 | | ASM("addl %[carry],%%eax") \ |
31 | | ASM("adcl $0,%%edx") \ |
32 | | ASM("movl %%edx,%[carry]") \ |
33 | | ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])") |
34 | | |
35 | | #define MULADD_OP(IGNORED, INDEX) \ |
36 | | ASM("movl 4*" #INDEX "(%[x]),%%eax") \ |
37 | | ASM("mull %[y]") \ |
38 | | ASM("addl %[carry],%%eax") \ |
39 | | ASM("adcl $0,%%edx") \ |
40 | | ASM("addl 4*" #INDEX "(%[z]),%%eax") \ |
41 | | ASM("adcl $0,%%edx") \ |
42 | | ASM("movl %%edx,%[carry]") \ |
43 | | ASM("movl %%eax, 4*" #INDEX " (%[z])") |
44 | | |
45 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
46 | | ASM("rorl %[carry]") \ |
47 | | CORE_CODE \ |
48 | | ASM("sbbl %[carry],%[carry]") \ |
49 | | ASM("negl %[carry]") |
50 | | |
51 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
52 | | |
53 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
54 | | ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ |
55 | | ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \ |
56 | | |
57 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
58 | | ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ |
59 | | ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ |
60 | | ASM("movq %[carry], 8*" #INDEX "(%[z])") \ |
61 | | |
62 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
63 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
64 | | ASM("mulq %[y]") \ |
65 | | ASM("addq %[carry],%%rax") \ |
66 | | ASM("adcq $0,%%rdx") \ |
67 | | ASM("movq %%rdx,%[carry]") \ |
68 | | ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") |
69 | | |
70 | | #define MULADD_OP(IGNORED, INDEX) \ |
71 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
72 | | ASM("mulq %[y]") \ |
73 | | ASM("addq %[carry],%%rax") \ |
74 | | ASM("adcq $0,%%rdx") \ |
75 | | ASM("addq 8*" #INDEX "(%[z]),%%rax") \ |
76 | | ASM("adcq $0,%%rdx") \ |
77 | | ASM("movq %%rdx,%[carry]") \ |
78 | | ASM("movq %%rax, 8*" #INDEX " (%[z])") |
79 | | |
80 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
81 | | ASM("rorq %[carry]") \ |
82 | | CORE_CODE \ |
83 | | ASM("sbbq %[carry],%[carry]") \ |
84 | | ASM("negq %[carry]") |
85 | | |
86 | | #endif |
87 | | |
88 | | #if defined(ADD_OR_SUBTRACT) |
89 | | |
90 | | #define ASM(x) x "\n\t" |
91 | | |
92 | | #define DO_8_TIMES(MACRO, ARG) \ |
93 | | MACRO(ARG, 0) \ |
94 | | MACRO(ARG, 1) \ |
95 | | MACRO(ARG, 2) \ |
96 | | MACRO(ARG, 3) \ |
97 | | MACRO(ARG, 4) \ |
98 | | MACRO(ARG, 5) \ |
99 | | MACRO(ARG, 6) \ |
100 | | MACRO(ARG, 7) |
101 | | |
102 | | #endif |
103 | | |
104 | | /* |
105 | | * Word Addition |
106 | | */ |
107 | | inline word word_add(word x, word y, word* carry) |
108 | 2.71G | { |
109 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
110 | | asm( |
111 | | ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]")) |
112 | | : [x]"=r"(x), [carry]"=r"(*carry) |
113 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
114 | | : "cc"); |
115 | | return x; |
116 | | |
117 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
118 | | |
119 | 2.71G | asm( |
120 | 2.71G | ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]")) |
121 | 2.71G | : [x]"=r"(x), [carry]"=r"(*carry) |
122 | 2.71G | : "0"(x), [y]"rm"(y), "1"(*carry) |
123 | 2.71G | : "cc"); |
124 | 2.71G | return x; |
125 | 2.71G | |
126 | | #else |
127 | | word z = x + y; |
128 | | word c1 = (z < x); |
129 | | z += *carry; |
130 | | *carry = c1 | (z < *carry); |
131 | | return z; |
132 | | #endif |
133 | | } |
134 | | |
135 | | /* |
136 | | * Eight Word Block Addition, Two Argument |
137 | | */ |
138 | | inline word word8_add2(word x[8], const word y[8], word carry) |
139 | 46.0M | { |
140 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
141 | | asm( |
142 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl")) |
143 | | : [carry]"=r"(carry) |
144 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
145 | | : "cc", "memory"); |
146 | | return carry; |
147 | | |
148 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
149 | | |
150 | 46.0M | asm( |
151 | 46.0M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) |
152 | 46.0M | : [carry]"=r"(carry) |
153 | 46.0M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
154 | 46.0M | : "cc", "memory"); |
155 | 46.0M | return carry; |
156 | 46.0M | |
157 | | #else |
158 | | x[0] = word_add(x[0], y[0], &carry); |
159 | | x[1] = word_add(x[1], y[1], &carry); |
160 | | x[2] = word_add(x[2], y[2], &carry); |
161 | | x[3] = word_add(x[3], y[3], &carry); |
162 | | x[4] = word_add(x[4], y[4], &carry); |
163 | | x[5] = word_add(x[5], y[5], &carry); |
164 | | x[6] = word_add(x[6], y[6], &carry); |
165 | | x[7] = word_add(x[7], y[7], &carry); |
166 | | return carry; |
167 | | #endif |
168 | | } |
169 | | |
170 | | /* |
171 | | * Eight Word Block Addition, Three Argument |
172 | | */ |
173 | | inline word word8_add3(word z[8], const word x[8], |
174 | | const word y[8], word carry) |
175 | 396M | { |
176 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
177 | | asm( |
178 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl")) |
179 | | : [carry]"=r"(carry) |
180 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
181 | | : "cc", "memory"); |
182 | | return carry; |
183 | | |
184 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
185 | | |
186 | 396M | asm( |
187 | 396M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) |
188 | 396M | : [carry]"=r"(carry) |
189 | 396M | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
190 | 396M | : "cc", "memory"); |
191 | 396M | return carry; |
192 | 396M | |
193 | | #else |
194 | | z[0] = word_add(x[0], y[0], &carry); |
195 | | z[1] = word_add(x[1], y[1], &carry); |
196 | | z[2] = word_add(x[2], y[2], &carry); |
197 | | z[3] = word_add(x[3], y[3], &carry); |
198 | | z[4] = word_add(x[4], y[4], &carry); |
199 | | z[5] = word_add(x[5], y[5], &carry); |
200 | | z[6] = word_add(x[6], y[6], &carry); |
201 | | z[7] = word_add(x[7], y[7], &carry); |
202 | | return carry; |
203 | | #endif |
204 | | } |
205 | | |
206 | | /* |
207 | | * Word Subtraction |
208 | | */ |
209 | | inline word word_sub(word x, word y, word* carry) |
210 | 4.07G | { |
211 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
212 | | asm( |
213 | | ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]")) |
214 | | : [x]"=r"(x), [carry]"=r"(*carry) |
215 | | : "0"(x), [y]"rm"(y), "1"(*carry) |
216 | | : "cc"); |
217 | | return x; |
218 | | |
219 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
220 | | |
221 | 4.07G | asm( |
222 | 4.07G | ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]")) |
223 | 4.07G | : [x]"=r"(x), [carry]"=r"(*carry) |
224 | 4.07G | : "0"(x), [y]"rm"(y), "1"(*carry) |
225 | 4.07G | : "cc"); |
226 | 4.07G | return x; |
227 | 4.07G | |
228 | | #else |
229 | | word t0 = x - y; |
230 | | word c1 = (t0 > x); |
231 | | word z = t0 - *carry; |
232 | | *carry = c1 | (z > t0); |
233 | | return z; |
234 | | #endif |
235 | | } |
236 | | |
237 | | /* |
238 | | * Eight Word Block Subtraction, Two Argument |
239 | | */ |
240 | | inline word word8_sub2(word x[8], const word y[8], word carry) |
241 | 57.7M | { |
242 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
243 | | asm( |
244 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl")) |
245 | | : [carry]"=r"(carry) |
246 | | : [x]"r"(x), [y]"r"(y), "0"(carry) |
247 | | : "cc", "memory"); |
248 | | return carry; |
249 | | |
250 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
251 | | |
252 | 57.7M | asm( |
253 | 57.7M | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) |
254 | 57.7M | : [carry]"=r"(carry) |
255 | 57.7M | : [x]"r"(x), [y]"r"(y), "0"(carry) |
256 | 57.7M | : "cc", "memory"); |
257 | 57.7M | return carry; |
258 | 57.7M | |
259 | | #else |
260 | | x[0] = word_sub(x[0], y[0], &carry); |
261 | | x[1] = word_sub(x[1], y[1], &carry); |
262 | | x[2] = word_sub(x[2], y[2], &carry); |
263 | | x[3] = word_sub(x[3], y[3], &carry); |
264 | | x[4] = word_sub(x[4], y[4], &carry); |
265 | | x[5] = word_sub(x[5], y[5], &carry); |
266 | | x[6] = word_sub(x[6], y[6], &carry); |
267 | | x[7] = word_sub(x[7], y[7], &carry); |
268 | | return carry; |
269 | | #endif |
270 | | } |
271 | | |
272 | | /* |
273 | | * Eight Word Block Subtraction, Two Argument |
274 | | */ |
275 | | inline word word8_sub2_rev(word x[8], const word y[8], word carry) |
276 | 164k | { |
277 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
278 | | asm( |
279 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
280 | | : [carry]"=r"(carry) |
281 | | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
282 | | : "cc", "memory"); |
283 | | return carry; |
284 | | |
285 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
286 | | |
287 | 164k | asm( |
288 | 164k | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
289 | 164k | : [carry]"=r"(carry) |
290 | 164k | : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry) |
291 | 164k | : "cc", "memory"); |
292 | 164k | return carry; |
293 | 164k | |
294 | | #else |
295 | | x[0] = word_sub(y[0], x[0], &carry); |
296 | | x[1] = word_sub(y[1], x[1], &carry); |
297 | | x[2] = word_sub(y[2], x[2], &carry); |
298 | | x[3] = word_sub(y[3], x[3], &carry); |
299 | | x[4] = word_sub(y[4], x[4], &carry); |
300 | | x[5] = word_sub(y[5], x[5], &carry); |
301 | | x[6] = word_sub(y[6], x[6], &carry); |
302 | | x[7] = word_sub(y[7], x[7], &carry); |
303 | | return carry; |
304 | | #endif |
305 | | } |
306 | | |
307 | | /* |
308 | | * Eight Word Block Subtraction, Three Argument |
309 | | */ |
310 | | inline word word8_sub3(word z[8], const word x[8], |
311 | | const word y[8], word carry) |
312 | 1.13G | { |
313 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
314 | | asm( |
315 | | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl")) |
316 | | : [carry]"=r"(carry) |
317 | | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
318 | | : "cc", "memory"); |
319 | | return carry; |
320 | | |
321 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
322 | | |
323 | 1.13G | asm( |
324 | 1.13G | ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
325 | 1.13G | : [carry]"=r"(carry) |
326 | 1.13G | : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry) |
327 | 1.13G | : "cc", "memory"); |
328 | 1.13G | return carry; |
329 | 1.13G | |
330 | | #else |
331 | | z[0] = word_sub(x[0], y[0], &carry); |
332 | | z[1] = word_sub(x[1], y[1], &carry); |
333 | | z[2] = word_sub(x[2], y[2], &carry); |
334 | | z[3] = word_sub(x[3], y[3], &carry); |
335 | | z[4] = word_sub(x[4], y[4], &carry); |
336 | | z[5] = word_sub(x[5], y[5], &carry); |
337 | | z[6] = word_sub(x[6], y[6], &carry); |
338 | | z[7] = word_sub(x[7], y[7], &carry); |
339 | | return carry; |
340 | | #endif |
341 | | } |
342 | | |
343 | | /* |
344 | | * Eight Word Block Linear Multiplication |
345 | | */ |
346 | | inline word word8_linmul2(word x[8], word y, word carry) |
347 | 859M | { |
348 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
349 | | asm( |
350 | | DO_8_TIMES(LINMUL_OP, "x") |
351 | | : [carry]"=r"(carry) |
352 | | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
353 | | : "cc", "%eax", "%edx"); |
354 | | return carry; |
355 | | |
356 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
357 | | |
358 | 859M | asm( |
359 | 859M | DO_8_TIMES(LINMUL_OP, "x") |
360 | 859M | : [carry]"=r"(carry) |
361 | 859M | : [x]"r"(x), [y]"rm"(y), "0"(carry) |
362 | 859M | : "cc", "%rax", "%rdx"); |
363 | 859M | return carry; |
364 | 859M | |
365 | | #else |
366 | | x[0] = word_madd2(x[0], y, &carry); |
367 | | x[1] = word_madd2(x[1], y, &carry); |
368 | | x[2] = word_madd2(x[2], y, &carry); |
369 | | x[3] = word_madd2(x[3], y, &carry); |
370 | | x[4] = word_madd2(x[4], y, &carry); |
371 | | x[5] = word_madd2(x[5], y, &carry); |
372 | | x[6] = word_madd2(x[6], y, &carry); |
373 | | x[7] = word_madd2(x[7], y, &carry); |
374 | | return carry; |
375 | | #endif |
376 | | } |
377 | | |
378 | | /* |
379 | | * Eight Word Block Linear Multiplication |
380 | | */ |
381 | | inline word word8_linmul3(word z[8], const word x[8], word y, word carry) |
382 | 35.0M | { |
383 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
384 | | asm( |
385 | | DO_8_TIMES(LINMUL_OP, "z") |
386 | | : [carry]"=r"(carry) |
387 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
388 | | : "cc", "%eax", "%edx"); |
389 | | return carry; |
390 | | |
391 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
392 | | asm( |
393 | 35.0M | DO_8_TIMES(LINMUL_OP, "z") |
394 | 35.0M | : [carry]"=r"(carry) |
395 | 35.0M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
396 | 35.0M | : "cc", "%rax", "%rdx"); |
397 | 35.0M | return carry; |
398 | 35.0M | |
399 | | #else |
400 | | z[0] = word_madd2(x[0], y, &carry); |
401 | | z[1] = word_madd2(x[1], y, &carry); |
402 | | z[2] = word_madd2(x[2], y, &carry); |
403 | | z[3] = word_madd2(x[3], y, &carry); |
404 | | z[4] = word_madd2(x[4], y, &carry); |
405 | | z[5] = word_madd2(x[5], y, &carry); |
406 | | z[6] = word_madd2(x[6], y, &carry); |
407 | | z[7] = word_madd2(x[7], y, &carry); |
408 | | return carry; |
409 | | #endif |
410 | | } |
411 | | |
412 | | /* |
413 | | * Eight Word Block Multiply/Add |
414 | | */ |
415 | | inline word word8_madd3(word z[8], const word x[8], word y, word carry) |
416 | 258M | { |
417 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
418 | | asm( |
419 | | DO_8_TIMES(MULADD_OP, "") |
420 | | : [carry]"=r"(carry) |
421 | | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
422 | | : "cc", "%eax", "%edx"); |
423 | | return carry; |
424 | | |
425 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
426 | | |
427 | 258M | asm( |
428 | 258M | DO_8_TIMES(MULADD_OP, "") |
429 | 258M | : [carry]"=r"(carry) |
430 | 258M | : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry) |
431 | 258M | : "cc", "%rax", "%rdx"); |
432 | 258M | return carry; |
433 | 258M | |
434 | | #else |
435 | | z[0] = word_madd3(x[0], y, z[0], &carry); |
436 | | z[1] = word_madd3(x[1], y, z[1], &carry); |
437 | | z[2] = word_madd3(x[2], y, z[2], &carry); |
438 | | z[3] = word_madd3(x[3], y, z[3], &carry); |
439 | | z[4] = word_madd3(x[4], y, z[4], &carry); |
440 | | z[5] = word_madd3(x[5], y, z[5], &carry); |
441 | | z[6] = word_madd3(x[6], y, z[6], &carry); |
442 | | z[7] = word_madd3(x[7], y, z[7], &carry); |
443 | | return carry; |
444 | | #endif |
445 | | } |
446 | | |
447 | | /* |
448 | | * Multiply-Add Accumulator |
449 | | * (w2,w1,w0) += x * y |
450 | | */ |
451 | | inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y) |
452 | 20.5G | { |
453 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
454 | | word z0 = 0, z1 = 0; |
455 | | |
456 | | asm("mull %[y]" |
457 | | : "=a"(z0),"=d"(z1) |
458 | | : "a"(x), [y]"rm"(y) |
459 | | : "cc"); |
460 | | |
461 | | asm(R"( |
462 | | addl %[z0],%[w0] |
463 | | adcl %[z1],%[w1] |
464 | | adcl $0,%[w2] |
465 | | )" |
466 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
467 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
468 | | : "cc"); |
469 | | |
470 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
471 | | |
472 | 20.5G | word z0 = 0, z1 = 0; |
473 | 20.5G | |
474 | 20.5G | asm("mulq %[y]" |
475 | 20.5G | : "=a"(z0),"=d"(z1) |
476 | 20.5G | : "a"(x), [y]"rm"(y) |
477 | 20.5G | : "cc"); |
478 | 20.5G | |
479 | 20.5G | asm(R"( |
480 | 20.5G | addq %[z0],%[w0] |
481 | 20.5G | adcq %[z1],%[w1] |
482 | 20.5G | adcq $0,%[w2] |
483 | 20.5G | )" |
484 | 20.5G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
485 | 20.5G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
486 | 20.5G | : "cc"); |
487 | 20.5G | |
488 | | #else |
489 | | word carry = *w0; |
490 | | *w0 = word_madd2(x, y, &carry); |
491 | | *w1 += carry; |
492 | | *w2 += (*w1 < carry); |
493 | | #endif |
494 | | } |
495 | | |
496 | | /* |
497 | | * 3-word addition |
498 | | * (w2,w1,w0) += x |
499 | | */ |
500 | | inline void word3_add(word* w2, word* w1, word* w0, word x) |
501 | 1.00G | { |
502 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
503 | | asm(R"( |
504 | | addl %[x],%[w0] |
505 | | adcl $0,%[w1] |
506 | | adcl $0,%[w2] |
507 | | )" |
508 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
509 | | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
510 | | : "cc"); |
511 | | |
512 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
513 | | |
514 | 1.00G | asm(R"( |
515 | 1.00G | addq %[x],%[w0] |
516 | 1.00G | adcq $0,%[w1] |
517 | 1.00G | adcq $0,%[w2] |
518 | 1.00G | )" |
519 | 1.00G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
520 | 1.00G | : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2) |
521 | 1.00G | : "cc"); |
522 | 1.00G | |
523 | | #else |
524 | | *w0 += x; |
525 | | word c1 = (*w0 < x); |
526 | | *w1 += c1; |
527 | | word c2 = (*w1 < c1); |
528 | | *w2 += c2; |
529 | | #endif |
530 | | } |
531 | | |
532 | | /* |
533 | | * Multiply-Add Accumulator |
534 | | * (w2,w1,w0) += 2 * x * y |
535 | | */ |
536 | | inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y) |
537 | 5.12G | { |
538 | | #if defined(BOTAN_MP_USE_X86_32_ASM) |
539 | | |
540 | | word z0 = 0, z1 = 0; |
541 | | |
542 | | asm("mull %[y]" |
543 | | : "=a"(z0),"=d"(z1) |
544 | | : "a"(x), [y]"rm"(y) |
545 | | : "cc"); |
546 | | |
547 | | asm(R"( |
548 | | addl %[z0],%[w0] |
549 | | adcl %[z1],%[w1] |
550 | | adcl $0,%[w2] |
551 | | |
552 | | addl %[z0],%[w0] |
553 | | adcl %[z1],%[w1] |
554 | | adcl $0,%[w2] |
555 | | )" |
556 | | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
557 | | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
558 | | : "cc"); |
559 | | |
560 | | #elif defined(BOTAN_MP_USE_X86_64_ASM) |
561 | | |
562 | 5.12G | word z0 = 0, z1 = 0; |
563 | 5.12G | |
564 | 5.12G | asm("mulq %[y]" |
565 | 5.12G | : "=a"(z0),"=d"(z1) |
566 | 5.12G | : "a"(x), [y]"rm"(y) |
567 | 5.12G | : "cc"); |
568 | 5.12G | |
569 | 5.12G | asm(R"( |
570 | 5.12G | addq %[z0],%[w0] |
571 | 5.12G | adcq %[z1],%[w1] |
572 | 5.12G | adcq $0,%[w2] |
573 | 5.12G | |
574 | 5.12G | addq %[z0],%[w0] |
575 | 5.12G | adcq %[z1],%[w1] |
576 | 5.12G | adcq $0,%[w2] |
577 | 5.12G | )" |
578 | 5.12G | : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2) |
579 | 5.12G | : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2) |
580 | 5.12G | : "cc"); |
581 | 5.12G | |
582 | | #else |
583 | | word carry = 0; |
584 | | x = word_madd2(x, y, &carry); |
585 | | y = carry; |
586 | | |
587 | | word top = (y >> (BOTAN_MP_WORD_BITS-1)); |
588 | | y <<= 1; |
589 | | y |= (x >> (BOTAN_MP_WORD_BITS-1)); |
590 | | x <<= 1; |
591 | | |
592 | | carry = 0; |
593 | | *w0 = word_add(*w0, x, &carry); |
594 | | *w1 = word_add(*w1, y, &carry); |
595 | | *w2 = word_add(*w2, top, &carry); |
596 | | #endif |
597 | | } |
598 | | |
599 | | #if defined(ASM) |
600 | | #undef ASM |
601 | | #undef DO_8_TIMES |
602 | | #undef ADD_OR_SUBTRACT |
603 | | #undef ADDSUB2_OP |
604 | | #undef ADDSUB3_OP |
605 | | #undef LINMUL_OP |
606 | | #undef MULADD_OP |
607 | | #endif |
608 | | |
609 | | } |
610 | | |
611 | | #endif |