/src/botan/build/include/internal/botan/internal/mp_asmi.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Lowest Level MPI Algorithms |
3 | | * (C) 1999-2010,2025 Jack Lloyd |
4 | | * 2006 Luca Piccarreta |
5 | | * |
6 | | * Botan is released under the Simplified BSD License (see license.txt) |
7 | | */ |
8 | | |
9 | | #ifndef BOTAN_MP_ASM_INTERNAL_H_ |
10 | | #define BOTAN_MP_ASM_INTERNAL_H_ |
11 | | |
12 | | #include <botan/compiler.h> |
13 | | #include <botan/types.h> |
14 | | #include <botan/internal/target_info.h> |
15 | | #include <concepts> |
16 | | |
17 | | #if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
18 | | #include <botan/internal/donna128.h> |
19 | | #endif |
20 | | |
21 | | namespace Botan { |
22 | | |
23 | | #if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64) |
24 | | #define BOTAN_MP_USE_X86_64_ASM |
25 | | #endif |
26 | | |
27 | | /* |
28 | | * Expressing an add with carry is sadly quite difficult in standard C/C++. |
29 | | * |
30 | | * Compilers will recognize various idioms and generate a reasonable carry |
31 | | * chain. Unfortunately which idioms the compiler will understand vary, so we |
32 | | * have to decide what to do based on the compiler. This is fragile; what will |
33 | | * work varies not just based on compiler but also version, target architecture, |
34 | | * and optimization flags. |
35 | | */ |
36 | | #if defined(__clang__) |
37 | | static constexpr bool use_dword_for_word_add = false; |
38 | | #else |
39 | | static constexpr bool use_dword_for_word_add = true; |
40 | | #endif |
41 | | |
42 | | /* |
43 | | * Concept for allowed multiprecision word types |
44 | | */ |
45 | | template <typename T> |
46 | | concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>); |
47 | | |
48 | | template <WordType W> |
49 | | struct WordInfo {}; |
50 | | |
51 | | template <> |
52 | | struct WordInfo<uint32_t> { |
53 | | public: |
54 | | static const constexpr size_t bytes = 4; |
55 | | static const constexpr size_t bits = 32; |
56 | | static const constexpr uint32_t max = 0xFFFFFFFF; |
57 | | static const constexpr uint32_t top_bit = 0x80000000; |
58 | | |
59 | | typedef uint64_t dword; |
60 | | static const constexpr bool dword_is_native = true; |
61 | | }; |
62 | | |
63 | | template <> |
64 | | struct WordInfo<uint64_t> { |
65 | | public: |
66 | | static const constexpr size_t bytes = 8; |
67 | | static const constexpr size_t bits = 64; |
68 | | static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF; |
69 | | static const constexpr uint64_t top_bit = 0x8000000000000000; |
70 | | |
71 | | #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128) |
72 | | typedef uint128_t dword; |
73 | | static const constexpr bool dword_is_native = true; |
74 | | #else |
75 | | typedef donna128 dword; |
76 | | static const constexpr bool dword_is_native = false; |
77 | | #endif |
78 | | }; |
79 | | |
80 | | /* |
81 | | * Word Multiply/Add |
82 | | */ |
83 | | template <WordType W> |
84 | 30.9M | inline constexpr auto word_madd2(W a, W b, W* c) -> W { |
85 | 30.9M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
86 | 30.9M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
87 | 30.9M | asm(R"( |
88 | 30.9M | mulq %[b] |
89 | 30.9M | addq %[c],%[a] |
90 | 30.9M | adcq $0,%[carry] |
91 | 30.9M | )" |
92 | 30.9M | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*c) |
93 | 30.9M | : "0"(a), "1"(b), [c] "g"(*c) |
94 | 30.9M | : "cc"); |
95 | | |
96 | 30.9M | return a; |
97 | 30.9M | } |
98 | 0 | #endif |
99 | | |
100 | 0 | typedef typename WordInfo<W>::dword dword; |
101 | 0 | const dword s = dword(a) * b + *c; |
102 | 0 | *c = static_cast<W>(s >> WordInfo<W>::bits); |
103 | 0 | return static_cast<W>(s); |
104 | 30.9M | } |
105 | | |
106 | | /* |
107 | | * Word Multiply/Add |
108 | | */ |
109 | | template <WordType W> |
110 | 400M | inline constexpr auto word_madd3(W a, W b, W c, W* d) -> W { |
111 | 400M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
112 | 400M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
113 | 400M | asm(R"( |
114 | 400M | mulq %[b] |
115 | 400M | |
116 | 400M | addq %[c],%[a] |
117 | 400M | adcq $0,%[carry] |
118 | 400M | |
119 | 400M | addq %[d],%[a] |
120 | 400M | adcq $0,%[carry] |
121 | 400M | )" |
122 | 400M | : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*d) |
123 | 400M | : "0"(a), "1"(b), [c] "g"(c), [d] "g"(*d) |
124 | 400M | : "cc"); |
125 | | |
126 | 400M | return a; |
127 | 400M | } |
128 | 0 | #endif |
129 | | |
130 | 0 | typedef typename WordInfo<W>::dword dword; |
131 | 0 | const dword s = dword(a) * b + c + *d; |
132 | 0 | *d = static_cast<W>(s >> WordInfo<W>::bits); |
133 | 0 | return static_cast<W>(s); |
134 | 400M | } |
135 | | |
136 | | #if defined(BOTAN_MP_USE_X86_64_ASM) |
137 | | |
138 | | #define ASM(x) x "\n\t" |
139 | | |
140 | | #define DO_8_TIMES(MACRO, ARG) \ |
141 | | MACRO(ARG, 0) \ |
142 | | MACRO(ARG, 1) \ |
143 | | MACRO(ARG, 2) \ |
144 | | MACRO(ARG, 3) \ |
145 | | MACRO(ARG, 4) \ |
146 | | MACRO(ARG, 5) \ |
147 | | MACRO(ARG, 6) \ |
148 | | MACRO(ARG, 7) |
149 | | |
150 | | #define ADDSUB2_OP(OPERATION, INDEX) \ |
151 | | ASM("movq 8*" #INDEX "(%[y]), %[carry]") \ |
152 | | ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") |
153 | | |
154 | | #define ADDSUB3_OP(OPERATION, INDEX) \ |
155 | | ASM("movq 8*" #INDEX "(%[x]), %[carry]") \ |
156 | | ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \ |
157 | | ASM("movq %[carry], 8*" #INDEX "(%[z])") |
158 | | |
159 | | #define LINMUL_OP(WRITE_TO, INDEX) \ |
160 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
161 | | ASM("mulq %[y]") \ |
162 | | ASM("addq %[carry],%%rax") \ |
163 | | ASM("adcq $0,%%rdx") \ |
164 | | ASM("movq %%rdx,%[carry]") \ |
165 | | ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])") |
166 | | |
167 | | #define MULADD_OP(IGNORED, INDEX) \ |
168 | | ASM("movq 8*" #INDEX "(%[x]),%%rax") \ |
169 | | ASM("mulq %[y]") \ |
170 | | ASM("addq %[carry],%%rax") \ |
171 | | ASM("adcq $0,%%rdx") \ |
172 | | ASM("addq 8*" #INDEX "(%[z]),%%rax") \ |
173 | | ASM("adcq $0,%%rdx") \ |
174 | | ASM("movq %%rdx,%[carry]") \ |
175 | | ASM("movq %%rax, 8*" #INDEX " (%[z])") |
176 | | |
177 | | #define ADD_OR_SUBTRACT(CORE_CODE) \ |
178 | | ASM("rorq %[carry]") \ |
179 | | CORE_CODE \ |
180 | | ASM("sbbq %[carry],%[carry]") \ |
181 | | ASM("negq %[carry]") |
182 | | |
183 | | #endif |
184 | | |
185 | | /* |
186 | | * Word Addition |
187 | | */ |
188 | | template <WordType W> |
189 | 456M | inline constexpr auto word_add(W x, W y, W* carry) -> W { |
190 | 456M | #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc) |
191 | 456M | if(!std::is_constant_evaluated()) { |
192 | | if constexpr(std::same_as<W, unsigned int>) { |
193 | | return __builtin_addc(x, y, *carry & 1, carry); |
194 | 456M | } else if constexpr(std::same_as<W, unsigned long>) { |
195 | 456M | return __builtin_addcl(x, y, *carry & 1, carry); |
196 | | } else if constexpr(std::same_as<W, unsigned long long>) { |
197 | | return __builtin_addcll(x, y, *carry & 1, carry); |
198 | | } |
199 | 456M | } |
200 | 0 | #endif |
201 | | |
202 | | if constexpr(WordInfo<W>::dword_is_native && use_dword_for_word_add) { |
203 | | /* |
204 | | TODO(Botan4) this is largely a performance hack for GCCs that don't |
205 | | support __builtin_addc, if we increase the minimum supported version of |
206 | | GCC to GCC 14 then we can remove this and not worry about it |
207 | | */ |
208 | | const W cb = *carry & 1; |
209 | | const auto s = typename WordInfo<W>::dword(x) + y + cb; |
210 | | *carry = static_cast<W>(s >> WordInfo<W>::bits); |
211 | | return static_cast<W>(s); |
212 | 456M | } else { |
213 | 456M | const W cb = *carry & 1; |
214 | 456M | W z = x + y; |
215 | 456M | W c1 = (z < x); |
216 | 456M | z += cb; |
217 | 456M | *carry = c1 | (z < cb); |
218 | 456M | return z; |
219 | 456M | } |
220 | 456M | } |
221 | | |
222 | | /* |
223 | | * Eight Word Block Addition, Two Argument |
224 | | */ |
225 | | template <WordType W> |
226 | 13.6M | inline constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W { |
227 | 13.6M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
228 | 13.6M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
229 | 13.6M | asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq")) |
230 | 13.6M | : [carry] "=r"(carry) |
231 | 13.6M | : [x] "r"(x), [y] "r"(y), "0"(carry) |
232 | 13.6M | : "cc", "memory"); |
233 | 13.6M | return carry; |
234 | 13.6M | } |
235 | 0 | #endif |
236 | | |
237 | 0 | x[0] = word_add(x[0], y[0], &carry); |
238 | 0 | x[1] = word_add(x[1], y[1], &carry); |
239 | 0 | x[2] = word_add(x[2], y[2], &carry); |
240 | 0 | x[3] = word_add(x[3], y[3], &carry); |
241 | 0 | x[4] = word_add(x[4], y[4], &carry); |
242 | 0 | x[5] = word_add(x[5], y[5], &carry); |
243 | 0 | x[6] = word_add(x[6], y[6], &carry); |
244 | 0 | x[7] = word_add(x[7], y[7], &carry); |
245 | 0 | return carry; |
246 | 13.6M | } |
247 | | |
248 | | /* |
249 | | * Eight Word Block Addition, Three Argument |
250 | | */ |
251 | | template <WordType W> |
252 | 5.41M | inline constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W { |
253 | 5.41M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
254 | 5.41M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
255 | 5.41M | asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq")) |
256 | 5.41M | : [carry] "=r"(carry) |
257 | 5.41M | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
258 | 5.41M | : "cc", "memory"); |
259 | 5.41M | return carry; |
260 | 5.41M | } |
261 | 0 | #endif |
262 | | |
263 | 0 | z[0] = word_add(x[0], y[0], &carry); |
264 | 0 | z[1] = word_add(x[1], y[1], &carry); |
265 | 0 | z[2] = word_add(x[2], y[2], &carry); |
266 | 0 | z[3] = word_add(x[3], y[3], &carry); |
267 | 0 | z[4] = word_add(x[4], y[4], &carry); |
268 | 0 | z[5] = word_add(x[5], y[5], &carry); |
269 | 0 | z[6] = word_add(x[6], y[6], &carry); |
270 | 0 | z[7] = word_add(x[7], y[7], &carry); |
271 | 0 | return carry; |
272 | 5.41M | } |
273 | | |
274 | | /* |
275 | | * Word Subtraction |
276 | | */ |
277 | | template <WordType W> |
278 | 990M | inline constexpr auto word_sub(W x, W y, W* carry) -> W { |
279 | 990M | #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc) |
280 | 990M | if(!std::is_constant_evaluated()) { |
281 | | if constexpr(std::same_as<W, unsigned int>) { |
282 | | return __builtin_subc(x, y, *carry & 1, carry); |
283 | 990M | } else if constexpr(std::same_as<W, unsigned long>) { |
284 | 990M | return __builtin_subcl(x, y, *carry & 1, carry); |
285 | | } else if constexpr(std::same_as<W, unsigned long long>) { |
286 | | return __builtin_subcll(x, y, *carry & 1, carry); |
287 | | } |
288 | 990M | } |
289 | 0 | #endif |
290 | | |
291 | 0 | const W cb = *carry & 1; |
292 | 990M | W t0 = x - y; |
293 | 990M | W c1 = (t0 > x); |
294 | 990M | W z = t0 - cb; |
295 | 990M | *carry = c1 | (z > t0); |
296 | 990M | return z; |
297 | 990M | } |
298 | | |
299 | | /* |
300 | | * Eight Word Block Subtraction, Two Argument |
301 | | */ |
302 | | template <WordType W> |
303 | 5.78M | inline constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W { |
304 | 5.78M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
305 | 5.78M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
306 | 5.78M | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq")) |
307 | 5.78M | : [carry] "=r"(carry) |
308 | 5.78M | : [x] "r"(x), [y] "r"(y), "0"(carry) |
309 | 5.78M | : "cc", "memory"); |
310 | 5.78M | return carry; |
311 | 5.78M | } |
312 | 0 | #endif |
313 | | |
314 | 0 | x[0] = word_sub(x[0], y[0], &carry); |
315 | 0 | x[1] = word_sub(x[1], y[1], &carry); |
316 | 0 | x[2] = word_sub(x[2], y[2], &carry); |
317 | 0 | x[3] = word_sub(x[3], y[3], &carry); |
318 | 0 | x[4] = word_sub(x[4], y[4], &carry); |
319 | 0 | x[5] = word_sub(x[5], y[5], &carry); |
320 | 0 | x[6] = word_sub(x[6], y[6], &carry); |
321 | 0 | x[7] = word_sub(x[7], y[7], &carry); |
322 | 0 | return carry; |
323 | 5.78M | } |
324 | | |
325 | | /* |
326 | | * Eight Word Block Subtraction, Two Argument |
327 | | */ |
328 | | template <WordType W> |
329 | 2.46M | inline constexpr auto word8_sub2_rev(W x[8], const W y[8], W carry) -> W { |
330 | 2.46M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
331 | 2.46M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
332 | 2.46M | asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
333 | 2.46M | : [carry] "=r"(carry) |
334 | 2.46M | : [x] "r"(y), [y] "r"(x), [z] "r"(x), "0"(carry) |
335 | 2.46M | : "cc", "memory"); |
336 | 2.46M | return carry; |
337 | 2.46M | } |
338 | 0 | #endif |
339 | | |
340 | 0 | x[0] = word_sub(y[0], x[0], &carry); |
341 | 0 | x[1] = word_sub(y[1], x[1], &carry); |
342 | 0 | x[2] = word_sub(y[2], x[2], &carry); |
343 | 0 | x[3] = word_sub(y[3], x[3], &carry); |
344 | 0 | x[4] = word_sub(y[4], x[4], &carry); |
345 | 0 | x[5] = word_sub(y[5], x[5], &carry); |
346 | 0 | x[6] = word_sub(y[6], x[6], &carry); |
347 | 0 | x[7] = word_sub(y[7], x[7], &carry); |
348 | 0 | return carry; |
349 | 2.46M | } |
350 | | |
351 | | /* |
352 | | * Eight Word Block Subtraction, Three Argument |
353 | | */ |
354 | | template <WordType W> |
355 | 335M | inline constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W { |
356 | 335M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
357 | 335M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
358 | 335M | asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq")) |
359 | 335M | : [carry] "=r"(carry) |
360 | 335M | : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry) |
361 | 335M | : "cc", "memory"); |
362 | 335M | return carry; |
363 | 335M | } |
364 | 0 | #endif |
365 | | |
366 | 0 | z[0] = word_sub(x[0], y[0], &carry); |
367 | 0 | z[1] = word_sub(x[1], y[1], &carry); |
368 | 0 | z[2] = word_sub(x[2], y[2], &carry); |
369 | 0 | z[3] = word_sub(x[3], y[3], &carry); |
370 | 0 | z[4] = word_sub(x[4], y[4], &carry); |
371 | 0 | z[5] = word_sub(x[5], y[5], &carry); |
372 | 0 | z[6] = word_sub(x[6], y[6], &carry); |
373 | 0 | z[7] = word_sub(x[7], y[7], &carry); |
374 | 0 | return carry; |
375 | 335M | } |
376 | | |
377 | | /* |
378 | | * Eight Word Block Linear Multiplication |
379 | | */ |
380 | | template <WordType W> |
381 | 1.25M | inline constexpr auto word8_linmul2(W x[8], W y, W carry) -> W { |
382 | 1.25M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
383 | 1.25M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
384 | 1.25M | asm(DO_8_TIMES(LINMUL_OP, "x") |
385 | 1.25M | : [carry] "=r"(carry) |
386 | 1.25M | : [x] "r"(x), [y] "rm"(y), "0"(carry) |
387 | 1.25M | : "cc", "%rax", "%rdx"); |
388 | 1.25M | return carry; |
389 | 1.25M | } |
390 | 0 | #endif |
391 | | |
392 | 0 | x[0] = word_madd2(x[0], y, &carry); |
393 | 0 | x[1] = word_madd2(x[1], y, &carry); |
394 | 0 | x[2] = word_madd2(x[2], y, &carry); |
395 | 0 | x[3] = word_madd2(x[3], y, &carry); |
396 | 0 | x[4] = word_madd2(x[4], y, &carry); |
397 | 0 | x[5] = word_madd2(x[5], y, &carry); |
398 | 0 | x[6] = word_madd2(x[6], y, &carry); |
399 | 0 | x[7] = word_madd2(x[7], y, &carry); |
400 | 0 | return carry; |
401 | 1.25M | } |
402 | | |
403 | | /* |
404 | | * Eight Word Block Linear Multiplication |
405 | | */ |
406 | | template <WordType W> |
407 | 3.68M | inline constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W { |
408 | 3.68M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
409 | 3.68M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
410 | 3.68M | asm(DO_8_TIMES(LINMUL_OP, "z") |
411 | 3.68M | : [carry] "=r"(carry) |
412 | 3.68M | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
413 | 3.68M | : "cc", "%rax", "%rdx"); |
414 | 3.68M | return carry; |
415 | 3.68M | } |
416 | 0 | #endif |
417 | | |
418 | 0 | z[0] = word_madd2(x[0], y, &carry); |
419 | 0 | z[1] = word_madd2(x[1], y, &carry); |
420 | 0 | z[2] = word_madd2(x[2], y, &carry); |
421 | 0 | z[3] = word_madd2(x[3], y, &carry); |
422 | 0 | z[4] = word_madd2(x[4], y, &carry); |
423 | 0 | z[5] = word_madd2(x[5], y, &carry); |
424 | 0 | z[6] = word_madd2(x[6], y, &carry); |
425 | 0 | z[7] = word_madd2(x[7], y, &carry); |
426 | 0 | return carry; |
427 | 3.68M | } |
428 | | |
429 | | /* |
430 | | * Eight Word Block Multiply/Add |
431 | | */ |
432 | | template <WordType W> |
433 | 145M | inline constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W { |
434 | 145M | #if defined(BOTAN_MP_USE_X86_64_ASM) |
435 | 145M | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
436 | 145M | asm(DO_8_TIMES(MULADD_OP, "") |
437 | 145M | : [carry] "=r"(carry) |
438 | 145M | : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry) |
439 | 145M | : "cc", "%rax", "%rdx"); |
440 | 145M | return carry; |
441 | 145M | } |
442 | 0 | #endif |
443 | | |
444 | 0 | z[0] = word_madd3(x[0], y, z[0], &carry); |
445 | 0 | z[1] = word_madd3(x[1], y, z[1], &carry); |
446 | 0 | z[2] = word_madd3(x[2], y, z[2], &carry); |
447 | 0 | z[3] = word_madd3(x[3], y, z[3], &carry); |
448 | 0 | z[4] = word_madd3(x[4], y, z[4], &carry); |
449 | 0 | z[5] = word_madd3(x[5], y, z[5], &carry); |
450 | 0 | z[6] = word_madd3(x[6], y, z[6], &carry); |
451 | 0 | z[7] = word_madd3(x[7], y, z[7], &carry); |
452 | 0 | return carry; |
453 | 145M | } |
454 | | |
455 | | /** |
456 | | * Helper for 3-word accumulators |
457 | | * |
458 | | * A number of algorithms especially Comba multiplication and |
459 | | * Montgomery reduction can take advantage of wide accumulators, which |
460 | | * consume inputs via addition with outputs extracted from the low |
461 | | * bits. |
462 | | */ |
463 | | template <WordType W> |
464 | | class word3 final { |
465 | | #if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64) |
466 | | |
467 | | public: |
468 | 140M | constexpr word3() { m_w = 0; } |
469 | | |
470 | 4.02G | inline constexpr void mul(W x, W y) { m_w += static_cast<W3>(x) * y; } |
471 | | |
472 | 630M | inline constexpr void mul_x2(W x, W y) { m_w += static_cast<W3>(x) * y * 2; } |
473 | | |
474 | 531M | inline constexpr void add(W x) { m_w += x; } |
475 | | |
476 | 1.36G | inline constexpr W extract() { |
477 | 1.36G | W r = static_cast<W>(m_w); |
478 | 1.36G | m_w >>= WordInfo<W>::bits; |
479 | 1.36G | return r; |
480 | 1.36G | } |
481 | | |
482 | 265M | inline constexpr W monty_step(W p0, W p_dash) { |
483 | 265M | const W w0 = static_cast<W>(m_w); |
484 | 265M | const W r = w0 * p_dash; |
485 | 265M | mul(r, p0); |
486 | 265M | m_w >>= WordInfo<W>::bits; |
487 | 265M | return r; |
488 | 265M | } |
489 | | |
490 | | inline constexpr W monty_step_pdash1() { |
491 | | const W r = static_cast<W>(m_w); |
492 | | m_w >>= WordInfo<W>::bits; |
493 | | m_w += static_cast<W3>(r); |
494 | | return r; |
495 | | } |
496 | | |
497 | | private: |
498 | | __extension__ typedef unsigned _BitInt(WordInfo<W>::bits * 3) W3; |
499 | | W3 m_w; |
500 | | #else |
501 | | |
502 | | public: |
503 | | constexpr word3() { |
504 | | m_w2 = 0; |
505 | | m_w1 = 0; |
506 | | m_w0 = 0; |
507 | | } |
508 | | |
509 | | inline constexpr void mul(W x, W y) { |
510 | | #if defined(BOTAN_MP_USE_X86_64_ASM) |
511 | | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
512 | | W z0 = 0, z1 = 0; |
513 | | |
514 | | asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
515 | | |
516 | | asm(R"( |
517 | | addq %[z0],%[w0] |
518 | | adcq %[z1],%[w1] |
519 | | adcq $0,%[w2] |
520 | | )" |
521 | | : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2) |
522 | | : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2) |
523 | | : "cc"); |
524 | | return; |
525 | | } |
526 | | #endif |
527 | | |
528 | | typedef typename WordInfo<W>::dword dword; |
529 | | const dword s = dword(x) * y + m_w0; |
530 | | W carry = static_cast<W>(s >> WordInfo<W>::bits); |
531 | | m_w0 = static_cast<W>(s); |
532 | | m_w1 += carry; |
533 | | m_w2 += (m_w1 < carry); |
534 | | } |
535 | | |
536 | | inline constexpr void mul_x2(W x, W y) { |
537 | | #if defined(BOTAN_MP_USE_X86_64_ASM) |
538 | | if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) { |
539 | | W z0 = 0, z1 = 0; |
540 | | |
541 | | asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc"); |
542 | | |
543 | | asm(R"( |
544 | | addq %[z0],%[w0] |
545 | | adcq %[z1],%[w1] |
546 | | adcq $0,%[w2] |
547 | | |
548 | | addq %[z0],%[w0] |
549 | | adcq %[z1],%[w1] |
550 | | adcq $0,%[w2] |
551 | | )" |
552 | | : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2) |
553 | | : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2) |
554 | | : "cc"); |
555 | | return; |
556 | | } |
557 | | #endif |
558 | | |
559 | | W carry = 0; |
560 | | x = word_madd2(x, y, &carry); |
561 | | y = carry; |
562 | | |
563 | | carry = 0; |
564 | | m_w0 = word_add(m_w0, x, &carry); |
565 | | m_w1 = word_add(m_w1, y, &carry); |
566 | | m_w2 += carry; |
567 | | |
568 | | carry = 0; |
569 | | m_w0 = word_add(m_w0, x, &carry); |
570 | | m_w1 = word_add(m_w1, y, &carry); |
571 | | m_w2 += carry; |
572 | | } |
573 | | |
574 | | inline constexpr void add(W x) { |
575 | | constexpr W z = 0; |
576 | | |
577 | | W carry = 0; |
578 | | m_w0 = word_add(m_w0, x, &carry); |
579 | | m_w1 = word_add(m_w1, z, &carry); |
580 | | m_w2 += carry; |
581 | | } |
582 | | |
583 | | inline constexpr W extract() { |
584 | | W r = m_w0; |
585 | | m_w0 = m_w1; |
586 | | m_w1 = m_w2; |
587 | | m_w2 = 0; |
588 | | return r; |
589 | | } |
590 | | |
591 | | inline constexpr W monty_step(W p0, W p_dash) { |
592 | | W r = m_w0 * p_dash; |
593 | | mul(r, p0); |
594 | | m_w0 = m_w1; |
595 | | m_w1 = m_w2; |
596 | | m_w2 = 0; |
597 | | return r; |
598 | | } |
599 | | |
600 | | inline constexpr W monty_step_pdash1() { |
601 | | // If p_dash == 1 then p[0] = -1 and everything simplifies |
602 | | const W r = m_w0; |
603 | | m_w0 += m_w1; |
604 | | m_w1 = m_w2 + (m_w0 < m_w1); |
605 | | m_w2 = 0; |
606 | | return r; |
607 | | } |
608 | | |
609 | | private: |
610 | | W m_w0, m_w1, m_w2; |
611 | | #endif |
612 | | }; |
613 | | |
614 | | #if defined(ASM) |
615 | | #undef ASM |
616 | | #undef DO_8_TIMES |
617 | | #undef ADD_OR_SUBTRACT |
618 | | #undef ADDSUB2_OP |
619 | | #undef ADDSUB3_OP |
620 | | #undef LINMUL_OP |
621 | | #undef MULADD_OP |
622 | | #endif |
623 | | |
624 | | } // namespace Botan |
625 | | |
626 | | #endif |