/src/libgcrypt/mpi/ec-inline.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* ec-inline.h - EC inline addition/substraction helpers |
2 | | * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | | */ |
19 | | |
20 | | #ifndef GCRY_EC_INLINE_H |
21 | | #define GCRY_EC_INLINE_H |
22 | | |
23 | | #include "mpi-internal.h" |
24 | | #include "longlong.h" |
25 | | #include "ec-context.h" |
26 | | #include "../cipher/bithelp.h" |
27 | | #include "../cipher/bufhelp.h" |
28 | | |
29 | | |
30 | | #if BYTES_PER_MPI_LIMB == 8 |
31 | | |
32 | | /* 64-bit limb definitions for 64-bit architectures. */ |
33 | | |
34 | 9.23M | #define LIMBS_PER_LIMB64 1 |
35 | 448k | #define LOAD64(x, pos) ((x)[pos]) |
36 | 14.7M | #define STORE64(x, pos, v) ((x)[pos] = (mpi_limb_t)(v)) |
37 | 9.68M | #define LIMB_TO64(v) ((mpi_limb_t)(v)) |
38 | | #define LIMB_FROM64(v) ((mpi_limb_t)(v)) |
39 | | #define HIBIT_LIMB64(v) ((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 1)) |
40 | 2.11M | #define HI32_LIMB64(v) (u32)((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 32)) |
41 | 20.9M | #define LO32_LIMB64(v) ((u32)(v)) |
42 | 575M | #define LIMB64_C(hi, lo) (((mpi_limb_t)(u32)(hi) << 32) | (u32)(lo)) |
43 | | #define MASK_AND64(mask, val) ((mask) & (val)) |
44 | 29.5M | #define LIMB_OR64(val1, val2) ((val1) | (val2)) |
45 | | #define STORE64_COND(x, pos, mask1, val1, mask2, val2) \ |
46 | 32.4M | ((x)[(pos)] = ((mask1) & (val1)) | ((mask2) & (val2))) |
47 | | |
48 | | typedef mpi_limb_t mpi_limb64_t; |
49 | | |
50 | | static inline u32 |
51 | | LOAD32(mpi_ptr_t x, unsigned int pos) |
52 | 135M | { |
53 | 135M | unsigned int shr = (pos % 2) * 32; |
54 | 135M | return (x[pos / 2] >> shr); |
55 | 135M | } |
56 | | |
57 | | static inline mpi_limb64_t |
58 | | LIMB64_HILO(u32 hi, u32 lo) |
59 | 135M | { |
60 | 135M | mpi_limb64_t v = hi; |
61 | 135M | return (v << 32) | lo; |
62 | 135M | } |
63 | | |
64 | | |
65 | | /* x86-64 addition/subtraction helpers. */ |
66 | | #if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 |
67 | | |
68 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
69 | 14.1M | __asm__ ("addq %8, %2\n" \ |
70 | 14.1M | "adcq %7, %1\n" \ |
71 | 14.1M | "adcq %6, %0\n" \ |
72 | 14.1M | : "=r" (A2), \ |
73 | 14.1M | "=&r" (A1), \ |
74 | 14.1M | "=&r" (A0) \ |
75 | 14.1M | : "0" ((mpi_limb_t)(B2)), \ |
76 | 14.1M | "1" ((mpi_limb_t)(B1)), \ |
77 | 14.1M | "2" ((mpi_limb_t)(B0)), \ |
78 | 14.1M | "rme" ((mpi_limb_t)(C2)), \ |
79 | 14.1M | "rme" ((mpi_limb_t)(C1)), \ |
80 | 14.1M | "rme" ((mpi_limb_t)(C0)) \ |
81 | 14.1M | : "cc") |
82 | | |
83 | | #define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
84 | | __asm__ ("subq %8, %2\n" \ |
85 | | "sbbq %7, %1\n" \ |
86 | | "sbbq %6, %0\n" \ |
87 | | : "=r" (A2), \ |
88 | | "=&r" (A1), \ |
89 | | "=&r" (A0) \ |
90 | | : "0" ((mpi_limb_t)(B2)), \ |
91 | | "1" ((mpi_limb_t)(B1)), \ |
92 | | "2" ((mpi_limb_t)(B0)), \ |
93 | | "rme" ((mpi_limb_t)(C2)), \ |
94 | | "rme" ((mpi_limb_t)(C1)), \ |
95 | | "rme" ((mpi_limb_t)(C0)) \ |
96 | | : "cc") |
97 | | |
98 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
99 | 35.1M | __asm__ ("addq %11, %3\n" \ |
100 | 35.1M | "adcq %10, %2\n" \ |
101 | 35.1M | "adcq %9, %1\n" \ |
102 | 35.1M | "adcq %8, %0\n" \ |
103 | 35.1M | : "=r" (A3), \ |
104 | 35.1M | "=&r" (A2), \ |
105 | 35.1M | "=&r" (A1), \ |
106 | 35.1M | "=&r" (A0) \ |
107 | 35.1M | : "0" ((mpi_limb_t)(B3)), \ |
108 | 35.1M | "1" ((mpi_limb_t)(B2)), \ |
109 | 35.1M | "2" ((mpi_limb_t)(B1)), \ |
110 | 35.1M | "3" ((mpi_limb_t)(B0)), \ |
111 | 35.1M | "rme" ((mpi_limb_t)(C3)), \ |
112 | 35.1M | "rme" ((mpi_limb_t)(C2)), \ |
113 | 35.1M | "rme" ((mpi_limb_t)(C1)), \ |
114 | 35.1M | "rme" ((mpi_limb_t)(C0)) \ |
115 | 35.1M | : "cc") |
116 | | |
117 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
118 | 11.5M | __asm__ ("subq %11, %3\n" \ |
119 | 11.5M | "sbbq %10, %2\n" \ |
120 | 11.5M | "sbbq %9, %1\n" \ |
121 | 11.5M | "sbbq %8, %0\n" \ |
122 | 11.5M | : "=r" (A3), \ |
123 | 11.5M | "=&r" (A2), \ |
124 | 11.5M | "=&r" (A1), \ |
125 | 11.5M | "=&r" (A0) \ |
126 | 11.5M | : "0" ((mpi_limb_t)(B3)), \ |
127 | 11.5M | "1" ((mpi_limb_t)(B2)), \ |
128 | 11.5M | "2" ((mpi_limb_t)(B1)), \ |
129 | 11.5M | "3" ((mpi_limb_t)(B0)), \ |
130 | 11.5M | "rme" ((mpi_limb_t)(C3)), \ |
131 | 11.5M | "rme" ((mpi_limb_t)(C2)), \ |
132 | 11.5M | "rme" ((mpi_limb_t)(C1)), \ |
133 | 11.5M | "rme" ((mpi_limb_t)(C0)) \ |
134 | 11.5M | : "cc") |
135 | | |
136 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
137 | | C4, C3, C2, C1, C0) \ |
138 | 52.7M | __asm__ ("addq %14, %4\n" \ |
139 | 52.7M | "adcq %13, %3\n" \ |
140 | 52.7M | "adcq %12, %2\n" \ |
141 | 52.7M | "adcq %11, %1\n" \ |
142 | 52.7M | "adcq %10, %0\n" \ |
143 | 52.7M | : "=r" (A4), \ |
144 | 52.7M | "=&r" (A3), \ |
145 | 52.7M | "=&r" (A2), \ |
146 | 52.7M | "=&r" (A1), \ |
147 | 52.7M | "=&r" (A0) \ |
148 | 52.7M | : "0" ((mpi_limb_t)(B4)), \ |
149 | 52.7M | "1" ((mpi_limb_t)(B3)), \ |
150 | 52.7M | "2" ((mpi_limb_t)(B2)), \ |
151 | 52.7M | "3" ((mpi_limb_t)(B1)), \ |
152 | 52.7M | "4" ((mpi_limb_t)(B0)), \ |
153 | 52.7M | "rme" ((mpi_limb_t)(C4)), \ |
154 | 52.7M | "rme" ((mpi_limb_t)(C3)), \ |
155 | 52.7M | "rme" ((mpi_limb_t)(C2)), \ |
156 | 52.7M | "rme" ((mpi_limb_t)(C1)), \ |
157 | 52.7M | "rme" ((mpi_limb_t)(C0)) \ |
158 | 52.7M | : "cc") |
159 | | |
160 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
161 | | C4, C3, C2, C1, C0) \ |
162 | 20.0M | __asm__ ("subq %14, %4\n" \ |
163 | 20.0M | "sbbq %13, %3\n" \ |
164 | 20.0M | "sbbq %12, %2\n" \ |
165 | 20.0M | "sbbq %11, %1\n" \ |
166 | 20.0M | "sbbq %10, %0\n" \ |
167 | 20.0M | : "=r" (A4), \ |
168 | 20.0M | "=&r" (A3), \ |
169 | 20.0M | "=&r" (A2), \ |
170 | 20.0M | "=&r" (A1), \ |
171 | 20.0M | "=&r" (A0) \ |
172 | 20.0M | : "0" ((mpi_limb_t)(B4)), \ |
173 | 20.0M | "1" ((mpi_limb_t)(B3)), \ |
174 | 20.0M | "2" ((mpi_limb_t)(B2)), \ |
175 | 20.0M | "3" ((mpi_limb_t)(B1)), \ |
176 | 20.0M | "4" ((mpi_limb_t)(B0)), \ |
177 | 20.0M | "rme" ((mpi_limb_t)(C4)), \ |
178 | 20.0M | "rme" ((mpi_limb_t)(C3)), \ |
179 | 20.0M | "rme" ((mpi_limb_t)(C2)), \ |
180 | 20.0M | "rme" ((mpi_limb_t)(C1)), \ |
181 | 20.0M | "rme" ((mpi_limb_t)(C0)) \ |
182 | 20.0M | : "cc") |
183 | | |
184 | | #endif /* __x86_64__ */ |
185 | | |
186 | | |
187 | | /* ARM AArch64 addition/subtraction helpers. */ |
188 | | #if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4 |
189 | | |
190 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
191 | | __asm__ ("adds %2, %5, %8\n" \ |
192 | | "adcs %1, %4, %7\n" \ |
193 | | "adc %0, %3, %6\n" \ |
194 | | : "=r" (A2), \ |
195 | | "=&r" (A1), \ |
196 | | "=&r" (A0) \ |
197 | | : "r" ((mpi_limb_t)(B2)), \ |
198 | | "r" ((mpi_limb_t)(B1)), \ |
199 | | "r" ((mpi_limb_t)(B0)), \ |
200 | | "r" ((mpi_limb_t)(C2)), \ |
201 | | "r" ((mpi_limb_t)(C1)), \ |
202 | | "r" ((mpi_limb_t)(C0)) \ |
203 | | : "cc") |
204 | | |
205 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
206 | | __asm__ ("subs %2, %5, %8\n" \ |
207 | | "sbcs %1, %4, %7\n" \ |
208 | | "sbc %0, %3, %6\n" \ |
209 | | : "=r" (A2), \ |
210 | | "=&r" (A1), \ |
211 | | "=&r" (A0) \ |
212 | | : "r" ((mpi_limb_t)(B2)), \ |
213 | | "r" ((mpi_limb_t)(B1)), \ |
214 | | "r" ((mpi_limb_t)(B0)), \ |
215 | | "r" ((mpi_limb_t)(C2)), \ |
216 | | "r" ((mpi_limb_t)(C1)), \ |
217 | | "r" ((mpi_limb_t)(C0)) \ |
218 | | : "cc") |
219 | | |
220 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
221 | | __asm__ ("adds %3, %7, %11\n" \ |
222 | | "adcs %2, %6, %10\n" \ |
223 | | "adcs %1, %5, %9\n" \ |
224 | | "adc %0, %4, %8\n" \ |
225 | | : "=r" (A3), \ |
226 | | "=&r" (A2), \ |
227 | | "=&r" (A1), \ |
228 | | "=&r" (A0) \ |
229 | | : "r" ((mpi_limb_t)(B3)), \ |
230 | | "r" ((mpi_limb_t)(B2)), \ |
231 | | "r" ((mpi_limb_t)(B1)), \ |
232 | | "r" ((mpi_limb_t)(B0)), \ |
233 | | "r" ((mpi_limb_t)(C3)), \ |
234 | | "r" ((mpi_limb_t)(C2)), \ |
235 | | "r" ((mpi_limb_t)(C1)), \ |
236 | | "r" ((mpi_limb_t)(C0)) \ |
237 | | : "cc") |
238 | | |
239 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
240 | | __asm__ ("subs %3, %7, %11\n" \ |
241 | | "sbcs %2, %6, %10\n" \ |
242 | | "sbcs %1, %5, %9\n" \ |
243 | | "sbc %0, %4, %8\n" \ |
244 | | : "=r" (A3), \ |
245 | | "=&r" (A2), \ |
246 | | "=&r" (A1), \ |
247 | | "=&r" (A0) \ |
248 | | : "r" ((mpi_limb_t)(B3)), \ |
249 | | "r" ((mpi_limb_t)(B2)), \ |
250 | | "r" ((mpi_limb_t)(B1)), \ |
251 | | "r" ((mpi_limb_t)(B0)), \ |
252 | | "r" ((mpi_limb_t)(C3)), \ |
253 | | "r" ((mpi_limb_t)(C2)), \ |
254 | | "r" ((mpi_limb_t)(C1)), \ |
255 | | "r" ((mpi_limb_t)(C0)) \ |
256 | | : "cc") |
257 | | |
258 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
259 | | C4, C3, C2, C1, C0) \ |
260 | | __asm__ ("adds %4, %9, %14\n" \ |
261 | | "adcs %3, %8, %13\n" \ |
262 | | "adcs %2, %7, %12\n" \ |
263 | | "adcs %1, %6, %11\n" \ |
264 | | "adc %0, %5, %10\n" \ |
265 | | : "=r" (A4), \ |
266 | | "=&r" (A3), \ |
267 | | "=&r" (A2), \ |
268 | | "=&r" (A1), \ |
269 | | "=&r" (A0) \ |
270 | | : "r" ((mpi_limb_t)(B4)), \ |
271 | | "r" ((mpi_limb_t)(B3)), \ |
272 | | "r" ((mpi_limb_t)(B2)), \ |
273 | | "r" ((mpi_limb_t)(B1)), \ |
274 | | "r" ((mpi_limb_t)(B0)), \ |
275 | | "r" ((mpi_limb_t)(C4)), \ |
276 | | "r" ((mpi_limb_t)(C3)), \ |
277 | | "r" ((mpi_limb_t)(C2)), \ |
278 | | "r" ((mpi_limb_t)(C1)), \ |
279 | | "r" ((mpi_limb_t)(C0)) \ |
280 | | : "cc") |
281 | | |
282 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
283 | | C4, C3, C2, C1, C0) \ |
284 | | __asm__ ("subs %4, %9, %14\n" \ |
285 | | "sbcs %3, %8, %13\n" \ |
286 | | "sbcs %2, %7, %12\n" \ |
287 | | "sbcs %1, %6, %11\n" \ |
288 | | "sbc %0, %5, %10\n" \ |
289 | | : "=r" (A4), \ |
290 | | "=&r" (A3), \ |
291 | | "=&r" (A2), \ |
292 | | "=&r" (A1), \ |
293 | | "=&r" (A0) \ |
294 | | : "r" ((mpi_limb_t)(B4)), \ |
295 | | "r" ((mpi_limb_t)(B3)), \ |
296 | | "r" ((mpi_limb_t)(B2)), \ |
297 | | "r" ((mpi_limb_t)(B1)), \ |
298 | | "r" ((mpi_limb_t)(B0)), \ |
299 | | "r" ((mpi_limb_t)(C4)), \ |
300 | | "r" ((mpi_limb_t)(C3)), \ |
301 | | "r" ((mpi_limb_t)(C2)), \ |
302 | | "r" ((mpi_limb_t)(C1)), \ |
303 | | "r" ((mpi_limb_t)(C0)) \ |
304 | | : "cc") |
305 | | |
306 | | #endif /* __aarch64__ */ |
307 | | |
308 | | |
309 | | /* PowerPC64 addition/subtraction helpers. */ |
310 | | #if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4 |
311 | | |
312 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
313 | | __asm__ ("addc %2, %8, %5\n" \ |
314 | | "adde %1, %7, %4\n" \ |
315 | | "adde %0, %6, %3\n" \ |
316 | | : "=r" (A2), \ |
317 | | "=&r" (A1), \ |
318 | | "=&r" (A0) \ |
319 | | : "r" ((mpi_limb_t)(B2)), \ |
320 | | "r" ((mpi_limb_t)(B1)), \ |
321 | | "r" ((mpi_limb_t)(B0)), \ |
322 | | "r" ((mpi_limb_t)(C2)), \ |
323 | | "r" ((mpi_limb_t)(C1)), \ |
324 | | "r" ((mpi_limb_t)(C0)) \ |
325 | | : "cc", "r0") |
326 | | |
327 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
328 | | __asm__ ("subfc %2, %8, %5\n" \ |
329 | | "subfe %1, %7, %4\n" \ |
330 | | "subfe %0, %6, %3\n" \ |
331 | | : "=r" (A2), \ |
332 | | "=&r" (A1), \ |
333 | | "=&r" (A0) \ |
334 | | : "r" ((mpi_limb_t)(B2)), \ |
335 | | "r" ((mpi_limb_t)(B1)), \ |
336 | | "r" ((mpi_limb_t)(B0)), \ |
337 | | "r" ((mpi_limb_t)(C2)), \ |
338 | | "r" ((mpi_limb_t)(C1)), \ |
339 | | "r" ((mpi_limb_t)(C0)) \ |
340 | | : "cc", "r0") |
341 | | |
342 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
343 | | __asm__ ("addc %3, %11, %7\n" \ |
344 | | "adde %2, %10, %6\n" \ |
345 | | "adde %1, %9, %5\n" \ |
346 | | "adde %0, %8, %4\n" \ |
347 | | : "=r" (A3), \ |
348 | | "=&r" (A2), \ |
349 | | "=&r" (A1), \ |
350 | | "=&r" (A0) \ |
351 | | : "r" ((mpi_limb_t)(B3)), \ |
352 | | "r" ((mpi_limb_t)(B2)), \ |
353 | | "r" ((mpi_limb_t)(B1)), \ |
354 | | "r" ((mpi_limb_t)(B0)), \ |
355 | | "r" ((mpi_limb_t)(C3)), \ |
356 | | "r" ((mpi_limb_t)(C2)), \ |
357 | | "r" ((mpi_limb_t)(C1)), \ |
358 | | "r" ((mpi_limb_t)(C0)) \ |
359 | | : "cc") |
360 | | |
361 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
362 | | __asm__ ("subfc %3, %11, %7\n" \ |
363 | | "subfe %2, %10, %6\n" \ |
364 | | "subfe %1, %9, %5\n" \ |
365 | | "subfe %0, %8, %4\n" \ |
366 | | : "=r" (A3), \ |
367 | | "=&r" (A2), \ |
368 | | "=&r" (A1), \ |
369 | | "=&r" (A0) \ |
370 | | : "r" ((mpi_limb_t)(B3)), \ |
371 | | "r" ((mpi_limb_t)(B2)), \ |
372 | | "r" ((mpi_limb_t)(B1)), \ |
373 | | "r" ((mpi_limb_t)(B0)), \ |
374 | | "r" ((mpi_limb_t)(C3)), \ |
375 | | "r" ((mpi_limb_t)(C2)), \ |
376 | | "r" ((mpi_limb_t)(C1)), \ |
377 | | "r" ((mpi_limb_t)(C0)) \ |
378 | | : "cc") |
379 | | |
380 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
381 | | C4, C3, C2, C1, C0) \ |
382 | | __asm__ ("addc %4, %14, %9\n" \ |
383 | | "adde %3, %13, %8\n" \ |
384 | | "adde %2, %12, %7\n" \ |
385 | | "adde %1, %11, %6\n" \ |
386 | | "adde %0, %10, %5\n" \ |
387 | | : "=r" (A4), \ |
388 | | "=&r" (A3), \ |
389 | | "=&r" (A2), \ |
390 | | "=&r" (A1), \ |
391 | | "=&r" (A0) \ |
392 | | : "r" ((mpi_limb_t)(B4)), \ |
393 | | "r" ((mpi_limb_t)(B3)), \ |
394 | | "r" ((mpi_limb_t)(B2)), \ |
395 | | "r" ((mpi_limb_t)(B1)), \ |
396 | | "r" ((mpi_limb_t)(B0)), \ |
397 | | "r" ((mpi_limb_t)(C4)), \ |
398 | | "r" ((mpi_limb_t)(C3)), \ |
399 | | "r" ((mpi_limb_t)(C2)), \ |
400 | | "r" ((mpi_limb_t)(C1)), \ |
401 | | "r" ((mpi_limb_t)(C0)) \ |
402 | | : "cc") |
403 | | |
404 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
405 | | C4, C3, C2, C1, C0) \ |
406 | | __asm__ ("subfc %4, %14, %9\n" \ |
407 | | "subfe %3, %13, %8\n" \ |
408 | | "subfe %2, %12, %7\n" \ |
409 | | "subfe %1, %11, %6\n" \ |
410 | | "subfe %0, %10, %5\n" \ |
411 | | : "=r" (A4), \ |
412 | | "=&r" (A3), \ |
413 | | "=&r" (A2), \ |
414 | | "=&r" (A1), \ |
415 | | "=&r" (A0) \ |
416 | | : "r" ((mpi_limb_t)(B4)), \ |
417 | | "r" ((mpi_limb_t)(B3)), \ |
418 | | "r" ((mpi_limb_t)(B2)), \ |
419 | | "r" ((mpi_limb_t)(B1)), \ |
420 | | "r" ((mpi_limb_t)(B0)), \ |
421 | | "r" ((mpi_limb_t)(C4)), \ |
422 | | "r" ((mpi_limb_t)(C3)), \ |
423 | | "r" ((mpi_limb_t)(C2)), \ |
424 | | "r" ((mpi_limb_t)(C1)), \ |
425 | | "r" ((mpi_limb_t)(C0)) \ |
426 | | : "cc") |
427 | | |
428 | | #endif /* __powerpc__ */ |
429 | | |
430 | | |
431 | | /* s390x/zSeries addition/subtraction helpers. */ |
432 | | #if defined (__s390x__) && defined(HAVE_CPU_ARCH_S390X) && __GNUC__ >= 4 |
433 | | |
434 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
435 | | __asm__ ("algr %2, %8\n" \ |
436 | | "alcgr %1, %7\n" \ |
437 | | "alcgr %0, %6\n" \ |
438 | | : "=r" (A2), \ |
439 | | "=&r" (A1), \ |
440 | | "=&r" (A0) \ |
441 | | : "0" ((mpi_limb_t)(B2)), \ |
442 | | "1" ((mpi_limb_t)(B1)), \ |
443 | | "2" ((mpi_limb_t)(B0)), \ |
444 | | "r" ((mpi_limb_t)(C2)), \ |
445 | | "r" ((mpi_limb_t)(C1)), \ |
446 | | "r" ((mpi_limb_t)(C0)) \ |
447 | | : "cc") |
448 | | |
449 | | #define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
450 | | __asm__ ("slgr %2, %8\n" \ |
451 | | "slbgr %1, %7\n" \ |
452 | | "slbgr %0, %6\n" \ |
453 | | : "=r" (A2), \ |
454 | | "=&r" (A1), \ |
455 | | "=&r" (A0) \ |
456 | | : "0" ((mpi_limb_t)(B2)), \ |
457 | | "1" ((mpi_limb_t)(B1)), \ |
458 | | "2" ((mpi_limb_t)(B0)), \ |
459 | | "r" ((mpi_limb_t)(C2)), \ |
460 | | "r" ((mpi_limb_t)(C1)), \ |
461 | | "r" ((mpi_limb_t)(C0)) \ |
462 | | : "cc") |
463 | | |
464 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
465 | | __asm__ ("algr %3, %11\n" \ |
466 | | "alcgr %2, %10\n" \ |
467 | | "alcgr %1, %9\n" \ |
468 | | "alcgr %0, %8\n" \ |
469 | | : "=r" (A3), \ |
470 | | "=&r" (A2), \ |
471 | | "=&r" (A1), \ |
472 | | "=&r" (A0) \ |
473 | | : "0" ((mpi_limb_t)(B3)), \ |
474 | | "1" ((mpi_limb_t)(B2)), \ |
475 | | "2" ((mpi_limb_t)(B1)), \ |
476 | | "3" ((mpi_limb_t)(B0)), \ |
477 | | "r" ((mpi_limb_t)(C3)), \ |
478 | | "r" ((mpi_limb_t)(C2)), \ |
479 | | "r" ((mpi_limb_t)(C1)), \ |
480 | | "r" ((mpi_limb_t)(C0)) \ |
481 | | : "cc") |
482 | | |
483 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
484 | | __asm__ ("slgr %3, %11\n" \ |
485 | | "slbgr %2, %10\n" \ |
486 | | "slbgr %1, %9\n" \ |
487 | | "slbgr %0, %8\n" \ |
488 | | : "=r" (A3), \ |
489 | | "=&r" (A2), \ |
490 | | "=&r" (A1), \ |
491 | | "=&r" (A0) \ |
492 | | : "0" ((mpi_limb_t)(B3)), \ |
493 | | "1" ((mpi_limb_t)(B2)), \ |
494 | | "2" ((mpi_limb_t)(B1)), \ |
495 | | "3" ((mpi_limb_t)(B0)), \ |
496 | | "r" ((mpi_limb_t)(C3)), \ |
497 | | "r" ((mpi_limb_t)(C2)), \ |
498 | | "r" ((mpi_limb_t)(C1)), \ |
499 | | "r" ((mpi_limb_t)(C0)) \ |
500 | | : "cc") |
501 | | |
502 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
503 | | C4, C3, C2, C1, C0) \ |
504 | | __asm__ ("algr %4, %14\n" \ |
505 | | "alcgr %3, %13\n" \ |
506 | | "alcgr %2, %12\n" \ |
507 | | "alcgr %1, %11\n" \ |
508 | | "alcgr %0, %10\n" \ |
509 | | : "=r" (A4), \ |
510 | | "=&r" (A3), \ |
511 | | "=&r" (A2), \ |
512 | | "=&r" (A1), \ |
513 | | "=&r" (A0) \ |
514 | | : "0" ((mpi_limb_t)(B4)), \ |
515 | | "1" ((mpi_limb_t)(B3)), \ |
516 | | "2" ((mpi_limb_t)(B2)), \ |
517 | | "3" ((mpi_limb_t)(B1)), \ |
518 | | "4" ((mpi_limb_t)(B0)), \ |
519 | | "r" ((mpi_limb_t)(C4)), \ |
520 | | "r" ((mpi_limb_t)(C3)), \ |
521 | | "r" ((mpi_limb_t)(C2)), \ |
522 | | "r" ((mpi_limb_t)(C1)), \ |
523 | | "r" ((mpi_limb_t)(C0)) \ |
524 | | : "cc") |
525 | | |
526 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
527 | | C4, C3, C2, C1, C0) \ |
528 | | __asm__ ("slgr %4, %14\n" \ |
529 | | "slbgr %3, %13\n" \ |
530 | | "slbgr %2, %12\n" \ |
531 | | "slbgr %1, %11\n" \ |
532 | | "slbgr %0, %10\n" \ |
533 | | : "=r" (A4), \ |
534 | | "=&r" (A3), \ |
535 | | "=&r" (A2), \ |
536 | | "=&r" (A1), \ |
537 | | "=&r" (A0) \ |
538 | | : "0" ((mpi_limb_t)(B4)), \ |
539 | | "1" ((mpi_limb_t)(B3)), \ |
540 | | "2" ((mpi_limb_t)(B2)), \ |
541 | | "3" ((mpi_limb_t)(B1)), \ |
542 | | "4" ((mpi_limb_t)(B0)), \ |
543 | | "r" ((mpi_limb_t)(C4)), \ |
544 | | "r" ((mpi_limb_t)(C3)), \ |
545 | | "r" ((mpi_limb_t)(C2)), \ |
546 | | "r" ((mpi_limb_t)(C1)), \ |
547 | | "r" ((mpi_limb_t)(C0)) \ |
548 | | : "cc") |
549 | | |
550 | | #endif /* __s390x__ */ |
551 | | |
552 | | |
553 | | /* Common 64-bit arch addition/subtraction macros. */ |
554 | | |
555 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
556 | 12.6M | add_ssaaaa(A1, A0, B1, B0, C1, C0) |
557 | | |
558 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
559 | | sub_ddmmss(A1, A0, B1, B0, C1, C0) |
560 | | |
561 | | #endif /* BYTES_PER_MPI_LIMB == 8 */ |
562 | | |
563 | | |
564 | | #if BYTES_PER_MPI_LIMB == 4 |
565 | | |
566 | | /* 64-bit limb definitions for 32-bit architectures. */ |
567 | | |
568 | | #define LIMBS_PER_LIMB64 2 |
569 | | #define LIMB_FROM64(v) ((v).lo) |
570 | | #define HIBIT_LIMB64(v) ((v).hi >> (BITS_PER_MPI_LIMB - 1)) |
571 | | #define HI32_LIMB64(v) ((v).hi) |
572 | | #define LO32_LIMB64(v) ((v).lo) |
573 | | #define LOAD32(x, pos) ((x)[pos]) |
574 | | #define LIMB64_C(hi, lo) { (lo), (hi) } |
575 | | |
576 | | typedef struct |
577 | | { |
578 | | mpi_limb_t lo; |
579 | | mpi_limb_t hi; |
580 | | } mpi_limb64_t; |
581 | | |
582 | | static inline mpi_limb64_t |
583 | | LOAD64(const mpi_ptr_t x, unsigned int pos) |
584 | | { |
585 | | mpi_limb64_t v; |
586 | | v.lo = x[pos * 2 + 0]; |
587 | | v.hi = x[pos * 2 + 1]; |
588 | | return v; |
589 | | } |
590 | | |
591 | | static inline void |
592 | | STORE64(mpi_ptr_t x, unsigned int pos, mpi_limb64_t v) |
593 | | { |
594 | | x[pos * 2 + 0] = v.lo; |
595 | | x[pos * 2 + 1] = v.hi; |
596 | | } |
597 | | |
598 | | static inline mpi_limb64_t |
599 | | MASK_AND64(mpi_limb_t mask, mpi_limb64_t val) |
600 | | { |
601 | | val.lo &= mask; |
602 | | val.hi &= mask; |
603 | | return val; |
604 | | } |
605 | | |
606 | | static inline mpi_limb64_t |
607 | | LIMB_OR64(mpi_limb64_t val1, mpi_limb64_t val2) |
608 | | { |
609 | | val1.lo |= val2.lo; |
610 | | val1.hi |= val2.hi; |
611 | | return val1; |
612 | | } |
613 | | |
614 | | static inline void |
615 | | STORE64_COND(mpi_ptr_t x, unsigned int pos, mpi_limb_t mask1, |
616 | | mpi_limb64_t val1, mpi_limb_t mask2, mpi_limb64_t val2) |
617 | | { |
618 | | x[pos * 2 + 0] = (mask1 & val1.lo) | (mask2 & val2.lo); |
619 | | x[pos * 2 + 1] = (mask1 & val1.hi) | (mask2 & val2.hi); |
620 | | } |
621 | | |
622 | | static inline mpi_limb64_t |
623 | | LIMB_TO64(mpi_limb_t x) |
624 | | { |
625 | | mpi_limb64_t v; |
626 | | v.lo = x; |
627 | | v.hi = 0; |
628 | | return v; |
629 | | } |
630 | | |
631 | | static inline mpi_limb64_t |
632 | | LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) |
633 | | { |
634 | | mpi_limb64_t v; |
635 | | v.lo = lo; |
636 | | v.hi = hi; |
637 | | return v; |
638 | | } |
639 | | |
640 | | |
641 | | /* i386 addition/subtraction helpers. */ |
642 | | #if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 |
643 | | |
644 | | #define ADD2_LIMB32_CARRY_OUT(carry, a1, a0, b1, b0, c1, c0) \ |
645 | | __asm__ ("addl %7, %2\n" \ |
646 | | "adcl %6, %1\n" \ |
647 | | "sbbl %0, %0\n" \ |
648 | | : "=r" (carry), \ |
649 | | "=&r" (a1), \ |
650 | | "=&r" (a0) \ |
651 | | : "0" ((mpi_limb_t)(0)), \ |
652 | | "1" ((mpi_limb_t)(b1)), \ |
653 | | "2" ((mpi_limb_t)(b0)), \ |
654 | | "re" ((mpi_limb_t)(c1)), \ |
655 | | "re" ((mpi_limb_t)(c0)) \ |
656 | | : "cc") |
657 | | |
658 | | #define ADD2_LIMB32_CARRY_IN_OUT(a1, a0, b1, b0, c1, c0, carry) \ |
659 | | __asm__ ("addl $1, %0\n" \ |
660 | | "adcl %7, %2\n" \ |
661 | | "adcl %6, %1\n" \ |
662 | | "sbbl %0, %0\n" \ |
663 | | : "=r" (carry), \ |
664 | | "=&r" (a1), \ |
665 | | "=&r" (a0) \ |
666 | | : "0" ((mpi_limb_t)(carry)), \ |
667 | | "1" ((mpi_limb_t)(b1)), \ |
668 | | "2" ((mpi_limb_t)(b0)), \ |
669 | | "re" ((mpi_limb_t)(c1)), \ |
670 | | "re" ((mpi_limb_t)(c0)) \ |
671 | | : "cc") |
672 | | |
673 | | #define ADD2_LIMB32_CARRY_IN(a1, a0, b1, b0, c1, c0, carry) \ |
674 | | __asm__ ("addl $1, %2\n" \ |
675 | | "adcl %7, %1\n" \ |
676 | | "adcl %6, %0\n" \ |
677 | | : "=r" (a1), \ |
678 | | "=&r" (a0), \ |
679 | | "=&g" (carry) \ |
680 | | : "0" ((mpi_limb_t)(b1)), \ |
681 | | "1" ((mpi_limb_t)(b0)), \ |
682 | | "2" ((mpi_limb_t)(carry)), \ |
683 | | "re" ((mpi_limb_t)(c1)), \ |
684 | | "re" ((mpi_limb_t)(c0)) \ |
685 | | : "cc") |
686 | | |
687 | | #define ADD4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) do { \ |
688 | | mpi_limb_t __carry4_32; \ |
689 | | ADD2_LIMB32_CARRY_OUT(__carry4_32, a1, a0, b1, b0, c1, c0); \ |
690 | | ADD2_LIMB32_CARRY_IN(a3, a2, b3, b2, c3, c2, __carry4_32); \ |
691 | | } while (0) |
692 | | |
693 | | #define ADD6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ |
694 | | c5, c4, c3, c2, c1, c0) do { \ |
695 | | mpi_limb_t __carry6_32; \ |
696 | | ADD2_LIMB32_CARRY_OUT(__carry6_32, a1, a0, b1, b0, c1, c0); \ |
697 | | ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry6_32); \ |
698 | | ADD2_LIMB32_CARRY_IN(a5, a4, b5, b4, c5, c4, __carry6_32); \ |
699 | | } while (0) |
700 | | |
701 | | #define ADD8_LIMB32(a7, a6, a5, a4, a3, a2, a1, a0, \ |
702 | | b7, b6, b5, b4, b3, b2, b1, b0, \ |
703 | | c7, c6, c5, c4, c3, c2, c1, c0) do { \ |
704 | | mpi_limb_t __carry8_32; \ |
705 | | ADD2_LIMB32_CARRY_OUT(__carry8_32, a1, a0, b1, b0, c1, c0); \ |
706 | | ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry8_32); \ |
707 | | ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry8_32); \ |
708 | | ADD2_LIMB32_CARRY_IN(a7, a6, b7, b6, c7, c6, __carry8_32); \ |
709 | | } while (0) |
710 | | |
711 | | #define ADD10_LIMB32(a9, a8, a7, a6, a5, a4, a3, a2, a1, a0, \ |
712 | | b9, b8, b7, b6, b5, b4, b3, b2, b1, b0, \ |
713 | | c9, c8, c7, c6, c5, c4, c3, c2, c1, c0) do { \ |
714 | | mpi_limb_t __carry10_32; \ |
715 | | ADD2_LIMB32_CARRY_OUT(__carry10_32, a1, a0, b1, b0, c1, c0); \ |
716 | | ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry10_32); \ |
717 | | ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry10_32); \ |
718 | | ADD2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry10_32); \ |
719 | | ADD2_LIMB32_CARRY_IN(a9, a8, b9, b8, c9, c8, __carry10_32); \ |
720 | | } while (0) |
721 | | |
722 | | #define ADD14_LIMB32(a13, a12, a11, a10, a9, a8, a7, \ |
723 | | a6, a5, a4, a3, a2, a1, a0, \ |
724 | | b13, b12, b11, b10, b9, b8, b7, \ |
725 | | b6, b5, b4, b3, b2, b1, b0, \ |
726 | | c13, c12, c11, c10, c9, c8, c7, \ |
727 | | c6, c5, c4, c3, c2, c1, c0) do { \ |
728 | | mpi_limb_t __carry14_32; \ |
729 | | ADD2_LIMB32_CARRY_OUT(__carry14_32, a1, a0, b1, b0, c1, c0); \ |
730 | | ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry14_32); \ |
731 | | ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry14_32); \ |
732 | | ADD2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry14_32); \ |
733 | | ADD2_LIMB32_CARRY_IN_OUT(a9, a8, b9, b8, c9, c8, __carry14_32); \ |
734 | | ADD2_LIMB32_CARRY_IN_OUT(a11, a10, b11, b10, c11, c10, __carry14_32); \ |
735 | | ADD2_LIMB32_CARRY_IN(a13, a12, b13, b12, c13, c12, __carry14_32); \ |
736 | | } while (0) |
737 | | |
738 | | #define SUB2_LIMB32_CARRY_OUT(carry, a1, a0, b1, b0, c1, c0) \ |
739 | | __asm__ ("subl %7, %2\n" \ |
740 | | "sbbl %6, %1\n" \ |
741 | | "sbbl %0, %0\n" \ |
742 | | : "=r" (carry), \ |
743 | | "=&r" (a1), \ |
744 | | "=&r" (a0) \ |
745 | | : "0" ((mpi_limb_t)(0)), \ |
746 | | "1" ((mpi_limb_t)(b1)), \ |
747 | | "2" ((mpi_limb_t)(b0)), \ |
748 | | "re" ((mpi_limb_t)(c1)), \ |
749 | | "re" ((mpi_limb_t)(c0)) \ |
750 | | : "cc") |
751 | | |
752 | | #define SUB2_LIMB32_CARRY_IN_OUT(a1, a0, b1, b0, c1, c0, carry) \ |
753 | | __asm__ ("addl $1, %0\n" \ |
754 | | "sbbl %7, %2\n" \ |
755 | | "sbbl %6, %1\n" \ |
756 | | "sbbl %0, %0\n" \ |
757 | | : "=r" (carry), \ |
758 | | "=&r" (a1), \ |
759 | | "=&r" (a0) \ |
760 | | : "0" ((mpi_limb_t)(carry)), \ |
761 | | "1" ((mpi_limb_t)(b1)), \ |
762 | | "2" ((mpi_limb_t)(b0)), \ |
763 | | "re" ((mpi_limb_t)(c1)), \ |
764 | | "re" ((mpi_limb_t)(c0)) \ |
765 | | : "cc") |
766 | | |
767 | | #define SUB2_LIMB32_CARRY_IN(a1, a0, b1, b0, c1, c0, carry) \ |
768 | | __asm__ ("addl $1, %2\n" \ |
769 | | "sbbl %7, %1\n" \ |
770 | | "sbbl %6, %0\n" \ |
771 | | : "=r" (a1), \ |
772 | | "=&r" (a0), \ |
773 | | "=&g" (carry) \ |
774 | | : "0" ((mpi_limb_t)(b1)), \ |
775 | | "1" ((mpi_limb_t)(b0)), \ |
776 | | "2" ((mpi_limb_t)(carry)), \ |
777 | | "re" ((mpi_limb_t)(c1)), \ |
778 | | "re" ((mpi_limb_t)(c0)) \ |
779 | | : "cc") |
780 | | |
781 | | #define SUB4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) do { \ |
782 | | mpi_limb_t __carry4_32; \ |
783 | | SUB2_LIMB32_CARRY_OUT(__carry4_32, a1, a0, b1, b0, c1, c0); \ |
784 | | SUB2_LIMB32_CARRY_IN(a3, a2, b3, b2, c3, c2, __carry4_32); \ |
785 | | } while (0) |
786 | | |
787 | | #define SUB6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ |
788 | | c5, c4, c3, c2, c1, c0) do { \ |
789 | | mpi_limb_t __carry6_32; \ |
790 | | SUB2_LIMB32_CARRY_OUT(__carry6_32, a1, a0, b1, b0, c1, c0); \ |
791 | | SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry6_32); \ |
792 | | SUB2_LIMB32_CARRY_IN(a5, a4, b5, b4, c5, c4, __carry6_32); \ |
793 | | } while (0) |
794 | | |
795 | | #define SUB8_LIMB32(a7, a6, a5, a4, a3, a2, a1, a0, \ |
796 | | b7, b6, b5, b4, b3, b2, b1, b0, \ |
797 | | c7, c6, c5, c4, c3, c2, c1, c0) do { \ |
798 | | mpi_limb_t __carry8_32; \ |
799 | | SUB2_LIMB32_CARRY_OUT(__carry8_32, a1, a0, b1, b0, c1, c0); \ |
800 | | SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry8_32); \ |
801 | | SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry8_32); \ |
802 | | SUB2_LIMB32_CARRY_IN(a7, a6, b7, b6, c7, c6, __carry8_32); \ |
803 | | } while (0) |
804 | | |
805 | | #define SUB10_LIMB32(a9, a8, a7, a6, a5, a4, a3, a2, a1, a0, \ |
806 | | b9, b8, b7, b6, b5, b4, b3, b2, b1, b0, \ |
807 | | c9, c8, c7, c6, c5, c4, c3, c2, c1, c0) do { \ |
808 | | mpi_limb_t __carry10_32; \ |
809 | | SUB2_LIMB32_CARRY_OUT(__carry10_32, a1, a0, b1, b0, c1, c0); \ |
810 | | SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry10_32); \ |
811 | | SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry10_32); \ |
812 | | SUB2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry10_32); \ |
813 | | SUB2_LIMB32_CARRY_IN(a9, a8, b9, b8, c9, c8, __carry10_32); \ |
814 | | } while (0) |
815 | | |
816 | | #define SUB14_LIMB32(a13, a12, a11, a10, a9, a8, a7, \ |
817 | | a6, a5, a4, a3, a2, a1, a0, \ |
818 | | b13, b12, b11, b10, b9, b8, b7, \ |
819 | | b6, b5, b4, b3, b2, b1, b0, \ |
820 | | c13, c12, c11, c10, c9, c8, c7, \ |
821 | | c6, c5, c4, c3, c2, c1, c0) do { \ |
822 | | mpi_limb_t __carry14_32; \ |
823 | | SUB2_LIMB32_CARRY_OUT(__carry14_32, a1, a0, b1, b0, c1, c0); \ |
824 | | SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry14_32); \ |
825 | | SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry14_32); \ |
826 | | SUB2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry14_32); \ |
827 | | SUB2_LIMB32_CARRY_IN_OUT(a9, a8, b9, b8, c9, c8, __carry14_32); \ |
828 | | SUB2_LIMB32_CARRY_IN_OUT(a11, a10, b11, b10, c11, c10, __carry14_32); \ |
829 | | SUB2_LIMB32_CARRY_IN(a13, a12, b13, b12, c13, c12, __carry14_32); \ |
830 | | } while (0) |
831 | | |
832 | | #endif /* __i386__ */ |
833 | | |
834 | | |
835 | | /* ARM addition/subtraction helpers. */ |
836 | | #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS |
837 | | |
838 | | #define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
839 | | __asm__ ("adds %3, %3, %11\n" \ |
840 | | "adcs %2, %2, %10\n" \ |
841 | | "adcs %1, %1, %9\n" \ |
842 | | "adc %0, %0, %8\n" \ |
843 | | : "=r" (A3), \ |
844 | | "=&r" (A2), \ |
845 | | "=&r" (A1), \ |
846 | | "=&r" (A0) \ |
847 | | : "0" ((mpi_limb_t)(B3)), \ |
848 | | "1" ((mpi_limb_t)(B2)), \ |
849 | | "2" ((mpi_limb_t)(B1)), \ |
850 | | "3" ((mpi_limb_t)(B0)), \ |
851 | | "Ir" ((mpi_limb_t)(C3)), \ |
852 | | "Ir" ((mpi_limb_t)(C2)), \ |
853 | | "Ir" ((mpi_limb_t)(C1)), \ |
854 | | "Ir" ((mpi_limb_t)(C0)) \ |
855 | | : "cc") |
856 | | |
857 | | #define ADD6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \ |
858 | | C5, C4, C3, C2, C1, C0) do { \ |
859 | | mpi_limb_t __carry6_32; \ |
860 | | __asm__ ("adds %3, %3, %10\n" \ |
861 | | "adcs %2, %2, %9\n" \ |
862 | | "adcs %1, %1, %8\n" \ |
863 | | "adc %0, %0, %0\n" \ |
864 | | : "=r" (__carry6_32), \ |
865 | | "=&r" (A2), \ |
866 | | "=&r" (A1), \ |
867 | | "=&r" (A0) \ |
868 | | : "0" ((mpi_limb_t)(0)), \ |
869 | | "1" ((mpi_limb_t)(B2)), \ |
870 | | "2" ((mpi_limb_t)(B1)), \ |
871 | | "3" ((mpi_limb_t)(B0)), \ |
872 | | "Ir" ((mpi_limb_t)(C2)), \ |
873 | | "Ir" ((mpi_limb_t)(C1)), \ |
874 | | "Ir" ((mpi_limb_t)(C0)) \ |
875 | | : "cc"); \ |
876 | | ADD4_LIMB32(A5, A4, A3, __carry6_32, B5, B4, B3, __carry6_32, \ |
877 | | C5, C4, C3, 0xffffffffU); \ |
878 | | } while (0) |
879 | | |
880 | | #define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
881 | | __asm__ ("subs %3, %3, %11\n" \ |
882 | | "sbcs %2, %2, %10\n" \ |
883 | | "sbcs %1, %1, %9\n" \ |
884 | | "sbc %0, %0, %8\n" \ |
885 | | : "=r" (A3), \ |
886 | | "=&r" (A2), \ |
887 | | "=&r" (A1), \ |
888 | | "=&r" (A0) \ |
889 | | : "0" ((mpi_limb_t)(B3)), \ |
890 | | "1" ((mpi_limb_t)(B2)), \ |
891 | | "2" ((mpi_limb_t)(B1)), \ |
892 | | "3" ((mpi_limb_t)(B0)), \ |
893 | | "Ir" ((mpi_limb_t)(C3)), \ |
894 | | "Ir" ((mpi_limb_t)(C2)), \ |
895 | | "Ir" ((mpi_limb_t)(C1)), \ |
896 | | "Ir" ((mpi_limb_t)(C0)) \ |
897 | | : "cc") |
898 | | |
899 | | #define SUB6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \ |
900 | | C5, C4, C3, C2, C1, C0) do { \ |
901 | | mpi_limb_t __borrow6_32; \ |
902 | | __asm__ ("subs %3, %3, %9\n" \ |
903 | | "sbcs %2, %2, %8\n" \ |
904 | | "sbcs %1, %1, %7\n" \ |
905 | | "sbc %0, %0, %0\n" \ |
906 | | : "=r" (__borrow6_32), \ |
907 | | "=&r" (A2), \ |
908 | | "=&r" (A1), \ |
909 | | "=&r" (A0) \ |
910 | | : "1" ((mpi_limb_t)(B2)), \ |
911 | | "2" ((mpi_limb_t)(B1)), \ |
912 | | "3" ((mpi_limb_t)(B0)), \ |
913 | | "Ir" ((mpi_limb_t)(C2)), \ |
914 | | "Ir" ((mpi_limb_t)(C1)), \ |
915 | | "Ir" ((mpi_limb_t)(C0)) \ |
916 | | : "cc"); \ |
917 | | SUB4_LIMB32(A5, A4, A3, __borrow6_32, B5, B4, B3, 0, \ |
918 | | C5, C4, C3, -__borrow6_32); \ |
919 | | } while (0) |
920 | | |
921 | | #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ |
922 | | |
923 | | #if defined (__hppa) && __GNUC__ >= 4 |
924 | | #define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
925 | | __asm__ ("add %7,%11,%3\n\t" \ |
926 | | "addc %6,%10,%2\n\t" \ |
927 | | "addc %5,%9,%1\n\t" \ |
928 | | "addc %4,%8,%0" \ |
929 | | : "=r" (A3), \ |
930 | | "=&r" (A2), \ |
931 | | "=&r" (A1), \ |
932 | | "=&r" (A0) \ |
933 | | : "rM" ((mpi_limb_t)(B3)), \ |
934 | | "rM" ((mpi_limb_t)(B2)), \ |
935 | | "rM" ((mpi_limb_t)(B1)), \ |
936 | | "rM" ((mpi_limb_t)(B0)), \ |
937 | | "rM" ((mpi_limb_t)(C3)), \ |
938 | | "rM" ((mpi_limb_t)(C2)), \ |
939 | | "rM" ((mpi_limb_t)(C1)), \ |
940 | | "rM" ((mpi_limb_t)(C0)) \ |
941 | | : "cc") |
942 | | |
943 | | #define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
944 | | __asm__ ("sub %7,%11,%3\n\t" \ |
945 | | "subb %6,%10,%2\n\t" \ |
946 | | "subb %5,%9,%1\n\t" \ |
947 | | "subb %4,%8,%0\n\t" \ |
948 | | : "=r" (A3), \ |
949 | | "=&r" (A2), \ |
950 | | "=&r" (A1), \ |
951 | | "=&r" (A0) \ |
952 | | : "rM" ((mpi_limb_t)(B3)), \ |
953 | | "rM" ((mpi_limb_t)(B2)), \ |
954 | | "rM" ((mpi_limb_t)(B1)), \ |
955 | | "rM" ((mpi_limb_t)(B0)), \ |
956 | | "rM" ((mpi_limb_t)(C3)), \ |
957 | | "rM" ((mpi_limb_t)(C2)), \ |
958 | | "rM" ((mpi_limb_t)(C1)), \ |
959 | | "rM" ((mpi_limb_t)(C0)) \ |
960 | | : "cc") |
961 | | |
962 | | #endif /* __hppa */ |
963 | | |
964 | | /* Common 32-bit arch addition/subtraction macros. */ |
965 | | |
966 | | #if defined(ADD4_LIMB32) |
967 | | /* A[0..1] = B[0..1] + C[0..1] */ |
968 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
969 | | ADD4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \ |
970 | | B1.hi, B1.lo, B0.hi, B0.lo, \ |
971 | | C1.hi, C1.lo, C0.hi, C0.lo) |
972 | | #else |
973 | | /* A[0..1] = B[0..1] + C[0..1] */ |
974 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) do { \ |
975 | | mpi_limb_t __carry2_0, __carry2_1; \ |
976 | | add_ssaaaa(__carry2_0, A0.lo, 0, B0.lo, 0, C0.lo); \ |
977 | | add_ssaaaa(__carry2_1, A0.hi, 0, B0.hi, 0, C0.hi); \ |
978 | | add_ssaaaa(__carry2_1, A0.hi, __carry2_1, A0.hi, 0, __carry2_0); \ |
979 | | add_ssaaaa(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \ |
980 | | add_ssaaaa(A1.hi, A1.lo, A1.hi, A1.lo, 0, __carry2_1); \ |
981 | | } while (0) |
982 | | #endif |
983 | | |
984 | | #if defined(ADD6_LIMB32) |
985 | | /* A[0..2] = B[0..2] + C[0..2] */ |
986 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
987 | | ADD6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
988 | | B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
989 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
990 | | #endif |
991 | | |
992 | | #if defined(ADD8_LIMB32) |
993 | | /* A[0..3] = B[0..3] + C[0..3] */ |
994 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
995 | | ADD8_LIMB32(A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
996 | | B3.hi, B3.lo, B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
997 | | C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
998 | | #elif defined(ADD6_LIMB32) |
999 | | /* A[0..3] = B[0..3] + C[0..3] */ |
1000 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
1001 | | mpi_limb_t __carry4; \ |
1002 | | ADD6_LIMB32(__carry4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
1003 | | 0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
1004 | | 0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \ |
1005 | | ADD4_LIMB32(A3.hi, A3.lo, A2.hi, __carry4, \ |
1006 | | B3.hi, B3.lo, B2.hi, __carry4, \ |
1007 | | C3.hi, C3.lo, C2.hi, 0xffffffffU); \ |
1008 | | } while (0) |
1009 | | #endif |
1010 | | |
1011 | | #if defined(ADD10_LIMB32) |
1012 | | /* A[0..4] = B[0..4] + C[0..4] */ |
1013 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
1014 | | C4, C3, C2, C1, C0) \ |
1015 | | ADD10_LIMB32(A4.hi, A4.lo, A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, \ |
1016 | | A0.hi, A0.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ |
1017 | | B1.hi, B1.lo, B0.hi, B0.lo, C4.hi, C4.lo, C3.hi, C3.lo, \ |
1018 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
1019 | | #endif |
1020 | | |
1021 | | #if defined(ADD14_LIMB32) |
1022 | | /* A[0..6] = B[0..6] + C[0..6] */ |
1023 | | #define ADD7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
1024 | | C6, C5, C4, C3, C2, C1, C0) \ |
1025 | | ADD14_LIMB32(A6.hi, A6.lo, A5.hi, A5.lo, A4.hi, A4.lo, A3.hi, A3.lo, \ |
1026 | | A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, B6.hi, B6.lo, \ |
1027 | | B5.hi, B5.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ |
1028 | | B1.hi, B1.lo, B0.hi, B0.lo, C6.hi, C6.lo, C5.hi, C5.lo, \ |
1029 | | C4.hi, C4.lo, C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, \ |
1030 | | C0.hi, C0.lo) |
1031 | | #endif |
1032 | | |
1033 | | #if defined(SUB4_LIMB32) |
1034 | | /* A[0..1] = B[0..1] - C[0..1] */ |
1035 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
1036 | | SUB4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \ |
1037 | | B1.hi, B1.lo, B0.hi, B0.lo, \ |
1038 | | C1.hi, C1.lo, C0.hi, C0.lo) |
1039 | | #else |
1040 | | /* A[0..1] = B[0..1] - C[0..1] */ |
1041 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) do { \ |
1042 | | mpi_limb_t __borrow2_0, __borrow2_1; \ |
1043 | | sub_ddmmss(__borrow2_0, A0.lo, 0, B0.lo, 0, C0.lo); \ |
1044 | | sub_ddmmss(__borrow2_1, A0.hi, 0, B0.hi, 0, C0.hi); \ |
1045 | | sub_ddmmss(__borrow2_1, A0.hi, __borrow2_1, A0.hi, 0, -__borrow2_0); \ |
1046 | | sub_ddmmss(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \ |
1047 | | sub_ddmmss(A1.hi, A1.lo, A1.hi, A1.lo, 0, -__borrow2_1); \ |
1048 | | } while (0) |
1049 | | #endif |
1050 | | |
1051 | | #if defined(SUB6_LIMB32) |
1052 | | /* A[0..2] = B[0..2] - C[0..2] */ |
1053 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
1054 | | SUB6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
1055 | | B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
1056 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
1057 | | #endif |
1058 | | |
1059 | | #if defined(SUB8_LIMB32) |
1060 | | /* A[0..3] = B[0..3] - C[0..3] */ |
1061 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
1062 | | SUB8_LIMB32(A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
1063 | | B3.hi, B3.lo, B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
1064 | | C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
1065 | | #elif defined(SUB6_LIMB32) |
1066 | | /* A[0..3] = B[0..3] - C[0..3] */ |
1067 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
1068 | | mpi_limb_t __borrow4; \ |
1069 | | SUB6_LIMB32(__borrow4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
1070 | | 0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
1071 | | 0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \ |
1072 | | SUB4_LIMB32(A3.hi, A3.lo, A2.hi, __borrow4, \ |
1073 | | B3.hi, B3.lo, B2.hi, 0, \ |
1074 | | C3.hi, C3.lo, C2.hi, -__borrow4); \ |
1075 | | } while (0) |
1076 | | #endif |
1077 | | |
1078 | | #if defined(SUB10_LIMB32) |
1079 | | /* A[0..4] = B[0..4] - C[0..4] */ |
1080 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
1081 | | C4, C3, C2, C1, C0) \ |
1082 | | SUB10_LIMB32(A4.hi, A4.lo, A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, \ |
1083 | | A0.hi, A0.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ |
1084 | | B1.hi, B1.lo, B0.hi, B0.lo, C4.hi, C4.lo, C3.hi, C3.lo, \ |
1085 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
1086 | | #endif |
1087 | | |
1088 | | #if defined(SUB14_LIMB32) |
1089 | | /* A[0..6] = B[0..6] - C[0..6] */ |
1090 | | #define SUB7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
1091 | | C6, C5, C4, C3, C2, C1, C0) \ |
1092 | | SUB14_LIMB32(A6.hi, A6.lo, A5.hi, A5.lo, A4.hi, A4.lo, A3.hi, A3.lo, \ |
1093 | | A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, B6.hi, B6.lo, \ |
1094 | | B5.hi, B5.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ |
1095 | | B1.hi, B1.lo, B0.hi, B0.lo, C6.hi, C6.lo, C5.hi, C5.lo, \ |
1096 | | C4.hi, C4.lo, C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, \ |
1097 | | C0.hi, C0.lo) |
1098 | | #endif |
1099 | | |
1100 | | #endif /* BYTES_PER_MPI_LIMB == 4 */ |
1101 | | |
1102 | | |
1103 | | /* Common definitions. */ |
1104 | | #define BITS_PER_MPI_LIMB64 (BITS_PER_MPI_LIMB * LIMBS_PER_LIMB64) |
1105 | | #define BYTES_PER_MPI_LIMB64 (BYTES_PER_MPI_LIMB * LIMBS_PER_LIMB64) |
1106 | | |
1107 | | |
1108 | | /* Common addition/subtraction macros. */ |
1109 | | |
1110 | | #ifndef ADD3_LIMB64 |
1111 | | /* A[0..2] = B[0..2] + C[0..2] */ |
1112 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \ |
1113 | | mpi_limb64_t __carry3; \ |
1114 | | ADD2_LIMB64(__carry3, A0, zero, B0, zero, C0); \ |
1115 | | ADD2_LIMB64(A2, A1, B2, B1, C2, C1); \ |
1116 | | ADD2_LIMB64(A2, A1, A2, A1, zero, __carry3); \ |
1117 | | } while (0) |
1118 | | #endif |
1119 | | |
1120 | | #ifndef ADD4_LIMB64 |
1121 | | /* A[0..3] = B[0..3] + C[0..3] */ |
1122 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
1123 | | mpi_limb64_t __carry4; \ |
1124 | | ADD3_LIMB64(__carry4, A1, A0, zero, B1, B0, zero, C1, C0); \ |
1125 | | ADD2_LIMB64(A3, A2, B3, B2, C3, C2); \ |
1126 | | ADD2_LIMB64(A3, A2, A3, A2, zero, __carry4); \ |
1127 | | } while (0) |
1128 | | #endif |
1129 | | |
1130 | | #ifndef ADD5_LIMB64 |
1131 | | /* A[0..4] = B[0..4] + C[0..4] */ |
1132 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
1133 | | C4, C3, C2, C1, C0) do { \ |
1134 | | mpi_limb64_t __carry5; \ |
1135 | | ADD4_LIMB64(__carry5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \ |
1136 | | ADD2_LIMB64(A4, A3, B4, B3, C4, C3); \ |
1137 | | ADD2_LIMB64(A4, A3, A4, A3, zero, __carry5); \ |
1138 | | } while (0) |
1139 | | #endif |
1140 | | |
1141 | | #ifndef ADD7_LIMB64 |
1142 | | /* A[0..6] = B[0..6] + C[0..6] */ |
1143 | | #define ADD7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
1144 | 26.9M | C6, C5, C4, C3, C2, C1, C0) do { \ |
1145 | 26.9M | mpi_limb64_t __carry7; \ |
1146 | 26.9M | ADD4_LIMB64(__carry7, A2, A1, A0, zero, B2, B1, B0, \ |
1147 | 26.9M | zero, C2, C1, C0); \ |
1148 | 26.9M | ADD5_LIMB64(A6, A5, A4, A3, __carry7, B6, B5, B4, B3, \ |
1149 | 26.9M | __carry7, C6, C5, C4, C3, LIMB64_HILO(-1, -1)); \ |
1150 | 26.9M | } while (0) |
1151 | | #endif |
1152 | | |
1153 | | #ifndef SUB3_LIMB64 |
1154 | | /* A[0..2] = B[0..2] - C[0..2] */ |
1155 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \ |
1156 | | mpi_limb64_t __borrow3; \ |
1157 | | SUB2_LIMB64(__borrow3, A0, zero, B0, zero, C0); \ |
1158 | | SUB2_LIMB64(A2, A1, B2, B1, C2, C1); \ |
1159 | | SUB2_LIMB64(A2, A1, A2, A1, zero, LIMB_TO64(-LIMB_FROM64(__borrow3))); \ |
1160 | | } while (0) |
1161 | | #endif |
1162 | | |
1163 | | #ifndef SUB4_LIMB64 |
1164 | | /* A[0..3] = B[0..3] - C[0..3] */ |
1165 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
1166 | | mpi_limb64_t __borrow4; \ |
1167 | | SUB3_LIMB64(__borrow4, A1, A0, zero, B1, B0, zero, C1, C0); \ |
1168 | | SUB2_LIMB64(A3, A2, B3, B2, C3, C2); \ |
1169 | | SUB2_LIMB64(A3, A2, A3, A2, zero, LIMB_TO64(-LIMB_FROM64(__borrow4))); \ |
1170 | | } while (0) |
1171 | | #endif |
1172 | | |
1173 | | #ifndef SUB5_LIMB64 |
1174 | | /* A[0..4] = B[0..4] - C[0..4] */ |
1175 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
1176 | | C4, C3, C2, C1, C0) do { \ |
1177 | | mpi_limb64_t __borrow5; \ |
1178 | | SUB4_LIMB64(__borrow5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \ |
1179 | | SUB2_LIMB64(A4, A3, B4, B3, C4, C3); \ |
1180 | | SUB2_LIMB64(A4, A3, A4, A3, zero, LIMB_TO64(-LIMB_FROM64(__borrow5))); \ |
1181 | | } while (0) |
1182 | | #endif |
1183 | | |
1184 | | #ifndef SUB7_LIMB64 |
1185 | | /* A[0..6] = B[0..6] - C[0..6] */ |
1186 | | #define SUB7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
1187 | 8.97M | C6, C5, C4, C3, C2, C1, C0) do { \ |
1188 | 8.97M | mpi_limb64_t __borrow7; \ |
1189 | 8.97M | SUB4_LIMB64(__borrow7, A2, A1, A0, zero, B2, B1, B0, \ |
1190 | 8.97M | zero, C2, C1, C0); \ |
1191 | 8.97M | SUB5_LIMB64(A6, A5, A4, A3, __borrow7, B6, B5, B4, B3, zero, \ |
1192 | 8.97M | C6, C5, C4, C3, LIMB_TO64(-LIMB_FROM64(__borrow7))); \ |
1193 | 8.97M | } while (0) |
1194 | | #endif |
1195 | | |
1196 | | |
1197 | | #if defined(WORDS_BIGENDIAN) || (BITS_PER_MPI_LIMB64 != BITS_PER_MPI_LIMB) |
1198 | | #define LOAD64_UNALIGNED(x, pos) \ |
1199 | | LIMB64_HILO(LOAD32(x, 2 * (pos) + 2), LOAD32(x, 2 * (pos) + 1)) |
1200 | | #else |
1201 | | #define LOAD64_UNALIGNED(x, pos) \ |
1202 | | buf_get_le64((const byte *)(&(x)[pos]) + 4) |
1203 | | #endif |
1204 | | |
1205 | | |
1206 | | /* Helper functions. */ |
1207 | | |
1208 | | static inline int |
1209 | | mpi_nbits_more_than (gcry_mpi_t w, unsigned int nbits) |
1210 | 14.1M | { |
1211 | 14.1M | unsigned int nbits_nlimbs; |
1212 | 14.1M | mpi_limb_t wlimb; |
1213 | 14.1M | unsigned int n; |
1214 | | |
1215 | 14.1M | nbits_nlimbs = (nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB; |
1216 | | |
1217 | | /* Note: Assumes that 'w' is normalized. */ |
1218 | | |
1219 | 14.1M | if (w->nlimbs > nbits_nlimbs) |
1220 | 0 | return 1; |
1221 | 14.1M | if (w->nlimbs < nbits_nlimbs) |
1222 | 5.67M | return 0; |
1223 | 8.44M | if ((nbits % BITS_PER_MPI_LIMB) == 0) |
1224 | 5.80M | return 0; |
1225 | | |
1226 | 2.64M | wlimb = w->d[nbits_nlimbs - 1]; |
1227 | 2.64M | if (wlimb == 0) |
1228 | 0 | log_bug ("mpi_nbits_more_than: input mpi not normalized\n"); |
1229 | | |
1230 | 2.64M | count_leading_zeros (n, wlimb); |
1231 | | |
1232 | 2.64M | return (BITS_PER_MPI_LIMB - n) > (nbits % BITS_PER_MPI_LIMB); |
1233 | 2.64M | } |
1234 | | |
1235 | | #endif /* GCRY_EC_INLINE_H */ |