/src/libgcrypt/mpi/ec-inline.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* ec-inline.h - EC inline addition/substraction helpers |
2 | | * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
3 | | * |
4 | | * This file is part of Libgcrypt. |
5 | | * |
6 | | * Libgcrypt is free software; you can redistribute it and/or modify |
7 | | * it under the terms of the GNU Lesser General Public License as |
8 | | * published by the Free Software Foundation; either version 2.1 of |
9 | | * the License, or (at your option) any later version. |
10 | | * |
11 | | * Libgcrypt is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | | */ |
19 | | |
20 | | #ifndef GCRY_EC_INLINE_H |
21 | | #define GCRY_EC_INLINE_H |
22 | | |
23 | | #include "mpi-internal.h" |
24 | | #include "longlong.h" |
25 | | #include "ec-context.h" |
26 | | #include "../cipher/bithelp.h" |
27 | | #include "../cipher/bufhelp.h" |
28 | | |
29 | | |
30 | | #if BYTES_PER_MPI_LIMB == 8 |
31 | | |
32 | | /* 64-bit limb definitions for 64-bit architectures. */ |
33 | | |
34 | 0 | #define LIMBS_PER_LIMB64 1 |
35 | 0 | #define LOAD64(x, pos) ((x)[pos]) |
36 | 0 | #define STORE64(x, pos, v) ((x)[pos] = (mpi_limb_t)(v)) |
37 | 0 | #define LIMB_TO64(v) ((mpi_limb_t)(v)) |
38 | | #define LIMB_FROM64(v) ((mpi_limb_t)(v)) |
39 | | #define HIBIT_LIMB64(v) ((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 1)) |
40 | 0 | #define HI32_LIMB64(v) (u32)((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 32)) |
41 | 0 | #define LO32_LIMB64(v) ((u32)(v)) |
42 | 0 | #define LIMB64_C(hi, lo) (((mpi_limb_t)(u32)(hi) << 32) | (u32)(lo)) |
43 | | #define MASK_AND64(mask, val) ((mask) & (val)) |
44 | 0 | #define LIMB_OR64(val1, val2) ((val1) | (val2)) |
45 | | #define STORE64_COND(x, pos, mask1, val1, mask2, val2) \ |
46 | 0 | ((x)[(pos)] = ((mask1) & (val1)) | ((mask2) & (val2))) |
47 | | |
48 | | typedef mpi_limb_t mpi_limb64_t; |
49 | | |
50 | | static inline u32 |
51 | | LOAD32(mpi_ptr_t x, unsigned int pos) |
52 | 0 | { |
53 | 0 | unsigned int shr = (pos % 2) * 32; |
54 | 0 | return (x[pos / 2] >> shr); |
55 | 0 | } |
56 | | |
57 | | static inline mpi_limb64_t |
58 | | LIMB64_HILO(u32 hi, u32 lo) |
59 | 0 | { |
60 | 0 | mpi_limb64_t v = hi; |
61 | 0 | return (v << 32) | lo; |
62 | 0 | } |
63 | | |
64 | | |
65 | | /* x86-64 addition/subtraction helpers. */ |
66 | | #if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 |
67 | | |
68 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
69 | 0 | __asm__ ("addq %8, %2\n" \ |
70 | 0 | "adcq %7, %1\n" \ |
71 | 0 | "adcq %6, %0\n" \ |
72 | 0 | : "=r" (A2), \ |
73 | 0 | "=&r" (A1), \ |
74 | 0 | "=&r" (A0) \ |
75 | 0 | : "0" ((mpi_limb_t)(B2)), \ |
76 | 0 | "1" ((mpi_limb_t)(B1)), \ |
77 | 0 | "2" ((mpi_limb_t)(B0)), \ |
78 | 0 | "rme" ((mpi_limb_t)(C2)), \ |
79 | 0 | "rme" ((mpi_limb_t)(C1)), \ |
80 | 0 | "rme" ((mpi_limb_t)(C0)) \ |
81 | 0 | : "cc") |
82 | | |
83 | | #define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
84 | | __asm__ ("subq %8, %2\n" \ |
85 | | "sbbq %7, %1\n" \ |
86 | | "sbbq %6, %0\n" \ |
87 | | : "=r" (A2), \ |
88 | | "=&r" (A1), \ |
89 | | "=&r" (A0) \ |
90 | | : "0" ((mpi_limb_t)(B2)), \ |
91 | | "1" ((mpi_limb_t)(B1)), \ |
92 | | "2" ((mpi_limb_t)(B0)), \ |
93 | | "rme" ((mpi_limb_t)(C2)), \ |
94 | | "rme" ((mpi_limb_t)(C1)), \ |
95 | | "rme" ((mpi_limb_t)(C0)) \ |
96 | | : "cc") |
97 | | |
98 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
99 | 0 | __asm__ ("addq %11, %3\n" \ |
100 | 0 | "adcq %10, %2\n" \ |
101 | 0 | "adcq %9, %1\n" \ |
102 | 0 | "adcq %8, %0\n" \ |
103 | 0 | : "=r" (A3), \ |
104 | 0 | "=&r" (A2), \ |
105 | 0 | "=&r" (A1), \ |
106 | 0 | "=&r" (A0) \ |
107 | 0 | : "0" ((mpi_limb_t)(B3)), \ |
108 | 0 | "1" ((mpi_limb_t)(B2)), \ |
109 | 0 | "2" ((mpi_limb_t)(B1)), \ |
110 | 0 | "3" ((mpi_limb_t)(B0)), \ |
111 | 0 | "rme" ((mpi_limb_t)(C3)), \ |
112 | 0 | "rme" ((mpi_limb_t)(C2)), \ |
113 | 0 | "rme" ((mpi_limb_t)(C1)), \ |
114 | 0 | "rme" ((mpi_limb_t)(C0)) \ |
115 | 0 | : "cc") |
116 | | |
117 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
118 | 0 | __asm__ ("subq %11, %3\n" \ |
119 | 0 | "sbbq %10, %2\n" \ |
120 | 0 | "sbbq %9, %1\n" \ |
121 | 0 | "sbbq %8, %0\n" \ |
122 | 0 | : "=r" (A3), \ |
123 | 0 | "=&r" (A2), \ |
124 | 0 | "=&r" (A1), \ |
125 | 0 | "=&r" (A0) \ |
126 | 0 | : "0" ((mpi_limb_t)(B3)), \ |
127 | 0 | "1" ((mpi_limb_t)(B2)), \ |
128 | 0 | "2" ((mpi_limb_t)(B1)), \ |
129 | 0 | "3" ((mpi_limb_t)(B0)), \ |
130 | 0 | "rme" ((mpi_limb_t)(C3)), \ |
131 | 0 | "rme" ((mpi_limb_t)(C2)), \ |
132 | 0 | "rme" ((mpi_limb_t)(C1)), \ |
133 | 0 | "rme" ((mpi_limb_t)(C0)) \ |
134 | 0 | : "cc") |
135 | | |
136 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
137 | | C4, C3, C2, C1, C0) \ |
138 | 0 | __asm__ ("addq %14, %4\n" \ |
139 | 0 | "adcq %13, %3\n" \ |
140 | 0 | "adcq %12, %2\n" \ |
141 | 0 | "adcq %11, %1\n" \ |
142 | 0 | "adcq %10, %0\n" \ |
143 | 0 | : "=r" (A4), \ |
144 | 0 | "=&r" (A3), \ |
145 | 0 | "=&r" (A2), \ |
146 | 0 | "=&r" (A1), \ |
147 | 0 | "=&r" (A0) \ |
148 | 0 | : "0" ((mpi_limb_t)(B4)), \ |
149 | 0 | "1" ((mpi_limb_t)(B3)), \ |
150 | 0 | "2" ((mpi_limb_t)(B2)), \ |
151 | 0 | "3" ((mpi_limb_t)(B1)), \ |
152 | 0 | "4" ((mpi_limb_t)(B0)), \ |
153 | 0 | "rme" ((mpi_limb_t)(C4)), \ |
154 | 0 | "rme" ((mpi_limb_t)(C3)), \ |
155 | 0 | "rme" ((mpi_limb_t)(C2)), \ |
156 | 0 | "rme" ((mpi_limb_t)(C1)), \ |
157 | 0 | "rme" ((mpi_limb_t)(C0)) \ |
158 | 0 | : "cc") |
159 | | |
160 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
161 | | C4, C3, C2, C1, C0) \ |
162 | 0 | __asm__ ("subq %14, %4\n" \ |
163 | 0 | "sbbq %13, %3\n" \ |
164 | 0 | "sbbq %12, %2\n" \ |
165 | 0 | "sbbq %11, %1\n" \ |
166 | 0 | "sbbq %10, %0\n" \ |
167 | 0 | : "=r" (A4), \ |
168 | 0 | "=&r" (A3), \ |
169 | 0 | "=&r" (A2), \ |
170 | 0 | "=&r" (A1), \ |
171 | 0 | "=&r" (A0) \ |
172 | 0 | : "0" ((mpi_limb_t)(B4)), \ |
173 | 0 | "1" ((mpi_limb_t)(B3)), \ |
174 | 0 | "2" ((mpi_limb_t)(B2)), \ |
175 | 0 | "3" ((mpi_limb_t)(B1)), \ |
176 | 0 | "4" ((mpi_limb_t)(B0)), \ |
177 | 0 | "rme" ((mpi_limb_t)(C4)), \ |
178 | 0 | "rme" ((mpi_limb_t)(C3)), \ |
179 | 0 | "rme" ((mpi_limb_t)(C2)), \ |
180 | 0 | "rme" ((mpi_limb_t)(C1)), \ |
181 | 0 | "rme" ((mpi_limb_t)(C0)) \ |
182 | 0 | : "cc") |
183 | | |
184 | | #endif /* __x86_64__ */ |
185 | | |
186 | | |
187 | | /* ARM AArch64 addition/subtraction helpers. */ |
188 | | #if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4 |
189 | | |
190 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
191 | | __asm__ ("adds %2, %5, %8\n" \ |
192 | | "adcs %1, %4, %7\n" \ |
193 | | "adc %0, %3, %6\n" \ |
194 | | : "=r" (A2), \ |
195 | | "=&r" (A1), \ |
196 | | "=&r" (A0) \ |
197 | | : "r" ((mpi_limb_t)(B2)), \ |
198 | | "r" ((mpi_limb_t)(B1)), \ |
199 | | "r" ((mpi_limb_t)(B0)), \ |
200 | | "r" ((mpi_limb_t)(C2)), \ |
201 | | "r" ((mpi_limb_t)(C1)), \ |
202 | | "r" ((mpi_limb_t)(C0)) \ |
203 | | : "cc") |
204 | | |
205 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
206 | | __asm__ ("subs %2, %5, %8\n" \ |
207 | | "sbcs %1, %4, %7\n" \ |
208 | | "sbc %0, %3, %6\n" \ |
209 | | : "=r" (A2), \ |
210 | | "=&r" (A1), \ |
211 | | "=&r" (A0) \ |
212 | | : "r" ((mpi_limb_t)(B2)), \ |
213 | | "r" ((mpi_limb_t)(B1)), \ |
214 | | "r" ((mpi_limb_t)(B0)), \ |
215 | | "r" ((mpi_limb_t)(C2)), \ |
216 | | "r" ((mpi_limb_t)(C1)), \ |
217 | | "r" ((mpi_limb_t)(C0)) \ |
218 | | : "cc") |
219 | | |
220 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
221 | | __asm__ ("adds %3, %7, %11\n" \ |
222 | | "adcs %2, %6, %10\n" \ |
223 | | "adcs %1, %5, %9\n" \ |
224 | | "adc %0, %4, %8\n" \ |
225 | | : "=r" (A3), \ |
226 | | "=&r" (A2), \ |
227 | | "=&r" (A1), \ |
228 | | "=&r" (A0) \ |
229 | | : "r" ((mpi_limb_t)(B3)), \ |
230 | | "r" ((mpi_limb_t)(B2)), \ |
231 | | "r" ((mpi_limb_t)(B1)), \ |
232 | | "r" ((mpi_limb_t)(B0)), \ |
233 | | "r" ((mpi_limb_t)(C3)), \ |
234 | | "r" ((mpi_limb_t)(C2)), \ |
235 | | "r" ((mpi_limb_t)(C1)), \ |
236 | | "r" ((mpi_limb_t)(C0)) \ |
237 | | : "cc") |
238 | | |
239 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
240 | | __asm__ ("subs %3, %7, %11\n" \ |
241 | | "sbcs %2, %6, %10\n" \ |
242 | | "sbcs %1, %5, %9\n" \ |
243 | | "sbc %0, %4, %8\n" \ |
244 | | : "=r" (A3), \ |
245 | | "=&r" (A2), \ |
246 | | "=&r" (A1), \ |
247 | | "=&r" (A0) \ |
248 | | : "r" ((mpi_limb_t)(B3)), \ |
249 | | "r" ((mpi_limb_t)(B2)), \ |
250 | | "r" ((mpi_limb_t)(B1)), \ |
251 | | "r" ((mpi_limb_t)(B0)), \ |
252 | | "r" ((mpi_limb_t)(C3)), \ |
253 | | "r" ((mpi_limb_t)(C2)), \ |
254 | | "r" ((mpi_limb_t)(C1)), \ |
255 | | "r" ((mpi_limb_t)(C0)) \ |
256 | | : "cc") |
257 | | |
258 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
259 | | C4, C3, C2, C1, C0) \ |
260 | | __asm__ ("adds %4, %9, %14\n" \ |
261 | | "adcs %3, %8, %13\n" \ |
262 | | "adcs %2, %7, %12\n" \ |
263 | | "adcs %1, %6, %11\n" \ |
264 | | "adc %0, %5, %10\n" \ |
265 | | : "=r" (A4), \ |
266 | | "=&r" (A3), \ |
267 | | "=&r" (A2), \ |
268 | | "=&r" (A1), \ |
269 | | "=&r" (A0) \ |
270 | | : "r" ((mpi_limb_t)(B4)), \ |
271 | | "r" ((mpi_limb_t)(B3)), \ |
272 | | "r" ((mpi_limb_t)(B2)), \ |
273 | | "r" ((mpi_limb_t)(B1)), \ |
274 | | "r" ((mpi_limb_t)(B0)), \ |
275 | | "r" ((mpi_limb_t)(C4)), \ |
276 | | "r" ((mpi_limb_t)(C3)), \ |
277 | | "r" ((mpi_limb_t)(C2)), \ |
278 | | "r" ((mpi_limb_t)(C1)), \ |
279 | | "r" ((mpi_limb_t)(C0)) \ |
280 | | : "cc") |
281 | | |
282 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
283 | | C4, C3, C2, C1, C0) \ |
284 | | __asm__ ("subs %4, %9, %14\n" \ |
285 | | "sbcs %3, %8, %13\n" \ |
286 | | "sbcs %2, %7, %12\n" \ |
287 | | "sbcs %1, %6, %11\n" \ |
288 | | "sbc %0, %5, %10\n" \ |
289 | | : "=r" (A4), \ |
290 | | "=&r" (A3), \ |
291 | | "=&r" (A2), \ |
292 | | "=&r" (A1), \ |
293 | | "=&r" (A0) \ |
294 | | : "r" ((mpi_limb_t)(B4)), \ |
295 | | "r" ((mpi_limb_t)(B3)), \ |
296 | | "r" ((mpi_limb_t)(B2)), \ |
297 | | "r" ((mpi_limb_t)(B1)), \ |
298 | | "r" ((mpi_limb_t)(B0)), \ |
299 | | "r" ((mpi_limb_t)(C4)), \ |
300 | | "r" ((mpi_limb_t)(C3)), \ |
301 | | "r" ((mpi_limb_t)(C2)), \ |
302 | | "r" ((mpi_limb_t)(C1)), \ |
303 | | "r" ((mpi_limb_t)(C0)) \ |
304 | | : "cc") |
305 | | |
306 | | #endif /* __aarch64__ */ |
307 | | |
308 | | |
309 | | /* PowerPC64 addition/subtraction helpers. */ |
310 | | #if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4 |
311 | | |
312 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
313 | | __asm__ ("addc %2, %8, %5\n" \ |
314 | | "adde %1, %7, %4\n" \ |
315 | | "adde %0, %6, %3\n" \ |
316 | | : "=r" (A2), \ |
317 | | "=&r" (A1), \ |
318 | | "=&r" (A0) \ |
319 | | : "r" ((mpi_limb_t)(B2)), \ |
320 | | "r" ((mpi_limb_t)(B1)), \ |
321 | | "r" ((mpi_limb_t)(B0)), \ |
322 | | "r" ((mpi_limb_t)(C2)), \ |
323 | | "r" ((mpi_limb_t)(C1)), \ |
324 | | "r" ((mpi_limb_t)(C0)) \ |
325 | | : "cc", "r0") |
326 | | |
327 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
328 | | __asm__ ("subfc %2, %8, %5\n" \ |
329 | | "subfe %1, %7, %4\n" \ |
330 | | "subfe %0, %6, %3\n" \ |
331 | | : "=r" (A2), \ |
332 | | "=&r" (A1), \ |
333 | | "=&r" (A0) \ |
334 | | : "r" ((mpi_limb_t)(B2)), \ |
335 | | "r" ((mpi_limb_t)(B1)), \ |
336 | | "r" ((mpi_limb_t)(B0)), \ |
337 | | "r" ((mpi_limb_t)(C2)), \ |
338 | | "r" ((mpi_limb_t)(C1)), \ |
339 | | "r" ((mpi_limb_t)(C0)) \ |
340 | | : "cc", "r0") |
341 | | |
342 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
343 | | __asm__ ("addc %3, %11, %7\n" \ |
344 | | "adde %2, %10, %6\n" \ |
345 | | "adde %1, %9, %5\n" \ |
346 | | "adde %0, %8, %4\n" \ |
347 | | : "=r" (A3), \ |
348 | | "=&r" (A2), \ |
349 | | "=&r" (A1), \ |
350 | | "=&r" (A0) \ |
351 | | : "r" ((mpi_limb_t)(B3)), \ |
352 | | "r" ((mpi_limb_t)(B2)), \ |
353 | | "r" ((mpi_limb_t)(B1)), \ |
354 | | "r" ((mpi_limb_t)(B0)), \ |
355 | | "r" ((mpi_limb_t)(C3)), \ |
356 | | "r" ((mpi_limb_t)(C2)), \ |
357 | | "r" ((mpi_limb_t)(C1)), \ |
358 | | "r" ((mpi_limb_t)(C0)) \ |
359 | | : "cc") |
360 | | |
361 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
362 | | __asm__ ("subfc %3, %11, %7\n" \ |
363 | | "subfe %2, %10, %6\n" \ |
364 | | "subfe %1, %9, %5\n" \ |
365 | | "subfe %0, %8, %4\n" \ |
366 | | : "=r" (A3), \ |
367 | | "=&r" (A2), \ |
368 | | "=&r" (A1), \ |
369 | | "=&r" (A0) \ |
370 | | : "r" ((mpi_limb_t)(B3)), \ |
371 | | "r" ((mpi_limb_t)(B2)), \ |
372 | | "r" ((mpi_limb_t)(B1)), \ |
373 | | "r" ((mpi_limb_t)(B0)), \ |
374 | | "r" ((mpi_limb_t)(C3)), \ |
375 | | "r" ((mpi_limb_t)(C2)), \ |
376 | | "r" ((mpi_limb_t)(C1)), \ |
377 | | "r" ((mpi_limb_t)(C0)) \ |
378 | | : "cc") |
379 | | |
380 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
381 | | C4, C3, C2, C1, C0) \ |
382 | | __asm__ ("addc %4, %14, %9\n" \ |
383 | | "adde %3, %13, %8\n" \ |
384 | | "adde %2, %12, %7\n" \ |
385 | | "adde %1, %11, %6\n" \ |
386 | | "adde %0, %10, %5\n" \ |
387 | | : "=r" (A4), \ |
388 | | "=&r" (A3), \ |
389 | | "=&r" (A2), \ |
390 | | "=&r" (A1), \ |
391 | | "=&r" (A0) \ |
392 | | : "r" ((mpi_limb_t)(B4)), \ |
393 | | "r" ((mpi_limb_t)(B3)), \ |
394 | | "r" ((mpi_limb_t)(B2)), \ |
395 | | "r" ((mpi_limb_t)(B1)), \ |
396 | | "r" ((mpi_limb_t)(B0)), \ |
397 | | "r" ((mpi_limb_t)(C4)), \ |
398 | | "r" ((mpi_limb_t)(C3)), \ |
399 | | "r" ((mpi_limb_t)(C2)), \ |
400 | | "r" ((mpi_limb_t)(C1)), \ |
401 | | "r" ((mpi_limb_t)(C0)) \ |
402 | | : "cc") |
403 | | |
404 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
405 | | C4, C3, C2, C1, C0) \ |
406 | | __asm__ ("subfc %4, %14, %9\n" \ |
407 | | "subfe %3, %13, %8\n" \ |
408 | | "subfe %2, %12, %7\n" \ |
409 | | "subfe %1, %11, %6\n" \ |
410 | | "subfe %0, %10, %5\n" \ |
411 | | : "=r" (A4), \ |
412 | | "=&r" (A3), \ |
413 | | "=&r" (A2), \ |
414 | | "=&r" (A1), \ |
415 | | "=&r" (A0) \ |
416 | | : "r" ((mpi_limb_t)(B4)), \ |
417 | | "r" ((mpi_limb_t)(B3)), \ |
418 | | "r" ((mpi_limb_t)(B2)), \ |
419 | | "r" ((mpi_limb_t)(B1)), \ |
420 | | "r" ((mpi_limb_t)(B0)), \ |
421 | | "r" ((mpi_limb_t)(C4)), \ |
422 | | "r" ((mpi_limb_t)(C3)), \ |
423 | | "r" ((mpi_limb_t)(C2)), \ |
424 | | "r" ((mpi_limb_t)(C1)), \ |
425 | | "r" ((mpi_limb_t)(C0)) \ |
426 | | : "cc") |
427 | | |
428 | | #endif /* __powerpc__ */ |
429 | | |
430 | | |
431 | | /* s390x/zSeries addition/subtraction helpers. */ |
432 | | #if defined (__s390x__) && defined(HAVE_CPU_ARCH_S390X) && __GNUC__ >= 4 |
433 | | |
434 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
435 | | __asm__ ("algr %2, %8\n" \ |
436 | | "alcgr %1, %7\n" \ |
437 | | "alcgr %0, %6\n" \ |
438 | | : "=r" (A2), \ |
439 | | "=&r" (A1), \ |
440 | | "=&r" (A0) \ |
441 | | : "0" ((mpi_limb_t)(B2)), \ |
442 | | "1" ((mpi_limb_t)(B1)), \ |
443 | | "2" ((mpi_limb_t)(B0)), \ |
444 | | "r" ((mpi_limb_t)(C2)), \ |
445 | | "r" ((mpi_limb_t)(C1)), \ |
446 | | "r" ((mpi_limb_t)(C0)) \ |
447 | | : "cc") |
448 | | |
449 | | #define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
450 | | __asm__ ("slgr %2, %8\n" \ |
451 | | "slbgr %1, %7\n" \ |
452 | | "slbgr %0, %6\n" \ |
453 | | : "=r" (A2), \ |
454 | | "=&r" (A1), \ |
455 | | "=&r" (A0) \ |
456 | | : "0" ((mpi_limb_t)(B2)), \ |
457 | | "1" ((mpi_limb_t)(B1)), \ |
458 | | "2" ((mpi_limb_t)(B0)), \ |
459 | | "r" ((mpi_limb_t)(C2)), \ |
460 | | "r" ((mpi_limb_t)(C1)), \ |
461 | | "r" ((mpi_limb_t)(C0)) \ |
462 | | : "cc") |
463 | | |
464 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
465 | | __asm__ ("algr %3, %11\n" \ |
466 | | "alcgr %2, %10\n" \ |
467 | | "alcgr %1, %9\n" \ |
468 | | "alcgr %0, %8\n" \ |
469 | | : "=r" (A3), \ |
470 | | "=&r" (A2), \ |
471 | | "=&r" (A1), \ |
472 | | "=&r" (A0) \ |
473 | | : "0" ((mpi_limb_t)(B3)), \ |
474 | | "1" ((mpi_limb_t)(B2)), \ |
475 | | "2" ((mpi_limb_t)(B1)), \ |
476 | | "3" ((mpi_limb_t)(B0)), \ |
477 | | "r" ((mpi_limb_t)(C3)), \ |
478 | | "r" ((mpi_limb_t)(C2)), \ |
479 | | "r" ((mpi_limb_t)(C1)), \ |
480 | | "r" ((mpi_limb_t)(C0)) \ |
481 | | : "cc") |
482 | | |
483 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
484 | | __asm__ ("slgr %3, %11\n" \ |
485 | | "slbgr %2, %10\n" \ |
486 | | "slbgr %1, %9\n" \ |
487 | | "slbgr %0, %8\n" \ |
488 | | : "=r" (A3), \ |
489 | | "=&r" (A2), \ |
490 | | "=&r" (A1), \ |
491 | | "=&r" (A0) \ |
492 | | : "0" ((mpi_limb_t)(B3)), \ |
493 | | "1" ((mpi_limb_t)(B2)), \ |
494 | | "2" ((mpi_limb_t)(B1)), \ |
495 | | "3" ((mpi_limb_t)(B0)), \ |
496 | | "r" ((mpi_limb_t)(C3)), \ |
497 | | "r" ((mpi_limb_t)(C2)), \ |
498 | | "r" ((mpi_limb_t)(C1)), \ |
499 | | "r" ((mpi_limb_t)(C0)) \ |
500 | | : "cc") |
501 | | |
502 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
503 | | C4, C3, C2, C1, C0) \ |
504 | | __asm__ ("algr %4, %14\n" \ |
505 | | "alcgr %3, %13\n" \ |
506 | | "alcgr %2, %12\n" \ |
507 | | "alcgr %1, %11\n" \ |
508 | | "alcgr %0, %10\n" \ |
509 | | : "=r" (A4), \ |
510 | | "=&r" (A3), \ |
511 | | "=&r" (A2), \ |
512 | | "=&r" (A1), \ |
513 | | "=&r" (A0) \ |
514 | | : "0" ((mpi_limb_t)(B4)), \ |
515 | | "1" ((mpi_limb_t)(B3)), \ |
516 | | "2" ((mpi_limb_t)(B2)), \ |
517 | | "3" ((mpi_limb_t)(B1)), \ |
518 | | "4" ((mpi_limb_t)(B0)), \ |
519 | | "r" ((mpi_limb_t)(C4)), \ |
520 | | "r" ((mpi_limb_t)(C3)), \ |
521 | | "r" ((mpi_limb_t)(C2)), \ |
522 | | "r" ((mpi_limb_t)(C1)), \ |
523 | | "r" ((mpi_limb_t)(C0)) \ |
524 | | : "cc") |
525 | | |
526 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
527 | | C4, C3, C2, C1, C0) \ |
528 | | __asm__ ("slgr %4, %14\n" \ |
529 | | "slbgr %3, %13\n" \ |
530 | | "slbgr %2, %12\n" \ |
531 | | "slbgr %1, %11\n" \ |
532 | | "slbgr %0, %10\n" \ |
533 | | : "=r" (A4), \ |
534 | | "=&r" (A3), \ |
535 | | "=&r" (A2), \ |
536 | | "=&r" (A1), \ |
537 | | "=&r" (A0) \ |
538 | | : "0" ((mpi_limb_t)(B4)), \ |
539 | | "1" ((mpi_limb_t)(B3)), \ |
540 | | "2" ((mpi_limb_t)(B2)), \ |
541 | | "3" ((mpi_limb_t)(B1)), \ |
542 | | "4" ((mpi_limb_t)(B0)), \ |
543 | | "r" ((mpi_limb_t)(C4)), \ |
544 | | "r" ((mpi_limb_t)(C3)), \ |
545 | | "r" ((mpi_limb_t)(C2)), \ |
546 | | "r" ((mpi_limb_t)(C1)), \ |
547 | | "r" ((mpi_limb_t)(C0)) \ |
548 | | : "cc") |
549 | | |
550 | | #endif /* __s390x__ */ |
551 | | |
552 | | |
553 | | /* Common 64-bit arch addition/subtraction macros. */ |
554 | | |
555 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
556 | 0 | add_ssaaaa(A1, A0, B1, B0, C1, C0) |
557 | | |
558 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
559 | | sub_ddmmss(A1, A0, B1, B0, C1, C0) |
560 | | |
561 | | #endif /* BYTES_PER_MPI_LIMB == 8 */ |
562 | | |
563 | | |
564 | | #if BYTES_PER_MPI_LIMB == 4 |
565 | | |
566 | | /* 64-bit limb definitions for 32-bit architectures. */ |
567 | | |
568 | | #define LIMBS_PER_LIMB64 2 |
569 | | #define LIMB_FROM64(v) ((v).lo) |
570 | | #define HIBIT_LIMB64(v) ((v).hi >> (BITS_PER_MPI_LIMB - 1)) |
571 | | #define HI32_LIMB64(v) ((v).hi) |
572 | | #define LO32_LIMB64(v) ((v).lo) |
573 | | #define LOAD32(x, pos) ((x)[pos]) |
574 | | #define LIMB64_C(hi, lo) { (lo), (hi) } |
575 | | |
576 | | typedef struct |
577 | | { |
578 | | mpi_limb_t lo; |
579 | | mpi_limb_t hi; |
580 | | } mpi_limb64_t; |
581 | | |
582 | | static inline mpi_limb64_t |
583 | | LOAD64(const mpi_ptr_t x, unsigned int pos) |
584 | | { |
585 | | mpi_limb64_t v; |
586 | | v.lo = x[pos * 2 + 0]; |
587 | | v.hi = x[pos * 2 + 1]; |
588 | | return v; |
589 | | } |
590 | | |
591 | | static inline void |
592 | | STORE64(mpi_ptr_t x, unsigned int pos, mpi_limb64_t v) |
593 | | { |
594 | | x[pos * 2 + 0] = v.lo; |
595 | | x[pos * 2 + 1] = v.hi; |
596 | | } |
597 | | |
598 | | static inline mpi_limb64_t |
599 | | MASK_AND64(mpi_limb_t mask, mpi_limb64_t val) |
600 | | { |
601 | | val.lo &= mask; |
602 | | val.hi &= mask; |
603 | | return val; |
604 | | } |
605 | | |
606 | | static inline mpi_limb64_t |
607 | | LIMB_OR64(mpi_limb64_t val1, mpi_limb64_t val2) |
608 | | { |
609 | | val1.lo |= val2.lo; |
610 | | val1.hi |= val2.hi; |
611 | | return val1; |
612 | | } |
613 | | |
614 | | static inline void |
615 | | STORE64_COND(mpi_ptr_t x, unsigned int pos, mpi_limb_t mask1, |
616 | | mpi_limb64_t val1, mpi_limb_t mask2, mpi_limb64_t val2) |
617 | | { |
618 | | x[pos * 2 + 0] = (mask1 & val1.lo) | (mask2 & val2.lo); |
619 | | x[pos * 2 + 1] = (mask1 & val1.hi) | (mask2 & val2.hi); |
620 | | } |
621 | | |
622 | | static inline mpi_limb64_t |
623 | | LIMB_TO64(mpi_limb_t x) |
624 | | { |
625 | | mpi_limb64_t v; |
626 | | v.lo = x; |
627 | | v.hi = 0; |
628 | | return v; |
629 | | } |
630 | | |
631 | | static inline mpi_limb64_t |
632 | | LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) |
633 | | { |
634 | | mpi_limb64_t v; |
635 | | v.lo = lo; |
636 | | v.hi = hi; |
637 | | return v; |
638 | | } |
639 | | |
640 | | |
641 | | /* i386 addition/subtraction helpers. */ |
642 | | #if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 |
643 | | |
644 | | #define ADD4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \ |
645 | | __asm__ ("addl %11, %3\n" \ |
646 | | "adcl %10, %2\n" \ |
647 | | "adcl %9, %1\n" \ |
648 | | "adcl %8, %0\n" \ |
649 | | : "=r" (a3), \ |
650 | | "=&r" (a2), \ |
651 | | "=&r" (a1), \ |
652 | | "=&r" (a0) \ |
653 | | : "0" ((mpi_limb_t)(b3)), \ |
654 | | "1" ((mpi_limb_t)(b2)), \ |
655 | | "2" ((mpi_limb_t)(b1)), \ |
656 | | "3" ((mpi_limb_t)(b0)), \ |
657 | | "g" ((mpi_limb_t)(c3)), \ |
658 | | "g" ((mpi_limb_t)(c2)), \ |
659 | | "g" ((mpi_limb_t)(c1)), \ |
660 | | "g" ((mpi_limb_t)(c0)) \ |
661 | | : "cc") |
662 | | |
663 | | #define ADD6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ |
664 | | c5, c4, c3, c2, c1, c0) do { \ |
665 | | mpi_limb_t __carry6_32; \ |
666 | | __asm__ ("addl %10, %3\n" \ |
667 | | "adcl %9, %2\n" \ |
668 | | "adcl %8, %1\n" \ |
669 | | "sbbl %0, %0\n" \ |
670 | | : "=r" (__carry6_32), \ |
671 | | "=&r" (a2), \ |
672 | | "=&r" (a1), \ |
673 | | "=&r" (a0) \ |
674 | | : "0" ((mpi_limb_t)(0)), \ |
675 | | "1" ((mpi_limb_t)(b2)), \ |
676 | | "2" ((mpi_limb_t)(b1)), \ |
677 | | "3" ((mpi_limb_t)(b0)), \ |
678 | | "g" ((mpi_limb_t)(c2)), \ |
679 | | "g" ((mpi_limb_t)(c1)), \ |
680 | | "g" ((mpi_limb_t)(c0)) \ |
681 | | : "cc"); \ |
682 | | __asm__ ("addl $1, %3\n" \ |
683 | | "adcl %10, %2\n" \ |
684 | | "adcl %9, %1\n" \ |
685 | | "adcl %8, %0\n" \ |
686 | | : "=r" (a5), \ |
687 | | "=&r" (a4), \ |
688 | | "=&r" (a3), \ |
689 | | "=&r" (__carry6_32) \ |
690 | | : "0" ((mpi_limb_t)(b5)), \ |
691 | | "1" ((mpi_limb_t)(b4)), \ |
692 | | "2" ((mpi_limb_t)(b3)), \ |
693 | | "3" ((mpi_limb_t)(__carry6_32)), \ |
694 | | "g" ((mpi_limb_t)(c5)), \ |
695 | | "g" ((mpi_limb_t)(c4)), \ |
696 | | "g" ((mpi_limb_t)(c3)) \ |
697 | | : "cc"); \ |
698 | | } while (0) |
699 | | |
700 | | #define SUB4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \ |
701 | | __asm__ ("subl %11, %3\n" \ |
702 | | "sbbl %10, %2\n" \ |
703 | | "sbbl %9, %1\n" \ |
704 | | "sbbl %8, %0\n" \ |
705 | | : "=r" (a3), \ |
706 | | "=&r" (a2), \ |
707 | | "=&r" (a1), \ |
708 | | "=&r" (a0) \ |
709 | | : "0" ((mpi_limb_t)(b3)), \ |
710 | | "1" ((mpi_limb_t)(b2)), \ |
711 | | "2" ((mpi_limb_t)(b1)), \ |
712 | | "3" ((mpi_limb_t)(b0)), \ |
713 | | "g" ((mpi_limb_t)(c3)), \ |
714 | | "g" ((mpi_limb_t)(c2)), \ |
715 | | "g" ((mpi_limb_t)(c1)), \ |
716 | | "g" ((mpi_limb_t)(c0)) \ |
717 | | : "cc") |
718 | | |
719 | | #define SUB6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ |
720 | | c5, c4, c3, c2, c1, c0) do { \ |
721 | | mpi_limb_t __borrow6_32; \ |
722 | | __asm__ ("subl %10, %3\n" \ |
723 | | "sbbl %9, %2\n" \ |
724 | | "sbbl %8, %1\n" \ |
725 | | "sbbl %0, %0\n" \ |
726 | | : "=r" (__borrow6_32), \ |
727 | | "=&r" (a2), \ |
728 | | "=&r" (a1), \ |
729 | | "=&r" (a0) \ |
730 | | : "0" ((mpi_limb_t)(0)), \ |
731 | | "1" ((mpi_limb_t)(b2)), \ |
732 | | "2" ((mpi_limb_t)(b1)), \ |
733 | | "3" ((mpi_limb_t)(b0)), \ |
734 | | "g" ((mpi_limb_t)(c2)), \ |
735 | | "g" ((mpi_limb_t)(c1)), \ |
736 | | "g" ((mpi_limb_t)(c0)) \ |
737 | | : "cc"); \ |
738 | | __asm__ ("addl $1, %3\n" \ |
739 | | "sbbl %10, %2\n" \ |
740 | | "sbbl %9, %1\n" \ |
741 | | "sbbl %8, %0\n" \ |
742 | | : "=r" (a5), \ |
743 | | "=&r" (a4), \ |
744 | | "=&r" (a3), \ |
745 | | "=&r" (__borrow6_32) \ |
746 | | : "0" ((mpi_limb_t)(b5)), \ |
747 | | "1" ((mpi_limb_t)(b4)), \ |
748 | | "2" ((mpi_limb_t)(b3)), \ |
749 | | "3" ((mpi_limb_t)(__borrow6_32)), \ |
750 | | "g" ((mpi_limb_t)(c5)), \ |
751 | | "g" ((mpi_limb_t)(c4)), \ |
752 | | "g" ((mpi_limb_t)(c3)) \ |
753 | | : "cc"); \ |
754 | | } while (0) |
755 | | |
756 | | #endif /* __i386__ */ |
757 | | |
758 | | |
759 | | /* ARM addition/subtraction helpers. */ |
760 | | #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS |
761 | | |
762 | | #define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
763 | | __asm__ ("adds %3, %7, %11\n" \ |
764 | | "adcs %2, %6, %10\n" \ |
765 | | "adcs %1, %5, %9\n" \ |
766 | | "adc %0, %4, %8\n" \ |
767 | | : "=r" (A3), \ |
768 | | "=&r" (A2), \ |
769 | | "=&r" (A1), \ |
770 | | "=&r" (A0) \ |
771 | | : "r" ((mpi_limb_t)(B3)), \ |
772 | | "r" ((mpi_limb_t)(B2)), \ |
773 | | "r" ((mpi_limb_t)(B1)), \ |
774 | | "r" ((mpi_limb_t)(B0)), \ |
775 | | "Ir" ((mpi_limb_t)(C3)), \ |
776 | | "Ir" ((mpi_limb_t)(C2)), \ |
777 | | "Ir" ((mpi_limb_t)(C1)), \ |
778 | | "Ir" ((mpi_limb_t)(C0)) \ |
779 | | : "cc") |
780 | | |
781 | | #define ADD6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \ |
782 | | C5, C4, C3, C2, C1, C0) do { \ |
783 | | mpi_limb_t __carry6_32; \ |
784 | | __asm__ ("adds %3, %7, %10\n" \ |
785 | | "adcs %2, %6, %9\n" \ |
786 | | "adcs %1, %5, %8\n" \ |
787 | | "adc %0, %4, %4\n" \ |
788 | | : "=r" (__carry6_32), \ |
789 | | "=&r" (A2), \ |
790 | | "=&r" (A1), \ |
791 | | "=&r" (A0) \ |
792 | | : "r" ((mpi_limb_t)(0)), \ |
793 | | "r" ((mpi_limb_t)(B2)), \ |
794 | | "r" ((mpi_limb_t)(B1)), \ |
795 | | "r" ((mpi_limb_t)(B0)), \ |
796 | | "Ir" ((mpi_limb_t)(C2)), \ |
797 | | "Ir" ((mpi_limb_t)(C1)), \ |
798 | | "Ir" ((mpi_limb_t)(C0)) \ |
799 | | : "cc"); \ |
800 | | ADD4_LIMB32(A5, A4, A3, __carry6_32, B5, B4, B3, __carry6_32, \ |
801 | | C5, C4, C3, 0xffffffffU); \ |
802 | | } while (0) |
803 | | |
804 | | #define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ |
805 | | __asm__ ("subs %3, %7, %11\n" \ |
806 | | "sbcs %2, %6, %10\n" \ |
807 | | "sbcs %1, %5, %9\n" \ |
808 | | "sbc %0, %4, %8\n" \ |
809 | | : "=r" (A3), \ |
810 | | "=&r" (A2), \ |
811 | | "=&r" (A1), \ |
812 | | "=&r" (A0) \ |
813 | | : "r" ((mpi_limb_t)(B3)), \ |
814 | | "r" ((mpi_limb_t)(B2)), \ |
815 | | "r" ((mpi_limb_t)(B1)), \ |
816 | | "r" ((mpi_limb_t)(B0)), \ |
817 | | "Ir" ((mpi_limb_t)(C3)), \ |
818 | | "Ir" ((mpi_limb_t)(C2)), \ |
819 | | "Ir" ((mpi_limb_t)(C1)), \ |
820 | | "Ir" ((mpi_limb_t)(C0)) \ |
821 | | : "cc") |
822 | | |
823 | | |
824 | | #define SUB6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \ |
825 | | C5, C4, C3, C2, C1, C0) do { \ |
826 | | mpi_limb_t __borrow6_32; \ |
827 | | __asm__ ("subs %3, %7, %10\n" \ |
828 | | "sbcs %2, %6, %9\n" \ |
829 | | "sbcs %1, %5, %8\n" \ |
830 | | "sbc %0, %4, %4\n" \ |
831 | | : "=r" (__borrow6_32), \ |
832 | | "=&r" (A2), \ |
833 | | "=&r" (A1), \ |
834 | | "=&r" (A0) \ |
835 | | : "r" ((mpi_limb_t)(0)), \ |
836 | | "r" ((mpi_limb_t)(B2)), \ |
837 | | "r" ((mpi_limb_t)(B1)), \ |
838 | | "r" ((mpi_limb_t)(B0)), \ |
839 | | "Ir" ((mpi_limb_t)(C2)), \ |
840 | | "Ir" ((mpi_limb_t)(C1)), \ |
841 | | "Ir" ((mpi_limb_t)(C0)) \ |
842 | | : "cc"); \ |
843 | | SUB4_LIMB32(A5, A4, A3, __borrow6_32, B5, B4, B3, 0, \ |
844 | | C5, C4, C3, -__borrow6_32); \ |
845 | | } while (0) |
846 | | |
847 | | #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ |
848 | | |
849 | | |
850 | | /* Common 32-bit arch addition/subtraction macros. */ |
851 | | |
852 | | #if defined(ADD4_LIMB32) |
853 | | /* A[0..1] = B[0..1] + C[0..1] */ |
854 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
855 | | ADD4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \ |
856 | | B1.hi, B1.lo, B0.hi, B0.lo, \ |
857 | | C1.hi, C1.lo, C0.hi, C0.lo) |
858 | | #else |
859 | | /* A[0..1] = B[0..1] + C[0..1] */ |
860 | | #define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) do { \ |
861 | | mpi_limb_t __carry2_0, __carry2_1; \ |
862 | | add_ssaaaa(__carry2_0, A0.lo, 0, B0.lo, 0, C0.lo); \ |
863 | | add_ssaaaa(__carry2_1, A0.hi, 0, B0.hi, 0, C0.hi); \ |
864 | | add_ssaaaa(__carry2_1, A0.hi, __carry2_1, A0.hi, 0, __carry2_0); \ |
865 | | add_ssaaaa(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \ |
866 | | add_ssaaaa(A1.hi, A1.lo, A1.hi, A1.lo, 0, __carry2_1); \ |
867 | | } while (0) |
868 | | #endif |
869 | | |
870 | | #if defined(ADD6_LIMB32) |
871 | | /* A[0..2] = B[0..2] + C[0..2] */ |
872 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
873 | | ADD6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
874 | | B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
875 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
876 | | #endif |
877 | | |
878 | | #if defined(ADD6_LIMB32) |
879 | | /* A[0..3] = B[0..3] + C[0..3] */ |
880 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
881 | | mpi_limb_t __carry4; \ |
882 | | ADD6_LIMB32(__carry4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
883 | | 0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
884 | | 0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \ |
885 | | ADD4_LIMB32(A3.hi, A3.lo, A2.hi, __carry4, \ |
886 | | B3.hi, B3.lo, B2.hi, __carry4, \ |
887 | | C3.hi, C3.lo, C2.hi, 0xffffffffU); \ |
888 | | } while (0) |
889 | | #endif |
890 | | |
891 | | #if defined(SUB4_LIMB32) |
892 | | /* A[0..1] = B[0..1] - C[0..1] */ |
893 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \ |
894 | | SUB4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \ |
895 | | B1.hi, B1.lo, B0.hi, B0.lo, \ |
896 | | C1.hi, C1.lo, C0.hi, C0.lo) |
897 | | #else |
898 | | /* A[0..1] = B[0..1] - C[0..1] */ |
899 | | #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) do { \ |
900 | | mpi_limb_t __borrow2_0, __borrow2_1; \ |
901 | | sub_ddmmss(__borrow2_0, A0.lo, 0, B0.lo, 0, C0.lo); \ |
902 | | sub_ddmmss(__borrow2_1, A0.hi, 0, B0.hi, 0, C0.hi); \ |
903 | | sub_ddmmss(__borrow2_1, A0.hi, __borrow2_1, A0.hi, 0, -__borrow2_0); \ |
904 | | sub_ddmmss(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \ |
905 | | sub_ddmmss(A1.hi, A1.lo, A1.hi, A1.lo, 0, -__borrow2_1); \ |
906 | | } while (0) |
907 | | #endif |
908 | | |
909 | | #if defined(SUB6_LIMB32) |
910 | | /* A[0..2] = B[0..2] - C[0..2] */ |
911 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \ |
912 | | SUB6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
913 | | B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
914 | | C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) |
915 | | #endif |
916 | | |
917 | | #if defined(SUB6_LIMB32) |
918 | | /* A[0..3] = B[0..3] - C[0..3] */ |
919 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
920 | | mpi_limb_t __borrow4; \ |
921 | | SUB6_LIMB32(__borrow4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ |
922 | | 0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ |
923 | | 0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \ |
924 | | SUB4_LIMB32(A3.hi, A3.lo, A2.hi, __borrow4, \ |
925 | | B3.hi, B3.lo, B2.hi, 0, \ |
926 | | C3.hi, C3.lo, C2.hi, -__borrow4); \ |
927 | | } while (0) |
928 | | #endif |
929 | | |
930 | | #endif /* BYTES_PER_MPI_LIMB == 4 */ |
931 | | |
932 | | |
933 | | /* Common definitions. */ |
934 | | #define BITS_PER_MPI_LIMB64 (BITS_PER_MPI_LIMB * LIMBS_PER_LIMB64) |
935 | | #define BYTES_PER_MPI_LIMB64 (BYTES_PER_MPI_LIMB * LIMBS_PER_LIMB64) |
936 | | |
937 | | |
938 | | /* Common addition/subtraction macros. */ |
939 | | |
940 | | #ifndef ADD3_LIMB64 |
941 | | /* A[0..2] = B[0..2] + C[0..2] */ |
942 | | #define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \ |
943 | | mpi_limb64_t __carry3; \ |
944 | | ADD2_LIMB64(__carry3, A0, zero, B0, zero, C0); \ |
945 | | ADD2_LIMB64(A2, A1, B2, B1, C2, C1); \ |
946 | | ADD2_LIMB64(A2, A1, A2, A1, zero, __carry3); \ |
947 | | } while (0) |
948 | | #endif |
949 | | |
950 | | #ifndef ADD4_LIMB64 |
951 | | /* A[0..3] = B[0..3] + C[0..3] */ |
952 | | #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
953 | | mpi_limb64_t __carry4; \ |
954 | | ADD3_LIMB64(__carry4, A1, A0, zero, B1, B0, zero, C1, C0); \ |
955 | | ADD2_LIMB64(A3, A2, B3, B2, C3, C2); \ |
956 | | ADD2_LIMB64(A3, A2, A3, A2, zero, __carry4); \ |
957 | | } while (0) |
958 | | #endif |
959 | | |
960 | | #ifndef ADD5_LIMB64 |
961 | | /* A[0..4] = B[0..4] + C[0..4] */ |
962 | | #define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
963 | | C4, C3, C2, C1, C0) do { \ |
964 | | mpi_limb64_t __carry5; \ |
965 | | ADD4_LIMB64(__carry5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \ |
966 | | ADD2_LIMB64(A4, A3, B4, B3, C4, C3); \ |
967 | | ADD2_LIMB64(A4, A3, A4, A3, zero, __carry5); \ |
968 | | } while (0) |
969 | | #endif |
970 | | |
971 | | #ifndef ADD7_LIMB64 |
972 | | /* A[0..6] = B[0..6] + C[0..6] */ |
973 | | #define ADD7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
974 | 0 | C6, C5, C4, C3, C2, C1, C0) do { \ |
975 | 0 | mpi_limb64_t __carry7; \ |
976 | 0 | ADD4_LIMB64(__carry7, A2, A1, A0, zero, B2, B1, B0, \ |
977 | 0 | zero, C2, C1, C0); \ |
978 | 0 | ADD5_LIMB64(A6, A5, A4, A3, __carry7, B6, B5, B4, B3, \ |
979 | 0 | __carry7, C6, C5, C4, C3, LIMB64_HILO(-1, -1)); \ |
980 | 0 | } while (0) |
981 | | #endif |
982 | | |
983 | | #ifndef SUB3_LIMB64 |
984 | | /* A[0..2] = B[0..2] - C[0..2] */ |
985 | | #define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \ |
986 | | mpi_limb64_t __borrow3; \ |
987 | | SUB2_LIMB64(__borrow3, A0, zero, B0, zero, C0); \ |
988 | | SUB2_LIMB64(A2, A1, B2, B1, C2, C1); \ |
989 | | SUB2_LIMB64(A2, A1, A2, A1, zero, LIMB_TO64(-LIMB_FROM64(__borrow3))); \ |
990 | | } while (0) |
991 | | #endif |
992 | | |
993 | | #ifndef SUB4_LIMB64 |
994 | | /* A[0..3] = B[0..3] - C[0..3] */ |
995 | | #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ |
996 | | mpi_limb64_t __borrow4; \ |
997 | | SUB3_LIMB64(__borrow4, A1, A0, zero, B1, B0, zero, C1, C0); \ |
998 | | SUB2_LIMB64(A3, A2, B3, B2, C3, C2); \ |
999 | | SUB2_LIMB64(A3, A2, A3, A2, zero, LIMB_TO64(-LIMB_FROM64(__borrow4))); \ |
1000 | | } while (0) |
1001 | | #endif |
1002 | | |
1003 | | #ifndef SUB5_LIMB64 |
1004 | | /* A[0..4] = B[0..4] - C[0..4] */ |
1005 | | #define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ |
1006 | | C4, C3, C2, C1, C0) do { \ |
1007 | | mpi_limb64_t __borrow5; \ |
1008 | | SUB4_LIMB64(__borrow5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \ |
1009 | | SUB2_LIMB64(A4, A3, B4, B3, C4, C3); \ |
1010 | | SUB2_LIMB64(A4, A3, A4, A3, zero, LIMB_TO64(-LIMB_FROM64(__borrow5))); \ |
1011 | | } while (0) |
1012 | | #endif |
1013 | | |
1014 | | #ifndef SUB7_LIMB64 |
1015 | | /* A[0..6] = B[0..6] - C[0..6] */ |
1016 | | #define SUB7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ |
1017 | 0 | C6, C5, C4, C3, C2, C1, C0) do { \ |
1018 | 0 | mpi_limb64_t __borrow7; \ |
1019 | 0 | SUB4_LIMB64(__borrow7, A2, A1, A0, zero, B2, B1, B0, \ |
1020 | 0 | zero, C2, C1, C0); \ |
1021 | 0 | SUB5_LIMB64(A6, A5, A4, A3, __borrow7, B6, B5, B4, B3, zero, \ |
1022 | 0 | C6, C5, C4, C3, LIMB_TO64(-LIMB_FROM64(__borrow7))); \ |
1023 | 0 | } while (0) |
1024 | | #endif |
1025 | | |
1026 | | |
1027 | | #if defined(WORDS_BIGENDIAN) || (BITS_PER_MPI_LIMB64 != BITS_PER_MPI_LIMB) |
1028 | | #define LOAD64_UNALIGNED(x, pos) \ |
1029 | | LIMB64_HILO(LOAD32(x, 2 * (pos) + 2), LOAD32(x, 2 * (pos) + 1)) |
1030 | | #else |
1031 | | #define LOAD64_UNALIGNED(x, pos) \ |
1032 | | buf_get_le64((const byte *)(&(x)[pos]) + 4) |
1033 | | #endif |
1034 | | |
1035 | | |
1036 | | /* Helper functions. */ |
1037 | | |
1038 | | static inline int |
1039 | | mpi_nbits_more_than (gcry_mpi_t w, unsigned int nbits) |
1040 | 0 | { |
1041 | 0 | unsigned int nbits_nlimbs; |
1042 | 0 | mpi_limb_t wlimb; |
1043 | 0 | unsigned int n; |
1044 | |
|
1045 | 0 | nbits_nlimbs = (nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB; |
1046 | | |
1047 | | /* Note: Assumes that 'w' is normalized. */ |
1048 | |
|
1049 | 0 | if (w->nlimbs > nbits_nlimbs) |
1050 | 0 | return 1; |
1051 | 0 | if (w->nlimbs < nbits_nlimbs) |
1052 | 0 | return 0; |
1053 | 0 | if ((nbits % BITS_PER_MPI_LIMB) == 0) |
1054 | 0 | return 0; |
1055 | | |
1056 | 0 | wlimb = w->d[nbits_nlimbs - 1]; |
1057 | 0 | if (wlimb == 0) |
1058 | 0 | log_bug ("mpi_nbits_more_than: input mpi not normalized\n"); |
1059 | | |
1060 | 0 | count_leading_zeros (n, wlimb); |
1061 | |
|
1062 | 0 | return (BITS_PER_MPI_LIMB - n) > (nbits % BITS_PER_MPI_LIMB); |
1063 | 0 | } |
1064 | | |
1065 | | #endif /* GCRY_EC_INLINE_H */ |