/src/boringssl/crypto/fipsmodule/bn/generic.c.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
2 | | * All rights reserved. |
3 | | * |
4 | | * This package is an SSL implementation written |
5 | | * by Eric Young (eay@cryptsoft.com). |
6 | | * The implementation was written so as to conform with Netscapes SSL. |
7 | | * |
8 | | * This library is free for commercial and non-commercial use as long as |
9 | | * the following conditions are aheared to. The following conditions |
10 | | * apply to all code found in this distribution, be it the RC4, RSA, |
11 | | * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
12 | | * included with this distribution is covered by the same copyright terms |
13 | | * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
14 | | * |
15 | | * Copyright remains Eric Young's, and as such any Copyright notices in |
16 | | * the code are not to be removed. |
17 | | * If this package is used in a product, Eric Young should be given attribution |
18 | | * as the author of the parts of the library used. |
19 | | * This can be in the form of a textual message at program startup or |
20 | | * in documentation (online or textual) provided with the package. |
21 | | * |
22 | | * Redistribution and use in source and binary forms, with or without |
23 | | * modification, are permitted provided that the following conditions |
24 | | * are met: |
25 | | * 1. Redistributions of source code must retain the copyright |
26 | | * notice, this list of conditions and the following disclaimer. |
27 | | * 2. Redistributions in binary form must reproduce the above copyright |
28 | | * notice, this list of conditions and the following disclaimer in the |
29 | | * documentation and/or other materials provided with the distribution. |
30 | | * 3. All advertising materials mentioning features or use of this software |
31 | | * must display the following acknowledgement: |
32 | | * "This product includes cryptographic software written by |
33 | | * Eric Young (eay@cryptsoft.com)" |
34 | | * The word 'cryptographic' can be left out if the rouines from the library |
35 | | * being used are not cryptographic related :-). |
36 | | * 4. If you include any Windows specific code (or a derivative thereof) from |
37 | | * the apps directory (application code) you must include an acknowledgement: |
38 | | * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" |
39 | | * |
40 | | * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
41 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
42 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
43 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
44 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
45 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
46 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
47 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
48 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
49 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
50 | | * SUCH DAMAGE. |
51 | | * |
52 | | * The licence and distribution terms for any publically available version or |
53 | | * derivative of this code cannot be changed. i.e. this code cannot simply be |
54 | | * copied and put under another distribution licence |
55 | | * [including the GNU Public Licence.] */ |
56 | | |
57 | | #include <openssl/bn.h> |
58 | | |
59 | | #include <assert.h> |
60 | | |
61 | | #include "internal.h" |
62 | | |
63 | | |
64 | | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) |
65 | | // See asm/bn-586.pl. |
66 | | #define BN_ADD_ASM |
67 | | #define BN_MUL_ASM |
68 | | #endif |
69 | | |
70 | | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ |
71 | | (defined(__GNUC__) || defined(__clang__)) |
72 | | // See asm/x86_64-gcc.c |
73 | | #define BN_ADD_ASM |
74 | | #define BN_MUL_ASM |
75 | | #endif |
76 | | |
77 | | #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) |
78 | | // See asm/bn-armv8.pl. |
79 | | #define BN_ADD_ASM |
80 | | #endif |
81 | | |
82 | | #if !defined(BN_MUL_ASM) |
83 | | |
84 | | #ifdef BN_ULLONG |
85 | | #define mul_add(r, a, w, c) \ |
86 | 2.06G | do { \ |
87 | 2.06G | BN_ULLONG t; \ |
88 | 2.06G | t = (BN_ULLONG)(w) * (a) + (r) + (c); \ |
89 | 2.06G | (r) = Lw(t); \ |
90 | 2.06G | (c) = Hw(t); \ |
91 | 2.06G | } while (0) |
92 | | |
93 | | #define mul(r, a, w, c) \ |
94 | 30.8M | do { \ |
95 | 30.8M | BN_ULLONG t; \ |
96 | 30.8M | t = (BN_ULLONG)(w) * (a) + (c); \ |
97 | 30.8M | (r) = Lw(t); \ |
98 | 30.8M | (c) = Hw(t); \ |
99 | 30.8M | } while (0) |
100 | | |
101 | | #define sqr(r0, r1, a) \ |
102 | 23.4M | do { \ |
103 | 23.4M | BN_ULLONG t; \ |
104 | 23.4M | t = (BN_ULLONG)(a) * (a); \ |
105 | 23.4M | (r0) = Lw(t); \ |
106 | 23.4M | (r1) = Hw(t); \ |
107 | 23.4M | } while (0) |
108 | | |
109 | | #else |
110 | | |
111 | | #define mul_add(r, a, w, c) \ |
112 | | do { \ |
113 | | BN_ULONG high, low, ret, tmp = (a); \ |
114 | | ret = (r); \ |
115 | | BN_UMULT_LOHI(low, high, w, tmp); \ |
116 | | ret += (c); \ |
117 | | (c) = (ret < (c)) ? 1 : 0; \ |
118 | | (c) += high; \ |
119 | | ret += low; \ |
120 | | (c) += (ret < low) ? 1 : 0; \ |
121 | | (r) = ret; \ |
122 | | } while (0) |
123 | | |
124 | | #define mul(r, a, w, c) \ |
125 | | do { \ |
126 | | BN_ULONG high, low, ret, ta = (a); \ |
127 | | BN_UMULT_LOHI(low, high, w, ta); \ |
128 | | ret = low + (c); \ |
129 | | (c) = high; \ |
130 | | (c) += (ret < low) ? 1 : 0; \ |
131 | | (r) = ret; \ |
132 | | } while (0) |
133 | | |
134 | | #define sqr(r0, r1, a) \ |
135 | | do { \ |
136 | | BN_ULONG tmp = (a); \ |
137 | | BN_UMULT_LOHI(r0, r1, tmp, tmp); \ |
138 | | } while (0) |
139 | | |
140 | | #endif // !BN_ULLONG |
141 | | |
142 | | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, |
143 | 57.8M | BN_ULONG w) { |
144 | 57.8M | BN_ULONG c1 = 0; |
145 | | |
146 | 57.8M | if (num == 0) { |
147 | 0 | return c1; |
148 | 0 | } |
149 | | |
150 | 559M | while (num & ~3) { |
151 | 501M | mul_add(rp[0], ap[0], w, c1); |
152 | 501M | mul_add(rp[1], ap[1], w, c1); |
153 | 501M | mul_add(rp[2], ap[2], w, c1); |
154 | 501M | mul_add(rp[3], ap[3], w, c1); |
155 | 501M | ap += 4; |
156 | 501M | rp += 4; |
157 | 501M | num -= 4; |
158 | 501M | } |
159 | | |
160 | 119M | while (num) { |
161 | 62.0M | mul_add(rp[0], ap[0], w, c1); |
162 | 62.0M | ap++; |
163 | 62.0M | rp++; |
164 | 62.0M | num--; |
165 | 62.0M | } |
166 | | |
167 | 57.8M | return c1; |
168 | 57.8M | } |
169 | | |
170 | | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, |
171 | 1.83M | BN_ULONG w) { |
172 | 1.83M | BN_ULONG c1 = 0; |
173 | | |
174 | 1.83M | if (num == 0) { |
175 | 0 | return c1; |
176 | 0 | } |
177 | | |
178 | 8.80M | while (num & ~3) { |
179 | 6.96M | mul(rp[0], ap[0], w, c1); |
180 | 6.96M | mul(rp[1], ap[1], w, c1); |
181 | 6.96M | mul(rp[2], ap[2], w, c1); |
182 | 6.96M | mul(rp[3], ap[3], w, c1); |
183 | 6.96M | ap += 4; |
184 | 6.96M | rp += 4; |
185 | 6.96M | num -= 4; |
186 | 6.96M | } |
187 | 4.76M | while (num) { |
188 | 2.93M | mul(rp[0], ap[0], w, c1); |
189 | 2.93M | ap++; |
190 | 2.93M | rp++; |
191 | 2.93M | num--; |
192 | 2.93M | } |
193 | 1.83M | return c1; |
194 | 1.83M | } |
195 | | |
196 | 1.24M | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) { |
197 | 1.24M | if (n == 0) { |
198 | 0 | return; |
199 | 0 | } |
200 | | |
201 | 6.47M | while (n & ~3) { |
202 | 5.23M | sqr(r[0], r[1], a[0]); |
203 | 5.23M | sqr(r[2], r[3], a[1]); |
204 | 5.23M | sqr(r[4], r[5], a[2]); |
205 | 5.23M | sqr(r[6], r[7], a[3]); |
206 | 5.23M | a += 4; |
207 | 5.23M | r += 8; |
208 | 5.23M | n -= 4; |
209 | 5.23M | } |
210 | 3.78M | while (n) { |
211 | 2.54M | sqr(r[0], r[1], a[0]); |
212 | 2.54M | a++; |
213 | 2.54M | r += 2; |
214 | 2.54M | n--; |
215 | 2.54M | } |
216 | 1.24M | } |
217 | | |
218 | | // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) |
219 | | // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) |
220 | | // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) |
221 | | // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) |
222 | | |
223 | | #ifdef BN_ULLONG |
224 | | |
225 | | // Keep in mind that additions to multiplication result can not overflow, |
226 | | // because its high half cannot be all-ones. |
227 | | #define mul_add_c(a, b, c0, c1, c2) \ |
228 | 844M | do { \ |
229 | 844M | BN_ULONG hi; \ |
230 | 844M | BN_ULLONG t = (BN_ULLONG)(a) * (b); \ |
231 | 844M | t += (c0); /* no carry */ \ |
232 | 844M | (c0) = (BN_ULONG)Lw(t); \ |
233 | 844M | hi = (BN_ULONG)Hw(t); \ |
234 | 844M | (c1) += (hi); \ |
235 | 844M | (c2) += (c1) < hi; \ |
236 | 844M | } while (0) |
237 | | |
238 | | #define mul_add_c2(a, b, c0, c1, c2) \ |
239 | 86.6M | do { \ |
240 | 86.6M | BN_ULONG hi; \ |
241 | 86.6M | BN_ULLONG t = (BN_ULLONG)(a) * (b); \ |
242 | 86.6M | BN_ULLONG tt = t + (c0); /* no carry */ \ |
243 | 86.6M | (c0) = (BN_ULONG)Lw(tt); \ |
244 | 86.6M | hi = (BN_ULONG)Hw(tt); \ |
245 | 86.6M | (c1) += hi; \ |
246 | 86.6M | (c2) += (c1) < hi; \ |
247 | 86.6M | t += (c0); /* no carry */ \ |
248 | 86.6M | (c0) = (BN_ULONG)Lw(t); \ |
249 | 86.6M | hi = (BN_ULONG)Hw(t); \ |
250 | 86.6M | (c1) += hi; \ |
251 | 86.6M | (c2) += (c1) < hi; \ |
252 | 86.6M | } while (0) |
253 | | |
254 | | #define sqr_add_c(a, i, c0, c1, c2) \ |
255 | 24.7M | do { \ |
256 | 24.7M | BN_ULONG hi; \ |
257 | 24.7M | BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \ |
258 | 24.7M | t += (c0); /* no carry */ \ |
259 | 24.7M | (c0) = (BN_ULONG)Lw(t); \ |
260 | 24.7M | hi = (BN_ULONG)Hw(t); \ |
261 | 24.7M | (c1) += hi; \ |
262 | 24.7M | (c2) += (c1) < hi; \ |
263 | 24.7M | } while (0) |
264 | | |
265 | 86.6M | #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) |
266 | | |
267 | | #else |
268 | | |
269 | | // Keep in mind that additions to hi can not overflow, because the high word of |
270 | | // a multiplication result cannot be all-ones. |
271 | | #define mul_add_c(a, b, c0, c1, c2) \ |
272 | | do { \ |
273 | | BN_ULONG ta = (a), tb = (b); \ |
274 | | BN_ULONG lo, hi; \ |
275 | | BN_UMULT_LOHI(lo, hi, ta, tb); \ |
276 | | (c0) += lo; \ |
277 | | hi += ((c0) < lo) ? 1 : 0; \ |
278 | | (c1) += hi; \ |
279 | | (c2) += ((c1) < hi) ? 1 : 0; \ |
280 | | } while (0) |
281 | | |
282 | | #define mul_add_c2(a, b, c0, c1, c2) \ |
283 | | do { \ |
284 | | BN_ULONG ta = (a), tb = (b); \ |
285 | | BN_ULONG lo, hi, tt; \ |
286 | | BN_UMULT_LOHI(lo, hi, ta, tb); \ |
287 | | (c0) += lo; \ |
288 | | tt = hi + (((c0) < lo) ? 1 : 0); \ |
289 | | (c1) += tt; \ |
290 | | (c2) += ((c1) < tt) ? 1 : 0; \ |
291 | | (c0) += lo; \ |
292 | | hi += (c0 < lo) ? 1 : 0; \ |
293 | | (c1) += hi; \ |
294 | | (c2) += ((c1) < hi) ? 1 : 0; \ |
295 | | } while (0) |
296 | | |
297 | | #define sqr_add_c(a, i, c0, c1, c2) \ |
298 | | do { \ |
299 | | BN_ULONG ta = (a)[i]; \ |
300 | | BN_ULONG lo, hi; \ |
301 | | BN_UMULT_LOHI(lo, hi, ta, ta); \ |
302 | | (c0) += lo; \ |
303 | | hi += (c0 < lo) ? 1 : 0; \ |
304 | | (c1) += hi; \ |
305 | | (c2) += ((c1) < hi) ? 1 : 0; \ |
306 | | } while (0) |
307 | | |
308 | | #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) |
309 | | |
310 | | #endif // !BN_ULLONG |
311 | | |
312 | 13.1M | void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) { |
313 | 13.1M | BN_ULONG c1, c2, c3; |
314 | | |
315 | 13.1M | c1 = 0; |
316 | 13.1M | c2 = 0; |
317 | 13.1M | c3 = 0; |
318 | 13.1M | mul_add_c(a[0], b[0], c1, c2, c3); |
319 | 13.1M | r[0] = c1; |
320 | 13.1M | c1 = 0; |
321 | 13.1M | mul_add_c(a[0], b[1], c2, c3, c1); |
322 | 13.1M | mul_add_c(a[1], b[0], c2, c3, c1); |
323 | 13.1M | r[1] = c2; |
324 | 13.1M | c2 = 0; |
325 | 13.1M | mul_add_c(a[2], b[0], c3, c1, c2); |
326 | 13.1M | mul_add_c(a[1], b[1], c3, c1, c2); |
327 | 13.1M | mul_add_c(a[0], b[2], c3, c1, c2); |
328 | 13.1M | r[2] = c3; |
329 | 13.1M | c3 = 0; |
330 | 13.1M | mul_add_c(a[0], b[3], c1, c2, c3); |
331 | 13.1M | mul_add_c(a[1], b[2], c1, c2, c3); |
332 | 13.1M | mul_add_c(a[2], b[1], c1, c2, c3); |
333 | 13.1M | mul_add_c(a[3], b[0], c1, c2, c3); |
334 | 13.1M | r[3] = c1; |
335 | 13.1M | c1 = 0; |
336 | 13.1M | mul_add_c(a[4], b[0], c2, c3, c1); |
337 | 13.1M | mul_add_c(a[3], b[1], c2, c3, c1); |
338 | 13.1M | mul_add_c(a[2], b[2], c2, c3, c1); |
339 | 13.1M | mul_add_c(a[1], b[3], c2, c3, c1); |
340 | 13.1M | mul_add_c(a[0], b[4], c2, c3, c1); |
341 | 13.1M | r[4] = c2; |
342 | 13.1M | c2 = 0; |
343 | 13.1M | mul_add_c(a[0], b[5], c3, c1, c2); |
344 | 13.1M | mul_add_c(a[1], b[4], c3, c1, c2); |
345 | 13.1M | mul_add_c(a[2], b[3], c3, c1, c2); |
346 | 13.1M | mul_add_c(a[3], b[2], c3, c1, c2); |
347 | 13.1M | mul_add_c(a[4], b[1], c3, c1, c2); |
348 | 13.1M | mul_add_c(a[5], b[0], c3, c1, c2); |
349 | 13.1M | r[5] = c3; |
350 | 13.1M | c3 = 0; |
351 | 13.1M | mul_add_c(a[6], b[0], c1, c2, c3); |
352 | 13.1M | mul_add_c(a[5], b[1], c1, c2, c3); |
353 | 13.1M | mul_add_c(a[4], b[2], c1, c2, c3); |
354 | 13.1M | mul_add_c(a[3], b[3], c1, c2, c3); |
355 | 13.1M | mul_add_c(a[2], b[4], c1, c2, c3); |
356 | 13.1M | mul_add_c(a[1], b[5], c1, c2, c3); |
357 | 13.1M | mul_add_c(a[0], b[6], c1, c2, c3); |
358 | 13.1M | r[6] = c1; |
359 | 13.1M | c1 = 0; |
360 | 13.1M | mul_add_c(a[0], b[7], c2, c3, c1); |
361 | 13.1M | mul_add_c(a[1], b[6], c2, c3, c1); |
362 | 13.1M | mul_add_c(a[2], b[5], c2, c3, c1); |
363 | 13.1M | mul_add_c(a[3], b[4], c2, c3, c1); |
364 | 13.1M | mul_add_c(a[4], b[3], c2, c3, c1); |
365 | 13.1M | mul_add_c(a[5], b[2], c2, c3, c1); |
366 | 13.1M | mul_add_c(a[6], b[1], c2, c3, c1); |
367 | 13.1M | mul_add_c(a[7], b[0], c2, c3, c1); |
368 | 13.1M | r[7] = c2; |
369 | 13.1M | c2 = 0; |
370 | 13.1M | mul_add_c(a[7], b[1], c3, c1, c2); |
371 | 13.1M | mul_add_c(a[6], b[2], c3, c1, c2); |
372 | 13.1M | mul_add_c(a[5], b[3], c3, c1, c2); |
373 | 13.1M | mul_add_c(a[4], b[4], c3, c1, c2); |
374 | 13.1M | mul_add_c(a[3], b[5], c3, c1, c2); |
375 | 13.1M | mul_add_c(a[2], b[6], c3, c1, c2); |
376 | 13.1M | mul_add_c(a[1], b[7], c3, c1, c2); |
377 | 13.1M | r[8] = c3; |
378 | 13.1M | c3 = 0; |
379 | 13.1M | mul_add_c(a[2], b[7], c1, c2, c3); |
380 | 13.1M | mul_add_c(a[3], b[6], c1, c2, c3); |
381 | 13.1M | mul_add_c(a[4], b[5], c1, c2, c3); |
382 | 13.1M | mul_add_c(a[5], b[4], c1, c2, c3); |
383 | 13.1M | mul_add_c(a[6], b[3], c1, c2, c3); |
384 | 13.1M | mul_add_c(a[7], b[2], c1, c2, c3); |
385 | 13.1M | r[9] = c1; |
386 | 13.1M | c1 = 0; |
387 | 13.1M | mul_add_c(a[7], b[3], c2, c3, c1); |
388 | 13.1M | mul_add_c(a[6], b[4], c2, c3, c1); |
389 | 13.1M | mul_add_c(a[5], b[5], c2, c3, c1); |
390 | 13.1M | mul_add_c(a[4], b[6], c2, c3, c1); |
391 | 13.1M | mul_add_c(a[3], b[7], c2, c3, c1); |
392 | 13.1M | r[10] = c2; |
393 | 13.1M | c2 = 0; |
394 | 13.1M | mul_add_c(a[4], b[7], c3, c1, c2); |
395 | 13.1M | mul_add_c(a[5], b[6], c3, c1, c2); |
396 | 13.1M | mul_add_c(a[6], b[5], c3, c1, c2); |
397 | 13.1M | mul_add_c(a[7], b[4], c3, c1, c2); |
398 | 13.1M | r[11] = c3; |
399 | 13.1M | c3 = 0; |
400 | 13.1M | mul_add_c(a[7], b[5], c1, c2, c3); |
401 | 13.1M | mul_add_c(a[6], b[6], c1, c2, c3); |
402 | 13.1M | mul_add_c(a[5], b[7], c1, c2, c3); |
403 | 13.1M | r[12] = c1; |
404 | 13.1M | c1 = 0; |
405 | 13.1M | mul_add_c(a[6], b[7], c2, c3, c1); |
406 | 13.1M | mul_add_c(a[7], b[6], c2, c3, c1); |
407 | 13.1M | r[13] = c2; |
408 | 13.1M | c2 = 0; |
409 | 13.1M | mul_add_c(a[7], b[7], c3, c1, c2); |
410 | 13.1M | r[14] = c3; |
411 | 13.1M | r[15] = c1; |
412 | 13.1M | } |
413 | | |
414 | 0 | void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) { |
415 | 0 | BN_ULONG c1, c2, c3; |
416 | |
|
417 | 0 | c1 = 0; |
418 | 0 | c2 = 0; |
419 | 0 | c3 = 0; |
420 | 0 | mul_add_c(a[0], b[0], c1, c2, c3); |
421 | 0 | r[0] = c1; |
422 | 0 | c1 = 0; |
423 | 0 | mul_add_c(a[0], b[1], c2, c3, c1); |
424 | 0 | mul_add_c(a[1], b[0], c2, c3, c1); |
425 | 0 | r[1] = c2; |
426 | 0 | c2 = 0; |
427 | 0 | mul_add_c(a[2], b[0], c3, c1, c2); |
428 | 0 | mul_add_c(a[1], b[1], c3, c1, c2); |
429 | 0 | mul_add_c(a[0], b[2], c3, c1, c2); |
430 | 0 | r[2] = c3; |
431 | 0 | c3 = 0; |
432 | 0 | mul_add_c(a[0], b[3], c1, c2, c3); |
433 | 0 | mul_add_c(a[1], b[2], c1, c2, c3); |
434 | 0 | mul_add_c(a[2], b[1], c1, c2, c3); |
435 | 0 | mul_add_c(a[3], b[0], c1, c2, c3); |
436 | 0 | r[3] = c1; |
437 | 0 | c1 = 0; |
438 | 0 | mul_add_c(a[3], b[1], c2, c3, c1); |
439 | 0 | mul_add_c(a[2], b[2], c2, c3, c1); |
440 | 0 | mul_add_c(a[1], b[3], c2, c3, c1); |
441 | 0 | r[4] = c2; |
442 | 0 | c2 = 0; |
443 | 0 | mul_add_c(a[2], b[3], c3, c1, c2); |
444 | 0 | mul_add_c(a[3], b[2], c3, c1, c2); |
445 | 0 | r[5] = c3; |
446 | 0 | c3 = 0; |
447 | 0 | mul_add_c(a[3], b[3], c1, c2, c3); |
448 | 0 | r[6] = c1; |
449 | 0 | r[7] = c2; |
450 | 0 | } |
451 | | |
452 | 3.09M | void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) { |
453 | 3.09M | BN_ULONG c1, c2, c3; |
454 | | |
455 | 3.09M | c1 = 0; |
456 | 3.09M | c2 = 0; |
457 | 3.09M | c3 = 0; |
458 | 3.09M | sqr_add_c(a, 0, c1, c2, c3); |
459 | 3.09M | r[0] = c1; |
460 | 3.09M | c1 = 0; |
461 | 3.09M | sqr_add_c2(a, 1, 0, c2, c3, c1); |
462 | 3.09M | r[1] = c2; |
463 | 3.09M | c2 = 0; |
464 | 3.09M | sqr_add_c(a, 1, c3, c1, c2); |
465 | 3.09M | sqr_add_c2(a, 2, 0, c3, c1, c2); |
466 | 3.09M | r[2] = c3; |
467 | 3.09M | c3 = 0; |
468 | 3.09M | sqr_add_c2(a, 3, 0, c1, c2, c3); |
469 | 3.09M | sqr_add_c2(a, 2, 1, c1, c2, c3); |
470 | 3.09M | r[3] = c1; |
471 | 3.09M | c1 = 0; |
472 | 3.09M | sqr_add_c(a, 2, c2, c3, c1); |
473 | 3.09M | sqr_add_c2(a, 3, 1, c2, c3, c1); |
474 | 3.09M | sqr_add_c2(a, 4, 0, c2, c3, c1); |
475 | 3.09M | r[4] = c2; |
476 | 3.09M | c2 = 0; |
477 | 3.09M | sqr_add_c2(a, 5, 0, c3, c1, c2); |
478 | 3.09M | sqr_add_c2(a, 4, 1, c3, c1, c2); |
479 | 3.09M | sqr_add_c2(a, 3, 2, c3, c1, c2); |
480 | 3.09M | r[5] = c3; |
481 | 3.09M | c3 = 0; |
482 | 3.09M | sqr_add_c(a, 3, c1, c2, c3); |
483 | 3.09M | sqr_add_c2(a, 4, 2, c1, c2, c3); |
484 | 3.09M | sqr_add_c2(a, 5, 1, c1, c2, c3); |
485 | 3.09M | sqr_add_c2(a, 6, 0, c1, c2, c3); |
486 | 3.09M | r[6] = c1; |
487 | 3.09M | c1 = 0; |
488 | 3.09M | sqr_add_c2(a, 7, 0, c2, c3, c1); |
489 | 3.09M | sqr_add_c2(a, 6, 1, c2, c3, c1); |
490 | 3.09M | sqr_add_c2(a, 5, 2, c2, c3, c1); |
491 | 3.09M | sqr_add_c2(a, 4, 3, c2, c3, c1); |
492 | 3.09M | r[7] = c2; |
493 | 3.09M | c2 = 0; |
494 | 3.09M | sqr_add_c(a, 4, c3, c1, c2); |
495 | 3.09M | sqr_add_c2(a, 5, 3, c3, c1, c2); |
496 | 3.09M | sqr_add_c2(a, 6, 2, c3, c1, c2); |
497 | 3.09M | sqr_add_c2(a, 7, 1, c3, c1, c2); |
498 | 3.09M | r[8] = c3; |
499 | 3.09M | c3 = 0; |
500 | 3.09M | sqr_add_c2(a, 7, 2, c1, c2, c3); |
501 | 3.09M | sqr_add_c2(a, 6, 3, c1, c2, c3); |
502 | 3.09M | sqr_add_c2(a, 5, 4, c1, c2, c3); |
503 | 3.09M | r[9] = c1; |
504 | 3.09M | c1 = 0; |
505 | 3.09M | sqr_add_c(a, 5, c2, c3, c1); |
506 | 3.09M | sqr_add_c2(a, 6, 4, c2, c3, c1); |
507 | 3.09M | sqr_add_c2(a, 7, 3, c2, c3, c1); |
508 | 3.09M | r[10] = c2; |
509 | 3.09M | c2 = 0; |
510 | 3.09M | sqr_add_c2(a, 7, 4, c3, c1, c2); |
511 | 3.09M | sqr_add_c2(a, 6, 5, c3, c1, c2); |
512 | 3.09M | r[11] = c3; |
513 | 3.09M | c3 = 0; |
514 | 3.09M | sqr_add_c(a, 6, c1, c2, c3); |
515 | 3.09M | sqr_add_c2(a, 7, 5, c1, c2, c3); |
516 | 3.09M | r[12] = c1; |
517 | 3.09M | c1 = 0; |
518 | 3.09M | sqr_add_c2(a, 7, 6, c2, c3, c1); |
519 | 3.09M | r[13] = c2; |
520 | 3.09M | c2 = 0; |
521 | 3.09M | sqr_add_c(a, 7, c3, c1, c2); |
522 | 3.09M | r[14] = c3; |
523 | 3.09M | r[15] = c1; |
524 | 3.09M | } |
525 | | |
526 | 12.0k | void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) { |
527 | 12.0k | BN_ULONG c1, c2, c3; |
528 | | |
529 | 12.0k | c1 = 0; |
530 | 12.0k | c2 = 0; |
531 | 12.0k | c3 = 0; |
532 | 12.0k | sqr_add_c(a, 0, c1, c2, c3); |
533 | 12.0k | r[0] = c1; |
534 | 12.0k | c1 = 0; |
535 | 12.0k | sqr_add_c2(a, 1, 0, c2, c3, c1); |
536 | 12.0k | r[1] = c2; |
537 | 12.0k | c2 = 0; |
538 | 12.0k | sqr_add_c(a, 1, c3, c1, c2); |
539 | 12.0k | sqr_add_c2(a, 2, 0, c3, c1, c2); |
540 | 12.0k | r[2] = c3; |
541 | 12.0k | c3 = 0; |
542 | 12.0k | sqr_add_c2(a, 3, 0, c1, c2, c3); |
543 | 12.0k | sqr_add_c2(a, 2, 1, c1, c2, c3); |
544 | 12.0k | r[3] = c1; |
545 | 12.0k | c1 = 0; |
546 | 12.0k | sqr_add_c(a, 2, c2, c3, c1); |
547 | 12.0k | sqr_add_c2(a, 3, 1, c2, c3, c1); |
548 | 12.0k | r[4] = c2; |
549 | 12.0k | c2 = 0; |
550 | 12.0k | sqr_add_c2(a, 3, 2, c3, c1, c2); |
551 | 12.0k | r[5] = c3; |
552 | 12.0k | c3 = 0; |
553 | 12.0k | sqr_add_c(a, 3, c1, c2, c3); |
554 | 12.0k | r[6] = c1; |
555 | 12.0k | r[7] = c2; |
556 | 12.0k | } |
557 | | |
558 | | #undef mul_add |
559 | | #undef mul |
560 | | #undef sqr |
561 | | #undef mul_add_c |
562 | | #undef mul_add_c2 |
563 | | #undef sqr_add_c |
564 | | #undef sqr_add_c2 |
565 | | |
566 | | #endif // !BN_MUL_ASM |
567 | | |
568 | | #if !defined(BN_ADD_ASM) |
569 | | |
570 | | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
571 | 25.2M | size_t n) { |
572 | 25.2M | if (n == 0) { |
573 | 2.31k | return 0; |
574 | 2.31k | } |
575 | | |
576 | 25.2M | BN_ULONG carry = 0; |
577 | 183M | while (n & ~3) { |
578 | 158M | r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry); |
579 | 158M | r[1] = CRYPTO_addc_w(a[1], b[1], carry, &carry); |
580 | 158M | r[2] = CRYPTO_addc_w(a[2], b[2], carry, &carry); |
581 | 158M | r[3] = CRYPTO_addc_w(a[3], b[3], carry, &carry); |
582 | 158M | a += 4; |
583 | 158M | b += 4; |
584 | 158M | r += 4; |
585 | 158M | n -= 4; |
586 | 158M | } |
587 | 29.7M | while (n) { |
588 | 4.50M | r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry); |
589 | 4.50M | a++; |
590 | 4.50M | b++; |
591 | 4.50M | r++; |
592 | 4.50M | n--; |
593 | 4.50M | } |
594 | 25.2M | return carry; |
595 | 25.2M | } |
596 | | |
597 | | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, |
598 | 39.6M | size_t n) { |
599 | 39.6M | if (n == 0) { |
600 | 623k | return (BN_ULONG)0; |
601 | 623k | } |
602 | | |
603 | 39.0M | BN_ULONG borrow = 0; |
604 | 176M | while (n & ~3) { |
605 | 137M | r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow); |
606 | 137M | r[1] = CRYPTO_subc_w(a[1], b[1], borrow, &borrow); |
607 | 137M | r[2] = CRYPTO_subc_w(a[2], b[2], borrow, &borrow); |
608 | 137M | r[3] = CRYPTO_subc_w(a[3], b[3], borrow, &borrow); |
609 | 137M | a += 4; |
610 | 137M | b += 4; |
611 | 137M | r += 4; |
612 | 137M | n -= 4; |
613 | 137M | } |
614 | 46.1M | while (n) { |
615 | 7.07M | r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow); |
616 | 7.07M | a++; |
617 | 7.07M | b++; |
618 | 7.07M | r++; |
619 | 7.07M | n--; |
620 | 7.07M | } |
621 | 39.0M | return borrow; |
622 | 39.6M | } |
623 | | |
624 | | #endif // !BN_ADD_ASM |