Coverage Report

Created: 2024-11-21 07:03

/src/boringssl/crypto/fipsmodule/bn/generic.c.inc
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2
 * All rights reserved.
3
 *
4
 * This package is an SSL implementation written
5
 * by Eric Young (eay@cryptsoft.com).
6
 * The implementation was written so as to conform with Netscapes SSL.
7
 *
8
 * This library is free for commercial and non-commercial use as long as
9
 * the following conditions are aheared to.  The following conditions
10
 * apply to all code found in this distribution, be it the RC4, RSA,
11
 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12
 * included with this distribution is covered by the same copyright terms
13
 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14
 *
15
 * Copyright remains Eric Young's, and as such any Copyright notices in
16
 * the code are not to be removed.
17
 * If this package is used in a product, Eric Young should be given attribution
18
 * as the author of the parts of the library used.
19
 * This can be in the form of a textual message at program startup or
20
 * in documentation (online or textual) provided with the package.
21
 *
22
 * Redistribution and use in source and binary forms, with or without
23
 * modification, are permitted provided that the following conditions
24
 * are met:
25
 * 1. Redistributions of source code must retain the copyright
26
 *    notice, this list of conditions and the following disclaimer.
27
 * 2. Redistributions in binary form must reproduce the above copyright
28
 *    notice, this list of conditions and the following disclaimer in the
29
 *    documentation and/or other materials provided with the distribution.
30
 * 3. All advertising materials mentioning features or use of this software
31
 *    must display the following acknowledgement:
32
 *    "This product includes cryptographic software written by
33
 *     Eric Young (eay@cryptsoft.com)"
34
 *    The word 'cryptographic' can be left out if the rouines from the library
35
 *    being used are not cryptographic related :-).
36
 * 4. If you include any Windows specific code (or a derivative thereof) from
37
 *    the apps directory (application code) you must include an acknowledgement:
38
 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39
 *
40
 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50
 * SUCH DAMAGE.
51
 *
52
 * The licence and distribution terms for any publically available version or
53
 * derivative of this code cannot be changed.  i.e. this code cannot simply be
54
 * copied and put under another distribution licence
55
 * [including the GNU Public Licence.] */
56
57
#include <openssl/bn.h>
58
59
#include <assert.h>
60
61
#include "internal.h"
62
63
64
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
65
// See asm/bn-586.pl.
66
#define BN_ADD_ASM
67
#define BN_MUL_ASM
68
#endif
69
70
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
71
    (defined(__GNUC__) || defined(__clang__))
72
// See asm/x86_64-gcc.c
73
#define BN_ADD_ASM
74
#define BN_MUL_ASM
75
#endif
76
77
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
78
// See asm/bn-armv8.pl.
79
#define BN_ADD_ASM
80
#endif
81
82
#if !defined(BN_MUL_ASM)
83
84
#ifdef BN_ULLONG
85
#define mul_add(r, a, w, c)               \
86
2.06G
  do {                                    \
87
2.06G
    BN_ULLONG t;                          \
88
2.06G
    t = (BN_ULLONG)(w) * (a) + (r) + (c); \
89
2.06G
    (r) = Lw(t);                          \
90
2.06G
    (c) = Hw(t);                          \
91
2.06G
  } while (0)
92
93
#define mul(r, a, w, c)             \
94
30.8M
  do {                              \
95
30.8M
    BN_ULLONG t;                    \
96
30.8M
    t = (BN_ULLONG)(w) * (a) + (c); \
97
30.8M
    (r) = Lw(t);                    \
98
30.8M
    (c) = Hw(t);                    \
99
30.8M
  } while (0)
100
101
#define sqr(r0, r1, a)        \
102
23.4M
  do {                        \
103
23.4M
    BN_ULLONG t;              \
104
23.4M
    t = (BN_ULLONG)(a) * (a); \
105
23.4M
    (r0) = Lw(t);             \
106
23.4M
    (r1) = Hw(t);             \
107
23.4M
  } while (0)
108
109
#else
110
111
#define mul_add(r, a, w, c)             \
112
  do {                                  \
113
    BN_ULONG high, low, ret, tmp = (a); \
114
    ret = (r);                          \
115
    BN_UMULT_LOHI(low, high, w, tmp);   \
116
    ret += (c);                         \
117
    (c) = (ret < (c)) ? 1 : 0;          \
118
    (c) += high;                        \
119
    ret += low;                         \
120
    (c) += (ret < low) ? 1 : 0;         \
121
    (r) = ret;                          \
122
  } while (0)
123
124
#define mul(r, a, w, c)                \
125
  do {                                 \
126
    BN_ULONG high, low, ret, ta = (a); \
127
    BN_UMULT_LOHI(low, high, w, ta);   \
128
    ret = low + (c);                   \
129
    (c) = high;                        \
130
    (c) += (ret < low) ? 1 : 0;        \
131
    (r) = ret;                         \
132
  } while (0)
133
134
#define sqr(r0, r1, a)               \
135
  do {                               \
136
    BN_ULONG tmp = (a);              \
137
    BN_UMULT_LOHI(r0, r1, tmp, tmp); \
138
  } while (0)
139
140
#endif  // !BN_ULLONG
141
142
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
143
57.8M
                          BN_ULONG w) {
144
57.8M
  BN_ULONG c1 = 0;
145
146
57.8M
  if (num == 0) {
147
0
    return c1;
148
0
  }
149
150
559M
  while (num & ~3) {
151
501M
    mul_add(rp[0], ap[0], w, c1);
152
501M
    mul_add(rp[1], ap[1], w, c1);
153
501M
    mul_add(rp[2], ap[2], w, c1);
154
501M
    mul_add(rp[3], ap[3], w, c1);
155
501M
    ap += 4;
156
501M
    rp += 4;
157
501M
    num -= 4;
158
501M
  }
159
160
119M
  while (num) {
161
62.0M
    mul_add(rp[0], ap[0], w, c1);
162
62.0M
    ap++;
163
62.0M
    rp++;
164
62.0M
    num--;
165
62.0M
  }
166
167
57.8M
  return c1;
168
57.8M
}
169
170
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
171
1.83M
                      BN_ULONG w) {
172
1.83M
  BN_ULONG c1 = 0;
173
174
1.83M
  if (num == 0) {
175
0
    return c1;
176
0
  }
177
178
8.80M
  while (num & ~3) {
179
6.96M
    mul(rp[0], ap[0], w, c1);
180
6.96M
    mul(rp[1], ap[1], w, c1);
181
6.96M
    mul(rp[2], ap[2], w, c1);
182
6.96M
    mul(rp[3], ap[3], w, c1);
183
6.96M
    ap += 4;
184
6.96M
    rp += 4;
185
6.96M
    num -= 4;
186
6.96M
  }
187
4.76M
  while (num) {
188
2.93M
    mul(rp[0], ap[0], w, c1);
189
2.93M
    ap++;
190
2.93M
    rp++;
191
2.93M
    num--;
192
2.93M
  }
193
1.83M
  return c1;
194
1.83M
}
195
196
1.24M
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
197
1.24M
  if (n == 0) {
198
0
    return;
199
0
  }
200
201
6.47M
  while (n & ~3) {
202
5.23M
    sqr(r[0], r[1], a[0]);
203
5.23M
    sqr(r[2], r[3], a[1]);
204
5.23M
    sqr(r[4], r[5], a[2]);
205
5.23M
    sqr(r[6], r[7], a[3]);
206
5.23M
    a += 4;
207
5.23M
    r += 8;
208
5.23M
    n -= 4;
209
5.23M
  }
210
3.78M
  while (n) {
211
2.54M
    sqr(r[0], r[1], a[0]);
212
2.54M
    a++;
213
2.54M
    r += 2;
214
2.54M
    n--;
215
2.54M
  }
216
1.24M
}
217
218
// mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
219
// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
220
// sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
221
// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
222
223
#ifdef BN_ULLONG
224
225
// Keep in mind that additions to multiplication result can not overflow,
226
// because its high half cannot be all-ones.
227
#define mul_add_c(a, b, c0, c1, c2)     \
228
844M
  do {                                  \
229
844M
    BN_ULONG hi;                        \
230
844M
    BN_ULLONG t = (BN_ULLONG)(a) * (b); \
231
844M
    t += (c0); /* no carry */           \
232
844M
    (c0) = (BN_ULONG)Lw(t);             \
233
844M
    hi = (BN_ULONG)Hw(t);               \
234
844M
    (c1) += (hi);                       \
235
844M
    (c2) += (c1) < hi;                  \
236
844M
  } while (0)
237
238
#define mul_add_c2(a, b, c0, c1, c2)        \
239
86.6M
  do {                                      \
240
86.6M
    BN_ULONG hi;                            \
241
86.6M
    BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
242
86.6M
    BN_ULLONG tt = t + (c0); /* no carry */ \
243
86.6M
    (c0) = (BN_ULONG)Lw(tt);                \
244
86.6M
    hi = (BN_ULONG)Hw(tt);                  \
245
86.6M
    (c1) += hi;                             \
246
86.6M
    (c2) += (c1) < hi;                      \
247
86.6M
    t += (c0); /* no carry */               \
248
86.6M
    (c0) = (BN_ULONG)Lw(t);                 \
249
86.6M
    hi = (BN_ULONG)Hw(t);                   \
250
86.6M
    (c1) += hi;                             \
251
86.6M
    (c2) += (c1) < hi;                      \
252
86.6M
  } while (0)
253
254
#define sqr_add_c(a, i, c0, c1, c2)           \
255
24.7M
  do {                                        \
256
24.7M
    BN_ULONG hi;                              \
257
24.7M
    BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
258
24.7M
    t += (c0); /* no carry */                 \
259
24.7M
    (c0) = (BN_ULONG)Lw(t);                   \
260
24.7M
    hi = (BN_ULONG)Hw(t);                     \
261
24.7M
    (c1) += hi;                               \
262
24.7M
    (c2) += (c1) < hi;                        \
263
24.7M
  } while (0)
264
265
86.6M
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
266
267
#else
268
269
// Keep in mind that additions to hi can not overflow, because the high word of
270
// a multiplication result cannot be all-ones.
271
#define mul_add_c(a, b, c0, c1, c2) \
272
  do {                              \
273
    BN_ULONG ta = (a), tb = (b);    \
274
    BN_ULONG lo, hi;                \
275
    BN_UMULT_LOHI(lo, hi, ta, tb);  \
276
    (c0) += lo;                     \
277
    hi += ((c0) < lo) ? 1 : 0;      \
278
    (c1) += hi;                     \
279
    (c2) += ((c1) < hi) ? 1 : 0;    \
280
  } while (0)
281
282
#define mul_add_c2(a, b, c0, c1, c2) \
283
  do {                               \
284
    BN_ULONG ta = (a), tb = (b);     \
285
    BN_ULONG lo, hi, tt;             \
286
    BN_UMULT_LOHI(lo, hi, ta, tb);   \
287
    (c0) += lo;                      \
288
    tt = hi + (((c0) < lo) ? 1 : 0); \
289
    (c1) += tt;                      \
290
    (c2) += ((c1) < tt) ? 1 : 0;     \
291
    (c0) += lo;                      \
292
    hi += (c0 < lo) ? 1 : 0;         \
293
    (c1) += hi;                      \
294
    (c2) += ((c1) < hi) ? 1 : 0;     \
295
  } while (0)
296
297
#define sqr_add_c(a, i, c0, c1, c2) \
298
  do {                              \
299
    BN_ULONG ta = (a)[i];           \
300
    BN_ULONG lo, hi;                \
301
    BN_UMULT_LOHI(lo, hi, ta, ta);  \
302
    (c0) += lo;                     \
303
    hi += (c0 < lo) ? 1 : 0;        \
304
    (c1) += hi;                     \
305
    (c2) += ((c1) < hi) ? 1 : 0;    \
306
  } while (0)
307
308
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
309
310
#endif  // !BN_ULLONG
311
312
13.1M
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
313
13.1M
  BN_ULONG c1, c2, c3;
314
315
13.1M
  c1 = 0;
316
13.1M
  c2 = 0;
317
13.1M
  c3 = 0;
318
13.1M
  mul_add_c(a[0], b[0], c1, c2, c3);
319
13.1M
  r[0] = c1;
320
13.1M
  c1 = 0;
321
13.1M
  mul_add_c(a[0], b[1], c2, c3, c1);
322
13.1M
  mul_add_c(a[1], b[0], c2, c3, c1);
323
13.1M
  r[1] = c2;
324
13.1M
  c2 = 0;
325
13.1M
  mul_add_c(a[2], b[0], c3, c1, c2);
326
13.1M
  mul_add_c(a[1], b[1], c3, c1, c2);
327
13.1M
  mul_add_c(a[0], b[2], c3, c1, c2);
328
13.1M
  r[2] = c3;
329
13.1M
  c3 = 0;
330
13.1M
  mul_add_c(a[0], b[3], c1, c2, c3);
331
13.1M
  mul_add_c(a[1], b[2], c1, c2, c3);
332
13.1M
  mul_add_c(a[2], b[1], c1, c2, c3);
333
13.1M
  mul_add_c(a[3], b[0], c1, c2, c3);
334
13.1M
  r[3] = c1;
335
13.1M
  c1 = 0;
336
13.1M
  mul_add_c(a[4], b[0], c2, c3, c1);
337
13.1M
  mul_add_c(a[3], b[1], c2, c3, c1);
338
13.1M
  mul_add_c(a[2], b[2], c2, c3, c1);
339
13.1M
  mul_add_c(a[1], b[3], c2, c3, c1);
340
13.1M
  mul_add_c(a[0], b[4], c2, c3, c1);
341
13.1M
  r[4] = c2;
342
13.1M
  c2 = 0;
343
13.1M
  mul_add_c(a[0], b[5], c3, c1, c2);
344
13.1M
  mul_add_c(a[1], b[4], c3, c1, c2);
345
13.1M
  mul_add_c(a[2], b[3], c3, c1, c2);
346
13.1M
  mul_add_c(a[3], b[2], c3, c1, c2);
347
13.1M
  mul_add_c(a[4], b[1], c3, c1, c2);
348
13.1M
  mul_add_c(a[5], b[0], c3, c1, c2);
349
13.1M
  r[5] = c3;
350
13.1M
  c3 = 0;
351
13.1M
  mul_add_c(a[6], b[0], c1, c2, c3);
352
13.1M
  mul_add_c(a[5], b[1], c1, c2, c3);
353
13.1M
  mul_add_c(a[4], b[2], c1, c2, c3);
354
13.1M
  mul_add_c(a[3], b[3], c1, c2, c3);
355
13.1M
  mul_add_c(a[2], b[4], c1, c2, c3);
356
13.1M
  mul_add_c(a[1], b[5], c1, c2, c3);
357
13.1M
  mul_add_c(a[0], b[6], c1, c2, c3);
358
13.1M
  r[6] = c1;
359
13.1M
  c1 = 0;
360
13.1M
  mul_add_c(a[0], b[7], c2, c3, c1);
361
13.1M
  mul_add_c(a[1], b[6], c2, c3, c1);
362
13.1M
  mul_add_c(a[2], b[5], c2, c3, c1);
363
13.1M
  mul_add_c(a[3], b[4], c2, c3, c1);
364
13.1M
  mul_add_c(a[4], b[3], c2, c3, c1);
365
13.1M
  mul_add_c(a[5], b[2], c2, c3, c1);
366
13.1M
  mul_add_c(a[6], b[1], c2, c3, c1);
367
13.1M
  mul_add_c(a[7], b[0], c2, c3, c1);
368
13.1M
  r[7] = c2;
369
13.1M
  c2 = 0;
370
13.1M
  mul_add_c(a[7], b[1], c3, c1, c2);
371
13.1M
  mul_add_c(a[6], b[2], c3, c1, c2);
372
13.1M
  mul_add_c(a[5], b[3], c3, c1, c2);
373
13.1M
  mul_add_c(a[4], b[4], c3, c1, c2);
374
13.1M
  mul_add_c(a[3], b[5], c3, c1, c2);
375
13.1M
  mul_add_c(a[2], b[6], c3, c1, c2);
376
13.1M
  mul_add_c(a[1], b[7], c3, c1, c2);
377
13.1M
  r[8] = c3;
378
13.1M
  c3 = 0;
379
13.1M
  mul_add_c(a[2], b[7], c1, c2, c3);
380
13.1M
  mul_add_c(a[3], b[6], c1, c2, c3);
381
13.1M
  mul_add_c(a[4], b[5], c1, c2, c3);
382
13.1M
  mul_add_c(a[5], b[4], c1, c2, c3);
383
13.1M
  mul_add_c(a[6], b[3], c1, c2, c3);
384
13.1M
  mul_add_c(a[7], b[2], c1, c2, c3);
385
13.1M
  r[9] = c1;
386
13.1M
  c1 = 0;
387
13.1M
  mul_add_c(a[7], b[3], c2, c3, c1);
388
13.1M
  mul_add_c(a[6], b[4], c2, c3, c1);
389
13.1M
  mul_add_c(a[5], b[5], c2, c3, c1);
390
13.1M
  mul_add_c(a[4], b[6], c2, c3, c1);
391
13.1M
  mul_add_c(a[3], b[7], c2, c3, c1);
392
13.1M
  r[10] = c2;
393
13.1M
  c2 = 0;
394
13.1M
  mul_add_c(a[4], b[7], c3, c1, c2);
395
13.1M
  mul_add_c(a[5], b[6], c3, c1, c2);
396
13.1M
  mul_add_c(a[6], b[5], c3, c1, c2);
397
13.1M
  mul_add_c(a[7], b[4], c3, c1, c2);
398
13.1M
  r[11] = c3;
399
13.1M
  c3 = 0;
400
13.1M
  mul_add_c(a[7], b[5], c1, c2, c3);
401
13.1M
  mul_add_c(a[6], b[6], c1, c2, c3);
402
13.1M
  mul_add_c(a[5], b[7], c1, c2, c3);
403
13.1M
  r[12] = c1;
404
13.1M
  c1 = 0;
405
13.1M
  mul_add_c(a[6], b[7], c2, c3, c1);
406
13.1M
  mul_add_c(a[7], b[6], c2, c3, c1);
407
13.1M
  r[13] = c2;
408
13.1M
  c2 = 0;
409
13.1M
  mul_add_c(a[7], b[7], c3, c1, c2);
410
13.1M
  r[14] = c3;
411
13.1M
  r[15] = c1;
412
13.1M
}
413
414
0
void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
415
0
  BN_ULONG c1, c2, c3;
416
417
0
  c1 = 0;
418
0
  c2 = 0;
419
0
  c3 = 0;
420
0
  mul_add_c(a[0], b[0], c1, c2, c3);
421
0
  r[0] = c1;
422
0
  c1 = 0;
423
0
  mul_add_c(a[0], b[1], c2, c3, c1);
424
0
  mul_add_c(a[1], b[0], c2, c3, c1);
425
0
  r[1] = c2;
426
0
  c2 = 0;
427
0
  mul_add_c(a[2], b[0], c3, c1, c2);
428
0
  mul_add_c(a[1], b[1], c3, c1, c2);
429
0
  mul_add_c(a[0], b[2], c3, c1, c2);
430
0
  r[2] = c3;
431
0
  c3 = 0;
432
0
  mul_add_c(a[0], b[3], c1, c2, c3);
433
0
  mul_add_c(a[1], b[2], c1, c2, c3);
434
0
  mul_add_c(a[2], b[1], c1, c2, c3);
435
0
  mul_add_c(a[3], b[0], c1, c2, c3);
436
0
  r[3] = c1;
437
0
  c1 = 0;
438
0
  mul_add_c(a[3], b[1], c2, c3, c1);
439
0
  mul_add_c(a[2], b[2], c2, c3, c1);
440
0
  mul_add_c(a[1], b[3], c2, c3, c1);
441
0
  r[4] = c2;
442
0
  c2 = 0;
443
0
  mul_add_c(a[2], b[3], c3, c1, c2);
444
0
  mul_add_c(a[3], b[2], c3, c1, c2);
445
0
  r[5] = c3;
446
0
  c3 = 0;
447
0
  mul_add_c(a[3], b[3], c1, c2, c3);
448
0
  r[6] = c1;
449
0
  r[7] = c2;
450
0
}
451
452
3.09M
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
453
3.09M
  BN_ULONG c1, c2, c3;
454
455
3.09M
  c1 = 0;
456
3.09M
  c2 = 0;
457
3.09M
  c3 = 0;
458
3.09M
  sqr_add_c(a, 0, c1, c2, c3);
459
3.09M
  r[0] = c1;
460
3.09M
  c1 = 0;
461
3.09M
  sqr_add_c2(a, 1, 0, c2, c3, c1);
462
3.09M
  r[1] = c2;
463
3.09M
  c2 = 0;
464
3.09M
  sqr_add_c(a, 1, c3, c1, c2);
465
3.09M
  sqr_add_c2(a, 2, 0, c3, c1, c2);
466
3.09M
  r[2] = c3;
467
3.09M
  c3 = 0;
468
3.09M
  sqr_add_c2(a, 3, 0, c1, c2, c3);
469
3.09M
  sqr_add_c2(a, 2, 1, c1, c2, c3);
470
3.09M
  r[3] = c1;
471
3.09M
  c1 = 0;
472
3.09M
  sqr_add_c(a, 2, c2, c3, c1);
473
3.09M
  sqr_add_c2(a, 3, 1, c2, c3, c1);
474
3.09M
  sqr_add_c2(a, 4, 0, c2, c3, c1);
475
3.09M
  r[4] = c2;
476
3.09M
  c2 = 0;
477
3.09M
  sqr_add_c2(a, 5, 0, c3, c1, c2);
478
3.09M
  sqr_add_c2(a, 4, 1, c3, c1, c2);
479
3.09M
  sqr_add_c2(a, 3, 2, c3, c1, c2);
480
3.09M
  r[5] = c3;
481
3.09M
  c3 = 0;
482
3.09M
  sqr_add_c(a, 3, c1, c2, c3);
483
3.09M
  sqr_add_c2(a, 4, 2, c1, c2, c3);
484
3.09M
  sqr_add_c2(a, 5, 1, c1, c2, c3);
485
3.09M
  sqr_add_c2(a, 6, 0, c1, c2, c3);
486
3.09M
  r[6] = c1;
487
3.09M
  c1 = 0;
488
3.09M
  sqr_add_c2(a, 7, 0, c2, c3, c1);
489
3.09M
  sqr_add_c2(a, 6, 1, c2, c3, c1);
490
3.09M
  sqr_add_c2(a, 5, 2, c2, c3, c1);
491
3.09M
  sqr_add_c2(a, 4, 3, c2, c3, c1);
492
3.09M
  r[7] = c2;
493
3.09M
  c2 = 0;
494
3.09M
  sqr_add_c(a, 4, c3, c1, c2);
495
3.09M
  sqr_add_c2(a, 5, 3, c3, c1, c2);
496
3.09M
  sqr_add_c2(a, 6, 2, c3, c1, c2);
497
3.09M
  sqr_add_c2(a, 7, 1, c3, c1, c2);
498
3.09M
  r[8] = c3;
499
3.09M
  c3 = 0;
500
3.09M
  sqr_add_c2(a, 7, 2, c1, c2, c3);
501
3.09M
  sqr_add_c2(a, 6, 3, c1, c2, c3);
502
3.09M
  sqr_add_c2(a, 5, 4, c1, c2, c3);
503
3.09M
  r[9] = c1;
504
3.09M
  c1 = 0;
505
3.09M
  sqr_add_c(a, 5, c2, c3, c1);
506
3.09M
  sqr_add_c2(a, 6, 4, c2, c3, c1);
507
3.09M
  sqr_add_c2(a, 7, 3, c2, c3, c1);
508
3.09M
  r[10] = c2;
509
3.09M
  c2 = 0;
510
3.09M
  sqr_add_c2(a, 7, 4, c3, c1, c2);
511
3.09M
  sqr_add_c2(a, 6, 5, c3, c1, c2);
512
3.09M
  r[11] = c3;
513
3.09M
  c3 = 0;
514
3.09M
  sqr_add_c(a, 6, c1, c2, c3);
515
3.09M
  sqr_add_c2(a, 7, 5, c1, c2, c3);
516
3.09M
  r[12] = c1;
517
3.09M
  c1 = 0;
518
3.09M
  sqr_add_c2(a, 7, 6, c2, c3, c1);
519
3.09M
  r[13] = c2;
520
3.09M
  c2 = 0;
521
3.09M
  sqr_add_c(a, 7, c3, c1, c2);
522
3.09M
  r[14] = c3;
523
3.09M
  r[15] = c1;
524
3.09M
}
525
526
12.0k
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
527
12.0k
  BN_ULONG c1, c2, c3;
528
529
12.0k
  c1 = 0;
530
12.0k
  c2 = 0;
531
12.0k
  c3 = 0;
532
12.0k
  sqr_add_c(a, 0, c1, c2, c3);
533
12.0k
  r[0] = c1;
534
12.0k
  c1 = 0;
535
12.0k
  sqr_add_c2(a, 1, 0, c2, c3, c1);
536
12.0k
  r[1] = c2;
537
12.0k
  c2 = 0;
538
12.0k
  sqr_add_c(a, 1, c3, c1, c2);
539
12.0k
  sqr_add_c2(a, 2, 0, c3, c1, c2);
540
12.0k
  r[2] = c3;
541
12.0k
  c3 = 0;
542
12.0k
  sqr_add_c2(a, 3, 0, c1, c2, c3);
543
12.0k
  sqr_add_c2(a, 2, 1, c1, c2, c3);
544
12.0k
  r[3] = c1;
545
12.0k
  c1 = 0;
546
12.0k
  sqr_add_c(a, 2, c2, c3, c1);
547
12.0k
  sqr_add_c2(a, 3, 1, c2, c3, c1);
548
12.0k
  r[4] = c2;
549
12.0k
  c2 = 0;
550
12.0k
  sqr_add_c2(a, 3, 2, c3, c1, c2);
551
12.0k
  r[5] = c3;
552
12.0k
  c3 = 0;
553
12.0k
  sqr_add_c(a, 3, c1, c2, c3);
554
12.0k
  r[6] = c1;
555
12.0k
  r[7] = c2;
556
12.0k
}
557
558
#undef mul_add
559
#undef mul
560
#undef sqr
561
#undef mul_add_c
562
#undef mul_add_c2
563
#undef sqr_add_c
564
#undef sqr_add_c2
565
566
#endif  // !BN_MUL_ASM
567
568
#if !defined(BN_ADD_ASM)
569
570
BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
571
25.2M
                      size_t n) {
572
25.2M
  if (n == 0) {
573
2.31k
    return 0;
574
2.31k
  }
575
576
25.2M
  BN_ULONG carry = 0;
577
183M
  while (n & ~3) {
578
158M
    r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry);
579
158M
    r[1] = CRYPTO_addc_w(a[1], b[1], carry, &carry);
580
158M
    r[2] = CRYPTO_addc_w(a[2], b[2], carry, &carry);
581
158M
    r[3] = CRYPTO_addc_w(a[3], b[3], carry, &carry);
582
158M
    a += 4;
583
158M
    b += 4;
584
158M
    r += 4;
585
158M
    n -= 4;
586
158M
  }
587
29.7M
  while (n) {
588
4.50M
    r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry);
589
4.50M
    a++;
590
4.50M
    b++;
591
4.50M
    r++;
592
4.50M
    n--;
593
4.50M
  }
594
25.2M
  return carry;
595
25.2M
}
596
597
BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
598
39.6M
                      size_t n) {
599
39.6M
  if (n == 0) {
600
623k
    return (BN_ULONG)0;
601
623k
  }
602
603
39.0M
  BN_ULONG borrow = 0;
604
176M
  while (n & ~3) {
605
137M
    r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow);
606
137M
    r[1] = CRYPTO_subc_w(a[1], b[1], borrow, &borrow);
607
137M
    r[2] = CRYPTO_subc_w(a[2], b[2], borrow, &borrow);
608
137M
    r[3] = CRYPTO_subc_w(a[3], b[3], borrow, &borrow);
609
137M
    a += 4;
610
137M
    b += 4;
611
137M
    r += 4;
612
137M
    n -= 4;
613
137M
  }
614
46.1M
  while (n) {
615
7.07M
    r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow);
616
7.07M
    a++;
617
7.07M
    b++;
618
7.07M
    r++;
619
7.07M
    n--;
620
7.07M
  }
621
39.0M
  return borrow;
622
39.6M
}
623
624
#endif  // !BN_ADD_ASM