Coverage Report

Created: 2022-08-24 06:37

/src/wolfssl-normal-math/wolfcrypt/src/sp_int.c
Line
Count
Source (jump to first uncovered line)
1
/* sp_int.c
2
 *
3
 * Copyright (C) 2006-2022 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Implementation by Sean Parkinson. */
23
24
/*
25
DESCRIPTION
26
This library provides single precision (SP) integer math functions.
27
28
*/
29
#ifdef HAVE_CONFIG_H
30
    #include <config.h>
31
#endif
32
33
#include <wolfssl/wolfcrypt/settings.h>
34
35
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
36
37
#include <wolfssl/wolfcrypt/error-crypt.h>
38
#ifdef NO_INLINE
39
    #include <wolfssl/wolfcrypt/misc.h>
40
#else
41
    #define WOLFSSL_MISC_INCLUDED
42
    #include <wolfcrypt/src/misc.c>
43
#endif
44
45
/* SP Build Options:
46
 * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
47
 * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
48
 * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
49
 * WOLFSSL_SP_MATH:             Use only single precision math and algorithms
50
 *      it supports (no fastmath tfm.c or normal integer.c)
51
 * WOLFSSL_SP_MATH_ALL          Implementation of all MP functions
52
 *      (replacement for tfm.c and integer.c)
53
 * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large
54
 *      stack variables
55
 * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
56
 * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
57
 * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
58
 * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
59
 * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
60
 * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
61
 * WOLFSSL_SP_521               Enable ECC 521-bit SECP521R1 support
62
 * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
63
 * WOLFSSL_SP_X86_64_ASM        Enable Intel x64 assembly implementation
64
 * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly implementation
65
 * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly implementation
66
 * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly implementation
67
 * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly implementation
68
 *      (used with -mthumb)
69
 * WOLFSSL_SP_X86_64            Enable Intel x86 64-bit assembly speedups
70
 * WOLFSSL_SP_X86               Enable Intel x86 assembly speedups
71
 * WOLFSSL_SP_ARM64             Enable Aarch64 assembly speedups
72
 * WOLFSSL_SP_ARM32             Enable ARM32 assembly speedups
73
 * WOLFSSL_SP_ARM32_UDIV        Enable word divide asm that uses UDIV instr
74
 * WOLFSSL_SP_ARM_THUMB         Enable ARM Thumb assembly speedups
75
 *                              (explicitly uses register 'r7')
76
 * WOLFSSL_SP_PPC64             Enable PPC64 assembly speedups
77
 * WOLFSSL_SP_PPC               Enable PPC assembly speedups
78
 * WOLFSSL_SP_MIPS64            Enable MIPS64 assembly speedups
79
 * WOLFSSL_SP_MIPS              Enable MIPS assembly speedups
80
 * WOLFSSL_SP_RISCV64           Enable RISCV64 assembly speedups
81
 * WOLFSSL_SP_RISCV32           Enable RISCV32 assembly speedups
82
 * WOLFSSL_SP_S390X             Enable S390X assembly speedups
83
 * SP_WORD_SIZE                 Force 32 or 64 bit mode
84
 * WOLFSSL_SP_NONBLOCK          Enables "non blocking" mode for SP math, which
85
 *      will return FP_WOULDBLOCK for long operations and function must be
86
 *      called again until complete.
87
 * WOLFSSL_SP_FAST_NCT_EXPTMOD  Enables the faster non-constant time modular
88
 *      exponentation implementation.
89
 * WOLFSSL_SP_INT_NEGATIVE      Enables negative values to be used.
90
 * WOLFSSL_SP_INT_DIGIT_ALIGN   Enable when unaligned access of sp_int_digit
91
 *                              pointer is not allowed.
92
 * WOLFSSL_SP_NO_DYN_STACK      Disable use of dynamic stack items.
93
 *                              Used with small code size and not small stack.
94
 * WOLFSSL_SP_FAST_MODEXP       Allow fast mod_exp with small C code
95
 */
96
97
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
98
#if defined(__clang__) && defined(__clang_major__) && \
99
    (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
100
    #undef WOLFSSL_SP_SMALL
101
#endif
102
103
#include <wolfssl/wolfcrypt/sp_int.h>
104
105
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
106
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
107
    !defined(WOLFSSL_SP_NO_MALLOC)
108
    /* Declare a variable that will be assigned a value on XMALLOC. */
109
    #define DECL_SP_INT(n, s)   \
110
20.1M
        sp_int* n = NULL
111
#else
112
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
113
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
114
        /* Declare a variable on the stack with the required data size. */
115
        #define DECL_SP_INT(n, s)               \
116
            byte    n##d[MP_INT_SIZEOF(s)];     \
117
            sp_int* n = (sp_int*)n##d
118
    #else
119
        /* Declare a variable on the stack. */
120
        #define DECL_SP_INT(n, s)               \
121
            sp_int n[1]
122
    #endif
123
#endif
124
125
/* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */
126
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
127
    !defined(WOLFSSL_SP_NO_MALLOC)
128
    /* Dynamically allocate just enough data to support size. */
129
    #define ALLOC_SP_INT(n, s, err, h)                                         \
130
20.1M
    do {                                                                       \
131
20.1M
        if ((err) == MP_OKAY) {                                                \
132
20.1M
            (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), DYNAMIC_TYPE_BIGINT); \
133
20.1M
            if ((n) == NULL) {                                                 \
134
1.27k
                (err) = MP_MEM;                                                \
135
1.27k
            }                                                                  \
136
20.1M
        }                                                                      \
137
20.1M
    }                                                                          \
138
20.1M
    while (0)
139
140
    /* Dynamically allocate just enough data to support size - and set size. */
141
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
142
30.4k
    do {                                                                       \
143
30.4k
        ALLOC_SP_INT(n, s, err, h);                                            \
144
30.4k
        if ((err) == MP_OKAY) {                                                \
145
30.0k
            (n)->size = (s);                                                   \
146
30.0k
        }                                                                      \
147
30.4k
    }                                                                          \
148
30.4k
    while (0)
149
#else
150
    /* Array declared on stack - nothing to do. */
151
    #define ALLOC_SP_INT(n, s, err, h)
152
    /* Array declared on stack - set the size field. */
153
    #define ALLOC_SP_INT_SIZE(n, s, err, h)     \
154
        n->size = s;
155
#endif
156
157
/* FREE_SP_INT: Free an 'sp_int' variable. */
158
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
159
    !defined(WOLFSSL_SP_NO_MALLOC)
160
    /* Free dynamically allocated data. */
161
    #define FREE_SP_INT(n, h)                   \
162
20.1M
    do {                                        \
163
20.1M
        if ((n) != NULL) {                      \
164
20.1M
            XFREE(n, h, DYNAMIC_TYPE_BIGINT);   \
165
20.1M
        }                                       \
166
20.1M
    }                                           \
167
20.1M
    while (0)
168
#else
169
    /* Nothing to do as declared on stack. */
170
    #define FREE_SP_INT(n, h)
171
#endif
172
173
174
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
175
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
176
    !defined(WOLFSSL_SP_NO_MALLOC)
177
    /* Declare a variable that will be assigned a value on XMALLOC. */
178
    #define DECL_SP_INT_ARRAY(n, s, c)  \
179
12.7M
        sp_int* n##d = NULL;            \
180
12.7M
        sp_int* (n)[c] = { NULL, }
181
#else
182
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
183
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
184
        /* Declare a variable on the stack with the required data size. */
185
        #define DECL_SP_INT_ARRAY(n, s, c)          \
186
            byte    n##d[MP_INT_SIZEOF(s) * (c)];   \
187
            sp_int* (n)[c]
188
    #else
189
        /* Declare a variable on the stack. */
190
        #define DECL_SP_INT_ARRAY(n, s, c)      \
191
            sp_int n##d[c];                     \
192
            sp_int* (n)[c]
193
    #endif
194
#endif
195
196
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */
197
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
198
    !defined(WOLFSSL_SP_NO_MALLOC)
199
    /* Dynamically allocate just enough data to support multiple sp_ints of the
200
     * required size. Use pointers into data to make up array and set sizes.
201
     */
202
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
203
5.64M
    do {                                                                       \
204
5.64M
        if ((err) == MP_OKAY) {                                                \
205
5.64M
            n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h),               \
206
5.64M
                                                         DYNAMIC_TYPE_BIGINT); \
207
5.64M
            if (n##d == NULL) {                                                \
208
2.01k
                (err) = MP_MEM;                                                \
209
2.01k
            }                                                                  \
210
5.64M
            else {                                                             \
211
5.64M
                int n##ii;                                                     \
212
5.64M
                (n)[0] = n##d;                                                 \
213
5.64M
                (n)[0]->size = (s);                                            \
214
12.9M
                for (n##ii = 1; n##ii < (c); n##ii++) {                        \
215
7.34M
                    (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                 \
216
7.34M
                    (n)[n##ii]->size = (s);                                    \
217
7.34M
                }                                                              \
218
5.64M
            }                                                                  \
219
5.64M
        }                                                                      \
220
5.64M
    }                                                                          \
221
5.64M
    while (0)
222
#else
223
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
224
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
225
        /* Data declared on stack that supports multiple sp_ints of the
226
         * required size. Use pointers into data to make up array and set sizes.
227
         */
228
        #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                            \
229
        do {                                                                   \
230
            if ((err) == MP_OKAY) {                                            \
231
                int n##ii;                                                     \
232
                (n)[0] = (sp_int*)n##d;                                        \
233
                (n)[0]->size = (s);                                            \
234
                for (n##ii = 1; n##ii < (c); n##ii++) {                        \
235
                    (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                 \
236
                    (n)[n##ii]->size = (s);                                    \
237
                }                                                              \
238
            }                                                                  \
239
        }                                                                      \
240
        while (0)
241
    #else
242
        /* Data declared on stack that supports multiple sp_ints of the
243
         * required size. Set into array and set sizes.
244
         */
245
        #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                            \
246
        do {                                                                   \
247
            if ((err) == MP_OKAY) {                                            \
248
                int n##ii;                                                     \
249
                for (n##ii = 0; n##ii < (c); n##ii++) {                        \
250
                    (n)[n##ii] = &n##d[n##ii];                                 \
251
                    (n)[n##ii]->size = (s);                                    \
252
                }                                                              \
253
            }                                                                  \
254
        }                                                                      \
255
        while (0)
256
    #endif
257
#endif
258
259
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
260
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
261
    !defined(WOLFSSL_SP_NO_MALLOC)
262
    /* Free data variable that was dynamically allocated. */
263
    #define FREE_SP_INT_ARRAY(n, h)                 \
264
12.7M
    do {                                            \
265
12.7M
        if (n##d != NULL) {                         \
266
5.64M
            XFREE(n##d, h, DYNAMIC_TYPE_BIGINT);    \
267
5.64M
        }                                           \
268
12.7M
    }                                               \
269
12.7M
    while (0)
270
#else
271
    /* Nothing to do as data declared on stack. */
272
    #define FREE_SP_INT_ARRAY(n, h)
273
#endif
274
275
276
#ifndef WOLFSSL_NO_ASM
277
    #ifdef __IAR_SYSTEMS_ICC__
278
        #define __asm__        asm
279
        #define __volatile__   volatile
280
    #endif /* __IAR_SYSTEMS_ICC__ */
281
    #ifdef __KEIL__
282
        #define __asm__        __asm
283
        #define __volatile__   volatile
284
    #endif
285
286
    #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
287
/*
288
 * CPU: x86_64
289
 */
290
291
/* Multiply va by vb and store double size result in: vh | vl */
292
#define SP_ASM_MUL(vl, vh, va, vb)                       \
293
471M
    __asm__ __volatile__ (                               \
294
471M
        "movq %[b], %%rax \n\t"                    \
295
471M
        "mulq %[a]    \n\t"                    \
296
471M
        "movq %%rax, %[l] \n\t"                    \
297
471M
        "movq %%rdx, %[h] \n\t"                    \
298
471M
        : [h] "+r" (vh), [l] "+r" (vl)                   \
299
471M
        : [a] "m" (va), [b] "m" (vb)                     \
300
471M
        : "memory", "%rax", "%rdx", "cc"                 \
301
471M
    )
302
/* Multiply va by vb and store double size result in: vo | vh | vl */
303
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
304
12.5M
    __asm__ __volatile__ (                               \
305
12.5M
        "movq %[b], %%rax \n\t"                    \
306
12.5M
        "mulq %[a]    \n\t"                    \
307
12.5M
        "movq $0   , %[o] \n\t"                    \
308
12.5M
        "movq %%rax, %[l] \n\t"                    \
309
12.5M
        "movq %%rdx, %[h] \n\t"                    \
310
12.5M
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
311
12.5M
        : [a] "m" (va), [b] "m" (vb)                     \
312
12.5M
        : "%rax", "%rdx", "cc"                           \
313
12.5M
    )
314
/* Multiply va by vb and add double size result into: vo | vh | vl */
315
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
316
5.83G
    __asm__ __volatile__ (                               \
317
5.83G
        "movq %[b], %%rax \n\t"                    \
318
5.83G
        "mulq %[a]    \n\t"                    \
319
5.83G
        "addq %%rax, %[l] \n\t"                    \
320
5.83G
        "adcq %%rdx, %[h] \n\t"                    \
321
5.83G
        "adcq $0   , %[o] \n\t"                    \
322
5.83G
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
323
5.83G
        : [a] "m" (va), [b] "m" (vb)                     \
324
5.83G
        : "%rax", "%rdx", "cc"                           \
325
5.83G
    )
326
/* Multiply va by vb and add double size result into: vh | vl */
327
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
328
8.45G
    __asm__ __volatile__ (                               \
329
8.45G
        "movq %[b], %%rax \n\t"                    \
330
8.45G
        "mulq %[a]    \n\t"                    \
331
8.45G
        "addq %%rax, %[l] \n\t"                    \
332
8.45G
        "adcq %%rdx, %[h] \n\t"                    \
333
8.45G
        : [l] "+r" (vl), [h] "+r" (vh)                   \
334
8.45G
        : [a] "m" (va), [b] "m" (vb)                     \
335
8.45G
        : "%rax", "%rdx", "cc"                           \
336
8.45G
    )
337
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
338
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
339
1.40G
    __asm__ __volatile__ (                               \
340
1.40G
        "movq %[b], %%rax \n\t"                    \
341
1.40G
        "mulq %[a]    \n\t"                    \
342
1.40G
        "addq %%rax, %[l] \n\t"                    \
343
1.40G
        "adcq %%rdx, %[h] \n\t"                    \
344
1.40G
        "adcq $0   , %[o] \n\t"                    \
345
1.40G
        "addq %%rax, %[l] \n\t"                    \
346
1.40G
        "adcq %%rdx, %[h] \n\t"                    \
347
1.40G
        "adcq $0   , %[o] \n\t"                    \
348
1.40G
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
349
1.40G
        : [a] "m" (va), [b] "m" (vb)                     \
350
1.40G
        : "%rax", "%rdx", "cc"                           \
351
1.40G
    )
352
/* Multiply va by vb and add double size result twice into: vo | vh | vl
353
 * Assumes first add will not overflow vh | vl
354
 */
355
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
356
86.5M
    __asm__ __volatile__ (                               \
357
86.5M
        "movq %[b], %%rax \n\t"                    \
358
86.5M
        "mulq %[a]    \n\t"                    \
359
86.5M
        "addq %%rax, %[l] \n\t"                    \
360
86.5M
        "adcq %%rdx, %[h] \n\t"                    \
361
86.5M
        "addq %%rax, %[l] \n\t"                    \
362
86.5M
        "adcq %%rdx, %[h] \n\t"                    \
363
86.5M
        "adcq $0   , %[o] \n\t"                    \
364
86.5M
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
365
86.5M
        : [a] "m" (va), [b] "m" (vb)                     \
366
86.5M
        : "%rax", "%rdx", "cc"                           \
367
86.5M
    )
368
/* Square va and store double size result in: vh | vl */
369
#define SP_ASM_SQR(vl, vh, va)                           \
370
70.4M
    __asm__ __volatile__ (                               \
371
70.4M
        "movq %[a], %%rax \n\t"                    \
372
70.4M
        "mulq %%rax   \n\t"                    \
373
70.4M
        "movq %%rax, %[l] \n\t"                    \
374
70.4M
        "movq %%rdx, %[h] \n\t"                    \
375
70.4M
        : [h] "+r" (vh), [l] "+r" (vl)                   \
376
70.4M
        : [a] "m" (va)                                   \
377
70.4M
        : "memory", "%rax", "%rdx", "cc"                 \
378
70.4M
    )
379
/* Square va and add double size result into: vo | vh | vl */
380
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
381
330M
    __asm__ __volatile__ (                               \
382
330M
        "movq %[a], %%rax \n\t"                    \
383
330M
        "mulq %%rax   \n\t"                    \
384
330M
        "addq %%rax, %[l] \n\t"                    \
385
330M
        "adcq %%rdx, %[h] \n\t"                    \
386
330M
        "adcq $0   , %[o] \n\t"                    \
387
330M
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
388
330M
        : [a] "m" (va)                                   \
389
330M
        : "%rax", "%rdx", "cc"                           \
390
330M
    )
391
/* Square va and add double size result into: vh | vl */
392
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
393
43.2M
    __asm__ __volatile__ (                               \
394
43.2M
        "movq %[a], %%rax \n\t"                    \
395
43.2M
        "mulq %%rax   \n\t"                    \
396
43.2M
        "addq %%rax, %[l] \n\t"                    \
397
43.2M
        "adcq %%rdx, %[h] \n\t"                    \
398
43.2M
        : [l] "+r" (vl), [h] "+r" (vh)                   \
399
43.2M
        : [a] "m" (va)                                   \
400
43.2M
        : "%rax", "%rdx", "cc"                           \
401
43.2M
    )
402
/* Add va into: vh | vl */
403
#define SP_ASM_ADDC(vl, vh, va)                          \
404
14.7G
    __asm__ __volatile__ (                               \
405
14.7G
        "addq %[a], %[l]  \n\t"                    \
406
14.7G
        "adcq $0  , %[h]  \n\t"                    \
407
14.7G
        : [l] "+r" (vl), [h] "+r" (vh)                   \
408
14.7G
        : [a] "m" (va)                                   \
409
14.7G
        : "cc"                                           \
410
14.7G
    )
411
/* Add va, variable in a register, into: vh | vl */
412
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
413
1.14G
    __asm__ __volatile__ (                               \
414
1.14G
        "addq %[a], %[l]  \n\t"                    \
415
1.14G
        "adcq $0  , %[h]  \n\t"                    \
416
1.14G
        : [l] "+r" (vl), [h] "+r" (vh)                   \
417
1.14G
        : [a] "r" (va)                                   \
418
1.14G
        : "cc"                                           \
419
1.14G
    )
420
/* Sub va from: vh | vl */
421
#define SP_ASM_SUBC(vl, vh, va)                          \
422
3.32G
    __asm__ __volatile__ (                               \
423
3.32G
        "subq %[a], %[l]  \n\t"                    \
424
3.32G
        "sbbq $0  , %[h]  \n\t"                    \
425
3.32G
        : [l] "+r" (vl), [h] "+r" (vh)                   \
426
3.32G
        : [a] "m" (va)                                   \
427
3.32G
        : "cc"                                           \
428
3.32G
    )
429
/* Add two times vc | vb | va into vo | vh | vl */
430
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
431
12.5M
    __asm__ __volatile__ (                               \
432
12.5M
        "addq %[a], %[l]  \n\t"                    \
433
12.5M
        "adcq %[b], %[h]  \n\t"                    \
434
12.5M
        "adcq %[c], %[o]  \n\t"                    \
435
12.5M
        "addq %[a], %[l]  \n\t"                    \
436
12.5M
        "adcq %[b], %[h]  \n\t"                    \
437
12.5M
        "adcq %[c], %[o]  \n\t"                    \
438
12.5M
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
439
12.5M
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
440
12.5M
        : "%rax", "%rdx", "cc"                           \
441
12.5M
    )
442
443
#ifndef WOLFSSL_SP_DIV_WORD_HALF
444
/* Divide a two digit number by a digit number and return. (hi | lo) / d
445
 *
446
 * Using divq instruction on Intel x64.
447
 *
448
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
449
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
450
 * @param  [in]  d   SP integer digit. Number to divide by.
451
 * @return  The division result.
452
 */
453
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
454
                                          sp_int_digit d)
455
64.7M
{
456
64.7M
    __asm__ __volatile__ (
457
64.7M
        "divq %2"
458
64.7M
        : "+a" (lo)
459
64.7M
        : "d" (hi), "r" (d)
460
64.7M
        : "cc"
461
64.7M
    );
462
64.7M
    return lo;
463
64.7M
}
464
#define SP_ASM_DIV_WORD
465
#endif
466
467
#define SP_INT_ASM_AVAILABLE
468
469
    #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
470
471
    #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
472
/*
473
 * CPU: x86
474
 */
475
476
/* Multiply va by vb and store double size result in: vh | vl */
477
#define SP_ASM_MUL(vl, vh, va, vb)                       \
478
    __asm__ __volatile__ (                               \
479
        "movl %[b], %%eax \n\t"                    \
480
        "mull %[a]    \n\t"                    \
481
        "movl %%eax, %[l] \n\t"                    \
482
        "movl %%edx, %[h] \n\t"                    \
483
        : [h] "+r" (vh), [l] "+r" (vl)                   \
484
        : [a] "m" (va), [b] "m" (vb)                     \
485
        : "memory", "eax", "edx", "cc"                   \
486
    )
487
/* Multiply va by vb and store double size result in: vo | vh | vl */
488
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
489
    __asm__ __volatile__ (                               \
490
        "movl %[b], %%eax \n\t"                    \
491
        "mull %[a]    \n\t"                    \
492
        "movl $0   , %[o] \n\t"                    \
493
        "movl %%eax, %[l] \n\t"                    \
494
        "movl %%edx, %[h] \n\t"                    \
495
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
496
        : [a] "m" (va), [b] "m" (vb)                     \
497
        : "eax", "edx", "cc"                             \
498
    )
499
/* Multiply va by vb and add double size result into: vo | vh | vl */
500
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
501
    __asm__ __volatile__ (                               \
502
        "movl %[b], %%eax \n\t"                    \
503
        "mull %[a]    \n\t"                    \
504
        "addl %%eax, %[l] \n\t"                    \
505
        "adcl %%edx, %[h] \n\t"                    \
506
        "adcl $0   , %[o] \n\t"                    \
507
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
508
        : [a] "r" (va), [b] "r" (vb)                     \
509
        : "eax", "edx", "cc"                             \
510
    )
511
/* Multiply va by vb and add double size result into: vh | vl */
512
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
513
    __asm__ __volatile__ (                               \
514
        "movl %[b], %%eax \n\t"                    \
515
        "mull %[a]    \n\t"                    \
516
        "addl %%eax, %[l] \n\t"                    \
517
        "adcl %%edx, %[h] \n\t"                    \
518
        : [l] "+r" (vl), [h] "+r" (vh)                   \
519
        : [a] "m" (va), [b] "m" (vb)                     \
520
        : "eax", "edx", "cc"                             \
521
    )
522
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
523
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
524
    __asm__ __volatile__ (                               \
525
        "movl %[b], %%eax \n\t"                    \
526
        "mull %[a]    \n\t"                    \
527
        "addl %%eax, %[l] \n\t"                    \
528
        "adcl %%edx, %[h] \n\t"                    \
529
        "adcl $0   , %[o] \n\t"                    \
530
        "addl %%eax, %[l] \n\t"                    \
531
        "adcl %%edx, %[h] \n\t"                    \
532
        "adcl $0   , %[o] \n\t"                    \
533
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
534
        : [a] "r" (va), [b] "r" (vb)                     \
535
        : "eax", "edx", "cc"                             \
536
    )
537
/* Multiply va by vb and add double size result twice into: vo | vh | vl
538
 * Assumes first add will not overflow vh | vl
539
 */
540
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
541
    __asm__ __volatile__ (                               \
542
        "movl %[b], %%eax \n\t"                    \
543
        "mull %[a]    \n\t"                    \
544
        "addl %%eax, %[l] \n\t"                    \
545
        "adcl %%edx, %[h] \n\t"                    \
546
        "addl %%eax, %[l] \n\t"                    \
547
        "adcl %%edx, %[h] \n\t"                    \
548
        "adcl $0   , %[o] \n\t"                    \
549
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
550
        : [a] "m" (va), [b] "m" (vb)                     \
551
        : "eax", "edx", "cc"                             \
552
    )
553
/* Square va and store double size result in: vh | vl */
554
#define SP_ASM_SQR(vl, vh, va)                           \
555
    __asm__ __volatile__ (                               \
556
        "movl %[a], %%eax \n\t"                    \
557
        "mull %%eax   \n\t"                    \
558
        "movl %%eax, %[l] \n\t"                    \
559
        "movl %%edx, %[h] \n\t"                    \
560
        : [h] "+r" (vh), [l] "+r" (vl)                   \
561
        : [a] "m" (va)                                   \
562
        : "memory", "eax", "edx", "cc"                   \
563
    )
564
/* Square va and add double size result into: vo | vh | vl */
565
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
566
    __asm__ __volatile__ (                               \
567
        "movl %[a], %%eax \n\t"                    \
568
        "mull %%eax   \n\t"                    \
569
        "addl %%eax, %[l] \n\t"                    \
570
        "adcl %%edx, %[h] \n\t"                    \
571
        "adcl $0   , %[o] \n\t"                    \
572
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
573
        : [a] "m" (va)                                   \
574
        : "eax", "edx", "cc"                             \
575
    )
576
/* Square va and add double size result into: vh | vl */
577
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
578
    __asm__ __volatile__ (                               \
579
        "movl %[a], %%eax \n\t"                    \
580
        "mull %%eax   \n\t"                    \
581
        "addl %%eax, %[l] \n\t"                    \
582
        "adcl %%edx, %[h] \n\t"                    \
583
        : [l] "+r" (vl), [h] "+r" (vh)                   \
584
        : [a] "m" (va)                                   \
585
        : "eax", "edx", "cc"                             \
586
    )
587
/* Add va into: vh | vl */
588
#define SP_ASM_ADDC(vl, vh, va)                          \
589
    __asm__ __volatile__ (                               \
590
        "addl %[a], %[l]  \n\t"                    \
591
        "adcl $0  , %[h]  \n\t"                    \
592
        : [l] "+r" (vl), [h] "+r" (vh)                   \
593
        : [a] "m" (va)                                   \
594
        : "cc"                                           \
595
    )
596
/* Add va, variable in a register, into: vh | vl */
597
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
598
    __asm__ __volatile__ (                               \
599
        "addl %[a], %[l]  \n\t"                    \
600
        "adcl $0  , %[h]  \n\t"                    \
601
        : [l] "+r" (vl), [h] "+r" (vh)                   \
602
        : [a] "r" (va)                                   \
603
        : "cc"                                           \
604
    )
605
/* Sub va from: vh | vl */
606
#define SP_ASM_SUBC(vl, vh, va)                          \
607
    __asm__ __volatile__ (                               \
608
        "subl %[a], %[l]  \n\t"                    \
609
        "sbbl $0  , %[h]  \n\t"                    \
610
        : [l] "+r" (vl), [h] "+r" (vh)                   \
611
        : [a] "m" (va)                                   \
612
        : "cc"                                           \
613
    )
614
/* Add two times vc | vb | va into vo | vh | vl */
615
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
616
    __asm__ __volatile__ (                               \
617
        "addl %[a], %[l]  \n\t"                    \
618
        "adcl %[b], %[h]  \n\t"                    \
619
        "adcl %[c], %[o]  \n\t"                    \
620
        "addl %[a], %[l]  \n\t"                    \
621
        "adcl %[b], %[h]  \n\t"                    \
622
        "adcl %[c], %[o]  \n\t"                    \
623
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
624
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
625
        : "cc"                                           \
626
    )
627
628
#ifndef WOLFSSL_SP_DIV_WORD_HALF
629
/* Divide a two digit number by a digit number and return. (hi | lo) / d
630
 *
631
 * Using divl instruction on Intel x64.
632
 *
633
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
634
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
635
 * @param  [in]  d   SP integer digit. Number to divide by.
636
 * @return  The division result.
637
 */
638
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
639
                                          sp_int_digit d)
640
{
641
    __asm__ __volatile__ (
642
        "divl %2"
643
        : "+a" (lo)
644
        : "d" (hi), "r" (d)
645
        : "cc"
646
    );
647
    return lo;
648
}
649
#define SP_ASM_DIV_WORD
650
#endif
651
652
#define SP_INT_ASM_AVAILABLE
653
654
    #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
655
656
    #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
657
/*
658
 * CPU: Aarch64
659
 */
660
661
/* Multiply va by vb and store double size result in: vh | vl */
662
#define SP_ASM_MUL(vl, vh, va, vb)                       \
663
    __asm__ __volatile__ (                               \
664
        "mul  %[l], %[a], %[b]  \n\t"            \
665
        "umulh  %[h], %[a], %[b]  \n\t"            \
666
        : [h] "+r" (vh), [l] "+r" (vl)                   \
667
        : [a] "r" (va), [b] "r" (vb)                     \
668
        : "memory", "cc"                                 \
669
    )
670
/* Multiply va by vb and store double size result in: vo | vh | vl */
671
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
672
    __asm__ __volatile__ (                               \
673
        "mul  x8, %[a], %[b]    \n\t"            \
674
        "umulh  %[h], %[a], %[b]  \n\t"            \
675
        "mov  %[l], x8    \n\t"            \
676
        "mov  %[o], xzr   \n\t"            \
677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
678
        : [a] "r" (va), [b] "r" (vb)                     \
679
        : "x8"                                           \
680
    )
681
/* Multiply va by vb and add double size result into: vo | vh | vl */
682
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
683
    __asm__ __volatile__ (                               \
684
        "mul  x8, %[a], %[b]    \n\t"            \
685
        "umulh  x9, %[a], %[b]    \n\t"            \
686
        "adds %[l], %[l], x8    \n\t"            \
687
        "adcs %[h], %[h], x9    \n\t"            \
688
        "adc  %[o], %[o], xzr   \n\t"            \
689
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
690
        : [a] "r" (va), [b] "r" (vb)                     \
691
        : "x8", "x9", "cc"                               \
692
    )
693
/* Multiply va by vb and add double size result into: vh | vl */
694
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
695
    __asm__ __volatile__ (                               \
696
        "mul  x8, %[a], %[b]    \n\t"            \
697
        "umulh  x9, %[a], %[b]    \n\t"            \
698
        "adds %[l], %[l], x8    \n\t"            \
699
        "adc  %[h], %[h], x9    \n\t"            \
700
        : [l] "+r" (vl), [h] "+r" (vh)                   \
701
        : [a] "r" (va), [b] "r" (vb)                     \
702
        : "x8", "x9", "cc"                               \
703
    )
704
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
705
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
706
    __asm__ __volatile__ (                               \
707
        "mul  x8, %[a], %[b]    \n\t"            \
708
        "umulh  x9, %[a], %[b]    \n\t"            \
709
        "adds %[l], %[l], x8    \n\t"            \
710
        "adcs %[h], %[h], x9    \n\t"            \
711
        "adc  %[o], %[o], xzr   \n\t"            \
712
        "adds %[l], %[l], x8    \n\t"            \
713
        "adcs %[h], %[h], x9    \n\t"            \
714
        "adc  %[o], %[o], xzr   \n\t"            \
715
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
716
        : [a] "r" (va), [b] "r" (vb)                     \
717
        : "x8", "x9", "cc"                               \
718
    )
719
/* Multiply va by vb and add double size result twice into: vo | vh | vl
720
 * Assumes first add will not overflow vh | vl
721
 */
722
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
723
    __asm__ __volatile__ (                               \
724
        "mul  x8, %[a], %[b]    \n\t"            \
725
        "umulh  x9, %[a], %[b]    \n\t"            \
726
        "adds %[l], %[l], x8    \n\t"            \
727
        "adc  %[h], %[h], x9    \n\t"            \
728
        "adds %[l], %[l], x8    \n\t"            \
729
        "adcs %[h], %[h], x9    \n\t"            \
730
        "adc  %[o], %[o], xzr   \n\t"            \
731
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
732
        : [a] "r" (va), [b] "r" (vb)                     \
733
        : "x8", "x9", "cc"                               \
734
    )
735
/* Square va and store double size result in: vh | vl */
736
#define SP_ASM_SQR(vl, vh, va)                           \
737
    __asm__ __volatile__ (                               \
738
        "mul  %[l], %[a], %[a]  \n\t"            \
739
        "umulh  %[h], %[a], %[a]  \n\t"            \
740
        : [h] "+r" (vh), [l] "+r" (vl)                   \
741
        : [a] "r" (va)                                   \
742
        : "memory"                                       \
743
    )
744
/* Square va and add double size result into: vo | vh | vl */
745
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
746
    __asm__ __volatile__ (                               \
747
        "mul  x8, %[a], %[a]    \n\t"            \
748
        "umulh  x9, %[a], %[a]    \n\t"            \
749
        "adds %[l], %[l], x8    \n\t"            \
750
        "adcs %[h], %[h], x9    \n\t"            \
751
        "adc  %[o], %[o], xzr   \n\t"            \
752
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
753
        : [a] "r" (va)                                   \
754
        : "x8", "x9", "cc"                               \
755
    )
756
/* Square va and add double size result into: vh | vl */
757
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
758
    __asm__ __volatile__ (                               \
759
        "mul  x8, %[a], %[a]    \n\t"            \
760
        "umulh  x9, %[a], %[a]    \n\t"            \
761
        "adds %[l], %[l], x8    \n\t"            \
762
        "adc  %[h], %[h], x9    \n\t"            \
763
        : [l] "+r" (vl), [h] "+r" (vh)                   \
764
        : [a] "r" (va)                                   \
765
        : "x8", "x9", "cc"                               \
766
    )
767
/* Add va into: vh | vl */
768
#define SP_ASM_ADDC(vl, vh, va)                          \
769
    __asm__ __volatile__ (                               \
770
        "adds %[l], %[l], %[a]  \n\t"            \
771
        "adc  %[h], %[h], xzr   \n\t"            \
772
        : [l] "+r" (vl), [h] "+r" (vh)                   \
773
        : [a] "r" (va)                                   \
774
        : "cc"                                           \
775
    )
776
/* Sub va from: vh | vl */
777
#define SP_ASM_SUBC(vl, vh, va)                          \
778
    __asm__ __volatile__ (                               \
779
        "subs %[l], %[l], %[a]  \n\t"            \
780
        "sbc  %[h], %[h], xzr   \n\t"            \
781
        : [l] "+r" (vl), [h] "+r" (vh)                   \
782
        : [a] "r" (va)                                   \
783
        : "cc"                                           \
784
    )
785
/* Add two times vc | vb | va into vo | vh | vl */
786
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
787
    __asm__ __volatile__ (                               \
788
        "adds %[l], %[l], %[a]  \n\t"            \
789
        "adcs %[h], %[h], %[b]  \n\t"            \
790
        "adc  %[o], %[o], %[c]  \n\t"            \
791
        "adds %[l], %[l], %[a]  \n\t"            \
792
        "adcs %[h], %[h], %[b]  \n\t"            \
793
        "adc  %[o], %[o], %[c]  \n\t"            \
794
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
795
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
796
        : "cc"                                           \
797
    )
798
799
#ifndef WOLFSSL_SP_DIV_WORD_HALF
800
/* Divide a two digit number by a digit number and return. (hi | lo) / d
801
 *
802
 * Using udiv instruction on Aarch64.
803
 * Constant time.
804
 *
805
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
806
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
807
 * @param  [in]  d   SP integer digit. Number to divide by.
808
 * @return  The division result.
809
 */
810
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
811
                                          sp_int_digit d)
812
{
813
    __asm__ __volatile__ (
814
        "lsr  x3, %[d], 48\n\t"
815
        "mov  x5, 16\n\t"
816
        "cmp  x3, 0\n\t"
817
        "mov  x4, 63\n\t"
818
        "csel x3, x5, xzr, eq\n\t"
819
        "sub  x4, x4, x3\n\t"
820
        "lsl  %[d], %[d], x3\n\t"
821
        "lsl  %[hi], %[hi], x3\n\t"
822
        "lsr  x5, %[lo], x4\n\t"
823
        "lsl  %[lo], %[lo], x3\n\t"
824
        "orr  %[hi], %[hi], x5, lsr 1\n\t"
825
826
        "lsr  x5, %[d], 32\n\t"
827
        "add  x5, x5, 1\n\t"
828
829
        "udiv x3, %[hi], x5\n\t"
830
        "lsl  x6, x3, 32\n\t"
831
        "mul  x4, %[d], x6\n\t"
832
        "umulh  x3, %[d], x6\n\t"
833
        "subs %[lo], %[lo], x4\n\t"
834
        "sbc  %[hi], %[hi], x3\n\t"
835
836
        "udiv x3, %[hi], x5\n\t"
837
        "lsl  x3, x3, 32\n\t"
838
        "add  x6, x6, x3\n\t"
839
        "mul  x4, %[d], x3\n\t"
840
        "umulh  x3, %[d], x3\n\t"
841
        "subs %[lo], %[lo], x4\n\t"
842
        "sbc  %[hi], %[hi], x3\n\t"
843
844
        "lsr  x3, %[lo], 32\n\t"
845
        "orr  x3, x3, %[hi], lsl 32\n\t"
846
847
        "udiv x3, x3, x5\n\t"
848
        "add  x6, x6, x3\n\t"
849
        "mul  x4, %[d], x3\n\t"
850
        "umulh  x3, %[d], x3\n\t"
851
        "subs %[lo], %[lo], x4\n\t"
852
        "sbc  %[hi], %[hi], x3\n\t"
853
854
        "lsr  x3, %[lo], 32\n\t"
855
        "orr  x3, x3, %[hi], lsl 32\n\t"
856
857
        "udiv x3, x3, x5\n\t"
858
        "add  x6, x6, x3\n\t"
859
        "mul  x4, %[d], x3\n\t"
860
        "sub  %[lo], %[lo], x4\n\t"
861
862
        "udiv x3, %[lo], %[d]\n\t"
863
        "add  %[hi], x6, x3\n\t"
864
865
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
866
        :
867
        : "x3", "x4", "x5", "x6"
868
    );
869
870
    return hi;
871
}
872
#define SP_ASM_DIV_WORD
873
#endif
874
875
#define SP_INT_ASM_AVAILABLE
876
877
    #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
878
879
    #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
880
        SP_WORD_SIZE == 32
881
/*
882
 * CPU: ARM32 or Cortex-M4 and similar
883
 */
884
885
/* Multiply va by vb and store double size result in: vh | vl */
886
#define SP_ASM_MUL(vl, vh, va, vb)                       \
887
    __asm__ __volatile__ (                               \
888
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
889
        : [h] "+r" (vh), [l] "+r" (vl)                   \
890
        : [a] "r" (va), [b] "r" (vb)                     \
891
        : "memory"                                       \
892
    )
893
/* Multiply va by vb and store double size result in: vo | vh | vl */
894
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
895
    __asm__ __volatile__ (                               \
896
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
897
        "mov  %[o], #0    \n\t"            \
898
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
899
        : [a] "r" (va), [b] "r" (vb)                     \
900
        :                                                \
901
    )
902
/* Multiply va by vb and add double size result into: vo | vh | vl */
903
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
904
    __asm__ __volatile__ (                               \
905
        "umull  r8, r9, %[a], %[b]  \n\t"            \
906
        "adds %[l], %[l], r8    \n\t"            \
907
        "adcs %[h], %[h], r9    \n\t"            \
908
        "adc  %[o], %[o], #0    \n\t"            \
909
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
910
        : [a] "r" (va), [b] "r" (vb)                     \
911
        : "r8", "r9", "cc"                               \
912
    )
913
/* Multiply va by vb and add double size result into: vh | vl */
914
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
915
    __asm__ __volatile__ (                               \
916
        "umlal  %[l], %[h], %[a], %[b]  \n\t"            \
917
        : [l] "+r" (vl), [h] "+r" (vh)                   \
918
        : [a] "r" (va), [b] "r" (vb)                     \
919
        :                                                \
920
    )
921
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
922
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
923
    __asm__ __volatile__ (                               \
924
        "umull  r8, r9, %[a], %[b]  \n\t"            \
925
        "adds %[l], %[l], r8    \n\t"            \
926
        "adcs %[h], %[h], r9    \n\t"            \
927
        "adc  %[o], %[o], #0    \n\t"            \
928
        "adds %[l], %[l], r8    \n\t"            \
929
        "adcs %[h], %[h], r9    \n\t"            \
930
        "adc  %[o], %[o], #0    \n\t"            \
931
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
932
        : [a] "r" (va), [b] "r" (vb)                     \
933
        : "r8", "r9", "cc"                               \
934
    )
935
/* Multiply va by vb and add double size result twice into: vo | vh | vl
936
 * Assumes first add will not overflow vh | vl
937
 */
938
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
939
    __asm__ __volatile__ (                               \
940
        "umull  r8, r9, %[a], %[b]  \n\t"            \
941
        "adds %[l], %[l], r8    \n\t"            \
942
        "adc  %[h], %[h], r9    \n\t"            \
943
        "adds %[l], %[l], r8    \n\t"            \
944
        "adcs %[h], %[h], r9    \n\t"            \
945
        "adc  %[o], %[o], #0    \n\t"            \
946
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
947
        : [a] "r" (va), [b] "r" (vb)                     \
948
        : "r8", "r9", "cc"                               \
949
    )
950
/* Square va and store double size result in: vh | vl */
951
#define SP_ASM_SQR(vl, vh, va)                           \
952
    __asm__ __volatile__ (                               \
953
        "umull  %[l], %[h], %[a], %[a]  \n\t"            \
954
        : [h] "+r" (vh), [l] "+r" (vl)                   \
955
        : [a] "r" (va)                                   \
956
        : "memory"                                       \
957
    )
958
/* Square va and add double size result into: vo | vh | vl */
959
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
960
    __asm__ __volatile__ (                               \
961
        "umull  r8, r9, %[a], %[a]  \n\t"            \
962
        "adds %[l], %[l], r8    \n\t"            \
963
        "adcs %[h], %[h], r9    \n\t"            \
964
        "adc  %[o], %[o], #0    \n\t"            \
965
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
966
        : [a] "r" (va)                                   \
967
        : "r8", "r9", "cc"                               \
968
    )
969
/* Square va and add double size result into: vh | vl */
970
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
971
    __asm__ __volatile__ (                               \
972
        "umlal  %[l], %[h], %[a], %[a]  \n\t"            \
973
        : [l] "+r" (vl), [h] "+r" (vh)                   \
974
        : [a] "r" (va)                                   \
975
        : "cc"                                           \
976
    )
977
/* Add va into: vh | vl */
978
#define SP_ASM_ADDC(vl, vh, va)                          \
979
    __asm__ __volatile__ (                               \
980
        "adds %[l], %[l], %[a]  \n\t"            \
981
        "adc  %[h], %[h], #0    \n\t"            \
982
        : [l] "+r" (vl), [h] "+r" (vh)                   \
983
        : [a] "r" (va)                                   \
984
        : "cc"                                           \
985
    )
986
/* Sub va from: vh | vl */
987
#define SP_ASM_SUBC(vl, vh, va)                          \
988
    __asm__ __volatile__ (                               \
989
        "subs %[l], %[l], %[a]  \n\t"            \
990
        "sbc  %[h], %[h], #0    \n\t"            \
991
        : [l] "+r" (vl), [h] "+r" (vh)                   \
992
        : [a] "r" (va)                                   \
993
        : "cc"                                           \
994
    )
995
/* Add two times vc | vb | va into vo | vh | vl */
996
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
997
    __asm__ __volatile__ (                               \
998
        "adds %[l], %[l], %[a]  \n\t"            \
999
        "adcs %[h], %[h], %[b]  \n\t"            \
1000
        "adc  %[o], %[o], %[c]  \n\t"            \
1001
        "adds %[l], %[l], %[a]  \n\t"            \
1002
        "adcs %[h], %[h], %[b]  \n\t"            \
1003
        "adc  %[o], %[o], %[c]  \n\t"            \
1004
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1005
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1006
        : "cc"                                           \
1007
    )
1008
1009
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1010
#ifndef WOLFSSL_SP_ARM32_UDIV
1011
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1012
 *
1013
 * No division instruction used - does operation bit by bit.
1014
 * Constant time.
1015
 *
1016
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1017
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1018
 * @param  [in]  d   SP integer digit. Number to divide by.
1019
 * @return  The division result.
1020
 */
1021
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1022
                                          sp_int_digit d)
1023
{
1024
    sp_int_digit r = 0;
1025
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1026
    static const char debruijn32[32] = {
1027
        0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1028
        1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1029
    };
1030
    static const sp_uint32 debruijn32_mul = 0x076be629;
1031
#endif
1032
1033
    __asm__ __volatile__ (
1034
        /* Shift d so that top bit is set. */
1035
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1036
        "ldr  r4, %[m]\n\t"
1037
        "mov  r5, %[d]\n\t"
1038
        "orr  r5, r5, r5, lsr #1\n\t"
1039
        "orr  r5, r5, r5, lsr #2\n\t"
1040
        "orr  r5, r5, r5, lsr #4\n\t"
1041
        "orr  r5, r5, r5, lsr #8\n\t"
1042
        "orr  r5, r5, r5, lsr #16\n\t"
1043
        "add  r5, r5, #1\n\t"
1044
        "mul  r5, r5, r4\n\t"
1045
        "lsr  r5, r5, #27\n\t"
1046
        "ldrb r5, [%[t], r5]\n\t"
1047
#else
1048
        "clz  r5, %[d]\n\t"
1049
#endif
1050
        "rsb  r6, r5, #31\n\t"
1051
        "lsl  %[d], %[d], r5\n\t"
1052
        "lsl  %[hi], %[hi], r5\n\t"
1053
        "lsr  r9, %[lo], r6\n\t"
1054
        "lsl  %[lo], %[lo], r5\n\t"
1055
        "orr  %[hi], %[hi], r9, lsr #1\n\t"
1056
1057
        "lsr  r5, %[d], #1\n\t"
1058
        "add  r5, r5, #1\n\t"
1059
        "mov  r6, %[lo]\n\t"
1060
        "mov  r9, %[hi]\n\t"
1061
        /* Do top 32 */
1062
        "subs r8, r5, r9\n\t"
1063
        "sbc  r8, r8, r8\n\t"
1064
        "add  %[r], %[r], %[r]\n\t"
1065
        "sub  %[r], %[r], r8\n\t"
1066
        "and  r8, r8, r5\n\t"
1067
        "subs r9, r9, r8\n\t"
1068
        /* Next 30 bits */
1069
        "mov  r4, #29\n\t"
1070
        "\n1:\n\t"
1071
        "movs r6, r6, lsl #1\n\t"
1072
        "adc  r9, r9, r9\n\t"
1073
        "subs r8, r5, r9\n\t"
1074
        "sbc  r8, r8, r8\n\t"
1075
        "add  %[r], %[r], %[r]\n\t"
1076
        "sub  %[r], %[r], r8\n\t"
1077
        "and  r8, r8, r5\n\t"
1078
        "subs r9, r9, r8\n\t"
1079
        "subs r4, r4, #1\n\t"
1080
        "bpl  1b\n\t"
1081
1082
        "add  %[r], %[r], %[r]\n\t"
1083
        "add  %[r], %[r], #1\n\t"
1084
1085
        /* Handle difference has hi word > 0. */
1086
        "umull  r4, r5, %[r], %[d]\n\t"
1087
        "subs r4, %[lo], r4\n\t"
1088
        "sbc  r5, %[hi], r5\n\t"
1089
        "add  %[r], %[r], r5\n\t"
1090
        "umull  r4, r5, %[r], %[d]\n\t"
1091
        "subs r4, %[lo], r4\n\t"
1092
        "sbc  r5, %[hi], r5\n\t"
1093
        "add  %[r], %[r], r5\n\t"
1094
1095
        /* Add 1 to result if bottom half of difference is >= d. */
1096
        "mul  r4, %[r], %[d]\n\t"
1097
        "subs r4, %[lo], r4\n\t"
1098
        "subs r9, %[d], r4\n\t"
1099
        "sbc  r8, r8, r8\n\t"
1100
        "sub  %[r], %[r], r8\n\t"
1101
        "subs r9, r9, #1\n\t"
1102
        "sbc  r8, r8, r8\n\t"
1103
        "sub  %[r], %[r], r8\n\t"
1104
        : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1105
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1106
        : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1107
#else
1108
        :
1109
#endif
1110
        : "r4", "r5", "r6", "r8", "r9"
1111
    );
1112
1113
    return r;
1114
}
1115
#else
1116
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1117
 *
1118
 * Using udiv instruction on arm32
1119
 * Constant time.
1120
 *
1121
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1122
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1123
 * @param  [in]  d   SP integer digit. Number to divide by.
1124
 * @return  The division result.
1125
 */
1126
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1127
                                          sp_int_digit d)
1128
{
1129
    __asm__ __volatile__ (
1130
        "lsrs r3, %[d], #24\n\t"
1131
  "it eq\n\t"
1132
        "moveq  r3, #8\n\t"
1133
  "it ne\n\t"
1134
        "movne  r3, #0\n\t"
1135
        "rsb  r4, r3, #31\n\t"
1136
        "lsl  %[d], %[d], r3\n\t"
1137
        "lsl  %[hi], %[hi], r3\n\t"
1138
        "lsr  r5, %[lo], r4\n\t"
1139
        "lsl  %[lo], %[lo], r3\n\t"
1140
        "orr  %[hi], %[hi], r5, lsr #1\n\t"
1141
1142
        "lsr  r5, %[d], 16\n\t"
1143
        "add  r5, r5, 1\n\t"
1144
1145
        "udiv r3, %[hi], r5\n\t"
1146
        "lsl  r6, r3, 16\n\t"
1147
        "umull  r4, r3, %[d], r6\n\t"
1148
        "subs %[lo], %[lo], r4\n\t"
1149
        "sbc  %[hi], %[hi], r3\n\t"
1150
1151
        "udiv r3, %[hi], r5\n\t"
1152
        "lsl  r3, r3, 16\n\t"
1153
        "add  r6, r6, r3\n\t"
1154
        "umull  r4, r3, %[d], r3\n\t"
1155
        "subs %[lo], %[lo], r4\n\t"
1156
        "sbc  %[hi], %[hi], r3\n\t"
1157
1158
        "lsr  r3, %[lo], 16\n\t"
1159
        "orr  r3, r3, %[hi], lsl 16\n\t"
1160
1161
        "udiv r3, r3, r5\n\t"
1162
        "add  r6, r6, r3\n\t"
1163
        "umull  r4, r3, %[d], r3\n\t"
1164
        "subs %[lo], %[lo], r4\n\t"
1165
        "sbc  %[hi], %[hi], r3\n\t"
1166
1167
        "lsr  r3, %[lo], 16\n\t"
1168
        "orr  r3, r3, %[hi], lsl 16\n\t"
1169
1170
        "udiv r3, r3, r5\n\t"
1171
        "add  r6, r6, r3\n\t"
1172
        "mul  r4, %[d], r3\n\t"
1173
        "sub  %[lo], %[lo], r4\n\t"
1174
1175
        "udiv r3, %[lo], %[d]\n\t"
1176
        "add  %[hi], r6, r3\n\t"
1177
1178
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1179
        :
1180
        : "r3", "r4", "r5", "r6"
1181
    );
1182
1183
    return hi;
1184
}
1185
#endif
1186
1187
#define SP_ASM_DIV_WORD
1188
#endif
1189
1190
#define SP_INT_ASM_AVAILABLE
1191
1192
    #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1193
1194
    #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1195
/*
1196
 * CPU: ARM Thumb (like Cortex-M0)
1197
 */
1198
1199
/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1200
 * usage of register 'r7'.
1201
 */
1202
1203
#if defined(__clang__)
1204
1205
/* Multiply va by vb and store double size result in: vh | vl */
1206
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1207
    __asm__ __volatile__ (                               \
1208
        /* al * bl */                                    \
1209
        "uxth r6, %[a]    \n\t"            \
1210
        "uxth %[l], %[b]    \n\t"            \
1211
        "muls %[l], r6    \n\t"            \
1212
        /* al * bh */                                    \
1213
        "lsrs r4, %[b], #16   \n\t"            \
1214
        "muls r6, r4      \n\t"            \
1215
        "lsrs %[h], r6, #16   \n\t"            \
1216
        "lsls r6, r6, #16   \n\t"            \
1217
        "adds %[l], %[l], r6    \n\t"            \
1218
        "movs r5, #0      \n\t"            \
1219
        "adcs %[h], r5    \n\t"            \
1220
        /* ah * bh */                                    \
1221
        "lsrs r6, %[a], #16   \n\t"            \
1222
        "muls r4, r6      \n\t"            \
1223
        "adds %[h], %[h], r4    \n\t"            \
1224
        /* ah * bl */                                    \
1225
        "uxth r4, %[b]    \n\t"            \
1226
        "muls r6, r4      \n\t"            \
1227
        "lsrs r4, r6, #16   \n\t"            \
1228
        "lsls r6, r6, #16   \n\t"            \
1229
        "adds %[l], %[l], r6    \n\t"            \
1230
        "adcs %[h], r4    \n\t"            \
1231
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1232
        : [a] "l" (va), [b] "l" (vb)                     \
1233
        : "r4", "r5", "r6", "cc"                         \
1234
    )
1235
/* Multiply va by vb and store double size result in: vo | vh | vl */
1236
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1237
    __asm__ __volatile__ (                               \
1238
        /* al * bl */                                    \
1239
        "uxth r6, %[a]    \n\t"            \
1240
        "uxth %[l], %[b]    \n\t"            \
1241
        "muls %[l], r6    \n\t"            \
1242
        /* al * bh */                                    \
1243
        "lsrs r7, %[b], #16   \n\t"            \
1244
        "muls r6, r7      \n\t"            \
1245
        "lsrs %[h], r6, #16   \n\t"            \
1246
        "lsls r6, r6, #16   \n\t"            \
1247
        "adds %[l], %[l], r6    \n\t"            \
1248
        "movs %[o], #0    \n\t"            \
1249
        "adcs %[h], %[o]    \n\t"            \
1250
        /* ah * bh */                                    \
1251
        "lsrs r6, %[a], #16   \n\t"            \
1252
        "muls r7, r6      \n\t"            \
1253
        "adds %[h], %[h], r7    \n\t"            \
1254
        /* ah * bl */                                    \
1255
        "uxth r7, %[b]    \n\t"            \
1256
        "muls r6, r7      \n\t"            \
1257
        "lsrs r7, r6, #16   \n\t"            \
1258
        "lsls r6, r6, #16   \n\t"            \
1259
        "adds %[l], %[l], r6    \n\t"            \
1260
        "adcs %[h], r7    \n\t"            \
1261
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1262
        : [a] "l" (va), [b] "l" (vb)                     \
1263
        : "r6", "r7", "cc"                               \
1264
    )
1265
#ifndef WOLFSSL_SP_SMALL
1266
/* Multiply va by vb and add double size result into: vo | vh | vl */
1267
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1268
    __asm__ __volatile__ (                               \
1269
        /* al * bl */                                    \
1270
        "uxth r6, %[a]    \n\t"            \
1271
        "uxth r7, %[b]    \n\t"            \
1272
        "muls r7, r6      \n\t"            \
1273
        "adds %[l], %[l], r7    \n\t"            \
1274
        "movs r5, #0      \n\t"            \
1275
        "adcs %[h], r5    \n\t"            \
1276
        "adcs %[o], r5    \n\t"            \
1277
        /* al * bh */                                    \
1278
        "lsrs r7, %[b], #16   \n\t"            \
1279
        "muls r6, r7      \n\t"            \
1280
        "lsrs r7, r6, #16   \n\t"            \
1281
        "lsls r6, r6, #16   \n\t"            \
1282
        "adds %[l], %[l], r6    \n\t"            \
1283
        "adcs %[h], r7    \n\t"            \
1284
        "adcs %[o], r5    \n\t"            \
1285
        /* ah * bh */                                    \
1286
        "lsrs r6, %[a], #16   \n\t"            \
1287
        "lsrs r7, %[b], #16   \n\t"            \
1288
        "muls r7, r6      \n\t"            \
1289
        "adds %[h], %[h], r7    \n\t"            \
1290
        "adcs %[o], r5    \n\t"            \
1291
        /* ah * bl */                                    \
1292
        "uxth r7, %[b]    \n\t"            \
1293
        "muls r6, r7      \n\t"            \
1294
        "lsrs r7, r6, #16   \n\t"            \
1295
        "lsls r6, r6, #16   \n\t"            \
1296
        "adds %[l], %[l], r6    \n\t"            \
1297
        "adcs %[h], r7    \n\t"            \
1298
        "adcs %[o], r5    \n\t"            \
1299
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1300
        : [a] "l" (va), [b] "l" (vb)                     \
1301
        : "r5", "r6", "r7", "cc"                         \
1302
    )
1303
#else
1304
/* Multiply va by vb and add double size result into: vo | vh | vl */
1305
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1306
    __asm__ __volatile__ (                               \
1307
        /* al * bl */                                    \
1308
        "uxth r6, %[a]    \n\t"            \
1309
        "uxth r5, %[b]    \n\t"            \
1310
        "muls r5, r6      \n\t"            \
1311
        "adds %[l], %[l], r5    \n\t"            \
1312
        "movs r5, #0      \n\t"            \
1313
        "adcs %[h], r5    \n\t"            \
1314
        "adcs %[o], r5    \n\t"            \
1315
        /* al * bh */                                    \
1316
        "lsrs r5, %[b], #16   \n\t"            \
1317
        "muls r6, r5      \n\t"            \
1318
        "lsrs r5, r6, #16   \n\t"            \
1319
        "lsls r6, r6, #16   \n\t"            \
1320
        "adds %[l], %[l], r6    \n\t"            \
1321
        "adcs %[h], r5    \n\t"            \
1322
        "movs r5, #0      \n\t"            \
1323
        "adcs %[o], r5    \n\t"            \
1324
        /* ah * bh */                                    \
1325
        "lsrs r6, %[a], #16   \n\t"            \
1326
        "lsrs r5, %[b], #16   \n\t"            \
1327
        "muls r5, r6      \n\t"            \
1328
        "adds %[h], %[h], r5    \n\t"            \
1329
        "movs r5, #0      \n\t"            \
1330
        "adcs %[o], r5    \n\t"            \
1331
        /* ah * bl */                                    \
1332
        "uxth r5, %[b]    \n\t"            \
1333
        "muls r6, r5      \n\t"            \
1334
        "lsrs r5, r6, #16   \n\t"            \
1335
        "lsls r6, r6, #16   \n\t"            \
1336
        "adds %[l], %[l], r6    \n\t"            \
1337
        "adcs %[h], r5    \n\t"            \
1338
        "movs r5, #0      \n\t"            \
1339
        "adcs %[o], r5    \n\t"            \
1340
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1341
        : [a] "l" (va), [b] "l" (vb)                     \
1342
        : "r5", "r6", "cc"                               \
1343
    )
1344
#endif
1345
/* Multiply va by vb and add double size result into: vh | vl */
1346
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1347
    __asm__ __volatile__ (                               \
1348
        /* al * bl */                                    \
1349
        "uxth r6, %[a]    \n\t"            \
1350
        "uxth r4, %[b]    \n\t"            \
1351
        "muls r4, r6      \n\t"            \
1352
        "adds %[l], %[l], r4    \n\t"            \
1353
        "movs r5, #0      \n\t"            \
1354
        "adcs %[h], r5    \n\t"            \
1355
        /* al * bh */                                    \
1356
        "lsrs r4, %[b], #16   \n\t"            \
1357
        "muls r6, r4      \n\t"            \
1358
        "lsrs r4, r6, #16   \n\t"            \
1359
        "lsls r6, r6, #16   \n\t"            \
1360
        "adds %[l], %[l], r6    \n\t"            \
1361
        "adcs %[h], r4    \n\t"            \
1362
        /* ah * bh */                                    \
1363
        "lsrs r6, %[a], #16   \n\t"            \
1364
        "lsrs r4, %[b], #16   \n\t"            \
1365
        "muls r4, r6      \n\t"            \
1366
        "adds %[h], %[h], r4    \n\t"            \
1367
        /* ah * bl */                                    \
1368
        "uxth r4, %[b]    \n\t"            \
1369
        "muls r6, r4      \n\t"            \
1370
        "lsrs r4, r6, #16   \n\t"            \
1371
        "lsls r6, r6, #16   \n\t"            \
1372
        "adds %[l], %[l], r6    \n\t"            \
1373
        "adcs %[h], r4    \n\t"            \
1374
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1375
        : [a] "l" (va), [b] "l" (vb)                     \
1376
        : "r4", "r5", "r6", "cc"                         \
1377
    )
1378
#ifndef WOLFSSL_SP_SMALL
1379
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1380
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1381
    __asm__ __volatile__ (                               \
1382
        /* al * bl */                                    \
1383
        "uxth r6, %[a]    \n\t"            \
1384
        "uxth r7, %[b]    \n\t"            \
1385
        "muls r7, r6      \n\t"            \
1386
        "adds %[l], %[l], r7    \n\t"            \
1387
        "movs r5, #0      \n\t"            \
1388
        "adcs %[h], r5    \n\t"            \
1389
        "adcs %[o], r5    \n\t"            \
1390
        "adds %[l], %[l], r7    \n\t"            \
1391
        "adcs %[h], r5    \n\t"            \
1392
        "adcs %[o], r5    \n\t"            \
1393
        /* al * bh */                                    \
1394
        "lsrs r7, %[b], #16   \n\t"            \
1395
        "muls r6, r7      \n\t"            \
1396
        "lsrs r7, r6, #16   \n\t"            \
1397
        "lsls r6, r6, #16   \n\t"            \
1398
        "adds %[l], %[l], r6    \n\t"            \
1399
        "adcs %[h], r7    \n\t"            \
1400
        "adcs %[o], r5    \n\t"            \
1401
        "adds %[l], %[l], r6    \n\t"            \
1402
        "adcs %[h], r7    \n\t"            \
1403
        "adcs %[o], r5    \n\t"            \
1404
        /* ah * bh */                                    \
1405
        "lsrs r6, %[a], #16   \n\t"            \
1406
        "lsrs r7, %[b], #16   \n\t"            \
1407
        "muls r7, r6      \n\t"            \
1408
        "adds %[h], %[h], r7    \n\t"            \
1409
        "adcs %[o], r5    \n\t"            \
1410
        "adds %[h], %[h], r7    \n\t"            \
1411
        "adcs %[o], r5    \n\t"            \
1412
        /* ah * bl */                                    \
1413
        "uxth r7, %[b]    \n\t"            \
1414
        "muls r6, r7      \n\t"            \
1415
        "lsrs r7, r6, #16   \n\t"            \
1416
        "lsls r6, r6, #16   \n\t"            \
1417
        "adds %[l], %[l], r6    \n\t"            \
1418
        "adcs %[h], r7    \n\t"            \
1419
        "adcs %[o], r5    \n\t"            \
1420
        "adds %[l], %[l], r6    \n\t"            \
1421
        "adcs %[h], r7    \n\t"            \
1422
        "adcs %[o], r5    \n\t"            \
1423
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1424
        : [a] "l" (va), [b] "l" (vb)                     \
1425
        : "r5", "r6", "r7", "cc"                         \
1426
    )
1427
#else
1428
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1429
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1430
    __asm__ __volatile__ (                               \
1431
        "movs r8, %[a]    \n\t"            \
1432
        /* al * bl */                                    \
1433
        "uxth r6, %[a]    \n\t"            \
1434
        "uxth r5, %[b]    \n\t"            \
1435
        "muls r5, r6      \n\t"            \
1436
        "adds %[l], %[l], r5    \n\t"            \
1437
        "movs %[a], #0    \n\t"            \
1438
        "adcs %[h], %[a]    \n\t"            \
1439
        "adcs %[o], %[a]    \n\t"            \
1440
        "adds %[l], %[l], r5    \n\t"            \
1441
        "adcs %[h], %[a]    \n\t"            \
1442
        "adcs %[o], %[a]    \n\t"            \
1443
        /* al * bh */                                    \
1444
        "lsrs r5, %[b], #16   \n\t"            \
1445
        "muls r6, r5      \n\t"            \
1446
        "lsrs r5, r6, #16   \n\t"            \
1447
        "lsls r6, r6, #16   \n\t"            \
1448
        "adds %[l], %[l], r6    \n\t"            \
1449
        "adcs %[h], r5    \n\t"            \
1450
        "adcs %[o], %[a]    \n\t"            \
1451
        "adds %[l], %[l], r6    \n\t"            \
1452
        "adcs %[h], r5    \n\t"            \
1453
        "adcs %[o], %[a]    \n\t"            \
1454
        /* ah * bh */                                    \
1455
        "movs %[a], r8    \n\t"            \
1456
        "lsrs r6, %[a], #16   \n\t"            \
1457
        "lsrs r5, %[b], #16   \n\t"            \
1458
        "muls r5, r6      \n\t"            \
1459
        "adds %[h], %[h], r5    \n\t"            \
1460
        "movs %[a], #0    \n\t"            \
1461
        "adcs %[o], %[a]    \n\t"            \
1462
        "adds %[h], %[h], r5    \n\t"            \
1463
        "adcs %[o], %[a]    \n\t"            \
1464
        /* ah * bl */                                    \
1465
        "uxth r5, %[b]    \n\t"            \
1466
        "muls r6, r5      \n\t"            \
1467
        "lsrs r5, r6, #16   \n\t"            \
1468
        "lsls r6, r6, #16   \n\t"            \
1469
        "adds %[l], %[l], r6    \n\t"            \
1470
        "adcs %[h], r5    \n\t"            \
1471
        "adcs %[o], %[a]    \n\t"            \
1472
        "adds %[l], %[l], r6    \n\t"            \
1473
        "adcs %[h], r5    \n\t"            \
1474
        "adcs %[o], %[a]    \n\t"            \
1475
        "movs %[a], r8    \n\t"            \
1476
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1477
        : [a] "l" (va), [b] "l" (vb)                     \
1478
        : "r5", "r6", "r8", "cc"                         \
1479
    )
1480
#endif
1481
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1482
 * Assumes first add will not overflow vh | vl
1483
 */
1484
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1485
    __asm__ __volatile__ (                               \
1486
        /* al * bl */                                    \
1487
        "uxth r6, %[a]    \n\t"            \
1488
        "uxth r7, %[b]    \n\t"            \
1489
        "muls r7, r6      \n\t"            \
1490
        "adds %[l], %[l], r7    \n\t"            \
1491
        "movs r5, #0      \n\t"            \
1492
        "adcs %[h], r5    \n\t"            \
1493
        "adds %[l], %[l], r7    \n\t"            \
1494
        "adcs %[h], r5    \n\t"            \
1495
        /* al * bh */                                    \
1496
        "lsrs r7, %[b], #16   \n\t"            \
1497
        "muls r6, r7      \n\t"            \
1498
        "lsrs r7, r6, #16   \n\t"            \
1499
        "lsls r6, r6, #16   \n\t"            \
1500
        "adds %[l], %[l], r6    \n\t"            \
1501
        "adcs %[h], r7    \n\t"            \
1502
        "adds %[l], %[l], r6    \n\t"            \
1503
        "adcs %[h], r7    \n\t"            \
1504
        "adcs %[o], r5    \n\t"            \
1505
        /* ah * bh */                                    \
1506
        "lsrs r6, %[a], #16   \n\t"            \
1507
        "lsrs r7, %[b], #16   \n\t"            \
1508
        "muls r7, r6      \n\t"            \
1509
        "adds %[h], %[h], r7    \n\t"            \
1510
        "adcs %[o], r5    \n\t"            \
1511
        "adds %[h], %[h], r7    \n\t"            \
1512
        "adcs %[o], r5    \n\t"            \
1513
        /* ah * bl */                                    \
1514
        "uxth r7, %[b]    \n\t"            \
1515
        "muls r6, r7      \n\t"            \
1516
        "lsrs r7, r6, #16   \n\t"            \
1517
        "lsls r6, r6, #16   \n\t"            \
1518
        "adds %[l], %[l], r6    \n\t"            \
1519
        "adcs %[h], r7    \n\t"            \
1520
        "adcs %[o], r5    \n\t"            \
1521
        "adds %[l], %[l], r6    \n\t"            \
1522
        "adcs %[h], r7    \n\t"            \
1523
        "adcs %[o], r5    \n\t"            \
1524
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1525
        : [a] "l" (va), [b] "l" (vb)                     \
1526
        : "r5", "r6", "r7", "cc"                         \
1527
    )
1528
/* Square va and store double size result in: vh | vl */
1529
#define SP_ASM_SQR(vl, vh, va)                           \
1530
    __asm__ __volatile__ (                               \
1531
        "lsrs r5, %[a], #16   \n\t"            \
1532
        "uxth r6, %[a]    \n\t"            \
1533
        "mov  %[l], r6    \n\t"            \
1534
        "mov  %[h], r5    \n\t"            \
1535
        /* al * al */                                    \
1536
        "muls %[l], %[l]    \n\t"            \
1537
        /* ah * ah */                                    \
1538
        "muls %[h], %[h]    \n\t"            \
1539
        /* 2 * al * ah */                                \
1540
        "muls r6, r5      \n\t"            \
1541
        "lsrs r5, r6, #15   \n\t"            \
1542
        "lsls r6, r6, #17   \n\t"            \
1543
        "adds %[l], %[l], r6    \n\t"            \
1544
        "adcs %[h], r5    \n\t"            \
1545
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1546
        : [a] "l" (va)                                   \
1547
        : "r5", "r6", "cc"                               \
1548
    )
1549
/* Square va and add double size result into: vo | vh | vl */
1550
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1551
    __asm__ __volatile__ (                               \
1552
        "lsrs r4, %[a], #16   \n\t"            \
1553
        "uxth r6, %[a]    \n\t"            \
1554
        /* al * al */                                    \
1555
        "muls r6, r6      \n\t"            \
1556
        /* ah * ah */                                    \
1557
        "muls r4, r4      \n\t"            \
1558
        "adds %[l], %[l], r6    \n\t"            \
1559
        "adcs %[h], r4    \n\t"            \
1560
        "movs r5, #0      \n\t"            \
1561
        "adcs %[o], r5    \n\t"            \
1562
        "lsrs r4, %[a], #16   \n\t"            \
1563
        "uxth r6, %[a]    \n\t"            \
1564
        /* 2 * al * ah */                                \
1565
        "muls r6, r4      \n\t"            \
1566
        "lsrs r4, r6, #15   \n\t"            \
1567
        "lsls r6, r6, #17   \n\t"            \
1568
        "adds %[l], %[l], r6    \n\t"            \
1569
        "adcs %[h], r4    \n\t"            \
1570
        "adcs %[o], r5    \n\t"            \
1571
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1572
        : [a] "l" (va)                                   \
1573
        : "r4", "r5", "r6", "cc"                         \
1574
    )
1575
/* Square va and add double size result into: vh | vl */
1576
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1577
    __asm__ __volatile__ (                               \
1578
        "lsrs r7, %[a], #16   \n\t"            \
1579
        "uxth r6, %[a]    \n\t"            \
1580
        /* al * al */                                    \
1581
        "muls r6, r6      \n\t"            \
1582
        /* ah * ah */                                    \
1583
        "muls r7, r7      \n\t"            \
1584
        "adds %[l], %[l], r6    \n\t"            \
1585
        "adcs %[h], r7    \n\t"            \
1586
        "lsrs r7, %[a], #16   \n\t"            \
1587
        "uxth r6, %[a]    \n\t"            \
1588
        /* 2 * al * ah */                                \
1589
        "muls r6, r7      \n\t"            \
1590
        "lsrs r7, r6, #15   \n\t"            \
1591
        "lsls r6, r6, #17   \n\t"            \
1592
        "adds %[l], %[l], r6    \n\t"            \
1593
        "adcs %[h], r7    \n\t"            \
1594
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1595
        : [a] "l" (va)                                   \
1596
        : "r6", "r7", "cc"                               \
1597
    )
1598
/* Add va into: vh | vl */
1599
#define SP_ASM_ADDC(vl, vh, va)                          \
1600
    __asm__ __volatile__ (                               \
1601
        "adds %[l], %[l], %[a]  \n\t"            \
1602
        "movs r5, #0      \n\t"            \
1603
        "adcs %[h], r5    \n\t"            \
1604
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1605
        : [a] "l" (va)                                   \
1606
        : "r5", "cc"                                     \
1607
    )
1608
/* Sub va from: vh | vl */
1609
#define SP_ASM_SUBC(vl, vh, va)                          \
1610
    __asm__ __volatile__ (                               \
1611
        "subs %[l], %[l], %[a]  \n\t"            \
1612
        "movs r5, #0      \n\t"            \
1613
        "sbcs %[h], r5    \n\t"            \
1614
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1615
        : [a] "l" (va)                                   \
1616
        : "r5", "cc"                                     \
1617
    )
1618
/* Add two times vc | vb | va into vo | vh | vl */
1619
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1620
    __asm__ __volatile__ (                               \
1621
        "adds %[l], %[l], %[a]  \n\t"            \
1622
        "adcs %[h], %[b]    \n\t"            \
1623
        "adcs %[o], %[c]    \n\t"            \
1624
        "adds %[l], %[l], %[a]  \n\t"            \
1625
        "adcs %[h], %[b]    \n\t"            \
1626
        "adcs %[o], %[c]    \n\t"            \
1627
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1628
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
1629
        : "cc"                                           \
1630
    )
1631
1632
#elif defined(WOLFSSL_KEIL)
1633
1634
/* Multiply va by vb and store double size result in: vh | vl */
1635
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1636
    __asm__ __volatile__ (                               \
1637
        /* al * bl */                                    \
1638
        "uxth r6, %[a]    \n\t"            \
1639
        "uxth %[l], %[b]    \n\t"            \
1640
        "muls %[l], r6, %[l]    \n\t"            \
1641
        /* al * bh */                                    \
1642
        "lsrs r4, %[b], #16   \n\t"            \
1643
        "muls r6, r4, r6    \n\t"            \
1644
        "lsrs %[h], r6, #16   \n\t"            \
1645
        "lsls r6, r6, #16   \n\t"            \
1646
        "adds %[l], %[l], r6    \n\t"            \
1647
        "movs r5, #0      \n\t"            \
1648
        "adcs %[h], %[h], r5    \n\t"            \
1649
        /* ah * bh */                                    \
1650
        "lsrs r6, %[a], #16   \n\t"            \
1651
        "muls r4, r6, r4    \n\t"            \
1652
        "adds %[h], %[h], r4    \n\t"            \
1653
        /* ah * bl */                                    \
1654
        "uxth r4, %[b]    \n\t"            \
1655
        "muls r6, r4, r6    \n\t"            \
1656
        "lsrs r4, r6, #16   \n\t"            \
1657
        "lsls r6, r6, #16   \n\t"            \
1658
        "adds %[l], %[l], r6    \n\t"            \
1659
        "adcs %[h], %[h], r4    \n\t"            \
1660
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1661
        : [a] "l" (va), [b] "l" (vb)                     \
1662
        : "r4", "r5", "r6", "cc"                         \
1663
    )
1664
/* Multiply va by vb and store double size result in: vo | vh | vl */
1665
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1666
    __asm__ __volatile__ (                               \
1667
        /* al * bl */                                    \
1668
        "uxth r6, %[a]    \n\t"            \
1669
        "uxth %[l], %[b]    \n\t"            \
1670
        "muls %[l], r6, %[l]    \n\t"            \
1671
        /* al * bh */                                    \
1672
        "lsrs r7, %[b], #16   \n\t"            \
1673
        "muls r6, r7, r6    \n\t"            \
1674
        "lsrs %[h], r6, #16   \n\t"            \
1675
        "lsls r6, r6, #16   \n\t"            \
1676
        "adds %[l], %[l], r6    \n\t"            \
1677
        "movs %[o], #0    \n\t"            \
1678
        "adcs %[h], %[h], %[o]  \n\t"            \
1679
        /* ah * bh */                                    \
1680
        "lsrs r6, %[a], #16   \n\t"            \
1681
        "muls r7, r6, r7    \n\t"            \
1682
        "adds %[h], %[h], r7    \n\t"            \
1683
        /* ah * bl */                                    \
1684
        "uxth r7, %[b]    \n\t"            \
1685
        "muls r6, r7, r6    \n\t"            \
1686
        "lsrs r7, r6, #16   \n\t"            \
1687
        "lsls r6, r6, #16   \n\t"            \
1688
        "adds %[l], %[l], r6    \n\t"            \
1689
        "adcs %[h], %[h], r7    \n\t"            \
1690
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1691
        : [a] "l" (va), [b] "l" (vb)                     \
1692
        : "r6", "r7", "cc"                               \
1693
    )
1694
#ifndef WOLFSSL_SP_SMALL
1695
/* Multiply va by vb and add double size result into: vo | vh | vl */
1696
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1697
    __asm__ __volatile__ (                               \
1698
        /* al * bl */                                    \
1699
        "uxth r6, %[a]    \n\t"            \
1700
        "uxth r7, %[b]    \n\t"            \
1701
        "muls r7, r6, r7    \n\t"            \
1702
        "adds %[l], %[l], r7    \n\t"            \
1703
        "movs r5, #0      \n\t"            \
1704
        "adcs %[h], %[h], r5    \n\t"            \
1705
        "adcs %[o], %[o], r5    \n\t"            \
1706
        /* al * bh */                                    \
1707
        "lsrs r7, %[b], #16   \n\t"            \
1708
        "muls r6, r7, r6    \n\t"            \
1709
        "lsrs r7, r6, #16   \n\t"            \
1710
        "lsls r6, r6, #16   \n\t"            \
1711
        "adds %[l], %[l], r6    \n\t"            \
1712
        "adcs %[h], %[h], r7    \n\t"            \
1713
        "adcs %[o], %[o], r5    \n\t"            \
1714
        /* ah * bh */                                    \
1715
        "lsrs r6, %[a], #16   \n\t"            \
1716
        "lsrs r7, %[b], #16   \n\t"            \
1717
        "muls r7, r6, r7    \n\t"            \
1718
        "adds %[h], %[h], r7    \n\t"            \
1719
        "adcs %[o], %[o], r5    \n\t"            \
1720
        /* ah * bl */                                    \
1721
        "uxth r7, %[b]    \n\t"            \
1722
        "muls r6, r7, r6    \n\t"            \
1723
        "lsrs r7, r6, #16   \n\t"            \
1724
        "lsls r6, r6, #16   \n\t"            \
1725
        "adds %[l], %[l], r6    \n\t"            \
1726
        "adcs %[h], %[h], r7    \n\t"            \
1727
        "adcs %[o], %[o], r5    \n\t"            \
1728
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1729
        : [a] "l" (va), [b] "l" (vb)                     \
1730
        : "r5", "r6", "r7", "cc"                         \
1731
    )
1732
#else
1733
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1734
    __asm__ __volatile__ (                               \
1735
        /* al * bl */                                    \
1736
        "uxth   r6, %[a]                \n\t"            \
1737
        "uxth   r5, %[b]                \n\t"            \
1738
        "muls   r5, r6, r5              \n\t"            \
1739
        "adds   %[l], %[l], r5          \n\t"            \
1740
        "movs   r5, #0                  \n\t"            \
1741
        "adcs   %[h], %[h], r5          \n\t"            \
1742
        "adcs   %[o], %[o], r5          \n\t"            \
1743
        /* al * bh */                                    \
1744
        "lsrs   r5, %[b], #16           \n\t"            \
1745
        "muls   r6, r5, r6              \n\t"            \
1746
        "lsrs   r5, r6, #16             \n\t"            \
1747
        "lsls   r6, r6, #16             \n\t"            \
1748
        "adds   %[l], %[l], r6          \n\t"            \
1749
        "adcs   %[h], %[h], r5          \n\t"            \
1750
        "movs   r5, #0                  \n\t"            \
1751
        "adcs   %[o], %[o], r5          \n\t"            \
1752
        /* ah * bh */                                    \
1753
        "lsrs   r6, %[a], #16           \n\t"            \
1754
        "lsrs   r5, %[b], #16           \n\t"            \
1755
        "muls   r5, r6, r5              \n\t"            \
1756
        "adds   %[h], %[h], r5          \n\t"            \
1757
        "movs   r5, #0                  \n\t"            \
1758
        "adcs   %[o], %[o], r5          \n\t"            \
1759
        /* ah * bl */                                    \
1760
        "uxth   r5, %[b]                \n\t"            \
1761
        "muls   r6, r5, r6              \n\t"            \
1762
        "lsrs   r5, r6, #16             \n\t"            \
1763
        "lsls   r6, r6, #16             \n\t"            \
1764
        "adds   %[l], %[l], r6          \n\t"            \
1765
        "adcs   %[h], %[h], r5          \n\t"            \
1766
        "movs   r5, #0                  \n\t"            \
1767
        "adcs   %[o], %[o], r5          \n\t"            \
1768
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1769
        : [a] "l" (va), [b] "l" (vb)                     \
1770
        : "r5", "r6", "cc"                               \
1771
    )
1772
#endif
1773
/* Multiply va by vb and add double size result into: vh | vl */
1774
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1775
    __asm__ __volatile__ (                               \
1776
        /* al * bl */                                    \
1777
        "uxth r6, %[a]    \n\t"            \
1778
        "uxth r4, %[b]    \n\t"            \
1779
        "muls r4, r6, r4    \n\t"            \
1780
        "adds %[l], %[l], r4    \n\t"            \
1781
        "movs r5, #0      \n\t"            \
1782
        "adcs %[h], %[h], r5    \n\t"            \
1783
        /* al * bh */                                    \
1784
        "lsrs r4, %[b], #16   \n\t"            \
1785
        "muls r6, r4, r6    \n\t"            \
1786
        "lsrs r4, r6, #16   \n\t"            \
1787
        "lsls r6, r6, #16   \n\t"            \
1788
        "adds %[l], %[l], r6    \n\t"            \
1789
        "adcs %[h], %[h], r4    \n\t"            \
1790
        /* ah * bh */                                    \
1791
        "lsrs r6, %[a], #16   \n\t"            \
1792
        "lsrs r4, %[b], #16   \n\t"            \
1793
        "muls r4, r6, r4    \n\t"            \
1794
        "adds %[h], %[h], r4    \n\t"            \
1795
        /* ah * bl */                                    \
1796
        "uxth r4, %[b]    \n\t"            \
1797
        "muls r6, r4, r6    \n\t"            \
1798
        "lsrs r4, r6, #16   \n\t"            \
1799
        "lsls r6, r6, #16   \n\t"            \
1800
        "adds %[l], %[l], r6    \n\t"            \
1801
        "adcs %[h], %[h], r4    \n\t"            \
1802
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1803
        : [a] "l" (va), [b] "l" (vb)                     \
1804
        : "r4", "r5", "r6", "cc"                         \
1805
    )
1806
#ifndef WOLFSSL_SP_SMALL
1807
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1808
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1809
    __asm__ __volatile__ (                               \
1810
        /* al * bl */                                    \
1811
        "uxth r6, %[a]    \n\t"            \
1812
        "uxth r7, %[b]    \n\t"            \
1813
        "muls r7, r6, r7    \n\t"            \
1814
        "adds %[l], %[l], r7    \n\t"            \
1815
        "movs r5, #0      \n\t"            \
1816
        "adcs %[h], %[h], r5    \n\t"            \
1817
        "adcs %[o], %[o], r5    \n\t"            \
1818
        "adds %[l], %[l], r7    \n\t"            \
1819
        "adcs %[h], %[h], r5    \n\t"            \
1820
        "adcs %[o], %[o], r5    \n\t"            \
1821
        /* al * bh */                                    \
1822
        "lsrs r7, %[b], #16   \n\t"            \
1823
        "muls r6, r7, r6    \n\t"            \
1824
        "lsrs r7, r6, #16   \n\t"            \
1825
        "lsls r6, r6, #16   \n\t"            \
1826
        "adds %[l], %[l], r6    \n\t"            \
1827
        "adcs %[h], %[h], r7    \n\t"            \
1828
        "adcs %[o], %[o], r5    \n\t"            \
1829
        "adds %[l], %[l], r6    \n\t"            \
1830
        "adcs %[h], %[h], r7    \n\t"            \
1831
        "adcs %[o], %[o], r5    \n\t"            \
1832
        /* ah * bh */                                    \
1833
        "lsrs r6, %[a], #16   \n\t"            \
1834
        "lsrs r7, %[b], #16   \n\t"            \
1835
        "muls r7, r6, r7    \n\t"            \
1836
        "adds %[h], %[h], r7    \n\t"            \
1837
        "adcs %[o], %[o], r5    \n\t"            \
1838
        "adds %[h], %[h], r7    \n\t"            \
1839
        "adcs %[o], %[o], r5    \n\t"            \
1840
        /* ah * bl */                                    \
1841
        "uxth r7, %[b]    \n\t"            \
1842
        "muls r6, r7, r6    \n\t"            \
1843
        "lsrs r7, r6, #16   \n\t"            \
1844
        "lsls r6, r6, #16   \n\t"            \
1845
        "adds %[l], %[l], r6    \n\t"            \
1846
        "adcs %[h], %[h], r7    \n\t"            \
1847
        "adcs %[o], %[o], r5    \n\t"            \
1848
        "adds %[l], %[l], r6    \n\t"            \
1849
        "adcs %[h], %[h], r7    \n\t"            \
1850
        "adcs %[o], %[o], r5    \n\t"            \
1851
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1852
        : [a] "l" (va), [b] "l" (vb)                     \
1853
        : "r5", "r6", "r7", "cc"                         \
1854
    )
1855
#else
1856
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1857
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1858
    __asm__ __volatile__ (                               \
1859
        "movs r8, %[a]    \n\t"            \
1860
        /* al * bl */                                    \
1861
        "uxth r6, %[a]    \n\t"            \
1862
        "uxth r5, %[b]    \n\t"            \
1863
        "muls r5, r6, r5    \n\t"            \
1864
        "adds %[l], %[l], r5    \n\t"            \
1865
        "movs %[a], #0    \n\t"            \
1866
        "adcs %[h], %[h], %[a]  \n\t"            \
1867
        "adcs %[o], %[o], %[a]  \n\t"            \
1868
        "adds %[l], %[l], r5    \n\t"            \
1869
        "adcs %[h], %[h], %[a]  \n\t"            \
1870
        "adcs %[o], %[o], %[a]  \n\t"            \
1871
        /* al * bh */                                    \
1872
        "lsrs r5, %[b], #16   \n\t"            \
1873
        "muls r6, r5, r6    \n\t"            \
1874
        "lsrs r5, r6, #16   \n\t"            \
1875
        "lsls r6, r6, #16   \n\t"            \
1876
        "adds %[l], %[l], r6    \n\t"            \
1877
        "adcs %[h], %[h], r5    \n\t"            \
1878
        "adcs %[o], %[o], %[a]  \n\t"            \
1879
        "adds %[l], %[l], r6    \n\t"            \
1880
        "adcs %[h], %[h], r5    \n\t"            \
1881
        "adcs %[o], %[o], %[a]  \n\t"            \
1882
        /* ah * bh */                                    \
1883
        "movs %[a], r8    \n\t"            \
1884
        "lsrs r6, %[a], #16   \n\t"            \
1885
        "lsrs r5, %[b], #16   \n\t"            \
1886
        "muls r5, r6, r5    \n\t"            \
1887
        "adds %[h], %[h], r5    \n\t"            \
1888
        "movs %[a], #0    \n\t"            \
1889
        "adcs %[o], %[o], %[a]  \n\t"            \
1890
        "adds %[h], %[h], r5    \n\t"            \
1891
        "adcs %[o], %[o], %[a]  \n\t"            \
1892
        /* ah * bl */                                    \
1893
        "uxth r5, %[b]    \n\t"            \
1894
        "muls r6, r5, r6    \n\t"            \
1895
        "lsrs r5, r6, #16   \n\t"            \
1896
        "lsls r6, r6, #16   \n\t"            \
1897
        "adds %[l], %[l], r6    \n\t"            \
1898
        "adcs %[h], %[h], r5    \n\t"            \
1899
        "adcs %[o], %[o], %[a]  \n\t"            \
1900
        "adds %[l], %[l], r6    \n\t"            \
1901
        "adcs %[h], %[h], r5    \n\t"            \
1902
        "adcs %[o], %[o], %[a]  \n\t"            \
1903
        "movs %[a], r8    \n\t"            \
1904
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1905
        : [a] "l" (va), [b] "l" (vb)                     \
1906
        : "r5", "r6", "r8", "cc"                         \
1907
    )
1908
#endif
1909
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1910
 * Assumes first add will not overflow vh | vl
1911
 */
1912
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1913
    __asm__ __volatile__ (                               \
1914
        /* al * bl */                                    \
1915
        "uxth r6, %[a]    \n\t"            \
1916
        "uxth r7, %[b]    \n\t"            \
1917
        "muls r7, r6, r7    \n\t"            \
1918
        "adds %[l], %[l], r7    \n\t"            \
1919
        "movs r5, #0      \n\t"            \
1920
        "adcs %[h], %[h], r5    \n\t"            \
1921
        "adds %[l], %[l], r7    \n\t"            \
1922
        "adcs %[h], %[h], r5    \n\t"            \
1923
        /* al * bh */                                    \
1924
        "lsrs r7, %[b], #16   \n\t"            \
1925
        "muls r6, r7, r6    \n\t"            \
1926
        "lsrs r7, r6, #16   \n\t"            \
1927
        "lsls r6, r6, #16   \n\t"            \
1928
        "adds %[l], %[l], r6    \n\t"            \
1929
        "adcs %[h], %[h], r7    \n\t"            \
1930
        "adds %[l], %[l], r6    \n\t"            \
1931
        "adcs %[h], %[h], r7    \n\t"            \
1932
        "adcs %[o], %[o], r5    \n\t"            \
1933
        /* ah * bh */                                    \
1934
        "lsrs r6, %[a], #16   \n\t"            \
1935
        "lsrs r7, %[b], #16   \n\t"            \
1936
        "muls r7, r6, r7    \n\t"            \
1937
        "adds %[h], %[h], r7    \n\t"            \
1938
        "adcs %[o], %[o], r5    \n\t"            \
1939
        "adds %[h], %[h], r7    \n\t"            \
1940
        "adcs %[o], %[o], r5    \n\t"            \
1941
        /* ah * bl */                                    \
1942
        "uxth r7, %[b]    \n\t"            \
1943
        "muls r6, r7, r6    \n\t"            \
1944
        "lsrs r7, r6, #16   \n\t"            \
1945
        "lsls r6, r6, #16   \n\t"            \
1946
        "adds %[l], %[l], r6    \n\t"            \
1947
        "adcs %[h], %[h], r7    \n\t"            \
1948
        "adcs %[o], %[o], r5    \n\t"            \
1949
        "adds %[l], %[l], r6    \n\t"            \
1950
        "adcs %[h], %[h], r7    \n\t"            \
1951
        "adcs %[o], %[o], r5    \n\t"            \
1952
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1953
        : [a] "l" (va), [b] "l" (vb)                     \
1954
        : "r5", "r6", "r7", "cc"                         \
1955
    )
1956
/* Square va and store double size result in: vh | vl */
1957
#define SP_ASM_SQR(vl, vh, va)                           \
1958
    __asm__ __volatile__ (                               \
1959
        "lsrs r5, %[a], #16   \n\t"            \
1960
        "uxth r6, %[a]    \n\t"            \
1961
        "mov  %[l], r6    \n\t"            \
1962
        "mov  %[h], r5    \n\t"            \
1963
        /* al * al */                                    \
1964
        "muls %[l], %[l], %[l]  \n\t"            \
1965
        /* ah * ah */                                    \
1966
        "muls %[h], %[h], %[h]  \n\t"            \
1967
        /* 2 * al * ah */                                \
1968
        "muls r6, r5, r6    \n\t"            \
1969
        "lsrs r5, r6, #15   \n\t"            \
1970
        "lsls r6, r6, #17   \n\t"            \
1971
        "adds %[l], %[l], r6    \n\t"            \
1972
        "adcs %[h], %[h], r5    \n\t"            \
1973
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1974
        : [a] "l" (va)                                   \
1975
        : "r5", "r6", "cc"                               \
1976
    )
1977
/* Square va and add double size result into: vo | vh | vl */
1978
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1979
    __asm__ __volatile__ (                               \
1980
        "lsrs r4, %[a], #16   \n\t"            \
1981
        "uxth r6, %[a]    \n\t"            \
1982
        /* al * al */                                    \
1983
        "muls r6, r6, r6    \n\t"            \
1984
        /* ah * ah */                                    \
1985
        "muls r4, r4, r4    \n\t"            \
1986
        "adds %[l], %[l], r6    \n\t"            \
1987
        "adcs %[h], %[h], r4    \n\t"            \
1988
        "movs r5, #0      \n\t"            \
1989
        "adcs %[o], %[o], r5    \n\t"            \
1990
        "lsrs r4, %[a], #16   \n\t"            \
1991
        "uxth r6, %[a]    \n\t"            \
1992
        /* 2 * al * ah */                                \
1993
        "muls r6, r4, r6    \n\t"            \
1994
        "lsrs r4, r6, #15   \n\t"            \
1995
        "lsls r6, r6, #17   \n\t"            \
1996
        "adds %[l], %[l], r6    \n\t"            \
1997
        "adcs %[h], %[h], r4    \n\t"            \
1998
        "adcs %[o], %[o], r5    \n\t"            \
1999
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2000
        : [a] "l" (va)                                   \
2001
        : "r4", "r5", "r6", "cc"                         \
2002
    )
2003
/* Square va and add double size result into: vh | vl */
2004
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2005
    __asm__ __volatile__ (                               \
2006
        "lsrs r7, %[a], #16   \n\t"            \
2007
        "uxth r6, %[a]    \n\t"            \
2008
        /* al * al */                                    \
2009
        "muls r6, r6, r6    \n\t"            \
2010
        /* ah * ah */                                    \
2011
        "muls r7, r7, r7    \n\t"            \
2012
        "adds %[l], %[l], r6    \n\t"            \
2013
        "adcs %[h], %[h], r7    \n\t"            \
2014
        "lsrs r7, %[a], #16   \n\t"            \
2015
        "uxth r6, %[a]    \n\t"            \
2016
        /* 2 * al * ah */                                \
2017
        "muls r6, r7, r6    \n\t"            \
2018
        "lsrs r7, r6, #15   \n\t"            \
2019
        "lsls r6, r6, #17   \n\t"            \
2020
        "adds %[l], %[l], r6    \n\t"            \
2021
        "adcs %[h], %[h], r7    \n\t"            \
2022
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2023
        : [a] "l" (va)                                   \
2024
        : "r6", "r7", "cc"                               \
2025
    )
2026
/* Add va into: vh | vl */
2027
#define SP_ASM_ADDC(vl, vh, va)                          \
2028
    __asm__ __volatile__ (                               \
2029
        "adds %[l], %[l], %[a]  \n\t"            \
2030
        "movs r5, #0      \n\t"            \
2031
        "adcs %[h], %[h], r5    \n\t"            \
2032
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2033
        : [a] "l" (va)                                   \
2034
        : "r5", "cc"                                     \
2035
    )
2036
/* Sub va from: vh | vl */
2037
#define SP_ASM_SUBC(vl, vh, va)                          \
2038
    __asm__ __volatile__ (                               \
2039
        "subs %[l], %[l], %[a]  \n\t"            \
2040
        "movs r5, #0      \n\t"            \
2041
        "sbcs %[h], %[h], r5    \n\t"            \
2042
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2043
        : [a] "l" (va)                                   \
2044
        : "r5", "cc"                                     \
2045
    )
2046
/* Add two times vc | vb | va into vo | vh | vl */
2047
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2048
    __asm__ __volatile__ (                               \
2049
        "adds %[l], %[l], %[a]  \n\t"            \
2050
        "adcs %[h], %[h], %[b]  \n\t"            \
2051
        "adcs %[o], %[o], %[c]  \n\t"            \
2052
        "adds %[l], %[l], %[a]  \n\t"            \
2053
        "adcs %[h], %[h], %[b]  \n\t"            \
2054
        "adcs %[o], %[o], %[c]  \n\t"            \
2055
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2056
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2057
        : "cc"                                           \
2058
    )
2059
2060
#elif defined(__GNUC__)
2061
2062
/* Multiply va by vb and store double size result in: vh | vl */
2063
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2064
    __asm__ __volatile__ (                               \
2065
        /* al * bl */                                    \
2066
        "uxth r6, %[a]    \n\t"            \
2067
        "uxth %[l], %[b]    \n\t"            \
2068
        "mul  %[l], r6    \n\t"            \
2069
        /* al * bh */                                    \
2070
        "lsr  r4, %[b], #16   \n\t"            \
2071
        "mul  r6, r4      \n\t"            \
2072
        "lsr  %[h], r6, #16   \n\t"            \
2073
        "lsl  r6, r6, #16   \n\t"            \
2074
        "add  %[l], %[l], r6    \n\t"            \
2075
        "mov  r5, #0      \n\t"            \
2076
        "adc  %[h], r5    \n\t"            \
2077
        /* ah * bh */                                    \
2078
        "lsr  r6, %[a], #16   \n\t"            \
2079
        "mul  r4, r6      \n\t"            \
2080
        "add  %[h], %[h], r4    \n\t"            \
2081
        /* ah * bl */                                    \
2082
        "uxth r4, %[b]    \n\t"            \
2083
        "mul  r6, r4      \n\t"            \
2084
        "lsr  r4, r6, #16   \n\t"            \
2085
        "lsl  r6, r6, #16   \n\t"            \
2086
        "add  %[l], %[l], r6    \n\t"            \
2087
        "adc  %[h], r4    \n\t"            \
2088
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2089
        : [a] "l" (va), [b] "l" (vb)                     \
2090
        : "r4", "r5", "r6", "cc"                         \
2091
    )
2092
/* Multiply va by vb and store double size result in: vo | vh | vl */
2093
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2094
    __asm__ __volatile__ (                               \
2095
        /* al * bl */                                    \
2096
        "uxth r6, %[a]    \n\t"            \
2097
        "uxth %[l], %[b]    \n\t"            \
2098
        "mul  %[l], r6    \n\t"            \
2099
        /* al * bh */                                    \
2100
        "lsr  r7, %[b], #16   \n\t"            \
2101
        "mul  r6, r7      \n\t"            \
2102
        "lsr  %[h], r6, #16   \n\t"            \
2103
        "lsl  r6, r6, #16   \n\t"            \
2104
        "add  %[l], %[l], r6    \n\t"            \
2105
        "mov  %[o], #0    \n\t"            \
2106
        "adc  %[h], %[o]    \n\t"            \
2107
        /* ah * bh */                                    \
2108
        "lsr  r6, %[a], #16   \n\t"            \
2109
        "mul  r7, r6      \n\t"            \
2110
        "add  %[h], %[h], r7    \n\t"            \
2111
        /* ah * bl */                                    \
2112
        "uxth r7, %[b]    \n\t"            \
2113
        "mul  r6, r7      \n\t"            \
2114
        "lsr  r7, r6, #16   \n\t"            \
2115
        "lsl  r6, r6, #16   \n\t"            \
2116
        "add  %[l], %[l], r6    \n\t"            \
2117
        "adc  %[h], r7    \n\t"            \
2118
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2119
        : [a] "l" (va), [b] "l" (vb)                     \
2120
        : "r6", "r7", "cc"                               \
2121
    )
2122
#ifndef WOLFSSL_SP_SMALL
2123
/* Multiply va by vb and add double size result into: vo | vh | vl */
2124
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2125
    __asm__ __volatile__ (                               \
2126
        /* al * bl */                                    \
2127
        "uxth r6, %[a]    \n\t"            \
2128
        "uxth r7, %[b]    \n\t"            \
2129
        "mul  r7, r6      \n\t"            \
2130
        "add  %[l], %[l], r7    \n\t"            \
2131
        "mov  r5, #0      \n\t"            \
2132
        "adc  %[h], r5    \n\t"            \
2133
        "adc  %[o], r5    \n\t"            \
2134
        /* al * bh */                                    \
2135
        "lsr  r7, %[b], #16   \n\t"            \
2136
        "mul  r6, r7      \n\t"            \
2137
        "lsr  r7, r6, #16   \n\t"            \
2138
        "lsl  r6, r6, #16   \n\t"            \
2139
        "add  %[l], %[l], r6    \n\t"            \
2140
        "adc  %[h], r7    \n\t"            \
2141
        "adc  %[o], r5    \n\t"            \
2142
        /* ah * bh */                                    \
2143
        "lsr  r6, %[a], #16   \n\t"            \
2144
        "lsr  r7, %[b], #16   \n\t"            \
2145
        "mul  r7, r6      \n\t"            \
2146
        "add  %[h], %[h], r7    \n\t"            \
2147
        "adc  %[o], r5    \n\t"            \
2148
        /* ah * bl */                                    \
2149
        "uxth r7, %[b]    \n\t"            \
2150
        "mul  r6, r7      \n\t"            \
2151
        "lsr  r7, r6, #16   \n\t"            \
2152
        "lsl  r6, r6, #16   \n\t"            \
2153
        "add  %[l], %[l], r6    \n\t"            \
2154
        "adc  %[h], r7    \n\t"            \
2155
        "adc  %[o], r5    \n\t"            \
2156
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2157
        : [a] "l" (va), [b] "l" (vb)                     \
2158
        : "r5", "r6", "r7", "cc"                         \
2159
    )
2160
#else
2161
/* Multiply va by vb and add double size result into: vo | vh | vl */
2162
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2163
    __asm__ __volatile__ (                               \
2164
        /* al * bl */                                    \
2165
        "uxth   r6, %[a]                \n\t"            \
2166
        "uxth   r5, %[b]                \n\t"            \
2167
        "mul    r5, r6                  \n\t"            \
2168
        "add    %[l], %[l], r5          \n\t"            \
2169
        "mov    r5, #0                  \n\t"            \
2170
        "adc    %[h], r5                \n\t"            \
2171
        "adc    %[o], r5                \n\t"            \
2172
        /* al * bh */                                    \
2173
        "lsr    r5, %[b], #16           \n\t"            \
2174
        "mul    r6, r5                  \n\t"            \
2175
        "lsr    r5, r6, #16             \n\t"            \
2176
        "lsl    r6, r6, #16             \n\t"            \
2177
        "add    %[l], %[l], r6          \n\t"            \
2178
        "adc    %[h], r5                \n\t"            \
2179
        "mov    r5, #0                  \n\t"            \
2180
        "adc    %[o], r5                \n\t"            \
2181
        /* ah * bh */                                    \
2182
        "lsr    r6, %[a], #16           \n\t"            \
2183
        "lsr    r5, %[b], #16           \n\t"            \
2184
        "mul    r5, r6                  \n\t"            \
2185
        "add    %[h], %[h], r5          \n\t"            \
2186
        "mov    r5, #0                  \n\t"            \
2187
        "adc    %[o], r5                \n\t"            \
2188
        /* ah * bl */                                    \
2189
        "uxth   r5, %[b]                \n\t"            \
2190
        "mul    r6, r5                  \n\t"            \
2191
        "lsr    r5, r6, #16             \n\t"            \
2192
        "lsl    r6, r6, #16             \n\t"            \
2193
        "add    %[l], %[l], r6          \n\t"            \
2194
        "adc    %[h], r5                \n\t"            \
2195
        "mov    r5, #0                  \n\t"            \
2196
        "adc    %[o], r5                \n\t"            \
2197
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2198
        : [a] "l" (va), [b] "l" (vb)                     \
2199
        : "r5", "r6", "cc"                               \
2200
    )
2201
#endif
2202
/* Multiply va by vb and add double size result into: vh | vl */
2203
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2204
    __asm__ __volatile__ (                               \
2205
        /* al * bl */                                    \
2206
        "uxth r6, %[a]    \n\t"            \
2207
        "uxth r4, %[b]    \n\t"            \
2208
        "mul  r4, r6      \n\t"            \
2209
        "add  %[l], %[l], r4    \n\t"            \
2210
        "mov  r5, #0      \n\t"            \
2211
        "adc  %[h], r5    \n\t"            \
2212
        /* al * bh */                                    \
2213
        "lsr  r4, %[b], #16   \n\t"            \
2214
        "mul  r6, r4      \n\t"            \
2215
        "lsr  r4, r6, #16   \n\t"            \
2216
        "lsl  r6, r6, #16   \n\t"            \
2217
        "add  %[l], %[l], r6    \n\t"            \
2218
        "adc  %[h], r4    \n\t"            \
2219
        /* ah * bh */                                    \
2220
        "lsr  r6, %[a], #16   \n\t"            \
2221
        "lsr  r4, %[b], #16   \n\t"            \
2222
        "mul  r4, r6      \n\t"            \
2223
        "add  %[h], %[h], r4    \n\t"            \
2224
        /* ah * bl */                                    \
2225
        "uxth r4, %[b]    \n\t"            \
2226
        "mul  r6, r4      \n\t"            \
2227
        "lsr  r4, r6, #16   \n\t"            \
2228
        "lsl  r6, r6, #16   \n\t"            \
2229
        "add  %[l], %[l], r6    \n\t"            \
2230
        "adc  %[h], r4    \n\t"            \
2231
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2232
        : [a] "l" (va), [b] "l" (vb)                     \
2233
        : "r4", "r5", "r6", "cc"                         \
2234
    )
2235
#ifndef WOLFSSL_SP_SMALL
2236
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2237
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2238
    __asm__ __volatile__ (                               \
2239
        /* al * bl */                                    \
2240
        "uxth r6, %[a]    \n\t"            \
2241
        "uxth r7, %[b]    \n\t"            \
2242
        "mul  r7, r6      \n\t"            \
2243
        "add  %[l], %[l], r7    \n\t"            \
2244
        "mov  r5, #0      \n\t"            \
2245
        "adc  %[h], r5    \n\t"            \
2246
        "adc  %[o], r5    \n\t"            \
2247
        "add  %[l], %[l], r7    \n\t"            \
2248
        "adc  %[h], r5    \n\t"            \
2249
        "adc  %[o], r5    \n\t"            \
2250
        /* al * bh */                                    \
2251
        "lsr  r7, %[b], #16   \n\t"            \
2252
        "mul  r6, r7      \n\t"            \
2253
        "lsr  r7, r6, #16   \n\t"            \
2254
        "lsl  r6, r6, #16   \n\t"            \
2255
        "add  %[l], %[l], r6    \n\t"            \
2256
        "adc  %[h], r7    \n\t"            \
2257
        "adc  %[o], r5    \n\t"            \
2258
        "add  %[l], %[l], r6    \n\t"            \
2259
        "adc  %[h], r7    \n\t"            \
2260
        "adc  %[o], r5    \n\t"            \
2261
        /* ah * bh */                                    \
2262
        "lsr  r6, %[a], #16   \n\t"            \
2263
        "lsr  r7, %[b], #16   \n\t"            \
2264
        "mul  r7, r6      \n\t"            \
2265
        "add  %[h], %[h], r7    \n\t"            \
2266
        "adc  %[o], r5    \n\t"            \
2267
        "add  %[h], %[h], r7    \n\t"            \
2268
        "adc  %[o], r5    \n\t"            \
2269
        /* ah * bl */                                    \
2270
        "uxth r7, %[b]    \n\t"            \
2271
        "mul  r6, r7      \n\t"            \
2272
        "lsr  r7, r6, #16   \n\t"            \
2273
        "lsl  r6, r6, #16   \n\t"            \
2274
        "add  %[l], %[l], r6    \n\t"            \
2275
        "adc  %[h], r7    \n\t"            \
2276
        "adc  %[o], r5    \n\t"            \
2277
        "add  %[l], %[l], r6    \n\t"            \
2278
        "adc  %[h], r7    \n\t"            \
2279
        "adc  %[o], r5    \n\t"            \
2280
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2281
        : [a] "l" (va), [b] "l" (vb)                     \
2282
        : "r5", "r6", "r7", "cc"                         \
2283
    )
2284
#else
2285
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2286
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2287
    __asm__ __volatile__ (                               \
2288
        "mov    r8, %[a]                \n\t"            \
2289
        /* al * bl */                                    \
2290
        "uxth   r6, %[a]                \n\t"            \
2291
        "uxth   r5, %[b]                \n\t"            \
2292
        "mul    r5, r6                  \n\t"            \
2293
        "add    %[l], %[l], r5          \n\t"            \
2294
        "mov    %[a], #0                \n\t"            \
2295
        "adc    %[h], %[a]              \n\t"            \
2296
        "adc    %[o], %[a]              \n\t"            \
2297
        "add    %[l], %[l], r5          \n\t"            \
2298
        "adc    %[h], %[a]              \n\t"            \
2299
        "adc    %[o], %[a]              \n\t"            \
2300
        /* al * bh */                                    \
2301
        "lsr    r5, %[b], #16           \n\t"            \
2302
        "mul    r6, r5                  \n\t"            \
2303
        "lsr    r5, r6, #16             \n\t"            \
2304
        "lsl    r6, r6, #16             \n\t"            \
2305
        "add    %[l], %[l], r6          \n\t"            \
2306
        "adc    %[h], r5                \n\t"            \
2307
        "adc    %[o], %[a]              \n\t"            \
2308
        "add    %[l], %[l], r6          \n\t"            \
2309
        "adc    %[h], r5                \n\t"            \
2310
        "adc    %[o], %[a]              \n\t"            \
2311
        /* ah * bh */                                    \
2312
        "mov    %[a], r8                \n\t"            \
2313
        "lsr    r6, %[a], #16           \n\t"            \
2314
        "lsr    r5, %[b], #16           \n\t"            \
2315
        "mul    r5, r6                  \n\t"            \
2316
        "add    %[h], %[h], r5          \n\t"            \
2317
        "mov    %[a], #0                \n\t"            \
2318
        "adc    %[o], %[a]              \n\t"            \
2319
        "add    %[h], %[h], r5          \n\t"            \
2320
        "adc    %[o], %[a]              \n\t"            \
2321
        /* ah * bl */                                    \
2322
        "uxth   r5, %[b]                \n\t"            \
2323
        "mul    r6, r5                  \n\t"            \
2324
        "lsr    r5, r6, #16             \n\t"            \
2325
        "lsl    r6, r6, #16             \n\t"            \
2326
        "add    %[l], %[l], r6          \n\t"            \
2327
        "adc    %[h], r5                \n\t"            \
2328
        "adc    %[o], %[a]              \n\t"            \
2329
        "add    %[l], %[l], r6          \n\t"            \
2330
        "adc    %[h], r5                \n\t"            \
2331
        "adc    %[o], %[a]              \n\t"            \
2332
        "mov    %[a], r8                \n\t"            \
2333
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2334
        : [a] "l" (va), [b] "l" (vb)                     \
2335
        : "r5", "r6", "r8", "cc"                         \
2336
    )
2337
#endif
2338
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2339
 * Assumes first add will not overflow vh | vl
2340
 */
2341
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2342
    __asm__ __volatile__ (                               \
2343
        /* al * bl */                                    \
2344
        "uxth r6, %[a]    \n\t"            \
2345
        "uxth r7, %[b]    \n\t"            \
2346
        "mul  r7, r6      \n\t"            \
2347
        "add  %[l], %[l], r7    \n\t"            \
2348
        "mov  r5, #0      \n\t"            \
2349
        "adc  %[h], r5    \n\t"            \
2350
        "add  %[l], %[l], r7    \n\t"            \
2351
        "adc  %[h], r5    \n\t"            \
2352
        /* al * bh */                                    \
2353
        "lsr  r7, %[b], #16   \n\t"            \
2354
        "mul  r6, r7      \n\t"            \
2355
        "lsr  r7, r6, #16   \n\t"            \
2356
        "lsl  r6, r6, #16   \n\t"            \
2357
        "add  %[l], %[l], r6    \n\t"            \
2358
        "adc  %[h], r7    \n\t"            \
2359
        "add  %[l], %[l], r6    \n\t"            \
2360
        "adc  %[h], r7    \n\t"            \
2361
        "adc  %[o], r5    \n\t"            \
2362
        /* ah * bh */                                    \
2363
        "lsr  r6, %[a], #16   \n\t"            \
2364
        "lsr  r7, %[b], #16   \n\t"            \
2365
        "mul  r7, r6      \n\t"            \
2366
        "add  %[h], %[h], r7    \n\t"            \
2367
        "adc  %[o], r5    \n\t"            \
2368
        "add  %[h], %[h], r7    \n\t"            \
2369
        "adc  %[o], r5    \n\t"            \
2370
        /* ah * bl */                                    \
2371
        "uxth r7, %[b]    \n\t"            \
2372
        "mul  r6, r7      \n\t"            \
2373
        "lsr  r7, r6, #16   \n\t"            \
2374
        "lsl  r6, r6, #16   \n\t"            \
2375
        "add  %[l], %[l], r6    \n\t"            \
2376
        "adc  %[h], r7    \n\t"            \
2377
        "adc  %[o], r5    \n\t"            \
2378
        "add  %[l], %[l], r6    \n\t"            \
2379
        "adc  %[h], r7    \n\t"            \
2380
        "adc  %[o], r5    \n\t"            \
2381
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2382
        : [a] "l" (va), [b] "l" (vb)                     \
2383
        : "r5", "r6", "r7", "cc"                         \
2384
    )
2385
/* Square va and store double size result in: vh | vl */
2386
#define SP_ASM_SQR(vl, vh, va)                           \
2387
    __asm__ __volatile__ (                               \
2388
        "lsr  r5, %[a], #16   \n\t"            \
2389
        "uxth r6, %[a]    \n\t"            \
2390
        "mov  %[l], r6    \n\t"            \
2391
        "mov  %[h], r5    \n\t"            \
2392
        /* al * al */                                    \
2393
        "mul  %[l], %[l]    \n\t"            \
2394
        /* ah * ah */                                    \
2395
        "mul  %[h], %[h]    \n\t"            \
2396
        /* 2 * al * ah */                                \
2397
        "mul  r6, r5      \n\t"            \
2398
        "lsr  r5, r6, #15   \n\t"            \
2399
        "lsl  r6, r6, #17   \n\t"            \
2400
        "add  %[l], %[l], r6    \n\t"            \
2401
        "adc  %[h], r5    \n\t"            \
2402
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2403
        : [a] "l" (va)                                   \
2404
        : "r5", "r6", "cc"                               \
2405
    )
2406
/* Square va and add double size result into: vo | vh | vl */
2407
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2408
    __asm__ __volatile__ (                               \
2409
        "lsr  r4, %[a], #16   \n\t"            \
2410
        "uxth r6, %[a]    \n\t"            \
2411
        /* al * al */                                    \
2412
        "mul  r6, r6      \n\t"            \
2413
        /* ah * ah */                                    \
2414
        "mul  r4, r4      \n\t"            \
2415
        "add  %[l], %[l], r6    \n\t"            \
2416
        "adc  %[h], r4    \n\t"            \
2417
        "mov  r5, #0      \n\t"            \
2418
        "adc  %[o], r5    \n\t"            \
2419
        "lsr  r4, %[a], #16   \n\t"            \
2420
        "uxth r6, %[a]    \n\t"            \
2421
        /* 2 * al * ah */                                \
2422
        "mul  r6, r4      \n\t"            \
2423
        "lsr  r4, r6, #15   \n\t"            \
2424
        "lsl  r6, r6, #17   \n\t"            \
2425
        "add  %[l], %[l], r6    \n\t"            \
2426
        "adc  %[h], r4    \n\t"            \
2427
        "adc  %[o], r5    \n\t"            \
2428
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2429
        : [a] "l" (va)                                   \
2430
        : "r4", "r5", "r6", "cc"                         \
2431
    )
2432
/* Square va and add double size result into: vh | vl */
2433
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2434
    __asm__ __volatile__ (                               \
2435
        "lsr  r7, %[a], #16   \n\t"            \
2436
        "uxth r6, %[a]    \n\t"            \
2437
        /* al * al */                                    \
2438
        "mul  r6, r6      \n\t"            \
2439
        /* ah * ah */                                    \
2440
        "mul  r7, r7      \n\t"            \
2441
        "add  %[l], %[l], r6    \n\t"            \
2442
        "adc  %[h], r7    \n\t"            \
2443
        "lsr  r7, %[a], #16   \n\t"            \
2444
        "uxth r6, %[a]    \n\t"            \
2445
        /* 2 * al * ah */                                \
2446
        "mul  r6, r7      \n\t"            \
2447
        "lsr  r7, r6, #15   \n\t"            \
2448
        "lsl  r6, r6, #17   \n\t"            \
2449
        "add  %[l], %[l], r6    \n\t"            \
2450
        "adc  %[h], r7    \n\t"            \
2451
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2452
        : [a] "l" (va)                                   \
2453
        : "r6", "r7", "cc"                               \
2454
    )
2455
/* Add va into: vh | vl */
2456
#define SP_ASM_ADDC(vl, vh, va)                          \
2457
    __asm__ __volatile__ (                               \
2458
        "add  %[l], %[l], %[a]  \n\t"            \
2459
        "mov  r5, #0      \n\t"            \
2460
        "adc  %[h], r5    \n\t"            \
2461
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2462
        : [a] "l" (va)                                   \
2463
        : "r5", "cc"                                     \
2464
    )
2465
/* Sub va from: vh | vl */
2466
#define SP_ASM_SUBC(vl, vh, va)                          \
2467
    __asm__ __volatile__ (                               \
2468
        "sub  %[l], %[l], %[a]  \n\t"            \
2469
        "mov  r5, #0      \n\t"            \
2470
        "sbc  %[h], r5    \n\t"            \
2471
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2472
        : [a] "l" (va)                                   \
2473
        : "r5", "cc"                                     \
2474
    )
2475
/* Add two times vc | vb | va into vo | vh | vl */
2476
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2477
    __asm__ __volatile__ (                               \
2478
        "add  %[l], %[l], %[a]  \n\t"            \
2479
        "adc  %[h], %[b]    \n\t"            \
2480
        "adc  %[o], %[c]    \n\t"            \
2481
        "add  %[l], %[l], %[a]  \n\t"            \
2482
        "adc  %[h], %[b]    \n\t"            \
2483
        "adc  %[o], %[c]    \n\t"            \
2484
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2485
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2486
        : "cc"                                           \
2487
    )
2488
2489
#endif
2490
2491
#ifdef WOLFSSL_SP_DIV_WORD_HALF
2492
/* Divide a two digit number by a digit number and return. (hi | lo) / d
2493
 *
2494
 * No division instruction used - does operation bit by bit.
2495
 * Constant time.
2496
 *
2497
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
2498
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
2499
 * @param  [in]  d   SP integer digit. Number to divide by.
2500
 * @return  The division result.
2501
 */
2502
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2503
                                          sp_int_digit d)
2504
{
2505
    __asm__ __volatile__ (
2506
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2507
        "lsrs r3, %[d], #24\n\t"
2508
#else
2509
        "lsr  r3, %[d], #24\n\t"
2510
#endif
2511
        "beq  2%=f\n\t"
2512
  "\n1%=:\n\t"
2513
        "movs r3, #0\n\t"
2514
        "b  3%=f\n\t"
2515
  "\n2%=:\n\t"
2516
        "mov  r3, #8\n\t"
2517
  "\n3%=:\n\t"
2518
        "movs r4, #31\n\t"
2519
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2520
        "subs r4, r4, r3\n\t"
2521
#else
2522
        "sub  r4, r4, r3\n\t"
2523
#endif
2524
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2525
        "lsls %[d], %[d], r3\n\t"
2526
#else
2527
        "lsl  %[d], %[d], r3\n\t"
2528
#endif
2529
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2530
        "lsls %[hi], %[hi], r3\n\t"
2531
#else
2532
        "lsl  %[hi], %[hi], r3\n\t"
2533
#endif
2534
        "mov  r5, %[lo]\n\t"
2535
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2536
        "lsrs r5, r5, r4\n\t"
2537
#else
2538
        "lsr  r5, r5, r4\n\t"
2539
#endif
2540
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2541
        "lsls %[lo], %[lo], r3\n\t"
2542
#else
2543
        "lsl  %[lo], %[lo], r3\n\t"
2544
#endif
2545
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2546
        "lsrs r5, r5, #1\n\t"
2547
#else
2548
        "lsr  r5, r5, #1\n\t"
2549
#endif
2550
#if defined(WOLFSSL_KEIL)
2551
        "orrs %[hi], %[hi], r5\n\t"
2552
#elif defined(__clang__)
2553
        "orrs %[hi], r5\n\t"
2554
#else
2555
        "orr  %[hi], r5\n\t"
2556
#endif
2557
2558
        "movs   r3, #0\n\t"
2559
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2560
        "lsrs   r5, %[d], #1\n\t"
2561
#else
2562
        "lsr    r5, %[d], #1\n\t"
2563
#endif
2564
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2565
        "adds   r5, r5, #1\n\t"
2566
#else
2567
        "add    r5, r5, #1\n\t"
2568
#endif
2569
        "mov    r8, %[lo]\n\t"
2570
        "mov    r9, %[hi]\n\t"
2571
        /* Do top 32 */
2572
        "movs   r6, r5\n\t"
2573
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2574
        "subs   r6, r6, %[hi]\n\t"
2575
#else
2576
        "sub    r6, r6, %[hi]\n\t"
2577
#endif
2578
#ifdef WOLFSSL_KEIL
2579
        "sbcs   r6, r6, r6\n\t"
2580
#elif defined(__clang__)
2581
        "sbcs   r6, r6\n\t"
2582
#else
2583
        "sbc    r6, r6\n\t"
2584
#endif
2585
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2586
        "adds   r3, r3, r3\n\t"
2587
#else
2588
        "add    r3, r3, r3\n\t"
2589
#endif
2590
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2591
        "subs   r3, r3, r6\n\t"
2592
#else
2593
        "sub    r3, r3, r6\n\t"
2594
#endif
2595
#ifdef WOLFSSL_KEIL
2596
        "ands   r6, r6, r5\n\t"
2597
#elif defined(__clang__)
2598
        "ands   r6, r5\n\t"
2599
#else
2600
        "and    r6, r5\n\t"
2601
#endif
2602
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2603
        "subs   %[hi], %[hi], r6\n\t"
2604
#else
2605
        "sub    %[hi], %[hi], r6\n\t"
2606
#endif
2607
        "movs   r4, #29\n\t"
2608
        "\n"
2609
    "L_sp_div_word_loop%=:\n\t"
2610
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2611
        "lsls   %[lo], %[lo], #1\n\t"
2612
#else
2613
        "lsl    %[lo], %[lo], #1\n\t"
2614
#endif
2615
#ifdef WOLFSSL_KEIL
2616
        "adcs   %[hi], %[hi], %[hi]\n\t"
2617
#elif defined(__clang__)
2618
        "adcs   %[hi], %[hi]\n\t"
2619
#else
2620
        "adc    %[hi], %[hi]\n\t"
2621
#endif
2622
        "movs   r6, r5\n\t"
2623
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2624
        "subs   r6, r6, %[hi]\n\t"
2625
#else
2626
        "sub    r6, r6, %[hi]\n\t"
2627
#endif
2628
#ifdef WOLFSSL_KEIL
2629
        "sbcs   r6, r6, r6\n\t"
2630
#elif defined(__clang__)
2631
        "sbcs   r6, r6\n\t"
2632
#else
2633
        "sbc    r6, r6\n\t"
2634
#endif
2635
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2636
        "adds   r3, r3, r3\n\t"
2637
#else
2638
        "add    r3, r3, r3\n\t"
2639
#endif
2640
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2641
        "subs   r3, r3, r6\n\t"
2642
#else
2643
        "sub    r3, r3, r6\n\t"
2644
#endif
2645
#ifdef WOLFSSL_KEIL
2646
        "ands   r6, r6, r5\n\t"
2647
#elif defined(__clang__)
2648
        "ands   r6, r5\n\t"
2649
#else
2650
        "and    r6, r5\n\t"
2651
#endif
2652
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2653
        "subs   %[hi], %[hi], r6\n\t"
2654
#else
2655
        "sub    %[hi], %[hi], r6\n\t"
2656
#endif
2657
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2658
        "subs   r4, r4, #1\n\t"
2659
#else
2660
        "sub    r4, r4, #1\n\t"
2661
#endif
2662
        "bpl    L_sp_div_word_loop%=\n\t"
2663
        "movs   r7, #0\n\t"
2664
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2665
        "adds   r3, r3, r3\n\t"
2666
#else
2667
        "add    r3, r3, r3\n\t"
2668
#endif
2669
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2670
        "adds   r3, r3, #1\n\t"
2671
#else
2672
        "add    r3, r3, #1\n\t"
2673
#endif
2674
        /* r * d - Start */
2675
        "uxth   %[hi], r3\n\t"
2676
        "uxth   r4, %[d]\n\t"
2677
#ifdef WOLFSSL_KEIL
2678
        "muls   r4, %[hi], r4\n\t"
2679
#elif defined(__clang__)
2680
        "muls   r4, %[hi]\n\t"
2681
#else
2682
        "mul    r4, %[hi]\n\t"
2683
#endif
2684
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2685
        "lsrs   r6, %[d], #16\n\t"
2686
#else
2687
        "lsr    r6, %[d], #16\n\t"
2688
#endif
2689
#ifdef WOLFSSL_KEIL
2690
        "muls   %[hi], r6, %[hi]\n\t"
2691
#elif defined(__clang__)
2692
        "muls   %[hi], r6\n\t"
2693
#else
2694
        "mul    %[hi], r6\n\t"
2695
#endif
2696
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2697
        "lsrs   r5, %[hi], #16\n\t"
2698
#else
2699
        "lsr    r5, %[hi], #16\n\t"
2700
#endif
2701
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2702
        "lsls   %[hi], %[hi], #16\n\t"
2703
#else
2704
        "lsl    %[hi], %[hi], #16\n\t"
2705
#endif
2706
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2707
        "adds   r4, r4, %[hi]\n\t"
2708
#else
2709
        "add    r4, r4, %[hi]\n\t"
2710
#endif
2711
#ifdef WOLFSSL_KEIL
2712
        "adcs   r5, r5, r7\n\t"
2713
#elif defined(__clang__)
2714
        "adcs   r5, r7\n\t"
2715
#else
2716
        "adc    r5, r7\n\t"
2717
#endif
2718
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2719
        "lsrs   %[hi], r3, #16\n\t"
2720
#else
2721
        "lsr    %[hi], r3, #16\n\t"
2722
#endif
2723
#ifdef WOLFSSL_KEIL
2724
        "muls   r6, %[hi], r6\n\t"
2725
#elif defined(__clang__)
2726
        "muls   r6, %[hi]\n\t"
2727
#else
2728
        "mul    r6, %[hi]\n\t"
2729
#endif
2730
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2731
        "adds   r5, r5, r6\n\t"
2732
#else
2733
        "add    r5, r5, r6\n\t"
2734
#endif
2735
        "uxth   r6, %[d]\n\t"
2736
#ifdef WOLFSSL_KEIL
2737
        "muls   %[hi], r6, %[hi]\n\t"
2738
#elif defined(__clang__)
2739
        "muls   %[hi], r6\n\t"
2740
#else
2741
        "mul    %[hi], r6\n\t"
2742
#endif
2743
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2744
        "lsrs   r6, %[hi], #16\n\t"
2745
#else
2746
        "lsr    r6, %[hi], #16\n\t"
2747
#endif
2748
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2749
        "lsls   %[hi], %[hi], #16\n\t"
2750
#else
2751
        "lsl    %[hi], %[hi], #16\n\t"
2752
#endif
2753
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2754
        "adds   r4, r4, %[hi]\n\t"
2755
#else
2756
        "add    r4, r4, %[hi]\n\t"
2757
#endif
2758
#ifdef WOLFSSL_KEIL
2759
        "adcs   r5, r5, r6\n\t"
2760
#elif defined(__clang__)
2761
        "adcs   r5, r6\n\t"
2762
#else
2763
        "adc    r5, r6\n\t"
2764
#endif
2765
        /* r * d - Done */
2766
        "mov    %[hi], r8\n\t"
2767
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2768
        "subs   %[hi], %[hi], r4\n\t"
2769
#else
2770
        "sub    %[hi], %[hi], r4\n\t"
2771
#endif
2772
        "movs   r4, %[hi]\n\t"
2773
        "mov    %[hi], r9\n\t"
2774
#ifdef WOLFSSL_KEIL
2775
        "sbcs   %[hi], %[hi], r5\n\t"
2776
#elif defined(__clang__)
2777
        "sbcs   %[hi], r5\n\t"
2778
#else
2779
        "sbc    %[hi], r5\n\t"
2780
#endif
2781
        "movs   r5, %[hi]\n\t"
2782
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2783
        "adds   r3, r3, r5\n\t"
2784
#else
2785
        "add    r3, r3, r5\n\t"
2786
#endif
2787
        /* r * d - Start */
2788
        "uxth   %[hi], r3\n\t"
2789
        "uxth   r4, %[d]\n\t"
2790
#ifdef WOLFSSL_KEIL
2791
        "muls   r4, %[hi], r4\n\t"
2792
#elif defined(__clang__)
2793
        "muls   r4, %[hi]\n\t"
2794
#else
2795
        "mul    r4, %[hi]\n\t"
2796
#endif
2797
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2798
        "lsrs   r6, %[d], #16\n\t"
2799
#else
2800
        "lsr    r6, %[d], #16\n\t"
2801
#endif
2802
#ifdef WOLFSSL_KEIL
2803
        "muls   %[hi], r6, %[hi]\n\t"
2804
#elif defined(__clang__)
2805
        "muls   %[hi], r6\n\t"
2806
#else
2807
        "mul    %[hi], r6\n\t"
2808
#endif
2809
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2810
        "lsrs   r5, %[hi], #16\n\t"
2811
#else
2812
        "lsr    r5, %[hi], #16\n\t"
2813
#endif
2814
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2815
        "lsls   %[hi], %[hi], #16\n\t"
2816
#else
2817
        "lsl    %[hi], %[hi], #16\n\t"
2818
#endif
2819
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2820
        "adds   r4, r4, %[hi]\n\t"
2821
#else
2822
        "add    r4, r4, %[hi]\n\t"
2823
#endif
2824
#ifdef WOLFSSL_KEIL
2825
        "adcs   r5, r5, r7\n\t"
2826
#elif defined(__clang__)
2827
        "adcs   r5, r7\n\t"
2828
#else
2829
        "adc    r5, r7\n\t"
2830
#endif
2831
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2832
        "lsrs   %[hi], r3, #16\n\t"
2833
#else
2834
        "lsr    %[hi], r3, #16\n\t"
2835
#endif
2836
#ifdef WOLFSSL_KEIL
2837
        "muls   r6, %[hi], r6\n\t"
2838
#elif defined(__clang__)
2839
        "muls   r6, %[hi]\n\t"
2840
#else
2841
        "mul    r6, %[hi]\n\t"
2842
#endif
2843
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2844
        "adds   r5, r5, r6\n\t"
2845
#else
2846
        "add    r5, r5, r6\n\t"
2847
#endif
2848
        "uxth   r6, %[d]\n\t"
2849
#ifdef WOLFSSL_KEIL
2850
        "muls   %[hi], r6, %[hi]\n\t"
2851
#elif defined(__clang__)
2852
        "muls   %[hi], r6\n\t"
2853
#else
2854
        "mul    %[hi], r6\n\t"
2855
#endif
2856
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2857
        "lsrs   r6, %[hi], #16\n\t"
2858
#else
2859
        "lsr    r6, %[hi], #16\n\t"
2860
#endif
2861
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2862
        "lsls   %[hi], %[hi], #16\n\t"
2863
#else
2864
        "lsl    %[hi], %[hi], #16\n\t"
2865
#endif
2866
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2867
        "adds   r4, r4, %[hi]\n\t"
2868
#else
2869
        "add    r4, r4, %[hi]\n\t"
2870
#endif
2871
#ifdef WOLFSSL_KEIL
2872
        "adcs   r5, r5, r6\n\t"
2873
#elif defined(__clang__)
2874
        "adcs   r5, r6\n\t"
2875
#else
2876
        "adc    r5, r6\n\t"
2877
#endif
2878
        /* r * d - Done */
2879
        "mov    %[hi], r8\n\t"
2880
        "mov    r6, r9\n\t"
2881
#ifdef WOLFSSL_KEIL
2882
        "subs   r4, %[hi], r4\n\t"
2883
#else
2884
#ifdef __clang__
2885
        "subs   r4, %[hi], r4\n\t"
2886
#else
2887
        "sub    r4, %[hi], r4\n\t"
2888
#endif
2889
#endif
2890
#ifdef WOLFSSL_KEIL
2891
        "sbcs   r6, r6, r5\n\t"
2892
#elif defined(__clang__)
2893
        "sbcs   r6, r5\n\t"
2894
#else
2895
        "sbc    r6, r5\n\t"
2896
#endif
2897
        "movs   r5, r6\n\t"
2898
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2899
        "adds   r3, r3, r5\n\t"
2900
#else
2901
        "add    r3, r3, r5\n\t"
2902
#endif
2903
        /* r * d - Start */
2904
        "uxth   %[hi], r3\n\t"
2905
        "uxth   r4, %[d]\n\t"
2906
#ifdef WOLFSSL_KEIL
2907
        "muls   r4, %[hi], r4\n\t"
2908
#elif defined(__clang__)
2909
        "muls   r4, %[hi]\n\t"
2910
#else
2911
        "mul    r4, %[hi]\n\t"
2912
#endif
2913
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2914
        "lsrs   r6, %[d], #16\n\t"
2915
#else
2916
        "lsr    r6, %[d], #16\n\t"
2917
#endif
2918
#ifdef WOLFSSL_KEIL
2919
        "muls   %[hi], r6, %[hi]\n\t"
2920
#elif defined(__clang__)
2921
        "muls   %[hi], r6\n\t"
2922
#else
2923
        "mul    %[hi], r6\n\t"
2924
#endif
2925
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2926
        "lsrs   r5, %[hi], #16\n\t"
2927
#else
2928
        "lsr    r5, %[hi], #16\n\t"
2929
#endif
2930
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2931
        "lsls   %[hi], %[hi], #16\n\t"
2932
#else
2933
        "lsl    %[hi], %[hi], #16\n\t"
2934
#endif
2935
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2936
        "adds   r4, r4, %[hi]\n\t"
2937
#else
2938
        "add    r4, r4, %[hi]\n\t"
2939
#endif
2940
#ifdef WOLFSSL_KEIL
2941
        "adcs   r5, r5, r7\n\t"
2942
#elif defined(__clang__)
2943
        "adcs   r5, r7\n\t"
2944
#else
2945
        "adc    r5, r7\n\t"
2946
#endif
2947
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2948
        "lsrs   %[hi], r3, #16\n\t"
2949
#else
2950
        "lsr    %[hi], r3, #16\n\t"
2951
#endif
2952
#ifdef WOLFSSL_KEIL
2953
        "muls   r6, %[hi], r6\n\t"
2954
#elif defined(__clang__)
2955
        "muls   r6, %[hi]\n\t"
2956
#else
2957
        "mul    r6, %[hi]\n\t"
2958
#endif
2959
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2960
        "adds   r5, r5, r6\n\t"
2961
#else
2962
        "add    r5, r5, r6\n\t"
2963
#endif
2964
        "uxth   r6, %[d]\n\t"
2965
#ifdef WOLFSSL_KEIL
2966
        "muls   %[hi], r6, %[hi]\n\t"
2967
#elif defined(__clang__)
2968
        "muls   %[hi], r6\n\t"
2969
#else
2970
        "mul    %[hi], r6\n\t"
2971
#endif
2972
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2973
        "lsrs   r6, %[hi], #16\n\t"
2974
#else
2975
        "lsr    r6, %[hi], #16\n\t"
2976
#endif
2977
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2978
        "lsls   %[hi], %[hi], #16\n\t"
2979
#else
2980
        "lsl    %[hi], %[hi], #16\n\t"
2981
#endif
2982
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2983
        "adds   r4, r4, %[hi]\n\t"
2984
#else
2985
        "add    r4, r4, %[hi]\n\t"
2986
#endif
2987
#ifdef WOLFSSL_KEIL
2988
        "adcs   r5, r5, r6\n\t"
2989
#elif defined(__clang__)
2990
        "adcs   r5, r6\n\t"
2991
#else
2992
        "adc    r5, r6\n\t"
2993
#endif
2994
        /* r * d - Done */
2995
        "mov    %[hi], r8\n\t"
2996
        "mov    r6, r9\n\t"
2997
#ifdef WOLFSSL_KEIL
2998
        "subs   r4, %[hi], r4\n\t"
2999
#else
3000
#ifdef __clang__
3001
        "subs   r4, %[hi], r4\n\t"
3002
#else
3003
        "sub    r4, %[hi], r4\n\t"
3004
#endif
3005
#endif
3006
#ifdef WOLFSSL_KEIL
3007
        "sbcs   r6, r6, r5\n\t"
3008
#elif defined(__clang__)
3009
        "sbcs   r6, r5\n\t"
3010
#else
3011
        "sbc    r6, r5\n\t"
3012
#endif
3013
        "movs   r5, r6\n\t"
3014
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3015
        "adds   r3, r3, r5\n\t"
3016
#else
3017
        "add    r3, r3, r5\n\t"
3018
#endif
3019
        "movs   r6, %[d]\n\t"
3020
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3021
        "subs   r6, r6, r4\n\t"
3022
#else
3023
        "sub    r6, r6, r4\n\t"
3024
#endif
3025
#ifdef WOLFSSL_KEIL
3026
        "sbcs   r6, r6, r6\n\t"
3027
#elif defined(__clang__)
3028
        "sbcs   r6, r6\n\t"
3029
#else
3030
        "sbc    r6, r6\n\t"
3031
#endif
3032
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3033
        "subs   r3, r3, r6\n\t"
3034
#else
3035
        "sub    r3, r3, r6\n\t"
3036
#endif
3037
        "movs   %[hi], r3\n\t"
3038
        : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3039
        :
3040
        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"
3041
    );
3042
    return (uint32_t)(size_t)hi;
3043
}
3044
3045
#define SP_ASM_DIV_WORD
3046
#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3047
3048
#define SP_INT_ASM_AVAILABLE
3049
3050
    #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3051
3052
    #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3053
/*
3054
 * CPU: PPC64
3055
 */
3056
3057
/* Multiply va by vb and store double size result in: vh | vl */
3058
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3059
    __asm__ __volatile__ (                               \
3060
        "mulld  %[l], %[a], %[b]  \n\t"            \
3061
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3062
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3063
        : [a] "r" (va), [b] "r" (vb)                     \
3064
        : "memory"                                       \
3065
    )
3066
/* Multiply va by vb and store double size result in: vo | vh | vl */
3067
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3068
    __asm__ __volatile__ (                               \
3069
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3070
        "mulld  %[l], %[a], %[b]  \n\t"            \
3071
        "li %[o], 0     \n\t"            \
3072
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3073
        : [a] "r" (va), [b] "r" (vb)                     \
3074
        :                                                \
3075
    )
3076
/* Multiply va by vb and add double size result into: vo | vh | vl */
3077
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3078
    __asm__ __volatile__ (                               \
3079
        "mulld  16, %[a], %[b]    \n\t"            \
3080
        "mulhdu 17, %[a], %[b]    \n\t"            \
3081
        "addc %[l], %[l], 16    \n\t"            \
3082
        "adde %[h], %[h], 17    \n\t"            \
3083
        "addze  %[o], %[o]    \n\t"            \
3084
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3085
        : [a] "r" (va), [b] "r" (vb)                     \
3086
        : "16", "17", "cc"                               \
3087
    )
3088
/* Multiply va by vb and add double size result into: vh | vl */
3089
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3090
    __asm__ __volatile__ (                               \
3091
        "mulld  16, %[a], %[b]    \n\t"            \
3092
        "mulhdu 17, %[a], %[b]    \n\t"            \
3093
        "addc %[l], %[l], 16    \n\t"            \
3094
        "adde %[h], %[h], 17    \n\t"            \
3095
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3096
        : [a] "r" (va), [b] "r" (vb)                     \
3097
        : "16", "17", "cc"                               \
3098
    )
3099
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3100
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3101
    __asm__ __volatile__ (                               \
3102
        "mulld  16, %[a], %[b]    \n\t"            \
3103
        "mulhdu 17, %[a], %[b]    \n\t"            \
3104
        "addc %[l], %[l], 16    \n\t"            \
3105
        "adde %[h], %[h], 17    \n\t"            \
3106
        "addze  %[o], %[o]    \n\t"            \
3107
        "addc %[l], %[l], 16    \n\t"            \
3108
        "adde %[h], %[h], 17    \n\t"            \
3109
        "addze  %[o], %[o]    \n\t"            \
3110
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3111
        : [a] "r" (va), [b] "r" (vb)                     \
3112
        : "16", "17", "cc"                               \
3113
    )
3114
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3115
 * Assumes first add will not overflow vh | vl
3116
 */
3117
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3118
    __asm__ __volatile__ (                               \
3119
        "mulld  16, %[a], %[b]    \n\t"            \
3120
        "mulhdu 17, %[a], %[b]    \n\t"            \
3121
        "addc %[l], %[l], 16    \n\t"            \
3122
        "adde %[h], %[h], 17    \n\t"            \
3123
        "addc %[l], %[l], 16    \n\t"            \
3124
        "adde %[h], %[h], 17    \n\t"            \
3125
        "addze  %[o], %[o]    \n\t"            \
3126
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3127
        : [a] "r" (va), [b] "r" (vb)                     \
3128
        : "16", "17", "cc"                               \
3129
    )
3130
/* Square va and store double size result in: vh | vl */
3131
#define SP_ASM_SQR(vl, vh, va)                           \
3132
    __asm__ __volatile__ (                               \
3133
        "mulld  %[l], %[a], %[a]  \n\t"            \
3134
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3135
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3136
        : [a] "r" (va)                                   \
3137
        : "memory"                                       \
3138
    )
3139
/* Square va and add double size result into: vo | vh | vl */
3140
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3141
    __asm__ __volatile__ (                               \
3142
        "mulld  16, %[a], %[a]    \n\t"            \
3143
        "mulhdu 17, %[a], %[a]    \n\t"            \
3144
        "addc %[l], %[l], 16    \n\t"            \
3145
        "adde %[h], %[h], 17    \n\t"            \
3146
        "addze  %[o], %[o]    \n\t"            \
3147
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3148
        : [a] "r" (va)                                   \
3149
        : "16", "17", "cc"                               \
3150
    )
3151
/* Square va and add double size result into: vh | vl */
3152
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3153
    __asm__ __volatile__ (                               \
3154
        "mulld  16, %[a], %[a]    \n\t"            \
3155
        "mulhdu 17, %[a], %[a]    \n\t"            \
3156
        "addc %[l], %[l], 16    \n\t"            \
3157
        "adde %[h], %[h], 17    \n\t"            \
3158
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3159
        : [a] "r" (va)                                   \
3160
        : "16", "17", "cc"                               \
3161
    )
3162
/* Add va into: vh | vl */
3163
#define SP_ASM_ADDC(vl, vh, va)                          \
3164
    __asm__ __volatile__ (                               \
3165
        "addc %[l], %[l], %[a]  \n\t"            \
3166
        "addze  %[h], %[h]    \n\t"            \
3167
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3168
        : [a] "r" (va)                                   \
3169
        : "cc"                                           \
3170
    )
3171
/* Sub va from: vh | vl */
3172
#define SP_ASM_SUBC(vl, vh, va)                          \
3173
    __asm__ __volatile__ (                               \
3174
        "subfc  %[l], %[a], %[l]  \n\t"            \
3175
        "li    16, 0      \n\t"            \
3176
        "subfe %[h], 16, %[h]   \n\t"            \
3177
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3178
        : [a] "r" (va)                                   \
3179
        : "16", "cc"                                     \
3180
    )
3181
/* Add two times vc | vb | va into vo | vh | vl */
3182
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3183
    __asm__ __volatile__ (                               \
3184
        "addc %[l], %[l], %[a]  \n\t"            \
3185
        "adde %[h], %[h], %[b]  \n\t"            \
3186
        "adde %[o], %[o], %[c]  \n\t"            \
3187
        "addc %[l], %[l], %[a]  \n\t"            \
3188
        "adde %[h], %[h], %[b]  \n\t"            \
3189
        "adde %[o], %[o], %[c]  \n\t"            \
3190
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3191
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3192
        : "cc"                                           \
3193
    )
3194
3195
#define SP_INT_ASM_AVAILABLE
3196
3197
    #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3198
3199
    #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3200
/*
3201
 * CPU: PPC 32-bit
3202
 */
3203
3204
/* Multiply va by vb and store double size result in: vh | vl */
3205
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3206
    __asm__ __volatile__ (                               \
3207
        "mullw  %[l], %[a], %[b]  \n\t"            \
3208
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3209
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3210
        : [a] "r" (va), [b] "r" (vb)                     \
3211
        : "memory"                                       \
3212
    )
3213
/* Multiply va by vb and store double size result in: vo | vh | vl */
3214
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3215
    __asm__ __volatile__ (                               \
3216
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3217
        "mullw  %[l], %[a], %[b]  \n\t"            \
3218
        "li %[o], 0     \n\t"            \
3219
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3220
        : [a] "r" (va), [b] "r" (vb)                     \
3221
        :                                                \
3222
    )
3223
/* Multiply va by vb and add double size result into: vo | vh | vl */
3224
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3225
    __asm__ __volatile__ (                               \
3226
        "mullw  16, %[a], %[b]    \n\t"            \
3227
        "mulhwu 17, %[a], %[b]    \n\t"            \
3228
        "addc %[l], %[l], 16    \n\t"            \
3229
        "adde %[h], %[h], 17    \n\t"            \
3230
        "addze  %[o], %[o]    \n\t"            \
3231
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3232
        : [a] "r" (va), [b] "r" (vb)                     \
3233
        : "16", "17", "cc"                               \
3234
    )
3235
/* Multiply va by vb and add double size result into: vh | vl */
3236
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3237
    __asm__ __volatile__ (                               \
3238
        "mullw  16, %[a], %[b]    \n\t"            \
3239
        "mulhwu 17, %[a], %[b]    \n\t"            \
3240
        "addc %[l], %[l], 16    \n\t"            \
3241
        "adde %[h], %[h], 17    \n\t"            \
3242
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3243
        : [a] "r" (va), [b] "r" (vb)                     \
3244
        : "16", "17", "cc"                               \
3245
    )
3246
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3247
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3248
    __asm__ __volatile__ (                               \
3249
        "mullw  16, %[a], %[b]    \n\t"            \
3250
        "mulhwu 17, %[a], %[b]    \n\t"            \
3251
        "addc %[l], %[l], 16    \n\t"            \
3252
        "adde %[h], %[h], 17    \n\t"            \
3253
        "addze  %[o], %[o]    \n\t"            \
3254
        "addc %[l], %[l], 16    \n\t"            \
3255
        "adde %[h], %[h], 17    \n\t"            \
3256
        "addze  %[o], %[o]    \n\t"            \
3257
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3258
        : [a] "r" (va), [b] "r" (vb)                     \
3259
        : "16", "17", "cc"                               \
3260
    )
3261
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3262
 * Assumes first add will not overflow vh | vl
3263
 */
3264
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3265
    __asm__ __volatile__ (                               \
3266
        "mullw  16, %[a], %[b]    \n\t"            \
3267
        "mulhwu 17, %[a], %[b]    \n\t"            \
3268
        "addc %[l], %[l], 16    \n\t"            \
3269
        "adde %[h], %[h], 17    \n\t"            \
3270
        "addc %[l], %[l], 16    \n\t"            \
3271
        "adde %[h], %[h], 17    \n\t"            \
3272
        "addze  %[o], %[o]    \n\t"            \
3273
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3274
        : [a] "r" (va), [b] "r" (vb)                     \
3275
        : "16", "17", "cc"                               \
3276
    )
3277
/* Square va and store double size result in: vh | vl */
3278
#define SP_ASM_SQR(vl, vh, va)                           \
3279
    __asm__ __volatile__ (                               \
3280
        "mullw  %[l], %[a], %[a]  \n\t"            \
3281
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3282
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3283
        : [a] "r" (va)                                   \
3284
        : "memory"                                       \
3285
    )
3286
/* Square va and add double size result into: vo | vh | vl */
3287
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3288
    __asm__ __volatile__ (                               \
3289
        "mullw  16, %[a], %[a]    \n\t"            \
3290
        "mulhwu 17, %[a], %[a]    \n\t"            \
3291
        "addc %[l], %[l], 16    \n\t"            \
3292
        "adde %[h], %[h], 17    \n\t"            \
3293
        "addze  %[o], %[o]    \n\t"            \
3294
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3295
        : [a] "r" (va)                                   \
3296
        : "16", "17", "cc"                               \
3297
    )
3298
/* Square va and add double size result into: vh | vl */
3299
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3300
    __asm__ __volatile__ (                               \
3301
        "mullw  16, %[a], %[a]    \n\t"            \
3302
        "mulhwu 17, %[a], %[a]    \n\t"            \
3303
        "addc %[l], %[l], 16    \n\t"            \
3304
        "adde %[h], %[h], 17    \n\t"            \
3305
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3306
        : [a] "r" (va)                                   \
3307
        : "16", "17", "cc"                               \
3308
    )
3309
/* Add va into: vh | vl */
3310
#define SP_ASM_ADDC(vl, vh, va)                          \
3311
    __asm__ __volatile__ (                               \
3312
        "addc %[l], %[l], %[a]  \n\t"            \
3313
        "addze  %[h], %[h]    \n\t"            \
3314
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3315
        : [a] "r" (va)                                   \
3316
        : "cc"                                           \
3317
    )
3318
/* Sub va from: vh | vl */
3319
#define SP_ASM_SUBC(vl, vh, va)                          \
3320
    __asm__ __volatile__ (                               \
3321
        "subfc  %[l], %[a], %[l]  \n\t"            \
3322
        "li 16, 0     \n\t"            \
3323
        "subfe  %[h], 16, %[h]    \n\t"            \
3324
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3325
        : [a] "r" (va)                                   \
3326
        : "16", "cc"                                     \
3327
    )
3328
/* Add two times vc | vb | va into vo | vh | vl */
3329
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3330
    __asm__ __volatile__ (                               \
3331
        "addc %[l], %[l], %[a]  \n\t"            \
3332
        "adde %[h], %[h], %[b]  \n\t"            \
3333
        "adde %[o], %[o], %[c]  \n\t"            \
3334
        "addc %[l], %[l], %[a]  \n\t"            \
3335
        "adde %[h], %[h], %[b]  \n\t"            \
3336
        "adde %[o], %[o], %[c]  \n\t"            \
3337
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3338
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3339
        : "cc"                                           \
3340
    )
3341
3342
#define SP_INT_ASM_AVAILABLE
3343
3344
    #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
3345
3346
    #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
3347
/*
3348
 * CPU: MIPS 64-bit
3349
 */
3350
3351
/* Multiply va by vb and store double size result in: vh | vl */
3352
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3353
    __asm__ __volatile__ (                               \
3354
        "dmultu %[a], %[b]    \n\t"            \
3355
        "mflo %[l]      \n\t"            \
3356
        "mfhi %[h]      \n\t"            \
3357
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3358
        : [a] "r" (va), [b] "r" (vb)                     \
3359
        : "memory", "$lo", "$hi"                         \
3360
    )
3361
/* Multiply va by vb and store double size result in: vo | vh | vl */
3362
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3363
    __asm__ __volatile__ (                               \
3364
        "dmultu %[a], %[b]    \n\t"            \
3365
        "mflo %[l]      \n\t"            \
3366
        "mfhi %[h]      \n\t"            \
3367
        "move %[o], $0    \n\t"            \
3368
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3369
        : [a] "r" (va), [b] "r" (vb)                     \
3370
        : "$lo", "$hi"                                   \
3371
    )
3372
/* Multiply va by vb and add double size result into: vo | vh | vl */
3373
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3374
    __asm__ __volatile__ (                               \
3375
        "dmultu %[a], %[b]    \n\t"            \
3376
        "mflo $10     \n\t"            \
3377
        "mfhi $11     \n\t"            \
3378
        "daddu  %[l], %[l], $10   \n\t"            \
3379
        "sltu $12, %[l], $10    \n\t"            \
3380
        "daddu  %[h], %[h], $12   \n\t"            \
3381
        "sltu $12, %[h], $12    \n\t"            \
3382
        "daddu  %[o], %[o], $12   \n\t"            \
3383
        "daddu  %[h], %[h], $11   \n\t"            \
3384
        "sltu $12, %[h], $11    \n\t"            \
3385
        "daddu  %[o], %[o], $12   \n\t"            \
3386
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3387
        : [a] "r" (va), [b] "r" (vb)                     \
3388
        : "$10", "$11", "$12", "$lo", "$hi"              \
3389
    )
3390
/* Multiply va by vb and add double size result into: vh | vl */
3391
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3392
    __asm__ __volatile__ (                               \
3393
        "dmultu %[a], %[b]    \n\t"            \
3394
        "mflo $10     \n\t"            \
3395
        "mfhi $11     \n\t"            \
3396
        "daddu  %[l], %[l], $10   \n\t"            \
3397
        "sltu $12, %[l], $10    \n\t"            \
3398
        "daddu  %[h], %[h], $11   \n\t"            \
3399
        "daddu  %[h], %[h], $12   \n\t"            \
3400
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3401
        : [a] "r" (va), [b] "r" (vb)                     \
3402
        : "$10", "$11", "$12", "$lo", "$hi"              \
3403
    )
3404
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3405
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3406
    __asm__ __volatile__ (                               \
3407
        "dmultu %[a], %[b]    \n\t"            \
3408
        "mflo $10     \n\t"            \
3409
        "mfhi $11     \n\t"            \
3410
        "daddu  %[l], %[l], $10   \n\t"            \
3411
        "sltu $12, %[l], $10    \n\t"            \
3412
        "daddu  %[h], %[h], $12   \n\t"            \
3413
        "sltu $12, %[h], $12    \n\t"            \
3414
        "daddu  %[o], %[o], $12   \n\t"            \
3415
        "daddu  %[h], %[h], $11   \n\t"            \
3416
        "sltu $12, %[h], $11    \n\t"            \
3417
        "daddu  %[o], %[o], $12   \n\t"            \
3418
        "daddu  %[l], %[l], $10   \n\t"            \
3419
        "sltu $12, %[l], $10    \n\t"            \
3420
        "daddu  %[h], %[h], $12   \n\t"            \
3421
        "sltu $12, %[h], $12    \n\t"            \
3422
        "daddu  %[o], %[o], $12   \n\t"            \
3423
        "daddu  %[h], %[h], $11   \n\t"            \
3424
        "sltu $12, %[h], $11    \n\t"            \
3425
        "daddu  %[o], %[o], $12   \n\t"            \
3426
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3427
        : [a] "r" (va), [b] "r" (vb)                     \
3428
        : "$10", "$11", "$12", "$lo", "$hi"              \
3429
    )
3430
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3431
 * Assumes first add will not overflow vh | vl
3432
 */
3433
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3434
    __asm__ __volatile__ (                               \
3435
        "dmultu %[a], %[b]    \n\t"            \
3436
        "mflo $10     \n\t"            \
3437
        "mfhi $11     \n\t"            \
3438
        "daddu  %[l], %[l], $10   \n\t"            \
3439
        "sltu $12, %[l], $10    \n\t"            \
3440
        "daddu  %[h], %[h], $11   \n\t"            \
3441
        "daddu  %[h], %[h], $12   \n\t"            \
3442
        "daddu  %[l], %[l], $10   \n\t"            \
3443
        "sltu $12, %[l], $10    \n\t"            \
3444
        "daddu  %[h], %[h], $12   \n\t"            \
3445
        "sltu $12, %[h], $12    \n\t"            \
3446
        "daddu  %[o], %[o], $12   \n\t"            \
3447
        "daddu  %[h], %[h], $11   \n\t"            \
3448
        "sltu $12, %[h], $11    \n\t"            \
3449
        "daddu  %[o], %[o], $12   \n\t"            \
3450
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3451
        : [a] "r" (va), [b] "r" (vb)                     \
3452
        : "$10", "$11", "$12", "$lo", "$hi"              \
3453
    )
3454
/* Square va and store double size result in: vh | vl */
3455
#define SP_ASM_SQR(vl, vh, va)                           \
3456
    __asm__ __volatile__ (                               \
3457
        "dmultu %[a], %[a]    \n\t"            \
3458
        "mflo %[l]      \n\t"            \
3459
        "mfhi %[h]      \n\t"            \
3460
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3461
        : [a] "r" (va)                                   \
3462
        : "memory", "$lo", "$hi"                         \
3463
    )
3464
/* Square va and add double size result into: vo | vh | vl */
3465
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3466
    __asm__ __volatile__ (                               \
3467
        "dmultu %[a], %[a]    \n\t"            \
3468
        "mflo $10     \n\t"            \
3469
        "mfhi $11     \n\t"            \
3470
        "daddu  %[l], %[l], $10   \n\t"            \
3471
        "sltu $12, %[l], $10    \n\t"            \
3472
        "daddu  %[h], %[h], $12   \n\t"            \
3473
        "sltu $12, %[h], $12    \n\t"            \
3474
        "daddu  %[o], %[o], $12   \n\t"            \
3475
        "daddu  %[h], %[h], $11   \n\t"            \
3476
        "sltu $12, %[h], $11    \n\t"            \
3477
        "daddu  %[o], %[o], $12   \n\t"            \
3478
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3479
        : [a] "r" (va)                                   \
3480
        : "$10", "$11", "$12", "$lo", "$hi"              \
3481
    )
3482
/* Square va and add double size result into: vh | vl */
3483
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3484
    __asm__ __volatile__ (                               \
3485
        "dmultu %[a], %[a]    \n\t"            \
3486
        "mflo $10     \n\t"            \
3487
        "mfhi $11     \n\t"            \
3488
        "daddu  %[l], %[l], $10   \n\t"            \
3489
        "sltu $12, %[l], $10    \n\t"            \
3490
        "daddu  %[h], %[h], $11   \n\t"            \
3491
        "daddu  %[h], %[h], $12   \n\t"            \
3492
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3493
        : [a] "r" (va)                                   \
3494
        : "$10", "$11", "$12", "$lo", "$hi"              \
3495
    )
3496
/* Add va into: vh | vl */
3497
#define SP_ASM_ADDC(vl, vh, va)                          \
3498
    __asm__ __volatile__ (                               \
3499
        "daddu  %[l], %[l], %[a]  \n\t"            \
3500
        "sltu $12, %[l], %[a]   \n\t"            \
3501
        "daddu  %[h], %[h], $12   \n\t"            \
3502
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3503
        : [a] "r" (va)                                   \
3504
        : "$12"                                          \
3505
    )
3506
/* Sub va from: vh | vl */
3507
#define SP_ASM_SUBC(vl, vh, va)                          \
3508
    __asm__ __volatile__ (                               \
3509
        "move $12, %[l]   \n\t"            \
3510
        "dsubu  %[l], $12, %[a]   \n\t"            \
3511
        "sltu $12, $12, %[l]    \n\t"            \
3512
        "dsubu  %[h], %[h], $12   \n\t"            \
3513
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3514
        : [a] "r" (va)                                   \
3515
        : "$12"                                          \
3516
    )
3517
/* Add two times vc | vb | va into vo | vh | vl */
3518
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3519
    __asm__ __volatile__ (                               \
3520
        "daddu  %[l], %[l], %[a]  \n\t"            \
3521
        "sltu $12, %[l], %[a]   \n\t"            \
3522
        "daddu  %[h], %[h], $12   \n\t"            \
3523
        "sltu $12, %[h], $12    \n\t"            \
3524
        "daddu  %[o], %[o], $12   \n\t"            \
3525
        "daddu  %[h], %[h], %[b]  \n\t"            \
3526
        "sltu $12, %[h], %[b]   \n\t"            \
3527
        "daddu  %[o], %[o], %[c]  \n\t"            \
3528
        "daddu  %[o], %[o], $12   \n\t"            \
3529
        "daddu  %[l], %[l], %[a]  \n\t"            \
3530
        "sltu $12, %[l], %[a]   \n\t"            \
3531
        "daddu  %[h], %[h], $12   \n\t"            \
3532
        "sltu $12, %[h], $12    \n\t"            \
3533
        "daddu  %[o], %[o], $12   \n\t"            \
3534
        "daddu  %[h], %[h], %[b]  \n\t"            \
3535
        "sltu $12, %[h], %[b]   \n\t"            \
3536
        "daddu  %[o], %[o], %[c]  \n\t"            \
3537
        "daddu  %[o], %[o], $12   \n\t"            \
3538
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3539
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3540
        : "$12"                                          \
3541
    )
3542
3543
#define SP_INT_ASM_AVAILABLE
3544
3545
    #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
3546
3547
    #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
3548
/*
3549
 * CPU: MIPS 32-bit
3550
 */
3551
3552
/* Multiply va by vb and store double size result in: vh | vl */
3553
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3554
    __asm__ __volatile__ (                               \
3555
        "multu  %[a], %[b]    \n\t"            \
3556
        "mflo %[l]      \n\t"            \
3557
        "mfhi %[h]      \n\t"            \
3558
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3559
        : [a] "r" (va), [b] "r" (vb)                     \
3560
        : "memory", "%lo", "%hi"                         \
3561
    )
3562
/* Multiply va by vb and store double size result in: vo | vh | vl */
3563
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3564
    __asm__ __volatile__ (                               \
3565
        "multu  %[a], %[b]    \n\t"            \
3566
        "mflo %[l]      \n\t"            \
3567
        "mfhi %[h]      \n\t"            \
3568
        "move %[o], $0    \n\t"            \
3569
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3570
        : [a] "r" (va), [b] "r" (vb)                     \
3571
        : "%lo", "%hi"                                   \
3572
    )
3573
/* Multiply va by vb and add double size result into: vo | vh | vl */
3574
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3575
    __asm__ __volatile__ (                               \
3576
        "multu  %[a], %[b]    \n\t"            \
3577
        "mflo $10     \n\t"            \
3578
        "mfhi $11     \n\t"            \
3579
        "addu %[l], %[l], $10   \n\t"            \
3580
        "sltu $12, %[l], $10    \n\t"            \
3581
        "addu %[h], %[h], $12   \n\t"            \
3582
        "sltu $12, %[h], $12    \n\t"            \
3583
        "addu %[o], %[o], $12   \n\t"            \
3584
        "addu %[h], %[h], $11   \n\t"            \
3585
        "sltu $12, %[h], $11    \n\t"            \
3586
        "addu %[o], %[o], $12   \n\t"            \
3587
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3588
        : [a] "r" (va), [b] "r" (vb)                     \
3589
        : "$10", "$11", "$12", "%lo", "%hi"              \
3590
    )
3591
/* Multiply va by vb and add double size result into: vh | vl */
3592
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3593
    __asm__ __volatile__ (                               \
3594
        "multu  %[a], %[b]    \n\t"            \
3595
        "mflo $10     \n\t"            \
3596
        "mfhi $11     \n\t"            \
3597
        "addu %[l], %[l], $10   \n\t"            \
3598
        "sltu $12, %[l], $10    \n\t"            \
3599
        "addu %[h], %[h], $11   \n\t"            \
3600
        "addu %[h], %[h], $12   \n\t"            \
3601
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3602
        : [a] "r" (va), [b] "r" (vb)                     \
3603
        : "$10", "$11", "$12", "%lo", "%hi"              \
3604
    )
3605
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3606
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3607
    __asm__ __volatile__ (                               \
3608
        "multu  %[a], %[b]    \n\t"            \
3609
        "mflo $10     \n\t"            \
3610
        "mfhi $11     \n\t"            \
3611
        "addu %[l], %[l], $10   \n\t"            \
3612
        "sltu $12, %[l], $10    \n\t"            \
3613
        "addu %[h], %[h], $12   \n\t"            \
3614
        "sltu $12, %[h], $12    \n\t"            \
3615
        "addu %[o], %[o], $12   \n\t"            \
3616
        "addu %[h], %[h], $11   \n\t"            \
3617
        "sltu $12, %[h], $11    \n\t"            \
3618
        "addu %[o], %[o], $12   \n\t"            \
3619
        "addu %[l], %[l], $10   \n\t"            \
3620
        "sltu $12, %[l], $10    \n\t"            \
3621
        "addu %[h], %[h], $12   \n\t"            \
3622
        "sltu $12, %[h], $12    \n\t"            \
3623
        "addu %[o], %[o], $12   \n\t"            \
3624
        "addu %[h], %[h], $11   \n\t"            \
3625
        "sltu $12, %[h], $11    \n\t"            \
3626
        "addu %[o], %[o], $12   \n\t"            \
3627
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3628
        : [a] "r" (va), [b] "r" (vb)                     \
3629
        : "$10", "$11", "$12", "%lo", "%hi"              \
3630
    )
3631
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3632
 * Assumes first add will not overflow vh | vl
3633
 */
3634
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3635
    __asm__ __volatile__ (                               \
3636
        "multu  %[a], %[b]    \n\t"            \
3637
        "mflo $10     \n\t"            \
3638
        "mfhi $11     \n\t"            \
3639
        "addu %[l], %[l], $10   \n\t"            \
3640
        "sltu $12, %[l], $10    \n\t"            \
3641
        "addu %[h], %[h], $11   \n\t"            \
3642
        "addu %[h], %[h], $12   \n\t"            \
3643
        "addu %[l], %[l], $10   \n\t"            \
3644
        "sltu $12, %[l], $10    \n\t"            \
3645
        "addu %[h], %[h], $12   \n\t"            \
3646
        "sltu $12, %[h], $12    \n\t"            \
3647
        "addu %[o], %[o], $12   \n\t"            \
3648
        "addu %[h], %[h], $11   \n\t"            \
3649
        "sltu $12, %[h], $11    \n\t"            \
3650
        "addu %[o], %[o], $12   \n\t"            \
3651
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3652
        : [a] "r" (va), [b] "r" (vb)                     \
3653
        : "$10", "$11", "$12", "%lo", "%hi"              \
3654
    )
3655
/* Square va and store double size result in: vh | vl */
3656
#define SP_ASM_SQR(vl, vh, va)                           \
3657
    __asm__ __volatile__ (                               \
3658
        "multu  %[a], %[a]    \n\t"            \
3659
        "mflo %[l]      \n\t"            \
3660
        "mfhi %[h]      \n\t"            \
3661
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3662
        : [a] "r" (va)                                   \
3663
        : "memory", "%lo", "%hi"                         \
3664
    )
3665
/* Square va and add double size result into: vo | vh | vl */
3666
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3667
    __asm__ __volatile__ (                               \
3668
        "multu  %[a], %[a]    \n\t"            \
3669
        "mflo $10     \n\t"            \
3670
        "mfhi $11     \n\t"            \
3671
        "addu %[l], %[l], $10   \n\t"            \
3672
        "sltu $12, %[l], $10    \n\t"            \
3673
        "addu %[h], %[h], $12   \n\t"            \
3674
        "sltu $12, %[h], $12    \n\t"            \
3675
        "addu %[o], %[o], $12   \n\t"            \
3676
        "addu %[h], %[h], $11   \n\t"            \
3677
        "sltu $12, %[h], $11    \n\t"            \
3678
        "addu %[o], %[o], $12   \n\t"            \
3679
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3680
        : [a] "r" (va)                                   \
3681
        : "$10", "$11", "$12", "%lo", "%hi"              \
3682
    )
3683
/* Square va and add double size result into: vh | vl */
3684
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3685
    __asm__ __volatile__ (                               \
3686
        "multu  %[a], %[a]    \n\t"            \
3687
        "mflo $10     \n\t"            \
3688
        "mfhi $11     \n\t"            \
3689
        "addu %[l], %[l], $10   \n\t"            \
3690
        "sltu $12, %[l], $10    \n\t"            \
3691
        "addu %[h], %[h], $11   \n\t"            \
3692
        "addu %[h], %[h], $12   \n\t"            \
3693
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3694
        : [a] "r" (va)                                   \
3695
        : "$10", "$11", "$12", "%lo", "%hi"              \
3696
    )
3697
/* Add va into: vh | vl */
3698
#define SP_ASM_ADDC(vl, vh, va)                          \
3699
    __asm__ __volatile__ (                               \
3700
        "addu %[l], %[l], %[a]  \n\t"            \
3701
        "sltu $12, %[l], %[a]   \n\t"            \
3702
        "addu %[h], %[h], $12   \n\t"            \
3703
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3704
        : [a] "r" (va)                                   \
3705
        : "$12"                                          \
3706
    )
3707
/* Sub va from: vh | vl */
3708
#define SP_ASM_SUBC(vl, vh, va)                          \
3709
    __asm__ __volatile__ (                               \
3710
        "move $12, %[l]   \n\t"            \
3711
        "subu %[l], $12, %[a]   \n\t"            \
3712
        "sltu $12, $12, %[l]    \n\t"            \
3713
        "subu %[h], %[h], $12   \n\t"            \
3714
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3715
        : [a] "r" (va)                                   \
3716
        : "$12"                                          \
3717
    )
3718
/* Add two times vc | vb | va into vo | vh | vl */
3719
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3720
    __asm__ __volatile__ (                               \
3721
        "addu %[l], %[l], %[a]  \n\t"            \
3722
        "sltu $12, %[l], %[a]   \n\t"            \
3723
        "addu %[h], %[h], $12   \n\t"            \
3724
        "sltu $12, %[h], $12    \n\t"            \
3725
        "addu %[o], %[o], $12   \n\t"            \
3726
        "addu %[h], %[h], %[b]  \n\t"            \
3727
        "sltu $12, %[h], %[b]   \n\t"            \
3728
        "addu %[o], %[o], %[c]  \n\t"            \
3729
        "addu %[o], %[o], $12   \n\t"            \
3730
        "addu %[l], %[l], %[a]  \n\t"            \
3731
        "sltu $12, %[l], %[a]   \n\t"            \
3732
        "addu %[h], %[h], $12   \n\t"            \
3733
        "sltu $12, %[h], $12    \n\t"            \
3734
        "addu %[o], %[o], $12   \n\t"            \
3735
        "addu %[h], %[h], %[b]  \n\t"            \
3736
        "sltu $12, %[h], %[b]   \n\t"            \
3737
        "addu %[o], %[o], %[c]  \n\t"            \
3738
        "addu %[o], %[o], $12   \n\t"            \
3739
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3740
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3741
        : "$12"                                          \
3742
    )
3743
3744
#define SP_INT_ASM_AVAILABLE
3745
3746
    #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
3747
3748
    #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
3749
/*
3750
 * CPU: RISCV 64-bit
3751
 */
3752
3753
/* Multiply va by vb and store double size result in: vh | vl */
3754
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3755
    __asm__ __volatile__ (                               \
3756
        "mul  %[l], %[a], %[b]  \n\t"            \
3757
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3758
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3759
        : [a] "r" (va), [b] "r" (vb)                     \
3760
        : "memory"                                       \
3761
    )
3762
/* Multiply va by vb and store double size result in: vo | vh | vl */
3763
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3764
    __asm__ __volatile__ (                               \
3765
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3766
        "mul  %[l], %[a], %[b]  \n\t"            \
3767
        "add  %[o], zero, zero  \n\t"            \
3768
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3769
        : [a] "r" (va), [b] "r" (vb)                     \
3770
        :                                                \
3771
    )
3772
/* Multiply va by vb and add double size result into: vo | vh | vl */
3773
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3774
    __asm__ __volatile__ (                               \
3775
        "mul  a5, %[a], %[b]    \n\t"            \
3776
        "mulhu  a6, %[a], %[b]    \n\t"            \
3777
        "add  %[l], %[l], a5    \n\t"            \
3778
        "sltu a7, %[l], a5    \n\t"            \
3779
        "add  %[h], %[h], a7    \n\t"            \
3780
        "sltu a7, %[h], a7    \n\t"            \
3781
        "add  %[o], %[o], a7    \n\t"            \
3782
        "add  %[h], %[h], a6    \n\t"            \
3783
        "sltu a7, %[h], a6    \n\t"            \
3784
        "add  %[o], %[o], a7    \n\t"            \
3785
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3786
        : [a] "r" (va), [b] "r" (vb)                     \
3787
        : "a5", "a6", "a7"                               \
3788
    )
3789
/* Multiply va by vb and add double size result into: vh | vl */
3790
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3791
    __asm__ __volatile__ (                               \
3792
        "mul  a5, %[a], %[b]    \n\t"            \
3793
        "mulhu  a6, %[a], %[b]    \n\t"            \
3794
        "add  %[l], %[l], a5    \n\t"            \
3795
        "sltu a7, %[l], a5    \n\t"            \
3796
        "add  %[h], %[h], a6    \n\t"            \
3797
        "add  %[h], %[h], a7    \n\t"            \
3798
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3799
        : [a] "r" (va), [b] "r" (vb)                     \
3800
        : "a5", "a6", "a7"                               \
3801
    )
3802
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3803
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3804
    __asm__ __volatile__ (                               \
3805
        "mul  a5, %[a], %[b]    \n\t"            \
3806
        "mulhu  a6, %[a], %[b]    \n\t"            \
3807
        "add  %[l], %[l], a5    \n\t"            \
3808
        "sltu a7, %[l], a5    \n\t"            \
3809
        "add  %[h], %[h], a7    \n\t"            \
3810
        "sltu a7, %[h], a7    \n\t"            \
3811
        "add  %[o], %[o], a7    \n\t"            \
3812
        "add  %[h], %[h], a6    \n\t"            \
3813
        "sltu a7, %[h], a6    \n\t"            \
3814
        "add  %[o], %[o], a7    \n\t"            \
3815
        "add  %[l], %[l], a5    \n\t"            \
3816
        "sltu a7, %[l], a5    \n\t"            \
3817
        "add  %[h], %[h], a7    \n\t"            \
3818
        "sltu a7, %[h], a7    \n\t"            \
3819
        "add  %[o], %[o], a7    \n\t"            \
3820
        "add  %[h], %[h], a6    \n\t"            \
3821
        "sltu a7, %[h], a6    \n\t"            \
3822
        "add  %[o], %[o], a7    \n\t"            \
3823
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3824
        : [a] "r" (va), [b] "r" (vb)                     \
3825
        : "a5", "a6", "a7"                               \
3826
    )
3827
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3828
 * Assumes first add will not overflow vh | vl
3829
 */
3830
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3831
    __asm__ __volatile__ (                               \
3832
        "mul  a5, %[a], %[b]    \n\t"            \
3833
        "mulhu  a6, %[a], %[b]    \n\t"            \
3834
        "add  %[l], %[l], a5    \n\t"            \
3835
        "sltu a7, %[l], a5    \n\t"            \
3836
        "add  %[h], %[h], a6    \n\t"            \
3837
        "add  %[h], %[h], a7    \n\t"            \
3838
        "add  %[l], %[l], a5    \n\t"            \
3839
        "sltu a7, %[l], a5    \n\t"            \
3840
        "add  %[h], %[h], a7    \n\t"            \
3841
        "sltu a7, %[h], a7    \n\t"            \
3842
        "add  %[o], %[o], a7    \n\t"            \
3843
        "add  %[h], %[h], a6    \n\t"            \
3844
        "sltu a7, %[h], a6    \n\t"            \
3845
        "add  %[o], %[o], a7    \n\t"            \
3846
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3847
        : [a] "r" (va), [b] "r" (vb)                     \
3848
        : "a5", "a6", "a7"                               \
3849
    )
3850
/* Square va and store double size result in: vh | vl */
3851
#define SP_ASM_SQR(vl, vh, va)                           \
3852
    __asm__ __volatile__ (                               \
3853
        "mul  %[l], %[a], %[a]  \n\t"            \
3854
        "mulhu  %[h], %[a], %[a]  \n\t"            \
3855
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3856
        : [a] "r" (va)                                   \
3857
        : "memory"                                       \
3858
    )
3859
/* Square va and add double size result into: vo | vh | vl */
3860
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3861
    __asm__ __volatile__ (                               \
3862
        "mul  a5, %[a], %[a]    \n\t"            \
3863
        "mulhu  a6, %[a], %[a]    \n\t"            \
3864
        "add  %[l], %[l], a5    \n\t"            \
3865
        "sltu a7, %[l], a5    \n\t"            \
3866
        "add  %[h], %[h], a7    \n\t"            \
3867
        "sltu a7, %[h], a7    \n\t"            \
3868
        "add  %[o], %[o], a7    \n\t"            \
3869
        "add  %[h], %[h], a6    \n\t"            \
3870
        "sltu a7, %[h], a6    \n\t"            \
3871
        "add  %[o], %[o], a7    \n\t"            \
3872
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3873
        : [a] "r" (va)                                   \
3874
        : "a5", "a6", "a7"                               \
3875
    )
3876
/* Square va and add double size result into: vh | vl */
3877
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3878
    __asm__ __volatile__ (                               \
3879
        "mul  a5, %[a], %[a]    \n\t"            \
3880
        "mulhu  a6, %[a], %[a]    \n\t"            \
3881
        "add  %[l], %[l], a5    \n\t"            \
3882
        "sltu a7, %[l], a5    \n\t"            \
3883
        "add  %[h], %[h], a6    \n\t"            \
3884
        "add  %[h], %[h], a7    \n\t"            \
3885
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3886
        : [a] "r" (va)                                   \
3887
        : "a5", "a6", "a7"                               \
3888
    )
3889
/* Add va into: vh | vl */
3890
#define SP_ASM_ADDC(vl, vh, va)                          \
3891
    __asm__ __volatile__ (                               \
3892
        "add  %[l], %[l], %[a]  \n\t"            \
3893
        "sltu a7, %[l], %[a]    \n\t"            \
3894
        "add  %[h], %[h], a7    \n\t"            \
3895
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3896
        : [a] "r" (va)                                   \
3897
        : "a7"                                           \
3898
    )
3899
/* Sub va from: vh | vl */
3900
#define SP_ASM_SUBC(vl, vh, va)                          \
3901
    __asm__ __volatile__ (                               \
3902
        "add  a7, %[l], zero    \n\t"            \
3903
        "sub  %[l], a7, %[a]    \n\t"            \
3904
        "sltu a7, a7, %[l]    \n\t"            \
3905
        "sub  %[h], %[h], a7    \n\t"            \
3906
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3907
        : [a] "r" (va)                                   \
3908
        : "a7"                                           \
3909
    )
3910
/* Add two times vc | vb | va into vo | vh | vl */
3911
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3912
    __asm__ __volatile__ (                               \
3913
        "add  %[l], %[l], %[a]  \n\t"            \
3914
        "sltu a7, %[l], %[a]    \n\t"            \
3915
        "add  %[h], %[h], a7    \n\t"            \
3916
        "sltu a7, %[h], a7    \n\t"            \
3917
        "add  %[o], %[o], a7    \n\t"            \
3918
        "add  %[h], %[h], %[b]  \n\t"            \
3919
        "sltu a7, %[h], %[b]    \n\t"            \
3920
        "add  %[o], %[o], %[c]  \n\t"            \
3921
        "add  %[o], %[o], a7    \n\t"            \
3922
        "add  %[l], %[l], %[a]  \n\t"            \
3923
        "sltu a7, %[l], %[a]    \n\t"            \
3924
        "add  %[h], %[h], a7    \n\t"            \
3925
        "sltu a7, %[h], a7    \n\t"            \
3926
        "add  %[o], %[o], a7    \n\t"            \
3927
        "add  %[h], %[h], %[b]  \n\t"            \
3928
        "sltu a7, %[h], %[b]    \n\t"            \
3929
        "add  %[o], %[o], %[c]  \n\t"            \
3930
        "add  %[o], %[o], a7    \n\t"            \
3931
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3932
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3933
        : "a7"                                           \
3934
    )
3935
3936
#define SP_INT_ASM_AVAILABLE
3937
3938
    #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
3939
3940
    #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
3941
/*
3942
 * CPU: RISCV 32-bit
3943
 */
3944
3945
/* Multiply va by vb and store double size result in: vh | vl */
3946
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3947
    __asm__ __volatile__ (                               \
3948
        "mul  %[l], %[a], %[b]  \n\t"            \
3949
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3950
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3951
        : [a] "r" (va), [b] "r" (vb)                     \
3952
        : "memory"                                       \
3953
    )
3954
/* Multiply va by vb and store double size result in: vo | vh | vl */
3955
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3956
    __asm__ __volatile__ (                               \
3957
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3958
        "mul  %[l], %[a], %[b]  \n\t"            \
3959
        "add  %[o], zero, zero  \n\t"            \
3960
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3961
        : [a] "r" (va), [b] "r" (vb)                     \
3962
        :                                                \
3963
    )
3964
/* Multiply va by vb and add double size result into: vo | vh | vl */
3965
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3966
    __asm__ __volatile__ (                               \
3967
        "mul  a5, %[a], %[b]    \n\t"            \
3968
        "mulhu  a6, %[a], %[b]    \n\t"            \
3969
        "add  %[l], %[l], a5    \n\t"            \
3970
        "sltu a7, %[l], a5    \n\t"            \
3971
        "add  %[h], %[h], a7    \n\t"            \
3972
        "sltu a7, %[h], a7    \n\t"            \
3973
        "add  %[o], %[o], a7    \n\t"            \
3974
        "add  %[h], %[h], a6    \n\t"            \
3975
        "sltu a7, %[h], a6    \n\t"            \
3976
        "add  %[o], %[o], a7    \n\t"            \
3977
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3978
        : [a] "r" (va), [b] "r" (vb)                     \
3979
        : "a5", "a6", "a7"                               \
3980
    )
3981
/* Multiply va by vb and add double size result into: vh | vl */
3982
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3983
    __asm__ __volatile__ (                               \
3984
        "mul  a5, %[a], %[b]    \n\t"            \
3985
        "mulhu  a6, %[a], %[b]    \n\t"            \
3986
        "add  %[l], %[l], a5    \n\t"            \
3987
        "sltu a7, %[l], a5    \n\t"            \
3988
        "add  %[h], %[h], a6    \n\t"            \
3989
        "add  %[h], %[h], a7    \n\t"            \
3990
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3991
        : [a] "r" (va), [b] "r" (vb)                     \
3992
        : "a5", "a6", "a7"                               \
3993
    )
3994
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3995
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3996
    __asm__ __volatile__ (                               \
3997
        "mul  a5, %[a], %[b]    \n\t"            \
3998
        "mulhu  a6, %[a], %[b]    \n\t"            \
3999
        "add  %[l], %[l], a5    \n\t"            \
4000
        "sltu a7, %[l], a5    \n\t"            \
4001
        "add  %[h], %[h], a7    \n\t"            \
4002
        "sltu a7, %[h], a7    \n\t"            \
4003
        "add  %[o], %[o], a7    \n\t"            \
4004
        "add  %[h], %[h], a6    \n\t"            \
4005
        "sltu a7, %[h], a6    \n\t"            \
4006
        "add  %[o], %[o], a7    \n\t"            \
4007
        "add  %[l], %[l], a5    \n\t"            \
4008
        "sltu a7, %[l], a5    \n\t"            \
4009
        "add  %[h], %[h], a7    \n\t"            \
4010
        "sltu a7, %[h], a7    \n\t"            \
4011
        "add  %[o], %[o], a7    \n\t"            \
4012
        "add  %[h], %[h], a6    \n\t"            \
4013
        "sltu a7, %[h], a6    \n\t"            \
4014
        "add  %[o], %[o], a7    \n\t"            \
4015
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4016
        : [a] "r" (va), [b] "r" (vb)                     \
4017
        : "a5", "a6", "a7"                               \
4018
    )
4019
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4020
 * Assumes first add will not overflow vh | vl
4021
 */
4022
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4023
    __asm__ __volatile__ (                               \
4024
        "mul  a5, %[a], %[b]    \n\t"            \
4025
        "mulhu  a6, %[a], %[b]    \n\t"            \
4026
        "add  %[l], %[l], a5    \n\t"            \
4027
        "sltu a7, %[l], a5    \n\t"            \
4028
        "add  %[h], %[h], a6    \n\t"            \
4029
        "add  %[h], %[h], a7    \n\t"            \
4030
        "add  %[l], %[l], a5    \n\t"            \
4031
        "sltu a7, %[l], a5    \n\t"            \
4032
        "add  %[h], %[h], a7    \n\t"            \
4033
        "sltu a7, %[h], a7    \n\t"            \
4034
        "add  %[o], %[o], a7    \n\t"            \
4035
        "add  %[h], %[h], a6    \n\t"            \
4036
        "sltu a7, %[h], a6    \n\t"            \
4037
        "add  %[o], %[o], a7    \n\t"            \
4038
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4039
        : [a] "r" (va), [b] "r" (vb)                     \
4040
        : "a5", "a6", "a7"                               \
4041
    )
4042
/* Square va and store double size result in: vh | vl */
4043
#define SP_ASM_SQR(vl, vh, va)                           \
4044
    __asm__ __volatile__ (                               \
4045
        "mul  %[l], %[a], %[a]  \n\t"            \
4046
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4047
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4048
        : [a] "r" (va)                                   \
4049
        : "memory"                                       \
4050
    )
4051
/* Square va and add double size result into: vo | vh | vl */
4052
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4053
    __asm__ __volatile__ (                               \
4054
        "mul  a5, %[a], %[a]    \n\t"            \
4055
        "mulhu  a6, %[a], %[a]    \n\t"            \
4056
        "add  %[l], %[l], a5    \n\t"            \
4057
        "sltu a7, %[l], a5    \n\t"            \
4058
        "add  %[h], %[h], a7    \n\t"            \
4059
        "sltu a7, %[h], a7    \n\t"            \
4060
        "add  %[o], %[o], a7    \n\t"            \
4061
        "add  %[h], %[h], a6    \n\t"            \
4062
        "sltu a7, %[h], a6    \n\t"            \
4063
        "add  %[o], %[o], a7    \n\t"            \
4064
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4065
        : [a] "r" (va)                                   \
4066
        : "a5", "a6", "a7"                               \
4067
    )
4068
/* Square va and add double size result into: vh | vl */
4069
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4070
    __asm__ __volatile__ (                               \
4071
        "mul  a5, %[a], %[a]    \n\t"            \
4072
        "mulhu  a6, %[a], %[a]    \n\t"            \
4073
        "add  %[l], %[l], a5    \n\t"            \
4074
        "sltu a7, %[l], a5    \n\t"            \
4075
        "add  %[h], %[h], a6    \n\t"            \
4076
        "add  %[h], %[h], a7    \n\t"            \
4077
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4078
        : [a] "r" (va)                                   \
4079
        : "a5", "a6", "a7"                               \
4080
    )
4081
/* Add va into: vh | vl */
4082
#define SP_ASM_ADDC(vl, vh, va)                          \
4083
    __asm__ __volatile__ (                               \
4084
        "add  %[l], %[l], %[a]  \n\t"            \
4085
        "sltu a7, %[l], %[a]    \n\t"            \
4086
        "add  %[h], %[h], a7    \n\t"            \
4087
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4088
        : [a] "r" (va)                                   \
4089
        : "a7"                                           \
4090
    )
4091
/* Sub va from: vh | vl */
4092
#define SP_ASM_SUBC(vl, vh, va)                          \
4093
    __asm__ __volatile__ (                               \
4094
        "add  a7, %[l], zero    \n\t"            \
4095
        "sub  %[l], a7, %[a]    \n\t"            \
4096
        "sltu a7, a7, %[l]    \n\t"            \
4097
        "sub  %[h], %[h], a7    \n\t"            \
4098
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4099
        : [a] "r" (va)                                   \
4100
        : "a7"                                           \
4101
    )
4102
/* Add two times vc | vb | va into vo | vh | vl */
4103
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4104
    __asm__ __volatile__ (                               \
4105
        "add  %[l], %[l], %[a]  \n\t"            \
4106
        "sltu a7, %[l], %[a]    \n\t"            \
4107
        "add  %[h], %[h], a7    \n\t"            \
4108
        "sltu a7, %[h], a7    \n\t"            \
4109
        "add  %[o], %[o], a7    \n\t"            \
4110
        "add  %[h], %[h], %[b]  \n\t"            \
4111
        "sltu a7, %[h], %[b]    \n\t"            \
4112
        "add  %[o], %[o], %[c]  \n\t"            \
4113
        "add  %[o], %[o], a7    \n\t"            \
4114
        "add  %[l], %[l], %[a]  \n\t"            \
4115
        "sltu a7, %[l], %[a]    \n\t"            \
4116
        "add  %[h], %[h], a7    \n\t"            \
4117
        "sltu a7, %[h], a7    \n\t"            \
4118
        "add  %[o], %[o], a7    \n\t"            \
4119
        "add  %[h], %[h], %[b]  \n\t"            \
4120
        "sltu a7, %[h], %[b]    \n\t"            \
4121
        "add  %[o], %[o], %[c]  \n\t"            \
4122
        "add  %[o], %[o], a7    \n\t"            \
4123
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4124
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4125
        : "a7"                                           \
4126
    )
4127
4128
#define SP_INT_ASM_AVAILABLE
4129
4130
    #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4131
4132
    #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4133
/*
4134
 * CPU: Intel s390x
4135
 */
4136
4137
/* Multiply va by vb and store double size result in: vh | vl */
4138
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4139
    __asm__ __volatile__ (                               \
4140
        "lgr  %%r1, %[a]    \n\t"            \
4141
        "mlgr %%r0, %[b]    \n\t"            \
4142
        "lgr  %[l], %%r1    \n\t"            \
4143
        "lgr  %[h], %%r0    \n\t"            \
4144
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4145
        : [a] "r" (va), [b] "r" (vb)                     \
4146
        : "memory", "r0", "r1"                           \
4147
    )
4148
/* Multiply va by vb and store double size result in: vo | vh | vl */
4149
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4150
    __asm__ __volatile__ (                               \
4151
        "lgr  %%r1, %[a]    \n\t"            \
4152
        "mlgr %%r0, %[b]    \n\t"            \
4153
        "lghi %[o], 0     \n\t"            \
4154
        "lgr  %[l], %%r1    \n\t"            \
4155
        "lgr  %[h], %%r0    \n\t"            \
4156
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4157
        : [a] "r" (va), [b] "r" (vb)                     \
4158
        : "r0", "r1"                                     \
4159
    )
4160
/* Multiply va by vb and add double size result into: vo | vh | vl */
4161
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4162
    __asm__ __volatile__ (                               \
4163
        "lghi %%r10, 0  \n\t"                    \
4164
        "lgr  %%r1, %[a]    \n\t"            \
4165
        "mlgr %%r0, %[b]    \n\t"            \
4166
        "algr %[l], %%r1  \n\t"                    \
4167
        "alcgr  %[h], %%r0  \n\t"                    \
4168
        "alcgr  %[o], %%r10 \n\t"                    \
4169
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4170
        : [a] "r" (va), [b] "r" (vb)                     \
4171
        : "r0", "r1", "r10", "cc"                        \
4172
    )
4173
/* Multiply va by vb and add double size result into: vh | vl */
4174
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4175
    __asm__ __volatile__ (                               \
4176
        "lgr  %%r1, %[a]    \n\t"            \
4177
        "mlgr %%r0, %[b]    \n\t"            \
4178
        "algr %[l], %%r1  \n\t"                    \
4179
        "alcgr  %[h], %%r0  \n\t"                    \
4180
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4181
        : [a] "r" (va), [b] "r" (vb)                     \
4182
        : "r0", "r1", "cc"                               \
4183
    )
4184
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4185
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4186
    __asm__ __volatile__ (                               \
4187
        "lghi %%r10, 0  \n\t"                    \
4188
        "lgr  %%r1, %[a]    \n\t"            \
4189
        "mlgr %%r0, %[b]    \n\t"            \
4190
        "algr %[l], %%r1  \n\t"                    \
4191
        "alcgr  %[h], %%r0  \n\t"                    \
4192
        "alcgr  %[o], %%r10 \n\t"                    \
4193
        "algr %[l], %%r1  \n\t"                    \
4194
        "alcgr  %[h], %%r0  \n\t"                    \
4195
        "alcgr  %[o], %%r10 \n\t"                    \
4196
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4197
        : [a] "r" (va), [b] "r" (vb)                     \
4198
        : "r0", "r1", "r10", "cc"                        \
4199
    )
4200
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4201
 * Assumes first add will not overflow vh | vl
4202
 */
4203
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4204
    __asm__ __volatile__ (                               \
4205
        "lghi %%r10, 0  \n\t"                    \
4206
        "lgr  %%r1, %[a]    \n\t"            \
4207
        "mlgr %%r0, %[b]    \n\t"            \
4208
        "algr %[l], %%r1  \n\t"                    \
4209
        "alcgr  %[h], %%r0  \n\t"                    \
4210
        "algr %[l], %%r1  \n\t"                    \
4211
        "alcgr  %[h], %%r0  \n\t"                    \
4212
        "alcgr  %[o], %%r10 \n\t"                    \
4213
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4214
        : [a] "r" (va), [b] "r" (vb)                     \
4215
        : "r0", "r1", "r10", "cc"                        \
4216
    )
4217
/* Square va and store double size result in: vh | vl */
4218
#define SP_ASM_SQR(vl, vh, va)                           \
4219
    __asm__ __volatile__ (                               \
4220
        "lgr  %%r1, %[a]    \n\t"            \
4221
        "mlgr %%r0, %%r1    \n\t"            \
4222
        "lgr  %[l], %%r1    \n\t"            \
4223
        "lgr  %[h], %%r0    \n\t"            \
4224
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4225
        : [a] "r" (va)                                   \
4226
        : "memory", "r0", "r1"                           \
4227
    )
4228
/* Square va and add double size result into: vo | vh | vl */
4229
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4230
    __asm__ __volatile__ (                               \
4231
        "lghi %%r10, 0  \n\t"                    \
4232
        "lgr  %%r1, %[a]    \n\t"            \
4233
        "mlgr %%r0, %%r1    \n\t"            \
4234
        "algr %[l], %%r1  \n\t"                    \
4235
        "alcgr  %[h], %%r0  \n\t"                    \
4236
        "alcgr  %[o], %%r10 \n\t"                    \
4237
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4238
        : [a] "r" (va)                                   \
4239
        : "r0", "r1", "r10", "cc"                        \
4240
    )
4241
/* Square va and add double size result into: vh | vl */
4242
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4243
    __asm__ __volatile__ (                               \
4244
        "lgr  %%r1, %[a]    \n\t"            \
4245
        "mlgr %%r0, %%r1    \n\t"            \
4246
        "algr %[l], %%r1  \n\t"                    \
4247
        "alcgr  %[h], %%r0  \n\t"                    \
4248
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4249
        : [a] "r" (va)                                   \
4250
        : "r0", "r1", "cc"                               \
4251
    )
4252
/* Add va into: vh | vl */
4253
#define SP_ASM_ADDC(vl, vh, va)                          \
4254
    __asm__ __volatile__ (                               \
4255
        "lghi %%r10, 0  \n\t"                    \
4256
        "algr %[l], %[a]  \n\t"                    \
4257
        "alcgr  %[h], %%r10 \n\t"                    \
4258
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4259
        : [a] "r" (va)                                   \
4260
        : "r10", "cc"                                    \
4261
    )
4262
/* Sub va from: vh | vl */
4263
#define SP_ASM_SUBC(vl, vh, va)                          \
4264
    __asm__ __volatile__ (                               \
4265
        "lghi %%r10, 0  \n\t"                    \
4266
        "slgr %[l], %[a]  \n\t"                    \
4267
        "slbgr  %[h], %%r10 \n\t"                    \
4268
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4269
        : [a] "r" (va)                                   \
4270
        : "r10", "cc"                                    \
4271
    )
4272
/* Add two times vc | vb | va into vo | vh | vl */
4273
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4274
    __asm__ __volatile__ (                               \
4275
        "algr %[l], %[a]  \n\t"                    \
4276
        "alcgr  %[h], %[b]  \n\t"                    \
4277
        "alcgr  %[o], %[c]  \n\t"                    \
4278
        "algr %[l], %[a]  \n\t"                    \
4279
        "alcgr  %[h], %[b]  \n\t"                    \
4280
        "alcgr  %[o], %[c]  \n\t"                    \
4281
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4282
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4283
        : "cc"                                           \
4284
    )
4285
4286
#define SP_INT_ASM_AVAILABLE
4287
4288
    #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
4289
4290
#ifdef SP_INT_ASM_AVAILABLE
4291
    #ifndef SP_INT_NO_ASM
4292
        #define SQR_MUL_ASM
4293
    #endif
4294
    #ifndef SP_ASM_ADDC_REG
4295
        #define SP_ASM_ADDC_REG  SP_ASM_ADDC
4296
    #endif /* SP_ASM_ADDC_REG */
4297
#endif /* SQR_MUL_ASM */
4298
4299
#endif /* !WOLFSSL_NO_ASM */
4300
4301
4302
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
4303
    !defined(NO_DSA) || !defined(NO_DH) || \
4304
    (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
4305
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4306
#ifndef WC_NO_CACHE_RESISTANT
4307
    /* Mask of address for constant time operations. */
4308
    const size_t sp_off_on_addr[2] =
4309
    {
4310
        (size_t) 0,
4311
        (size_t)-1
4312
    };
4313
#endif
4314
#endif
4315
4316
4317
#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
4318
4319
#ifdef __cplusplus
4320
extern "C" {
4321
#endif
4322
4323
/* Modular exponentiation implementations using Single Precision. */
4324
WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
4325
    sp_int* res);
4326
WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
4327
    sp_int* res);
4328
WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
4329
    sp_int* res);
4330
WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
4331
    sp_int* res);
4332
WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
4333
    sp_int* res);
4334
4335
#ifdef __cplusplus
4336
} /* extern "C" */
4337
#endif
4338
4339
#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
4340
4341
4342
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
4343
static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp);
4344
#endif
4345
4346
/* Set the multi-precision number to zero.
4347
 *
4348
 * Assumes a is not NULL.
4349
 *
4350
 * @param  [out]  a  SP integer to set to zero.
4351
 */
4352
static void _sp_zero(sp_int* a)
4353
206M
{
4354
206M
    a->used = 0;
4355
206M
    a->dp[0] = 0;
4356
#ifdef WOLFSSL_SP_INT_NEGATIVE
4357
    a->sign = MP_ZPOS;
4358
#endif
4359
206M
}
4360
4361
/* Initialize the multi-precision number to be zero.
4362
 *
4363
 * @param  [out]  a  SP integer.
4364
 *
4365
 * @return  MP_OKAY on success.
4366
 * @return  MP_VAL when a is NULL.
4367
 */
4368
int sp_init(sp_int* a)
4369
44.3M
{
4370
44.3M
    int err = MP_OKAY;
4371
4372
44.3M
    if (a == NULL) {
4373
0
        err = MP_VAL;
4374
0
    }
4375
44.3M
    if (err == MP_OKAY) {
4376
    #ifdef HAVE_WOLF_BIGINT
4377
        wc_bigint_init(&a->raw);
4378
    #endif
4379
44.3M
        _sp_zero(a);
4380
44.3M
        a->size = SP_INT_DIGITS;
4381
44.3M
    }
4382
4383
44.3M
    return err;
4384
44.3M
}
4385
4386
/* Initialize the multi-precision number to be zero and have a maximum size.
4387
 *
4388
 * @param  [out]  a     SP integer.
4389
 * @param  [in]   size  Number of words to say are available.
4390
 *
4391
 * @return  MP_OKAY on success.
4392
 * @return  MP_VAL when a is NULL.
4393
 */
4394
int sp_init_size(sp_int* a, int size)
4395
41.4M
{
4396
41.4M
    int err = sp_init(a);
4397
4398
41.4M
    if (err == MP_OKAY) {
4399
41.4M
        a->size = size;
4400
41.4M
    }
4401
4402
41.4M
    return err;
4403
41.4M
}
4404
4405
#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
4406
/* Initialize up to six multi-precision numbers to be zero.
4407
 *
4408
 * @param  [out]  n1  SP integer.
4409
 * @param  [out]  n2  SP integer.
4410
 * @param  [out]  n3  SP integer.
4411
 * @param  [out]  n4  SP integer.
4412
 * @param  [out]  n5  SP integer.
4413
 * @param  [out]  n6  SP integer.
4414
 *
4415
 * @return  MP_OKAY on success.
4416
 */
4417
int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
4418
                  sp_int* n6)
4419
34.2M
{
4420
34.2M
    if (n1 != NULL) {
4421
    #ifdef HAVE_WOLF_BIGINT
4422
        wc_bigint_init(&n1->raw);
4423
    #endif
4424
34.2M
        _sp_zero(n1);
4425
34.2M
        n1->dp[0] = 0;
4426
34.2M
        n1->size = SP_INT_DIGITS;
4427
    #ifdef HAVE_WOLF_BIGINT
4428
        wc_bigint_init(&n1->raw);
4429
    #endif
4430
34.2M
    }
4431
34.2M
    if (n2 != NULL) {
4432
    #ifdef HAVE_WOLF_BIGINT
4433
        wc_bigint_init(&n2->raw);
4434
    #endif
4435
34.2M
        _sp_zero(n2);
4436
34.2M
        n2->dp[0] = 0;
4437
34.2M
        n2->size = SP_INT_DIGITS;
4438
    #ifdef HAVE_WOLF_BIGINT
4439
        wc_bigint_init(&n2->raw);
4440
    #endif
4441
34.2M
    }
4442
34.2M
    if (n3 != NULL) {
4443
    #ifdef HAVE_WOLF_BIGINT
4444
        wc_bigint_init(&n3->raw);
4445
    #endif
4446
1.07M
        _sp_zero(n3);
4447
1.07M
        n3->dp[0] = 0;
4448
1.07M
        n3->size = SP_INT_DIGITS;
4449
    #ifdef HAVE_WOLF_BIGINT
4450
        wc_bigint_init(&n3->raw);
4451
    #endif
4452
1.07M
    }
4453
34.2M
    if (n4 != NULL) {
4454
    #ifdef HAVE_WOLF_BIGINT
4455
        wc_bigint_init(&n4->raw);
4456
    #endif
4457
213k
        _sp_zero(n4);
4458
213k
        n4->dp[0] = 0;
4459
213k
        n4->size = SP_INT_DIGITS;
4460
    #ifdef HAVE_WOLF_BIGINT
4461
        wc_bigint_init(&n4->raw);
4462
    #endif
4463
213k
    }
4464
34.2M
    if (n5 != NULL) {
4465
    #ifdef HAVE_WOLF_BIGINT
4466
        wc_bigint_init(&n5->raw);
4467
    #endif
4468
30.0k
        _sp_zero(n5);
4469
30.0k
        n5->dp[0] = 0;
4470
30.0k
        n5->size = SP_INT_DIGITS;
4471
    #ifdef HAVE_WOLF_BIGINT
4472
        wc_bigint_init(&n5->raw);
4473
    #endif
4474
30.0k
    }
4475
34.2M
    if (n6 != NULL) {
4476
    #ifdef HAVE_WOLF_BIGINT
4477
        wc_bigint_init(&n6->raw);
4478
    #endif
4479
30.0k
        _sp_zero(n6);
4480
30.0k
        n6->dp[0] = 0;
4481
30.0k
        n6->size = SP_INT_DIGITS;
4482
    #ifdef HAVE_WOLF_BIGINT
4483
        wc_bigint_init(&n6->raw);
4484
    #endif
4485
30.0k
    }
4486
4487
34.2M
    return MP_OKAY;
4488
34.2M
}
4489
#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
4490
4491
/* Free the memory allocated in the multi-precision number.
4492
 *
4493
 * @param  [in]  a  SP integer.
4494
 */
4495
void sp_free(sp_int* a)
4496
79.8M
{
4497
79.8M
    if (a != NULL) {
4498
    #ifdef HAVE_WOLF_BIGINT
4499
        wc_bigint_free(&a->raw);
4500
    #endif
4501
79.8M
    }
4502
79.8M
}
4503
4504
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
4505
/* Grow multi-precision number to be able to hold l digits.
4506
 * This function does nothing as the number of digits is fixed.
4507
 *
4508
 * @param  [in,out]  a  SP integer.
4509
 * @param  [in]      l  Number of digits to grow to.
4510
 *
4511
 * @return  MP_OKAY on success
4512
 * @return  MP_MEM if the number of digits requested is more than available.
4513
 */
4514
int sp_grow(sp_int* a, int l)
4515
139k
{
4516
139k
    int err = MP_OKAY;
4517
4518
139k
    if (a == NULL) {
4519
0
        err = MP_VAL;
4520
0
    }
4521
139k
    if ((err == MP_OKAY) && (l > a->size)) {
4522
4
        err = MP_MEM;
4523
4
    }
4524
139k
    if (err == MP_OKAY) {
4525
139k
        int i;
4526
4527
491k
        for (i = a->used; i < l; i++) {
4528
352k
            a->dp[i] = 0;
4529
352k
        }
4530
139k
    }
4531
4532
139k
    return err;
4533
139k
}
4534
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
4535
4536
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(HAVE_ECC)
4537
/* Set the multi-precision number to zero.
4538
 *
4539
 * @param  [out]  a  SP integer to set to zero.
4540
 */
4541
void sp_zero(sp_int* a)
4542
622
{
4543
622
    if (a != NULL) {
4544
622
        _sp_zero(a);
4545
622
    }
4546
622
}
4547
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
4548
4549
/* Clear the data from the multi-precision number and set to zero.
4550
 *
4551
 * @param  [out]  a  SP integer.
4552
 */
4553
void sp_clear(sp_int* a)
4554
79.6M
{
4555
79.6M
    if (a != NULL) {
4556
79.6M
        int i;
4557
4558
774M
        for (i = 0; i < a->used; i++) {
4559
694M
            a->dp[i] = 0;
4560
694M
        }
4561
79.6M
        _sp_zero(a);
4562
79.6M
        sp_free(a);
4563
79.6M
    }
4564
79.6M
}
4565
4566
#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
4567
/* Ensure the data in the multi-precision number is zeroed.
4568
 *
4569
 * Use when security sensitive data needs to be wiped.
4570
 *
4571
 * @param  [in]  a  SP integer.
4572
 */
4573
void sp_forcezero(sp_int* a)
4574
218k
{
4575
218k
    if (a != NULL) {
4576
        /* Ensure all data zeroized - data not zeroed when used decreases. */
4577
218k
        ForceZero(a->dp, a->used * sizeof(sp_int_digit));
4578
218k
        _sp_zero(a);
4579
    #ifdef HAVE_WOLF_BIGINT
4580
        wc_bigint_zero(&a->raw);
4581
    #endif
4582
218k
        sp_free(a);
4583
218k
    }
4584
218k
}
4585
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
4586
4587
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4588
    !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
4589
/* Copy value of multi-precision number a into r.
4590
 *
4591
 * @param  [in]   a  SP integer - source.
4592
 * @param  [out]  r  SP integer - destination.
4593
 *
4594
 * @return  MP_OKAY on success.
4595
 */
4596
int sp_copy(const sp_int* a, sp_int* r)
4597
338M
{
4598
338M
    int err = MP_OKAY;
4599
4600
338M
    if ((a == NULL) || (r == NULL)) {
4601
67
        err = MP_VAL;
4602
67
    }
4603
338M
    else if (a != r) {
4604
228M
        XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
4605
228M
        if (a->used == 0)
4606
10.5M
            r->dp[0] = 0;
4607
228M
        r->used = a->used;
4608
#ifdef WOLFSSL_SP_INT_NEGATIVE
4609
        r->sign = a->sign;
4610
#endif
4611
228M
    }
4612
4613
338M
    return err;
4614
338M
}
4615
#endif
4616
4617
#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
4618
/* Initializes r and copies in value from a.
4619
 *
4620
 * @param  [out]  r  SP integer - destination.
4621
 * @param  [in]   a  SP integer - source.
4622
 *
4623
 * @return  MP_OKAY on success.
4624
 * @return  MP_VAL when a or r is NULL.
4625
 */
4626
int sp_init_copy(sp_int* r, sp_int* a)
4627
0
{
4628
0
    int err;
4629
4630
0
    err = sp_init(r);
4631
0
    if (err == MP_OKAY) {
4632
0
        err = sp_copy(a, r);
4633
0
    }
4634
0
    return err;
4635
0
}
4636
#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
4637
4638
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4639
    !defined(NO_DH) || !defined(NO_DSA)
4640
/* Exchange the values in a and b.
4641
 *
4642
 * @param  [in,out]  a  SP integer to swap.
4643
 * @param  [in,out]  b  SP integer to swap.
4644
 *
4645
 * @return  MP_OKAY on success.
4646
 * @return  MP_VAL when a or b is NULL.
4647
 * @return  MP_MEM when dynamic memory allocation fails.
4648
 */
4649
int sp_exch(sp_int* a, sp_int* b)
4650
129
{
4651
129
    int err = MP_OKAY;
4652
129
    DECL_SP_INT(t, (a != NULL) ? a->used : 1);
4653
4654
129
    if ((a == NULL) || (b == NULL)) {
4655
0
        err = MP_VAL;
4656
0
    }
4657
129
    if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
4658
0
        err = MP_VAL;
4659
0
    }
4660
4661
129
    ALLOC_SP_INT(t, a->used, err, NULL);
4662
129
    if (err == MP_OKAY) {
4663
96
        int asize = a->size;
4664
96
        int bsize = b->size;
4665
96
        XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
4666
96
        XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
4667
96
        XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
4668
96
        a->size = asize;
4669
96
        b->size = bsize;
4670
96
    }
4671
4672
129
    FREE_SP_INT(t, NULL);
4673
129
    return err;
4674
129
}
4675
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
4676
        * !NO_DSA */
4677
4678
#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
4679
    !defined(WC_NO_CACHE_RESISTANT)
4680
int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m)
4681
20.0M
{
4682
20.0M
    int i;
4683
20.0M
    int err = MP_OKAY;
4684
20.0M
    sp_int_digit mask = (sp_int_digit)0 - m;
4685
20.0M
    DECL_SP_INT(t, c);
4686
4687
20.0M
    ALLOC_SP_INT(t, c, err, NULL);
4688
20.0M
    if (err == MP_OKAY) {
4689
20.0M
        t->used = (int)((a->used ^ b->used) & mask);
4690
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4691
        t->sign = (int)((a->sign ^ b->sign) & mask);
4692
    #endif
4693
301M
        for (i = 0; i < c; i++) {
4694
281M
            t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
4695
281M
        }
4696
20.0M
        a->used ^= t->used;
4697
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4698
        a->sign ^= t->sign;
4699
    #endif
4700
301M
        for (i = 0; i < c; i++) {
4701
281M
            a->dp[i] ^= t->dp[i];
4702
281M
        }
4703
20.0M
        b->used ^= t->used;
4704
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4705
        b->sign ^= b->sign;
4706
    #endif
4707
301M
        for (i = 0; i < c; i++) {
4708
281M
            b->dp[i] ^= t->dp[i];
4709
281M
        }
4710
20.0M
    }
4711
4712
20.0M
    FREE_SP_INT(t, NULL);
4713
20.0M
    return err;
4714
20.0M
}
4715
#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
4716
4717
#ifdef WOLFSSL_SP_INT_NEGATIVE
4718
/* Calculate the absolute value of the multi-precision number.
4719
 *
4720
 * @param  [in]   a  SP integer to calculate absolute value of.
4721
 * @param  [out]  r  SP integer to hold result.
4722
 *
4723
 * @return  MP_OKAY on success.
4724
 * @return  MP_VAL when a or r is NULL.
4725
 */
4726
int sp_abs(sp_int* a, sp_int* r)
4727
{
4728
    int err;
4729
4730
    err = sp_copy(a, r);
4731
    if (r != NULL) {
4732
        r->sign = MP_ZPOS;
4733
    }
4734
4735
    return err;
4736
}
4737
#endif /* WOLFSSL_SP_INT_NEGATIVE */
4738
4739
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4740
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4741
/* Compare absolute value of two multi-precision numbers.
4742
 *
4743
 * @param  [in]  a  SP integer.
4744
 * @param  [in]  b  SP integer.
4745
 *
4746
 * @return  MP_GT when a is greater than b.
4747
 * @return  MP_LT when a is less than b.
4748
 * @return  MP_EQ when a is equals b.
4749
 */
4750
static int _sp_cmp_abs(sp_int* a, sp_int* b)
4751
628M
{
4752
628M
    int ret = MP_EQ;
4753
4754
628M
    if (a->used > b->used) {
4755
80.2M
        ret = MP_GT;
4756
80.2M
    }
4757
548M
    else if (a->used < b->used) {
4758
33.3M
        ret = MP_LT;
4759
33.3M
    }
4760
514M
    else {
4761
514M
        int i;
4762
4763
683M
        for (i = a->used - 1; i >= 0; i--) {
4764
673M
            if (a->dp[i] > b->dp[i]) {
4765
122M
                ret = MP_GT;
4766
122M
                break;
4767
122M
            }
4768
551M
            else if (a->dp[i] < b->dp[i]) {
4769
382M
                ret = MP_LT;
4770
382M
                break;
4771
382M
            }
4772
673M
        }
4773
514M
    }
4774
4775
628M
    return ret;
4776
628M
}
4777
#endif
4778
4779
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
4780
/* Compare absolute value of two multi-precision numbers.
4781
 *
4782
 * @param  [in]  a  SP integer.
4783
 * @param  [in]  b  SP integer.
4784
 *
4785
 * @return  MP_GT when a is greater than b.
4786
 * @return  MP_LT when a is less than b.
4787
 * @return  MP_EQ when a is equals b.
4788
 */
4789
int sp_cmp_mag(sp_int* a, sp_int* b)
4790
0
{
4791
0
    int ret;
4792
4793
0
    if (a == b) {
4794
0
        ret = MP_EQ;
4795
0
    }
4796
0
    else if (a == NULL) {
4797
0
        ret = MP_LT;
4798
0
    }
4799
0
    else if (b == NULL) {
4800
0
        ret = MP_GT;
4801
0
    }
4802
0
    else
4803
0
    {
4804
0
        ret = _sp_cmp_abs(a, b);
4805
0
    }
4806
4807
0
    return ret;
4808
0
}
4809
#endif
4810
4811
#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
4812
    defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
4813
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4814
/* Compare two multi-precision numbers.
4815
 *
4816
 * Assumes a and b are not NULL.
4817
 *
4818
 * @param  [in]  a  SP integer.
4819
 * @param  [in]  a  SP integer.
4820
 *
4821
 * @return  MP_GT when a is greater than b.
4822
 * @return  MP_LT when a is less than b.
4823
 * @return  MP_EQ when a is equals b.
4824
 */
4825
static int _sp_cmp(sp_int* a, sp_int* b)
4826
48.3M
{
4827
48.3M
    int ret;
4828
4829
#ifdef WOLFSSL_SP_INT_NEGATIVE
4830
    if (a->sign == b->sign) {
4831
#endif
4832
48.3M
        ret = _sp_cmp_abs(a, b);
4833
#ifdef WOLFSSL_SP_INT_NEGATIVE
4834
        if (a->sign == MP_NEG) {
4835
            /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
4836
             * Swapping MP_GT and MP_LT results.
4837
             */
4838
            ret = -ret;
4839
        }
4840
    }
4841
    else if (a->sign > b->sign) {
4842
        ret = MP_LT;
4843
    }
4844
    else /* (a->sign < b->sign) */ {
4845
        ret = MP_GT;
4846
    }
4847
#endif
4848
4849
48.3M
    return ret;
4850
48.3M
}
4851
#endif
4852
4853
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4854
    !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
4855
    defined(WOLFSSL_SP_MATH_ALL)
4856
/* Compare two multi-precision numbers.
4857
 *
4858
 * Pointers are compared such that NULL is less than not NULL.
4859
 *
4860
 * @param  [in]  a  SP integer.
4861
 * @param  [in]  a  SP integer.
4862
 *
4863
 * @return  MP_GT when a is greater than b.
4864
 * @return  MP_LT when a is less than b.
4865
 * @return  MP_EQ when a is equals b.
4866
 */
4867
int sp_cmp(sp_int* a, sp_int* b)
4868
29.6M
{
4869
29.6M
    int ret;
4870
4871
29.6M
    if (a == b) {
4872
2.79k
        ret = MP_EQ;
4873
2.79k
    }
4874
29.6M
    else if (a == NULL) {
4875
0
        ret = MP_LT;
4876
0
    }
4877
29.6M
    else if (b == NULL) {
4878
0
        ret = MP_GT;
4879
0
    }
4880
29.6M
    else
4881
29.6M
    {
4882
29.6M
        ret = _sp_cmp(a, b);
4883
29.6M
    }
4884
4885
29.6M
    return ret;
4886
29.6M
}
4887
#endif
4888
4889
/*************************
4890
 * Bit check/set functions
4891
 *************************/
4892
4893
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (defined(WOLFSSL_SP_MATH_ALL) && \
4894
    defined(HAVE_ECC))
4895
/* Check if a bit is set
4896
 *
4897
 * When a is NULL, result is 0.
4898
 *
4899
 * @param  [in]  a  SP integer.
4900
 * @param  [in]  b  Bit position to check.
4901
 *
4902
 * @return  0 when bit is not set.
4903
 * @return  1 when bit is set.
4904
 */
4905
int sp_is_bit_set(sp_int* a, unsigned int b)
4906
8.84M
{
4907
8.84M
    int ret = 0;
4908
8.84M
    int i = (int)(b >> SP_WORD_SHIFT);
4909
8.84M
    int s = (int)(b & SP_WORD_MASK);
4910
4911
8.84M
    if ((a != NULL) && (i < a->used)) {
4912
8.84M
        ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
4913
8.84M
    }
4914
4915
8.84M
    return ret;
4916
8.84M
}
4917
#endif /* WOLFSSL_RSA_VERIFY_ONLY */
4918
4919
/* Count the number of bits in the multi-precision number.
4920
 *
4921
 * When a is not NULL, result is 0.
4922
 *
4923
 * @param  [in]  a  SP integer.
4924
 *
4925
 * @return  The number of bits in the number.
4926
 */
4927
int sp_count_bits(const sp_int* a)
4928
508M
{
4929
508M
    int r = 0;
4930
4931
508M
    if (a != NULL) {
4932
508M
        r = a->used - 1;
4933
508M
        while ((r >= 0) && (a->dp[r] == 0)) {
4934
73.5k
            r--;
4935
73.5k
        }
4936
508M
        if (r < 0) {
4937
116k
            r = 0;
4938
116k
        }
4939
508M
        else {
4940
508M
            sp_int_digit d;
4941
4942
508M
            d = a->dp[r];
4943
508M
            r *= SP_WORD_SIZE;
4944
508M
            if (d > SP_HALF_MAX) {
4945
292M
                r += SP_WORD_SIZE;
4946
715M
                while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
4947
423M
                    r--;
4948
423M
                    d <<= 1;
4949
423M
                }
4950
292M
            }
4951
216M
            else {
4952
4.47G
                while (d != 0) {
4953
4.26G
                    r++;
4954
4.26G
                    d >>= 1;
4955
4.26G
                }
4956
216M
            }
4957
508M
        }
4958
508M
    }
4959
4960
508M
    return r;
4961
508M
}
4962
4963
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
4964
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
4965
    (defined(HAVE_ECC) && defined(FP_ECC)) || \
4966
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
4967
4968
/* Number of entries in array of number of least significant zero bits. */
4969
#define SP_LNZ_CNT      16
4970
/* Number of bits the array checks. */
4971
171k
#define SP_LNZ_BITS     4
4972
/* Mask to apply to check with array. */
4973
320k
#define SP_LNZ_MASK     0xf
4974
/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
4975
static const int sp_lnz[SP_LNZ_CNT] = {
4976
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4977
};
4978
4979
/* Count the number of least significant zero bits.
4980
 *
4981
 * When a is not NULL, result is 0.
4982
 *
4983
 * @param  [in]   a  SP integer to use.
4984
 *
4985
 * @return  Number of leas significant zero bits.
4986
 */
4987
#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
4988
static
4989
#endif /* !HAVE_ECC || HAVE_COMP_KEY */
4990
int sp_cnt_lsb(sp_int* a)
4991
149k
{
4992
149k
    int bc = 0;
4993
4994
149k
    if ((a != NULL) && (!sp_iszero(a))) {
4995
149k
        int i;
4996
149k
        int j;
4997
149k
        int cnt = 0;
4998
4999
220k
        for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
5000
70.9k
        }
5001
5002
320k
        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
5003
320k
            bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
5004
320k
            if (bc != 4) {
5005
149k
                bc += cnt + j;
5006
149k
                break;
5007
149k
            }
5008
320k
        }
5009
149k
    }
5010
5011
149k
    return bc;
5012
149k
}
5013
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
5014
5015
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5016
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
5017
/* Determine if the most significant byte of the encoded multi-precision number
5018
 * has the top bit set.
5019
 *
5020
 * When A is NULL, result is 0.
5021
 *
5022
 * @param  [in]  a  SP integer.
5023
 *
5024
 * @return  1 when the top bit of top byte is set.
5025
 * @return  0 when the top bit of top byte is not set.
5026
 */
5027
int sp_leading_bit(sp_int* a)
5028
27.8k
{
5029
27.8k
    int bit = 0;
5030
5031
27.8k
    if ((a != NULL) && (a->used > 0)) {
5032
27.7k
        sp_int_digit d = a->dp[a->used - 1];
5033
27.7k
    #if SP_WORD_SIZE > 8
5034
151k
        while (d > (sp_int_digit)0xff) {
5035
123k
            d >>= 8;
5036
123k
        }
5037
27.7k
    #endif
5038
27.7k
        bit = (int)(d >> 7);
5039
27.7k
    }
5040
5041
27.8k
    return bit;
5042
27.8k
}
5043
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
5044
5045
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5046
    defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
5047
    !defined(NO_RSA)
5048
/* Set a bit of a: a |= 1 << i
5049
 * The field 'used' is updated in a.
5050
 *
5051
 * @param  [in,out]  a  SP integer to set bit into.
5052
 * @param  [in]      i  Index of bit to set.
5053
 *
5054
 * @return  MP_OKAY on success.
5055
 * @return  MP_VAL when a is NULL or index is too large.
5056
 */
5057
int sp_set_bit(sp_int* a, int i)
5058
337k
{
5059
337k
    int err = MP_OKAY;
5060
337k
    int w = (int)(i >> SP_WORD_SHIFT);
5061
5062
337k
    if ((a == NULL) || (w >= a->size)) {
5063
172
        err = MP_VAL;
5064
172
    }
5065
337k
    else {
5066
337k
        int s = (int)(i & (SP_WORD_SIZE - 1));
5067
337k
        int j;
5068
5069
4.01M
        for (j = a->used; j <= w; j++) {
5070
3.67M
            a->dp[j] = 0;
5071
3.67M
        }
5072
337k
        a->dp[w] |= (sp_int_digit)1 << s;
5073
337k
        if (a->used <= w) {
5074
337k
            a->used = w + 1;
5075
337k
        }
5076
337k
    }
5077
337k
    return err;
5078
337k
}
5079
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
5080
        * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
5081
5082
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5083
    defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
5084
/* Exponentiate 2 to the power of e: a = 2^e
5085
 * This is done by setting the 'e'th bit.
5086
 *
5087
 * @param  [out]  a  SP integer to hold result.
5088
 * @param  [in]   e  Exponent.
5089
 *
5090
 * @return  MP_OKAY on success.
5091
 * @return  MP_VAL when a is NULL or 2^exponent is too large.
5092
 */
5093
int sp_2expt(sp_int* a, int e)
5094
467
{
5095
467
    int err = MP_OKAY;
5096
5097
467
    if (a == NULL) {
5098
0
        err = MP_VAL;
5099
0
    }
5100
467
    if (err == MP_OKAY) {
5101
467
        _sp_zero(a);
5102
467
        err = sp_set_bit(a, e);
5103
467
    }
5104
5105
467
    return err;
5106
467
}
5107
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5108
        * WOLFSSL_KEY_GEN || !NO_DH */
5109
5110
/**********************
5111
 * Digit/Long functions
5112
 **********************/
5113
5114
/* Set the multi-precision number to be the value of the digit.
5115
 *
5116
 * @param  [out]  a  SP integer to become number.
5117
 * @param  [in]   d  Digit to be set.
5118
 *
5119
 * @return  MP_OKAY on success.
5120
 * @return  MP_VAL when a is NULL.
5121
 */
5122
int sp_set(sp_int* a, sp_int_digit d)
5123
5.35M
{
5124
5.35M
    int err = MP_OKAY;
5125
5126
5.35M
    if (a == NULL) {
5127
0
        err = MP_VAL;
5128
0
    }
5129
5.35M
    if (err == MP_OKAY) {
5130
        /* gcc-11 reports out-of-bounds array access if the byte array backing
5131
         * the sp_int* is smaller than sizeof(sp_int), as occurs when
5132
         * WOLFSSL_SP_SMALL.
5133
         */
5134
5.35M
        PRAGMA_GCC_DIAG_PUSH;
5135
5.35M
        PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"");
5136
5.35M
        a->dp[0] = d;
5137
5.35M
        a->used = d > 0;
5138
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5139
        a->sign = MP_ZPOS;
5140
    #endif
5141
5.35M
        PRAGMA_GCC_DIAG_POP;
5142
5.35M
    }
5143
5144
5.35M
    return err;
5145
5.35M
}
5146
5147
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
5148
/* Set a number into the multi-precision number.
5149
 *
5150
 * Number may be larger than the size of a digit.
5151
 *
5152
 * @param  [out]  a  SP integer to set.
5153
 * @param  [in]   n  Long value to set.
5154
 *
5155
 * @return  MP_OKAY on success.
5156
 * @return  MP_VAL when a is NULL.
5157
 */
5158
int sp_set_int(sp_int* a, unsigned long n)
5159
228
{
5160
228
    int err = MP_OKAY;
5161
5162
228
    if (a == NULL) {
5163
0
        err = MP_VAL;
5164
0
    }
5165
5166
228
    if (err == MP_OKAY) {
5167
    #if SP_WORD_SIZE < SP_ULONG_BITS
5168
        if (n <= (sp_int_digit)SP_DIGIT_MAX) {
5169
    #endif
5170
228
            a->dp[0] = (sp_int_digit)n;
5171
228
            a->used = (n != 0);
5172
    #if SP_WORD_SIZE < SP_ULONG_BITS
5173
        }
5174
        else {
5175
            int i;
5176
5177
            for (i = 0; n > 0; i++,n >>= SP_WORD_SIZE) {
5178
                a->dp[i] = (sp_int_digit)n;
5179
            }
5180
            a->used = i;
5181
        }
5182
    #endif
5183
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5184
        a->sign = MP_ZPOS;
5185
    #endif
5186
228
    }
5187
5188
228
    return err;
5189
228
}
5190
#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA  */
5191
5192
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5193
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_DH))
5194
/* Compare a one digit number with a multi-precision number.
5195
 *
5196
 * When a is NULL, MP_LT is returned.
5197
 *
5198
 * @param  [in]  a  SP integer to compare.
5199
 * @param  [in]  d  Digit to compare with.
5200
 *
5201
 * @return  MP_GT when a is greater than d.
5202
 * @return  MP_LT when a is less than d.
5203
 * @return  MP_EQ when a is equals d.
5204
 */
5205
int sp_cmp_d(sp_int* a, sp_int_digit d)
5206
8.79M
{
5207
8.79M
    int ret = MP_EQ;
5208
5209
8.79M
    if (a == NULL) {
5210
0
        ret = MP_LT;
5211
0
    }
5212
8.79M
    else
5213
#ifdef WOLFSSL_SP_INT_NEGATIVE
5214
    if (a->sign == MP_NEG) {
5215
        ret = MP_LT;
5216
    }
5217
    else
5218
#endif
5219
8.79M
    {
5220
        /* special case for zero*/
5221
8.79M
        if (a->used == 0) {
5222
2.72M
            if (d == 0) {
5223
745
                ret = MP_EQ;
5224
745
            }
5225
2.72M
            else {
5226
2.72M
                ret = MP_LT;
5227
2.72M
            }
5228
2.72M
        }
5229
6.06M
        else if (a->used > 1) {
5230
818k
            ret = MP_GT;
5231
818k
        }
5232
5.24M
        else {
5233
5.24M
            if (a->dp[0] > d) {
5234
1.84M
                ret = MP_GT;
5235
1.84M
            }
5236
3.40M
            else if (a->dp[0] < d) {
5237
313k
                ret = MP_LT;
5238
313k
            }
5239
5.24M
        }
5240
8.79M
    }
5241
5242
8.79M
    return ret;
5243
8.79M
}
5244
#endif
5245
5246
#if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
5247
    !defined(NO_DSA) || \
5248
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5249
    defined(OPENSSL_EXTRA)
5250
#define WOLFSSL_SP_ADD_D
5251
#endif
5252
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5253
    !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
5254
#define WOLFSSL_SP_SUB_D
5255
#endif
5256
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
5257
    !defined(WOLFSSL_RSA_VERIFY_ONLY)
5258
#define WOLFSSL_SP_READ_RADIX_10
5259
#endif
5260
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
5261
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5262
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5263
#define WOLFSSL_SP_INVMOD
5264
#endif
5265
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
5266
#define WOLFSSL_SP_INVMOD_MONT_CT
5267
#endif
5268
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5269
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5270
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5271
#define WOLFSSL_SP_PRIME_GEN
5272
#endif
5273
5274
#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
5275
    defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
5276
/* Add a one digit number to the multi-precision number.
5277
 *
5278
 * @param  [in]   a  SP integer be added to.
5279
 * @param  [in]   d  Digit to add.
5280
 * @param  [out]  r  SP integer to store result in.
5281
 *
5282
 * @return  MP_OKAY on success.
5283
 * @return  MP_VAL when result is too large for fixed size dp array.
5284
 */
5285
static int _sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
5286
9.77M
{
5287
9.77M
    int err = MP_OKAY;
5288
9.77M
    int i = 0;
5289
9.77M
    sp_int_digit t;
5290
5291
9.77M
    r->used = a->used;
5292
9.77M
    if (a->used == 0) {
5293
119k
        r->used = d > 0;
5294
119k
    }
5295
9.77M
    t = a->dp[0] + d;
5296
9.77M
    if (t < a->dp[0]) {
5297
51.4k
        for (++i; i < a->used; i++) {
5298
50.6k
            r->dp[i] = a->dp[i] + 1;
5299
50.6k
            if (r->dp[i] != 0) {
5300
26.8k
               break;
5301
26.8k
            }
5302
50.6k
        }
5303
27.6k
        if (i == a->used) {
5304
828
            if (i < r->size) {
5305
828
                r->used++;
5306
828
                r->dp[i] = 1;
5307
828
            }
5308
0
            else {
5309
0
                err = MP_VAL;
5310
0
            }
5311
828
        }
5312
27.6k
    }
5313
9.77M
    if (err == MP_OKAY) {
5314
9.77M
        r->dp[0] = t;
5315
9.77M
        if (r != a) {
5316
6.58k
            for (++i; i < a->used; i++) {
5317
5.93k
                r->dp[i] = a->dp[i];
5318
5.93k
            }
5319
651
        }
5320
9.77M
    }
5321
5322
9.77M
    return err;
5323
9.77M
}
5324
#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
5325
        * defined(WOLFSSL_SP_READ_RADIX_10) */
5326
5327
#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
5328
    defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
5329
    defined(WOLFSSL_SP_INVMOD_MONT_CT) || defined(WOLFSSL_SP_PRIME_GEN)
5330
/* Sub a one digit number from the multi-precision number.
5331
 *
5332
 * returns MP_OKAY always.
5333
 * @param  [in]   a  SP integer be subtracted from.
5334
 * @param  [in]   d  Digit to subtract.
5335
 * @param  [out]  r  SP integer to store result in.
5336
 */
5337
static void _sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
5338
134k
{
5339
134k
    int i = 0;
5340
134k
    sp_int_digit t;
5341
5342
134k
    r->used = a->used;
5343
134k
    if (a->used == 0) {
5344
74
        r->dp[0] = 0;
5345
74
    }
5346
134k
    else {
5347
134k
        t = a->dp[0] - d;
5348
134k
        if (t > a->dp[0]) {
5349
76.5k
            for (++i; i < a->used; i++) {
5350
76.4k
                r->dp[i] = a->dp[i] - 1;
5351
76.4k
                if (r->dp[i] != SP_DIGIT_MAX) {
5352
13.0k
                   break;
5353
13.0k
                }
5354
76.4k
            }
5355
13.1k
        }
5356
134k
        r->dp[0] = t;
5357
134k
        if (r != a) {
5358
821k
            for (++i; i < a->used; i++) {
5359
732k
                r->dp[i] = a->dp[i];
5360
732k
            }
5361
89.5k
        }
5362
134k
        sp_clamp(r);
5363
134k
    }
5364
134k
}
5365
#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
5366
        * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
5367
        * WOLFSSL_SP_PRIME_GEN */
5368
5369
#ifdef WOLFSSL_SP_ADD_D
5370
/* Add a one digit number to the multi-precision number.
5371
 *
5372
 * @param  [in]   a  SP integer be added to.
5373
 * @param  [in]   d  Digit to add.
5374
 * @param  [out]  r  SP integer to store result in.
5375
 *
5376
 * @return  MP_OKAY on success.
5377
 * @return  MP_VAL when result is too large for fixed size dp array.
5378
 */
5379
int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
5380
5.29k
{
5381
5.29k
    int err = MP_OKAY;
5382
5383
    /* Check validity of parameters. */
5384
5.29k
    if ((a == NULL) || (r == NULL)) {
5385
0
        err = MP_VAL;
5386
0
    }
5387
5.29k
    else
5388
5.29k
    {
5389
5.29k
    #ifndef WOLFSSL_SP_INT_NEGATIVE
5390
        /* Positive only so just use internal function. */
5391
5.29k
        err = _sp_add_d(a, d, r);
5392
    #else
5393
        if (a->sign == MP_ZPOS) {
5394
            /* Positive so use interal function. */
5395
            r->sign = MP_ZPOS;
5396
            err = _sp_add_d(a, d, r);
5397
        }
5398
        else if ((a->used > 1) || (a->dp[0] > d)) {
5399
            /* Negative value bigger than digit so subtract digit. */
5400
            r->sign = MP_NEG;
5401
            _sp_sub_d(a, d, r);
5402
        }
5403
        else {
5404
            /* Negative value smaller or equal to digit. */
5405
            r->sign = MP_ZPOS;
5406
            /* Subtract negative value from digit. */
5407
            r->dp[0] = d - a->dp[0];
5408
            /* Result is a digit equal to or greater than zero. */
5409
            r->used = ((r->dp[0] == 0) ? 0 : 1);
5410
        }
5411
    #endif
5412
5.29k
    }
5413
5414
5.29k
    return err;
5415
5.29k
}
5416
#endif /* WOLFSSL_SP_ADD_D */
5417
5418
#ifdef WOLFSSL_SP_SUB_D
5419
/* Sub a one digit number from the multi-precision number.
5420
 *
5421
 * @param  [in]   a  SP integer be subtracted from.
5422
 * @param  [in]   d  Digit to subtract.
5423
 * @param  [out]  r  SP integer to store result in.
5424
 *
5425
 * @return  MP_OKAY on success.
5426
 * @return  MP_VAL when a or r is NULL.
5427
 */
5428
int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
5429
11.4k
{
5430
11.4k
    int err = MP_OKAY;
5431
5432
    /* Check validity of parameters. */
5433
11.4k
    if ((a == NULL) || (r == NULL)) {
5434
0
        err = MP_VAL;
5435
0
    }
5436
11.4k
    else {
5437
11.4k
    #ifndef WOLFSSL_SP_INT_NEGATIVE
5438
        /* Positive only so just use internal function. */
5439
11.4k
        _sp_sub_d(a, d, r);
5440
    #else
5441
        if (a->sign == MP_NEG) {
5442
            /* Subtracting from negative use interal add. */
5443
            r->sign = MP_NEG;
5444
            err = _sp_add_d(a, d, r);
5445
        }
5446
        else if ((a->used > 1) || (a->dp[0] >= d)) {
5447
            /* Positive number greater than digit so add digit. */
5448
            r->sign = MP_ZPOS;
5449
            _sp_sub_d(a, d, r);
5450
        }
5451
        else {
5452
            /* Negative value smaller than digit. */
5453
            r->sign = MP_NEG;
5454
            /* Subtract positive value from digit. */
5455
            r->dp[0] = d - a->dp[0];
5456
            /* Result is a digit equal to or greater than zero. */
5457
            r->used = 1;
5458
        }
5459
    #endif
5460
11.4k
    }
5461
5462
11.4k
    return err;
5463
11.4k
}
5464
#endif /* WOLFSSL_SP_SUB_D */
5465
5466
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5467
    defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
5468
    !defined(NO_DH) || defined(HAVE_ECC) || \
5469
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5470
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
5471
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
5472
/* Multiply a by digit n and put result into r shifting up o digits.
5473
 *   r = (a * n) << (o * SP_WORD_SIZE)
5474
 *
5475
 * @param  [in]   a  SP integer to be multiplied.
5476
 * @param  [in]   n  Number (SP digit) to multiply by.
5477
 * @param  [out]  r  SP integer result.
5478
 * @param  [in]   o  Number of digits to move result up by.
5479
 * @return  MP_OKAY on success.
5480
 * @return  MP_VAL when result is too large for sp_int.
5481
 */
5482
static int _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
5483
9.76M
{
5484
9.76M
    int err = MP_OKAY;
5485
9.76M
    int i;
5486
#ifndef SQR_MUL_ASM
5487
    sp_int_word t = 0;
5488
#else
5489
9.76M
    sp_int_digit l = 0;
5490
9.76M
    sp_int_digit h = 0;
5491
9.76M
#endif
5492
5493
#ifdef WOLFSSL_SP_SMALL
5494
    for (i = 0; i < o; i++) {
5495
        r->dp[i] = 0;
5496
    }
5497
#else
5498
    /* Don't use the offset. Only when doing small code size div. */
5499
9.76M
    (void)o;
5500
9.76M
#endif
5501
5502
638M
    for (i = 0; i < a->used; i++, o++) {
5503
    #ifndef SQR_MUL_ASM
5504
        t += (sp_int_word)a->dp[i] * n;
5505
        r->dp[o] = (sp_int_digit)t;
5506
        t >>= SP_WORD_SIZE;
5507
    #else
5508
629M
        SP_ASM_MUL_ADD_NO(l, h, a->dp[i], n);
5509
629M
        r->dp[o] = l;
5510
629M
        l = h;
5511
629M
        h = 0;
5512
629M
    #endif
5513
629M
    }
5514
5515
#ifndef SQR_MUL_ASM
5516
    if (t > 0)
5517
#else
5518
9.76M
    if (l > 0)
5519
1.26M
#endif
5520
1.26M
    {
5521
1.26M
        if (o == r->size) {
5522
295
            err = MP_VAL;
5523
295
        }
5524
1.26M
        else {
5525
        #ifndef SQR_MUL_ASM
5526
            r->dp[o++] = (sp_int_digit)t;
5527
        #else
5528
1.26M
            r->dp[o++] = l;
5529
1.26M
        #endif
5530
1.26M
        }
5531
1.26M
    }
5532
9.76M
    r->used = o;
5533
9.76M
    sp_clamp(r);
5534
5535
9.76M
    return err;
5536
9.76M
}
5537
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5538
        *  WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
5539
5540
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5541
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
5542
/* Multiply a by digit n and put result into r. r = a * n
5543
 *
5544
 * @param  [in]   a  SP integer to multiply.
5545
 * @param  [in]   n  Digit to multiply by.
5546
 * @param  [out]  r  SP integer to hold result.
5547
 *
5548
 * @return  MP_OKAY on success.
5549
 * @return  MP_VAL when a or b is NULL, or a has maximum number of digits used.
5550
 */
5551
int sp_mul_d(sp_int* a, sp_int_digit d, sp_int* r)
5552
277
{
5553
277
    int err = MP_OKAY;
5554
5555
277
    if ((a == NULL) || (r == NULL)) {
5556
0
        err = MP_VAL;
5557
0
    }
5558
277
    if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
5559
2
        err = MP_VAL;
5560
2
    }
5561
5562
277
    if (err == MP_OKAY) {
5563
275
        err = _sp_mul_d(a, d, r, 0);
5564
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5565
        if (d == 0) {
5566
            r->sign = MP_ZPOS;
5567
        }
5568
        else {
5569
            r->sign = a->sign;
5570
        }
5571
    #endif
5572
275
    }
5573
5574
277
    return err;
5575
277
}
5576
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5577
        * (WOLFSSL_KEY_GEN && !NO_RSA) */
5578
5579
/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
5580
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5581
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
5582
    defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
5583
#define WOLFSSL_SP_DIV_D
5584
#endif
5585
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5586
    !defined(NO_DH) || \
5587
    (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
5588
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5589
#define WOLFSSL_SP_MOD_D
5590
#endif
5591
5592
#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5593
     (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5594
      !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
5595
    defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
5596
#ifndef SP_ASM_DIV_WORD
5597
/* Divide a two digit number by a digit number and return. (hi | lo) / d
5598
 *
5599
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
5600
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
5601
 * @param  [in]  d   SP integer digit. Number to divide by.
5602
 * @return  The division result.
5603
 */
5604
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
5605
                                          sp_int_digit d)
5606
{
5607
#ifdef WOLFSSL_SP_DIV_WORD_HALF
5608
    sp_int_digit r;
5609
5610
    if (hi != 0) {
5611
        sp_int_digit divsz = d >> SP_HALF_SIZE;
5612
        sp_int_digit r2;
5613
        sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
5614
        sp_int_word trial;
5615
5616
        r = hi / divsz;
5617
        if (r > SP_HALF_MAX) {
5618
            r = SP_HALF_MAX;
5619
        }
5620
        r <<= SP_HALF_SIZE;
5621
        trial = r * (sp_int_word)d;
5622
        while (trial > w) {
5623
            r -= (sp_int_digit)1 << SP_HALF_SIZE;
5624
            trial -= (sp_int_word)d << SP_HALF_SIZE;
5625
        }
5626
        w -= trial;
5627
        r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divsz;
5628
        trial = r2 * (sp_int_word)d;
5629
        while (trial > w) {
5630
            r2--;
5631
            trial -= d;
5632
        }
5633
        w -= trial;
5634
        r += r2;
5635
        r2 = ((sp_int_digit)w) / d;
5636
        r += r2;
5637
    }
5638
    else {
5639
        r = lo / d;
5640
    }
5641
5642
    return r;
5643
#else
5644
    sp_int_word w;
5645
    sp_int_digit r;
5646
5647
    w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
5648
    w /= d;
5649
    r = (sp_int_digit)w;
5650
5651
    return r;
5652
#endif /* WOLFSSL_SP_DIV_WORD_HALF */
5653
}
5654
#endif /* !SP_ASM_DIV_WORD */
5655
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
5656
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
5657
5658
#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
5659
    !defined(WOLFSSL_SP_SMALL)
5660
5661
#if SP_WORD_SIZE == 64
5662
3.88M
    #define SP_DIV_3_CONST      0x5555555555555555L
5663
594M
    #define SP_DIV_10_CONST     0x1999999999999999L
5664
#elif SP_WORD_SIZE == 32
5665
    #define SP_DIV_3_CONST      0x55555555
5666
    #define SP_DIV_10_CONST     0x19999999
5667
#elif SP_WORD_SIZE == 16
5668
    #define SP_DIV_3_CONST      0x5555
5669
    #define SP_DIV_10_CONST     0x1999
5670
#elif SP_WORD_SIZE == 8
5671
    #define SP_DIV_3_CONST      0x55
5672
    #define SP_DIV_10_CONST     0x19
5673
#endif
5674
5675
/* Divide by 3: r = a / 3 and rem = a % 3
5676
 *
5677
 * @param  [in]   a    SP integer to be divided.
5678
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5679
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5680
 */
5681
static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem)
5682
154k
{
5683
154k
    int i;
5684
#ifndef SQR_MUL_ASM
5685
    sp_int_word t;
5686
    sp_int_digit tt;
5687
#else
5688
154k
    sp_int_digit l = 0;
5689
154k
    sp_int_digit tt = 0;
5690
154k
    sp_int_digit t;
5691
154k
#endif
5692
154k
    sp_int_digit tr = 0;
5693
154k
    static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
5694
154k
    static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
5695
5696
154k
    if (r == NULL) {
5697
2.09M
        for (i = a->used - 1; i >= 0; i--) {
5698
    #ifndef SQR_MUL_ASM
5699
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5700
            tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
5701
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
5702
    #else
5703
1.94M
            t = SP_DIV_3_CONST;
5704
1.94M
            SP_ASM_MUL(l, tt, a->dp[i], t);
5705
1.94M
            tt += tr * SP_DIV_3_CONST;
5706
1.94M
            tr = a->dp[i] - (tt * 3);
5707
1.94M
    #endif
5708
1.94M
            tr = sp_rem6[tr];
5709
1.94M
        }
5710
154k
        *rem = tr;
5711
154k
    }
5712
190
    else {
5713
1.19k
        for (i = a->used - 1; i >= 0; i--) {
5714
    #ifndef SQR_MUL_ASM
5715
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5716
            tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
5717
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
5718
    #else
5719
1.00k
            t = SP_DIV_3_CONST;
5720
1.00k
            SP_ASM_MUL(l, tt, a->dp[i], t);
5721
1.00k
            tt += tr * SP_DIV_3_CONST;
5722
1.00k
            tr = a->dp[i] - (tt * 3);
5723
1.00k
    #endif
5724
1.00k
            tt += sp_r6[tr];
5725
1.00k
            tr = sp_rem6[tr];
5726
1.00k
            r->dp[i] = tt;
5727
1.00k
        }
5728
190
        r->used = a->used;
5729
190
        sp_clamp(r);
5730
190
        if (rem != NULL) {
5731
116
            *rem = tr;
5732
116
        }
5733
190
    }
5734
154k
}
5735
5736
/* Divide by 10: r = a / 10 and rem = a % 10
5737
 *
5738
 * @param  [in]   a    SP integer to be divided.
5739
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5740
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5741
 */
5742
static void _sp_div_10(sp_int* a, sp_int* r, sp_int_digit* rem)
5743
9.65M
{
5744
9.65M
    int i;
5745
#ifndef SQR_MUL_ASM
5746
    sp_int_word t;
5747
    sp_int_digit tt;
5748
#else
5749
9.65M
    sp_int_digit l = 0;
5750
9.65M
    sp_int_digit tt = 0;
5751
9.65M
    sp_int_digit t;
5752
9.65M
#endif
5753
9.65M
    sp_int_digit tr = 0;
5754
5755
9.65M
    if (r == NULL) {
5756
1.03k
        for (i = a->used - 1; i >= 0; i--) {
5757
    #ifndef SQR_MUL_ASM
5758
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5759
            tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
5760
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
5761
    #else
5762
759
            t = SP_DIV_10_CONST;
5763
759
            SP_ASM_MUL(l, tt, a->dp[i], t);
5764
759
            tt += tr * SP_DIV_10_CONST;
5765
759
            tr = a->dp[i] - (tt * 10);
5766
759
    #endif
5767
759
            tr = tr % 10;
5768
759
        }
5769
278
        *rem = tr;
5770
278
    }
5771
9.65M
    else {
5772
306M
        for (i = a->used - 1; i >= 0; i--) {
5773
    #ifndef SQR_MUL_ASM
5774
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5775
            tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
5776
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
5777
    #else
5778
297M
            t = SP_DIV_10_CONST;
5779
297M
            SP_ASM_MUL(l, tt, a->dp[i], t);
5780
297M
            tt += tr * SP_DIV_10_CONST;
5781
297M
            tr = a->dp[i] - (tt * 10);
5782
297M
    #endif
5783
297M
            tt += tr / 10;
5784
297M
            tr = tr % 10;
5785
297M
            r->dp[i] = tt;
5786
297M
        }
5787
9.65M
        r->used = a->used;
5788
9.65M
        sp_clamp(r);
5789
9.65M
        if (rem != NULL) {
5790
9.65M
            *rem = tr;
5791
9.65M
        }
5792
9.65M
    }
5793
9.65M
}
5794
#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
5795
5796
#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
5797
/* Divide by small number: r = a / d and rem = a % d
5798
 *
5799
 * @param  [in]   a    SP integer to be divided.
5800
 * @param  [in]   d    Digit to divide by.
5801
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5802
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5803
 */
5804
static void _sp_div_small(sp_int* a, sp_int_digit d, sp_int* r,
5805
                         sp_int_digit* rem)
5806
8.33M
{
5807
8.33M
    int i;
5808
#ifndef SQR_MUL_ASM
5809
    sp_int_word t;
5810
    sp_int_digit tt;
5811
#else
5812
8.33M
    sp_int_digit l = 0;
5813
8.33M
    sp_int_digit tt = 0;
5814
8.33M
#endif
5815
8.33M
    sp_int_digit tr = 0;
5816
8.33M
    sp_int_digit m;
5817
5818
8.33M
    if (r == NULL) {
5819
8.33M
        m = SP_DIGIT_MAX / d;
5820
71.2M
        for (i = a->used - 1; i >= 0; i--) {
5821
    #ifndef SQR_MUL_ASM
5822
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5823
            tt = (t * m) >> SP_WORD_SIZE;
5824
            tr = (sp_int_digit)(t - tt * d);
5825
    #else
5826
62.9M
            SP_ASM_MUL(l, tt, a->dp[i], m);
5827
62.9M
            tt += tr * m;
5828
62.9M
            tr = a->dp[i] - (tt * d);
5829
62.9M
    #endif
5830
62.9M
            tr = tr % d;
5831
62.9M
        }
5832
8.33M
        *rem = tr;
5833
8.33M
    }
5834
137
    else {
5835
137
        m = SP_DIGIT_MAX / d;
5836
1.62k
        for (i = a->used - 1; i >= 0; i--) {
5837
    #ifndef SQR_MUL_ASM
5838
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5839
            tt = (t * m) >> SP_WORD_SIZE;
5840
            tr = (sp_int_digit)(t - tt * d);
5841
    #else
5842
1.48k
            SP_ASM_MUL(l, tt, a->dp[i], m);
5843
1.48k
            tt += tr * m;
5844
1.48k
            tr = a->dp[i] - (tt * d);
5845
1.48k
    #endif
5846
1.48k
            tt += tr / d;
5847
1.48k
            tr = tr % d;
5848
1.48k
            r->dp[i] = tt;
5849
1.48k
        }
5850
137
        r->used = a->used;
5851
137
        sp_clamp(r);
5852
137
        if (rem != NULL) {
5853
80
            *rem = tr;
5854
80
        }
5855
137
    }
5856
8.33M
}
5857
#endif
5858
5859
#ifdef WOLFSSL_SP_DIV_D
5860
/* Divide a multi-precision number by a digit size number and calculate
5861
 * remainder.
5862
 *   r = a / d; rem = a % d
5863
 *
5864
 * @param  [in]   a    SP integer to be divided.
5865
 * @param  [in]   d    Digit to divide by.
5866
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5867
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
5868
 *
5869
 * @return  MP_OKAY on success.
5870
 * @return  MP_VAL when a is NULL or d is 0.
5871
 */
5872
int sp_div_d(sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
5873
4.69M
{
5874
4.69M
    int err = MP_OKAY;
5875
5876
4.69M
    if ((a == NULL) || (d == 0)) {
5877
21
        err = MP_VAL;
5878
21
    }
5879
5880
4.69M
    if (err == MP_OKAY) {
5881
4.69M
    #if !defined(WOLFSSL_SP_SMALL)
5882
4.69M
        if (d == 3) {
5883
96
            _sp_div_3(a, r, rem);
5884
96
        }
5885
4.69M
        else if (d == 10) {
5886
4.69M
            _sp_div_10(a, r, rem);
5887
4.69M
        }
5888
160
        else
5889
160
    #endif
5890
160
        if (d <= SP_HALF_MAX) {
5891
79
            _sp_div_small(a, d, r, rem);
5892
79
        }
5893
81
        else
5894
81
        {
5895
81
            int i;
5896
        #ifndef SQR_MUL_ASM
5897
            sp_int_word w = 0;
5898
        #else
5899
81
            sp_int_digit l;
5900
81
            sp_int_digit h = 0;
5901
81
        #endif
5902
81
            sp_int_digit t;
5903
5904
944
            for (i = a->used - 1; i >= 0; i--) {
5905
            #ifndef SQR_MUL_ASM
5906
                t = sp_div_word((sp_int_digit)w, a->dp[i], d);
5907
                w = (w << SP_WORD_SIZE) | a->dp[i];
5908
                w -= (sp_int_word)t * d;
5909
            #else
5910
863
                l = a->dp[i];
5911
863
                t = sp_div_word(h, l, d);
5912
863
                h = l - t * d;
5913
863
            #endif
5914
863
                if (r != NULL) {
5915
863
                    r->dp[i] = t;
5916
863
                }
5917
863
            }
5918
81
            if (r != NULL) {
5919
81
                r->used = a->used;
5920
81
                sp_clamp(r);
5921
81
            }
5922
5923
81
            if (rem != NULL) {
5924
            #ifndef SQR_MUL_ASM
5925
                *rem = (sp_int_digit)w;
5926
            #else
5927
46
                *rem = h;
5928
46
            #endif
5929
46
            }
5930
81
        }
5931
5932
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5933
        if (r != NULL) {
5934
            r->sign = a->sign;
5935
        }
5936
    #endif
5937
4.69M
    }
5938
5939
4.69M
    return err;
5940
4.69M
}
5941
#endif /* WOLFSSL_SP_DIV_D */
5942
5943
#ifdef WOLFSSL_SP_MOD_D
5944
/* Calculate a modulo the digit d into r: r = a mod d
5945
 *
5946
 * @param  [in]   a  SP integer to reduce.
5947
 * @param  [in]   d  Digit to that is the modulus.
5948
 * @param  [out]  r  Digit that is the result..
5949
 *
5950
 * @return  MP_OKAY on success.
5951
 * @return  MP_VAL when a is NULL or d is 0.
5952
 */
5953
#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
5954
    !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
5955
static
5956
#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
5957
int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
5958
6.39M
{
5959
6.39M
    int err = MP_OKAY;
5960
5961
6.39M
    if ((a == NULL) || (r == NULL) || (d == 0)) {
5962
22
        err = MP_VAL;
5963
22
    }
5964
5965
#if 0
5966
    sp_print(a, "a");
5967
    sp_print_digit(d, "m");
5968
#endif
5969
5970
6.39M
    if (err == MP_OKAY) {
5971
        /* Check whether d is a power of 2. */
5972
6.39M
        if ((d & (d - 1)) == 0) {
5973
98.5k
            if (a->used == 0) {
5974
23
                *r = 0;
5975
23
            }
5976
98.5k
            else {
5977
98.5k
                *r = a->dp[0] & (d - 1);
5978
98.5k
            }
5979
98.5k
        }
5980
6.29M
    #if !defined(WOLFSSL_SP_SMALL)
5981
6.29M
        else if (d == 3) {
5982
97.0k
            _sp_div_3(a, NULL, r);
5983
97.0k
        }
5984
6.19M
        else if (d == 10) {
5985
179
            _sp_div_10(a, NULL, r);
5986
179
        }
5987
6.19M
    #endif
5988
6.19M
        else if (d <= SP_HALF_MAX) {
5989
6.18M
            _sp_div_small(a, d, NULL, r);
5990
6.18M
        }
5991
8.04k
        else {
5992
8.04k
            int i;
5993
        #ifndef SQR_MUL_ASM
5994
            sp_int_word w = 0;
5995
        #else
5996
8.04k
            sp_int_digit l;
5997
8.04k
            sp_int_digit h = 0;
5998
8.04k
        #endif
5999
8.04k
            sp_int_digit t;
6000
6001
17.3k
            for (i = a->used - 1; i >= 0; i--) {
6002
            #ifndef SQR_MUL_ASM
6003
                t = sp_div_word((sp_int_digit)w, a->dp[i], d);
6004
                w = (w << SP_WORD_SIZE) | a->dp[i];
6005
                w -= (sp_int_word)t * d;
6006
            #else
6007
9.27k
                l = a->dp[i];
6008
9.27k
                t = sp_div_word(h, l, d);
6009
9.27k
                h = l - t * d;
6010
9.27k
            #endif
6011
9.27k
            }
6012
6013
        #ifndef SQR_MUL_ASM
6014
            *r = (sp_int_digit)w;
6015
        #else
6016
8.04k
            *r = h;
6017
8.04k
        #endif
6018
8.04k
        }
6019
6020
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6021
        if (a->sign == MP_NEG) {
6022
            *r = d - *r;
6023
        }
6024
    #endif
6025
6.39M
    }
6026
6027
#if 0
6028
    sp_print_digit(*r, "rmod");
6029
#endif
6030
6031
6.39M
    return err;
6032
6.39M
}
6033
#endif /* WOLFSSL_SP_MOD_D */
6034
6035
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6036
/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
6037
 *
6038
 * r = a / 2 (mod m) - constant time (a < m and positive)
6039
 *
6040
 * @param  [in]   a  SP integer to divide.
6041
 * @param  [in]   m  SP integer that is modulus.
6042
 * @param  [out]  r  SP integer to hold result.
6043
 *
6044
 * @return  MP_OKAY on success.
6045
 * @return  MP_VAL when a, m or r is NULL.
6046
 */
6047
int sp_div_2_mod_ct(sp_int* a, sp_int* m, sp_int* r)
6048
33.0M
{
6049
33.0M
    int err = MP_OKAY;
6050
6051
33.0M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
6052
0
        err = MP_VAL;
6053
0
    }
6054
33.0M
    if ((err == MP_OKAY) && (r->size < m->used + 1)) {
6055
3
        err = MP_VAL;
6056
3
    }
6057
6058
33.0M
    if (err == MP_OKAY) {
6059
    #ifndef SQR_MUL_ASM
6060
        sp_int_word  w = 0;
6061
    #else
6062
33.0M
        sp_int_digit l = 0;
6063
33.0M
        sp_int_digit h = 0;
6064
33.0M
        sp_int_digit t;
6065
33.0M
    #endif
6066
33.0M
        sp_int_digit mask;
6067
33.0M
        int i;
6068
6069
    #if 0
6070
        sp_print(a, "a");
6071
        sp_print(m, "m");
6072
    #endif
6073
6074
33.0M
        mask = (sp_int_digit)0 - (a->dp[0] & 1);
6075
338M
        for (i = 0; i < m->used; i++) {
6076
305M
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6077
6078
        #ifndef SQR_MUL_ASM
6079
            w         += m->dp[i] & mask;
6080
            w         += a->dp[i] & mask_a;
6081
            r->dp[i]   = (sp_int_digit)w;
6082
            w        >>= DIGIT_BIT;
6083
        #else
6084
305M
            t        = m->dp[i] & mask;
6085
305M
            SP_ASM_ADDC(l, h, t);
6086
305M
            t        = a->dp[i] & mask_a;
6087
305M
            SP_ASM_ADDC(l, h, t);
6088
305M
            r->dp[i] = l;
6089
305M
            l        = h;
6090
305M
            h        = 0;
6091
305M
        #endif
6092
305M
        }
6093
    #ifndef SQR_MUL_ASM
6094
        r->dp[i] = (sp_int_digit)w;
6095
    #else
6096
33.0M
        r->dp[i] = l;
6097
33.0M
    #endif
6098
33.0M
        r->used = i + 1;
6099
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6100
        r->sign = MP_ZPOS;
6101
    #endif
6102
33.0M
        sp_clamp(r);
6103
33.0M
        sp_div_2(r, r);
6104
6105
    #if 0
6106
        sp_print(r, "rd2");
6107
    #endif
6108
33.0M
    }
6109
6110
33.0M
    return err;
6111
33.0M
}
6112
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6113
6114
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
6115
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6116
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
6117
/* Divides a by 2 and stores in r: r = a >> 1
6118
 *
6119
 * @param  [in]   a  SP integer to divide.
6120
 * @param  [out]  r  SP integer to hold result.
6121
 *
6122
 * @return  MP_OKAY on success.
6123
 * @return  MP_VAL when a or r is NULL.
6124
 */
6125
#if !(defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
6126
static
6127
#endif
6128
int sp_div_2(sp_int* a, sp_int* r)
6129
128M
{
6130
128M
    int err = MP_OKAY;
6131
6132
128M
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6133
    /* Only when a public API. */
6134
128M
    if ((a == NULL) || (r == NULL)) {
6135
0
        err = MP_VAL;
6136
0
    }
6137
128M
#endif
6138
6139
128M
    if (err == MP_OKAY) {
6140
128M
        int i;
6141
6142
128M
        r->used = a->used;
6143
1.79G
        for (i = 0; i < a->used - 1; i++) {
6144
1.66G
            r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
6145
1.66G
        }
6146
128M
        r->dp[i] = a->dp[i] >> 1;
6147
128M
        r->used = i + 1;
6148
128M
        sp_clamp(r);
6149
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6150
        r->sign = a->sign;
6151
    #endif
6152
128M
    }
6153
6154
128M
    return err;
6155
128M
}
6156
#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
6157
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6158
6159
/************************
6160
 * Add/Subtract Functions
6161
 ************************/
6162
6163
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
6164
/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
6165
 *
6166
 * @param  [in]   a  SP integer to add to.
6167
 * @param  [in]   b  SP integer to add.
6168
 * @param  [out]  r  SP integer to store result in.
6169
 * @param  [in]   o  Number of digits to offset b.
6170
 *
6171
 * @return  MP_OKAY on success.
6172
 */
6173
static int _sp_add_off(sp_int* a, sp_int* b, sp_int* r, int o)
6174
39.3M
{
6175
39.3M
    int i;
6176
39.3M
    int j;
6177
#ifndef SQR_MUL_ASM
6178
    sp_int_word t = 0;
6179
#else
6180
39.3M
    sp_int_digit l = 0;
6181
39.3M
    sp_int_digit h = 0;
6182
39.3M
    sp_int_digit t = 0;
6183
39.3M
#endif
6184
6185
#ifdef SP_MATH_NEED_ADD_OFF
6186
    for (i = 0; (i < o) && (i < a->used); i++) {
6187
        r->dp[i] = a->dp[i];
6188
    }
6189
    for (; i < o; i++) {
6190
        r->dp[i] = 0;
6191
    }
6192
#else
6193
39.3M
    i = 0;
6194
39.3M
    (void)o;
6195
39.3M
#endif
6196
6197
770M
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
6198
    #ifndef SQR_MUL_ASM
6199
        t += a->dp[i];
6200
        t += b->dp[j];
6201
        r->dp[i] = (sp_int_digit)t;
6202
        t >>= SP_WORD_SIZE;
6203
    #else
6204
731M
        t = a->dp[i];
6205
731M
        SP_ASM_ADDC(l, h, t);
6206
731M
        t = b->dp[j];
6207
731M
        SP_ASM_ADDC(l, h, t);
6208
731M
        r->dp[i] = l;
6209
731M
        l = h;
6210
731M
        h = 0;
6211
731M
    #endif
6212
731M
    }
6213
39.6M
    for (; i < a->used; i++) {
6214
    #ifndef SQR_MUL_ASM
6215
        t += a->dp[i];
6216
        r->dp[i] = (sp_int_digit)t;
6217
        t >>= SP_WORD_SIZE;
6218
    #else
6219
214k
        t = a->dp[i];
6220
214k
        SP_ASM_ADDC(l, h, t);
6221
214k
        r->dp[i] = l;
6222
214k
        l = h;
6223
214k
        h = 0;
6224
214k
    #endif
6225
214k
    }
6226
64.9M
    for (; j < b->used; i++, j++) {
6227
    #ifndef SQR_MUL_ASM
6228
        t += b->dp[j];
6229
        r->dp[i] = (sp_int_digit)t;
6230
        t >>= SP_WORD_SIZE;
6231
    #else
6232
25.5M
        t = b->dp[j];
6233
25.5M
        SP_ASM_ADDC(l, h, t);
6234
25.5M
        r->dp[i] = l;
6235
25.5M
        l = h;
6236
25.5M
        h = 0;
6237
25.5M
    #endif
6238
25.5M
    }
6239
39.3M
    r->used = i;
6240
#ifndef SQR_MUL_ASM
6241
    if (t != 0) {
6242
       r->dp[i] = (sp_int_digit)t;
6243
       r->used++;
6244
    }
6245
#else
6246
39.3M
    if (l != 0) {
6247
15.5M
       r->dp[i] = l;
6248
15.5M
       r->used++;
6249
15.5M
    }
6250
39.3M
#endif
6251
6252
39.3M
    sp_clamp(r);
6253
6254
39.3M
    return MP_OKAY;
6255
39.3M
}
6256
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6257
6258
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
6259
    !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
6260
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
6261
/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
6262
 * a must be greater than b.
6263
 *
6264
 * @param  [in]   a  SP integer to subtract from.
6265
 * @param  [in]   b  SP integer to subtract.
6266
 * @param  [out]  r  SP integer to store result in.
6267
 * @param  [in]   o  Number of digits to offset b.
6268
 *
6269
 * @return  MP_OKAY on success.
6270
 */
6271
static int _sp_sub_off(sp_int* a, sp_int* b, sp_int* r, int o)
6272
189M
{
6273
189M
    int i;
6274
189M
    int j;
6275
#ifndef SQR_MUL_ASM
6276
    sp_int_sword t = 0;
6277
#else
6278
189M
    sp_int_digit l = 0;
6279
189M
    sp_int_digit h = 0;
6280
189M
    sp_int_digit t = 0;
6281
189M
#endif
6282
6283
189M
    for (i = 0; (i < o) && (i < a->used); i++) {
6284
398k
        r->dp[i] = a->dp[i];
6285
398k
    }
6286
2.37G
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
6287
    #ifndef SQR_MUL_ASM
6288
        t += a->dp[i];
6289
        t -= b->dp[j];
6290
        r->dp[i] = (sp_int_digit)t;
6291
        t >>= SP_WORD_SIZE;
6292
    #else
6293
2.18G
        t = a->dp[i];
6294
2.18G
        SP_ASM_ADDC(l, h, t);
6295
2.18G
        t = b->dp[j];
6296
2.18G
        SP_ASM_SUBC(l, h, t);
6297
2.18G
        r->dp[i] = l;
6298
2.18G
        l = h;
6299
2.18G
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6300
2.18G
    #endif
6301
2.18G
    }
6302
285M
    for (; i < a->used; i++) {
6303
    #ifndef SQR_MUL_ASM
6304
        t += a->dp[i];
6305
        r->dp[i] = (sp_int_digit)t;
6306
        t >>= SP_WORD_SIZE;
6307
    #else
6308
95.4M
        t = a->dp[i];
6309
95.4M
        SP_ASM_ADDC(l, h, t);
6310
95.4M
        r->dp[i] = l;
6311
95.4M
        l = h;
6312
95.4M
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6313
95.4M
    #endif
6314
95.4M
    }
6315
189M
    r->used = i;
6316
189M
    sp_clamp(r);
6317
6318
189M
    return MP_OKAY;
6319
189M
}
6320
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
6321
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6322
6323
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
6324
/* Add b to a into r: r = a + b
6325
 *
6326
 * @param  [in]   a  SP integer to add to.
6327
 * @param  [in]   b  SP integer to add.
6328
 * @param  [out]  r  SP integer to store result in.
6329
 *
6330
 * @return  MP_OKAY on success.
6331
 * @return  MP_VAL when a, b, or r is NULL.
6332
 */
6333
int sp_add(sp_int* a, sp_int* b, sp_int* r)
6334
3.83M
{
6335
3.83M
    int err = MP_OKAY;
6336
6337
3.83M
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
6338
0
        err = MP_VAL;
6339
0
    }
6340
3.83M
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
6341
5
        err = MP_VAL;
6342
5
    }
6343
3.83M
    if (err == MP_OKAY) {
6344
3.83M
    #ifndef WOLFSSL_SP_INT_NEGATIVE
6345
3.83M
        err = _sp_add_off(a, b, r, 0);
6346
    #else
6347
        if (a->sign == b->sign) {
6348
            r->sign = a->sign;
6349
            err = _sp_add_off(a, b, r, 0);
6350
        }
6351
        else if (_sp_cmp_abs(a, b) != MP_LT) {
6352
            err = _sp_sub_off(a, b, r, 0);
6353
            if (sp_iszero(r)) {
6354
                r->sign = MP_ZPOS;
6355
            }
6356
            else {
6357
                r->sign = a->sign;
6358
            }
6359
        }
6360
        else {
6361
            err = _sp_sub_off(b, a, r, 0);
6362
            if (sp_iszero(r)) {
6363
                r->sign = MP_ZPOS;
6364
            }
6365
            else {
6366
                r->sign = b->sign;
6367
            }
6368
        }
6369
    #endif
6370
3.83M
    }
6371
6372
3.83M
    return err;
6373
3.83M
}
6374
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6375
6376
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6377
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
6378
/* Subtract b from a into r: r = a - b
6379
 *
6380
 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
6381
 *
6382
 * @param  [in]   a  SP integer to subtract from.
6383
 * @param  [in]   b  SP integer to subtract.
6384
 * @param  [out]  r  SP integer to store result in.
6385
 *
6386
 * @return  MP_OKAY on success.
6387
 * @return  MP_VAL when a, b, or r is NULL.
6388
 */
6389
int sp_sub(sp_int* a, sp_int* b, sp_int* r)
6390
35.4M
{
6391
35.4M
    int err = MP_OKAY;
6392
6393
35.4M
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
6394
0
        err = MP_VAL;
6395
0
    }
6396
35.4M
    else {
6397
35.4M
    #ifndef WOLFSSL_SP_INT_NEGATIVE
6398
35.4M
        err = _sp_sub_off(a, b, r, 0);
6399
    #else
6400
        if (a->sign != b->sign) {
6401
            r->sign = a->sign;
6402
            err = _sp_add_off(a, b, r, 0);
6403
        }
6404
        else if (_sp_cmp_abs(a, b) != MP_LT) {
6405
            err = _sp_sub_off(a, b, r, 0);
6406
            if (sp_iszero(r)) {
6407
                r->sign = MP_ZPOS;
6408
            }
6409
            else {
6410
                r->sign = a->sign;
6411
            }
6412
        }
6413
        else {
6414
            err = _sp_sub_off(b, a, r, 0);
6415
            if (sp_iszero(r)) {
6416
                r->sign = MP_ZPOS;
6417
            }
6418
            else {
6419
                r->sign = 1 - a->sign;
6420
            }
6421
        }
6422
    #endif
6423
35.4M
    }
6424
6425
35.4M
    return err;
6426
35.4M
}
6427
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6428
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
6429
6430
/****************************
6431
 * Add/Subtract mod functions
6432
 ****************************/
6433
6434
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6435
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
6436
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
6437
/* Add two value and reduce: r = (a + b) % m
6438
 *
6439
 * @param  [in]   a  SP integer to add.
6440
 * @param  [in]   b  SP integer to add with.
6441
 * @param  [in]   m  SP integer that is the modulus.
6442
 * @param  [out]  r  SP integer to hold result.
6443
 *
6444
 * @return  MP_OKAY on success.
6445
 * @return  MP_VAL when a, b, m or r is NULL.
6446
 * @return  MP_MEM when dynamic memory allocation fails.
6447
 */
6448
int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6449
4.79k
{
6450
4.79k
    int err = MP_OKAY;
6451
4.79k
    int used = ((a == NULL) || (b == NULL)) ? 1 :
6452
4.79k
                   ((a->used >= b->used) ? a->used + 1 : b->used + 1);
6453
4.79k
    DECL_SP_INT(t, used);
6454
6455
4.79k
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6456
0
        err = MP_VAL;
6457
0
    }
6458
6459
4.79k
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
6460
#if 0
6461
    if (err == MP_OKAY) {
6462
        sp_print(a, "a");
6463
        sp_print(b, "b");
6464
        sp_print(m, "m");
6465
    }
6466
#endif
6467
6468
4.79k
    if (err == MP_OKAY) {
6469
4.73k
        err = sp_add(a, b, t);
6470
4.73k
    }
6471
4.79k
    if (err == MP_OKAY) {
6472
4.73k
        err = sp_mod(t, m, r);
6473
4.73k
    }
6474
6475
#if 0
6476
    if (err == MP_OKAY) {
6477
        sp_print(r, "rma");
6478
    }
6479
#endif
6480
6481
4.79k
    FREE_SP_INT(t, NULL);
6482
4.79k
    return err;
6483
4.79k
}
6484
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
6485
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
6486
6487
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
6488
    defined(HAVE_ECC))
6489
/* Sub b from a and reduce: r = (a - b) % m
6490
 * Result is always positive.
6491
 *
6492
 * @param  [in]   a  SP integer to subtract from
6493
 * @param  [in]   b  SP integer to subtract.
6494
 * @param  [in]   m  SP integer that is the modulus.
6495
 * @param  [out]  r  SP integer to hold result.
6496
 *
6497
 * @return  MP_OKAY on success.
6498
 * @return  MP_VAL when a, b, m or r is NULL.
6499
 * @return  MP_MEM when dynamic memory allocation fails.
6500
 */
6501
int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6502
4.16M
{
6503
4.16M
#ifndef WOLFSSL_SP_INT_NEGATIVE
6504
4.16M
    int err = MP_OKAY;
6505
4.16M
    int used = ((a == NULL) || (b == NULL) || (m == NULL)) ? 1 :
6506
4.16M
                   ((a->used >= m->used) ?
6507
4.16M
                       ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
6508
4.16M
                   ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
6509
4.16M
    DECL_SP_INT_ARRAY(t, used, 2);
6510
6511
4.16M
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6512
0
        err = MP_VAL;
6513
0
    }
6514
6515
#if 0
6516
    if (err == MP_OKAY) {
6517
        sp_print(a, "a");
6518
        sp_print(b, "b");
6519
        sp_print(m, "m");
6520
    }
6521
#endif
6522
6523
4.16M
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
6524
4.16M
    if (err == MP_OKAY) {
6525
4.16M
        if (_sp_cmp(a, m) != MP_LT) {
6526
4.16M
            err = sp_mod(a, m, t[0]);
6527
4.16M
            a = t[0];
6528
4.16M
        }
6529
4.16M
    }
6530
4.16M
    if (err == MP_OKAY) {
6531
4.16M
        if (_sp_cmp(b, m) != MP_LT) {
6532
3.82k
            err = sp_mod(b, m, t[1]);
6533
3.82k
            b = t[1];
6534
3.82k
        }
6535
4.16M
    }
6536
4.16M
    if (err == MP_OKAY) {
6537
4.16M
        if (_sp_cmp(a, b) == MP_LT) {
6538
3.63M
            err = sp_add(a, m, t[0]);
6539
3.63M
            a = t[0];
6540
3.63M
        }
6541
4.16M
    }
6542
4.16M
    if (err == MP_OKAY) {
6543
4.16M
        err = sp_sub(a, b, r);
6544
4.16M
    }
6545
6546
#if 0
6547
    if (err == MP_OKAY) {
6548
        sp_print(r, "rms");
6549
    }
6550
#endif
6551
6552
4.16M
    FREE_SP_INT_ARRAY(t, NULL);
6553
4.16M
    return err;
6554
6555
#else /* WOLFSSL_SP_INT_NEGATIVE */
6556
6557
    int err = MP_OKAY;
6558
    int used = ((a == NULL) || (b == NULL)) ? 1 :
6559
                   ((a->used >= b->used) ? a->used + 1 : b->used + 1);
6560
    DECL_SP_INT(t, used);
6561
6562
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6563
        err = MP_VAL;
6564
    }
6565
6566
#if 0
6567
    if (err == MP_OKAY) {
6568
        sp_print(a, "a");
6569
        sp_print(b, "b");
6570
        sp_print(m, "m");
6571
    }
6572
#endif
6573
6574
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
6575
    if (err == MP_OKAY) {
6576
        err = sp_sub(a, b, t);
6577
    }
6578
    if (err == MP_OKAY) {
6579
        err = sp_mod(t, m, r);
6580
    }
6581
6582
#if 0
6583
    if (err == MP_OKAY) {
6584
        sp_print(r, "rms");
6585
    }
6586
#endif
6587
6588
    FREE_SP_INT(t, NULL);
6589
    return err;
6590
#endif /* WOLFSSL_SP_INT_NEGATIVE */
6591
4.16M
}
6592
#endif /* WOLFSSL_SP_MATH_ALL */
6593
6594
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6595
/* Add two value and reduce: r = (a + b) % m
6596
 *
6597
 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
6598
 *
6599
 * Assumes a, b, m and r are not NULL.
6600
 * m and r must not be the same pointer.
6601
 *
6602
 * @param  [in]   a  SP integer to add.
6603
 * @param  [in]   b  SP integer to add with.
6604
 * @param  [in]   m  SP integer that is the modulus.
6605
 * @param  [out]  r  SP integer to hold result.
6606
 *
6607
 * @return  MP_OKAY on success.
6608
 */
6609
int sp_addmod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6610
53.3M
{
6611
53.3M
    int err = MP_OKAY;
6612
#ifndef SQR_MUL_ASM
6613
    sp_int_sword w;
6614
    sp_int_sword s;
6615
#else
6616
53.3M
    sp_int_digit wl;
6617
53.3M
    sp_int_digit wh;
6618
53.3M
    sp_int_digit sl;
6619
53.3M
    sp_int_digit sh;
6620
53.3M
    sp_int_digit t;
6621
53.3M
#endif
6622
53.3M
    sp_int_digit mask;
6623
53.3M
    int i;
6624
6625
53.3M
    if (r->size < m->used) {
6626
0
        err = MP_VAL;
6627
0
    }
6628
53.3M
    if ((err == MP_OKAY) && (r == m)) {
6629
0
        err = MP_VAL;
6630
0
    }
6631
6632
53.3M
    if (err == MP_OKAY) {
6633
#if 0
6634
        sp_print(a, "a");
6635
        sp_print(b, "b");
6636
        sp_print(m, "m");
6637
#endif
6638
6639
        /* Add a to b into r. Do the subtract of modulus but don't store result.
6640
         * When subtract result is negative, the overflow will be negative.
6641
         * Only need to subtract mod when result is positive - overflow is
6642
         * positive.
6643
         */
6644
    #ifndef SQR_MUL_ASM
6645
        w = 0;
6646
        s = 0;
6647
    #else
6648
53.3M
        wl = 0;
6649
53.3M
        wh = 0;
6650
53.3M
        sl = 0;
6651
53.3M
        sh = 0;
6652
53.3M
    #endif
6653
377M
        for (i = 0; i < m->used; i++) {
6654
            /* Values past 'used' are not initialized. */
6655
323M
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6656
323M
            sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
6657
6658
        #ifndef SQR_MUL_ASM
6659
            w         += a->dp[i] & mask_a;
6660
            w         += b->dp[i] & mask_b;
6661
            r->dp[i]   = (sp_int_digit)w;
6662
            s         += (sp_int_digit)w;
6663
            s         -= m->dp[i];
6664
            s        >>= DIGIT_BIT;
6665
            w        >>= DIGIT_BIT;
6666
        #else
6667
323M
            t = a->dp[i] & mask_a;
6668
323M
            SP_ASM_ADDC(wl, wh, t);
6669
323M
            t = b->dp[i] & mask_b;
6670
323M
            SP_ASM_ADDC(wl, wh, t);
6671
323M
            r->dp[i] = wl;
6672
323M
            SP_ASM_ADDC(sl, sh, wl);
6673
323M
            t = m->dp[i];
6674
323M
            SP_ASM_SUBC(sl, sh, t);
6675
323M
            sl = sh;
6676
323M
            sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE-1));
6677
323M
            wl = wh;
6678
323M
            wh = 0;
6679
323M
        #endif
6680
323M
        }
6681
    #ifndef SQR_MUL_ASM
6682
        s += (sp_int_digit)w;
6683
        /* s will be positive when subtracting modulus is needed. */
6684
        mask = (sp_int_digit)0 - (s >= 0);
6685
    #else
6686
53.3M
        SP_ASM_ADDC(sl, sh, wl);
6687
        /* s will be positive when subtracting modulus is needed. */
6688
53.3M
        mask = (sh >> (SP_WORD_SIZE-1)) - 1;
6689
53.3M
    #endif
6690
6691
        /* Constant time, conditionally, subtract modulus from sum. */
6692
    #ifndef SQR_MUL_ASM
6693
        w = 0;
6694
    #else
6695
53.3M
        wl = 0;
6696
53.3M
        wh = 0;
6697
53.3M
    #endif
6698
377M
        for (i = 0; i < m->used; i++) {
6699
        #ifndef SQR_MUL_ASM
6700
            w         += r->dp[i];
6701
            w         -= m->dp[i] & mask;
6702
            r->dp[i]   = (sp_int_digit)w;
6703
            w        >>= DIGIT_BIT;
6704
        #else
6705
323M
            t = r->dp[i];
6706
323M
            SP_ASM_ADDC(wl, wh, t);
6707
323M
            t = m->dp[i] & mask;
6708
323M
            SP_ASM_SUBC(wl, wh, t);
6709
323M
            r->dp[i] = wl;
6710
323M
            wl = wh;
6711
323M
            wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
6712
323M
        #endif
6713
323M
        }
6714
        /* Result will always have digits equal to or less than those in
6715
         * modulus. */
6716
53.3M
        r->used = i;
6717
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6718
        r->sign = MP_ZPOS;
6719
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
6720
53.3M
        sp_clamp(r);
6721
6722
#if 0
6723
        sp_print(r, "rma");
6724
#endif
6725
53.3M
    }
6726
6727
53.3M
    return err;
6728
53.3M
}
6729
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6730
6731
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6732
/* Sub b from a and reduce: r = (a - b) % m
6733
 * Result is always positive.
6734
 *
6735
 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
6736
 *
6737
 * Assumes a, b, m and r are not NULL.
6738
 * m and r must not be the same pointer.
6739
 *
6740
 * @param  [in]   a  SP integer to subtract from
6741
 * @param  [in]   b  SP integer to subtract.
6742
 * @param  [in]   m  SP integer that is the modulus.
6743
 * @param  [out]  r  SP integer to hold result.
6744
 *
6745
 * @return  MP_OKAY on success.
6746
 */
6747
int sp_submod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6748
69.5M
{
6749
69.5M
    int err = MP_OKAY;
6750
#ifndef SQR_MUL_ASM
6751
    sp_int_sword w;
6752
#else
6753
69.5M
    sp_int_digit l;
6754
69.5M
    sp_int_digit h;
6755
69.5M
    sp_int_digit t;
6756
69.5M
#endif
6757
69.5M
    sp_int_digit mask;
6758
69.5M
    int i;
6759
6760
69.5M
    if (r->size < m->used + 1) {
6761
1
        err = MP_VAL;
6762
1
    }
6763
69.5M
    if ((err == MP_OKAY) && (r == m)) {
6764
0
        err = MP_VAL;
6765
0
    }
6766
6767
69.5M
    if (err == MP_OKAY) {
6768
#if 0
6769
        sp_print(a, "a");
6770
        sp_print(b, "b");
6771
        sp_print(m, "m");
6772
#endif
6773
6774
        /* In constant time, subtract b from a putting result in r. */
6775
    #ifndef SQR_MUL_ASM
6776
        w = 0;
6777
    #else
6778
69.5M
        l = 0;
6779
69.5M
        h = 0;
6780
69.5M
    #endif
6781
491M
        for (i = 0; i < m->used; i++) {
6782
            /* Values past 'used' are not initialized. */
6783
421M
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6784
421M
            sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
6785
6786
        #ifndef SQR_MUL_ASM
6787
            w         += a->dp[i] & mask_a;
6788
            w         -= b->dp[i] & mask_b;
6789
            r->dp[i]   = (sp_int_digit)w;
6790
            w        >>= DIGIT_BIT;
6791
        #else
6792
421M
            t = a->dp[i] & mask_a;
6793
421M
            SP_ASM_ADDC(l, h, t);
6794
421M
            t = b->dp[i] & mask_b;
6795
421M
            SP_ASM_SUBC(l, h, t);
6796
421M
            r->dp[i] = l;
6797
421M
            l = h;
6798
421M
            h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6799
421M
        #endif
6800
421M
        }
6801
        /* When w is negative then we need to add modulus to make result
6802
         * positive. */
6803
    #ifndef SQR_MUL_ASM
6804
        mask = (sp_int_digit)0 - (w < 0);
6805
    #else
6806
69.5M
        mask = h;
6807
69.5M
    #endif
6808
        /* Constant time, conditionally, add modulus to difference. */
6809
    #ifndef SQR_MUL_ASM
6810
        w = 0;
6811
    #else
6812
69.5M
        l = 0;
6813
69.5M
        h = 0;
6814
69.5M
    #endif
6815
491M
        for (i = 0; i < m->used; i++) {
6816
        #ifndef SQR_MUL_ASM
6817
            w         += r->dp[i];
6818
            w         += m->dp[i] & mask;
6819
            r->dp[i]   = (sp_int_digit)w;
6820
            w        >>= DIGIT_BIT;
6821
        #else
6822
421M
            t = r->dp[i];
6823
421M
            SP_ASM_ADDC(l, h, t);
6824
421M
            t = m->dp[i] & mask;
6825
421M
            SP_ASM_ADDC(l, h, t);
6826
421M
            r->dp[i] = l;
6827
421M
            l = h;
6828
421M
            h = 0;
6829
421M
        #endif
6830
421M
        }
6831
69.5M
        r->used = i;
6832
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6833
        r->sign = MP_ZPOS;
6834
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
6835
69.5M
        sp_clamp(r);
6836
6837
#if 0
6838
        sp_print(r, "rms");
6839
#endif
6840
69.5M
    }
6841
6842
69.5M
    return err;
6843
69.5M
}
6844
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6845
6846
/********************
6847
 * Shifting functoins
6848
 ********************/
6849
6850
#if !defined(NO_DH) || defined(HAVE_ECC) || (defined(WC_RSA_BLINDING) && \
6851
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
6852
/* Left shift the multi-precision number by a number of digits.
6853
 *
6854
 * @param  [in,out]  a  SP integer to shift.
6855
 * @param  [in]      s  Number of digits to shift.
6856
 *
6857
 * @return  MP_OKAY on success.
6858
 * @return  MP_VAL when a is NULL or the result is too big to fit in an SP.
6859
 */
6860
int sp_lshd(sp_int* a, int s)
6861
8
{
6862
8
    int err = MP_OKAY;
6863
6864
8
    if (a == NULL) {
6865
0
        err = MP_VAL;
6866
0
    }
6867
8
    if ((err == MP_OKAY) && (a->used + s > a->size)) {
6868
0
        err = MP_VAL;
6869
0
    }
6870
8
    if (err == MP_OKAY) {
6871
8
        XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
6872
8
        a->used += s;
6873
8
        XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
6874
8
        sp_clamp(a);
6875
8
    }
6876
6877
8
    return err;
6878
8
}
6879
#endif
6880
6881
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6882
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6883
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
6884
/* Left shift the multi-precision number by n bits.
6885
 * Bits may be larger than the word size.
6886
 *
6887
 * @param  [in,out]  a  SP integer to shift.
6888
 * @param  [in]      n  Number of bits to shift left.
6889
 *
6890
 * @return  MP_OKAY on success.
6891
 */
6892
static int sp_lshb(sp_int* a, int n)
6893
8.73M
{
6894
8.73M
    int err = MP_OKAY;
6895
6896
8.73M
    if (a->used != 0) {
6897
8.73M
        int s = n >> SP_WORD_SHIFT;
6898
8.73M
        int i;
6899
6900
8.73M
        if (a->used + s >= a->size) {
6901
3
            err = MP_VAL;
6902
3
        }
6903
8.73M
        if (err == MP_OKAY) {
6904
8.73M
            n &= SP_WORD_MASK;
6905
8.73M
            if (n != 0) {
6906
8.60M
                sp_int_digit v;
6907
6908
8.60M
                v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
6909
8.60M
                a->dp[a->used - 1 + s] = a->dp[a->used - 1] << n;
6910
83.0M
                for (i = a->used - 2; i >= 0; i--) {
6911
74.3M
                    a->dp[i + 1 + s] |= a->dp[i] >> (SP_WORD_SIZE - n);
6912
74.3M
                    a->dp[i     + s] = a->dp[i] << n;
6913
74.3M
                }
6914
8.60M
                if (v != 0) {
6915
3.78M
                    a->dp[a->used + s] = v;
6916
3.78M
                    a->used++;
6917
3.78M
                }
6918
8.60M
            }
6919
127k
            else if (s > 0) {
6920
74
                for (i = a->used - 1; i >= 0; i--) {
6921
37
                    a->dp[i + s] = a->dp[i];
6922
37
                }
6923
37
            }
6924
8.73M
            a->used += s;
6925
8.73M
            XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
6926
8.73M
        }
6927
8.73M
    }
6928
6929
8.73M
    return err;
6930
8.73M
}
6931
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6932
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6933
6934
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6935
    !defined(NO_DH) || defined(HAVE_ECC) || \
6936
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
6937
/* Shift a right by n digits into r: r = a >> (n * SP_WORD_SIZE)
6938
 *
6939
 * @param  [in]   a  SP integer to shift.
6940
 * @param  [in]   n  Number of digits to shift.
6941
 * @param  [out]  r  SP integer to store result in.
6942
 */
6943
void sp_rshd(sp_int* a, int c)
6944
178
{
6945
178
    if (a != NULL) {
6946
178
        int i;
6947
178
        int j;
6948
6949
178
        if (c >= a->used) {
6950
77
            _sp_zero(a);
6951
77
        }
6952
101
        else {
6953
1.87k
            for (i = c, j = 0; i < a->used; i++, j++) {
6954
1.77k
                a->dp[j] = a->dp[i];
6955
1.77k
            }
6956
101
            a->used -= c;
6957
101
        }
6958
178
    }
6959
178
}
6960
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
6961
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6962
6963
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6964
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6965
    defined(WOLFSSL_HAVE_SP_DH)
6966
/* Shift a right by n bits into r: r = a >> n
6967
 *
6968
 * @param  [in]   a  SP integer to shift.
6969
 * @param  [in]   n  Number of bits to shift.
6970
 * @param  [out]  r  SP integer to store result in.
6971
 */
6972
void sp_rshb(sp_int* a, int n, sp_int* r)
6973
171M
{
6974
171M
    int i = n >> SP_WORD_SHIFT;
6975
6976
171M
    if (i >= a->used) {
6977
1.53M
        _sp_zero(r);
6978
1.53M
    }
6979
169M
    else {
6980
169M
        int j;
6981
6982
169M
        n &= SP_WORD_SIZE - 1;
6983
169M
        if (n == 0) {
6984
208M
            for (j = 0; i < a->used; i++, j++)
6985
180M
                r->dp[j] = a->dp[i];
6986
28.1M
            r->used = j;
6987
28.1M
        }
6988
141M
        else if (n > 0) {
6989
822M
            for (j = 0; i < a->used-1; i++, j++)
6990
681M
                r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
6991
141M
            r->dp[j] = a->dp[i] >> n;
6992
141M
            r->used = j + 1;
6993
141M
            sp_clamp(r);
6994
141M
        }
6995
#ifdef WOLFSSL_SP_INT_NEGATIVE
6996
        if (sp_iszero(r)) {
6997
            r->sign = MP_ZPOS;
6998
        }
6999
        else {
7000
            r->sign = a->sign;
7001
        }
7002
#endif
7003
169M
    }
7004
171M
}
7005
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7006
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
7007
7008
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7009
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7010
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
7011
/* Divide a by d and return the quotient in r and the remainder in rem.
7012
 *   r = a / d; rem = a % d
7013
 *
7014
 * @param  [in]   a    SP integer to be divided.
7015
 * @param  [in]   d    SP integer to divide by.
7016
 * @param  [out]  r    SP integer that is the quotient.
7017
 * @param  [out]  rem  SP integer that is the remainder.
7018
 *
7019
 * @return  MP_OKAY on success.
7020
 * @return  MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
7021
 * @return  MP_MEM when dynamic memory allocation fails.
7022
 */
7023
#ifndef WOLFSSL_SP_MATH_ALL
7024
static
7025
#endif
7026
int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
7027
8.30M
{
7028
8.30M
    int err = MP_OKAY;
7029
8.30M
    int ret;
7030
8.30M
    int done = 0;
7031
8.30M
    int i;
7032
8.30M
    int s = 0;
7033
8.30M
    sp_int_digit dt;
7034
8.30M
    sp_int_digit t;
7035
8.30M
    sp_int* sa = NULL;
7036
8.30M
    sp_int* sd = NULL;
7037
8.30M
    sp_int* tr = NULL;
7038
8.30M
    sp_int* trial = NULL;
7039
#ifdef WOLFSSL_SP_INT_NEGATIVE
7040
    int aSign = MP_ZPOS;
7041
    int dSign = MP_ZPOS;
7042
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7043
8.30M
    DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4);
7044
7045
8.30M
    if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
7046
0
        err = MP_VAL;
7047
0
    }
7048
8.30M
    if ((err == MP_OKAY) && sp_iszero(d)) {
7049
228
        err = MP_VAL;
7050
228
    }
7051
8.30M
    if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
7052
0
        err = MP_VAL;
7053
0
    }
7054
8.30M
    if ((err == MP_OKAY) && (rem != NULL)) {
7055
8.30M
        if ((a->used <= d->used) && (rem->size < a->used + 1)) {
7056
0
            err = MP_VAL;
7057
0
        }
7058
8.30M
        else if ((a->used > d->used) && (rem->size < d->used + 1)) {
7059
0
            err = MP_VAL;
7060
0
        }
7061
8.30M
    }
7062
    /* May need to shift number being divided left into a new word. */
7063
8.30M
    if ((err == MP_OKAY) && (a->used == SP_INT_DIGITS)) {
7064
41
        int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
7065
41
        if ((bits != SP_WORD_SIZE) &&
7066
41
                (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
7067
7
            err = MP_VAL;
7068
7
        }
7069
41
    }
7070
7071
#if 0
7072
    if (err == MP_OKAY) {
7073
        sp_print(a, "a");
7074
        sp_print(d, "b");
7075
    }
7076
#endif
7077
7078
8.30M
    if (err == MP_OKAY) {
7079
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7080
        aSign = a->sign;
7081
        dSign = d->sign;
7082
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
7083
7084
8.30M
        ret = _sp_cmp_abs(a, d);
7085
8.30M
        if (ret == MP_LT) {
7086
2.89M
            if (rem != NULL) {
7087
2.89M
                sp_copy(a, rem);
7088
2.89M
            }
7089
2.89M
            if (r != NULL) {
7090
33
                sp_set(r, 0);
7091
33
            }
7092
2.89M
            done = 1;
7093
2.89M
        }
7094
5.41M
        else if (ret == MP_EQ) {
7095
4.15M
            if (rem != NULL) {
7096
4.15M
                sp_set(rem, 0);
7097
4.15M
            }
7098
4.15M
            if (r != NULL) {
7099
41
                sp_set(r, 1);
7100
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7101
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7102
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
7103
41
            }
7104
4.15M
            done = 1;
7105
4.15M
        }
7106
1.25M
        else if (sp_count_bits(a) == sp_count_bits(d)) {
7107
            /* a is greater than d but same bit length */
7108
7.97k
            if (rem != NULL) {
7109
7.96k
                _sp_sub_off(a, d, rem, 0);
7110
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7111
                rem->sign = aSign;
7112
            #endif
7113
7.96k
            }
7114
7.97k
            if (r != NULL) {
7115
14
                sp_set(r, 1);
7116
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7117
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7118
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
7119
14
            }
7120
7.97k
            done = 1;
7121
7.97k
        }
7122
8.30M
    }
7123
7124
8.30M
    if ((!done) && (err == MP_OKAY)) {
7125
1.24M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7126
1.24M
    !defined(WOLFSSL_SP_NO_MALLOC)
7127
1.24M
        int cnt = 4;
7128
1.24M
        if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
7129
1.24M
            cnt--;
7130
1.24M
        }
7131
1.24M
        if ((r != NULL) && (r != d)) {
7132
536
            cnt--;
7133
536
        }
7134
        /* Macro always has code associated with it and checks err first. */
7135
1.24M
        ALLOC_SP_INT_ARRAY(td, a->used + 1, cnt, err, NULL);
7136
#else
7137
        ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL);
7138
#endif
7139
1.24M
    }
7140
7141
8.30M
    if ((!done) && (err == MP_OKAY)) {
7142
1.24M
        sd    = td[0];
7143
1.24M
        trial = td[1];
7144
1.24M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7145
1.24M
    !defined(WOLFSSL_SP_NO_MALLOC)
7146
1.24M
        i = 2;
7147
1.24M
        sa    = ((rem != NULL) && (rem != d) && (rem->size > a->used)) ? rem :
7148
1.24M
            td[i++];
7149
1.24M
        tr    = ((r != NULL) && (r != d)) ? r : td[i];
7150
#else
7151
        sa    = td[2];
7152
        tr    = td[3];
7153
#endif
7154
7155
1.24M
        sp_init_size(sd, d->used + 1);
7156
1.24M
        sp_init_size(trial, a->used + 1);
7157
1.24M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7158
1.24M
    !defined(WOLFSSL_SP_NO_MALLOC)
7159
1.24M
        if (sa != rem) {
7160
1.27k
            sp_init_size(sa, a->used + 1);
7161
1.27k
        }
7162
1.24M
        if (tr != r) {
7163
1.24M
            sp_init_size(tr, a->used - d->used + 2);
7164
1.24M
        }
7165
#else
7166
        sp_init_size(sa, a->used + 1);
7167
        sp_init_size(tr, a->used - d->used + 2);
7168
#endif
7169
7170
1.24M
        s = sp_count_bits(d);
7171
1.24M
        s = SP_WORD_SIZE - (s & SP_WORD_MASK);
7172
1.24M
        sp_copy(a, sa);
7173
1.24M
        if (s != SP_WORD_SIZE) {
7174
690k
            err = sp_lshb(sa, s);
7175
690k
            if (err == MP_OKAY) {
7176
690k
                sp_copy(d, sd);
7177
690k
                d = sd;
7178
690k
                err = sp_lshb(sd, s);
7179
690k
            }
7180
690k
        }
7181
1.24M
    }
7182
8.30M
    if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
7183
#ifdef WOLFSSL_SP_SMALL
7184
        int c;
7185
#else
7186
1.24M
        int j;
7187
1.24M
        int o;
7188
    #ifndef SQR_MUL_ASM
7189
        sp_int_sword sw;
7190
    #else
7191
1.24M
        sp_int_digit sl;
7192
1.24M
        sp_int_digit sh;
7193
1.24M
        sp_int_digit st;
7194
1.24M
    #endif
7195
1.24M
#endif /* WOLFSSL_SP_SMALL */
7196
#ifdef WOLFSSL_SP_INT_NEGATIVE
7197
        sa->sign = MP_ZPOS;
7198
        sd->sign = MP_ZPOS;
7199
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7200
7201
1.24M
        tr->used = sa->used - d->used + 1;
7202
1.24M
        sp_clear(tr);
7203
1.24M
        tr->used = sa->used - d->used + 1;
7204
1.24M
        dt = d->dp[d->used-1];
7205
7206
1.24M
        for (i = d->used - 1; i > 0; i--) {
7207
971k
            if (sa->dp[sa->used - d->used + i] != d->dp[i]) {
7208
970k
                break;
7209
970k
            }
7210
971k
        }
7211
1.24M
        if (sa->dp[sa->used - d->used + i] >= d->dp[i]) {
7212
2.91k
            i = sa->used;
7213
2.91k
            _sp_sub_off(sa, d, sa, sa->used - d->used);
7214
            /* Keep the same used so that 0 zeros will be put in. */
7215
2.91k
            sa->used = i;
7216
2.91k
            if (r != NULL) {
7217
29
                tr->dp[sa->used - d->used] = 1;
7218
29
            }
7219
2.91k
        }
7220
6.78M
        for (i = sa->used - 1; i >= d->used; i--) {
7221
5.53M
            if (sa->dp[i] == dt) {
7222
1.63k
                t = SP_DIGIT_MAX;
7223
1.63k
            }
7224
5.53M
            else {
7225
5.53M
                t = sp_div_word(sa->dp[i], sa->dp[i-1], dt);
7226
5.53M
            }
7227
7228
#ifdef WOLFSSL_SP_SMALL
7229
            do {
7230
                err = _sp_mul_d(d, t, trial, i - d->used);
7231
                if (err != MP_OKAY) {
7232
                    break;
7233
                }
7234
                c = _sp_cmp_abs(trial, sa);
7235
                if (c == MP_GT) {
7236
                    t--;
7237
                }
7238
            }
7239
            while (c == MP_GT);
7240
7241
            if (err != MP_OKAY) {
7242
                break;
7243
            }
7244
7245
            _sp_sub_off(sa, trial, sa, 0);
7246
            tr->dp[i - d->used] += t;
7247
            if (tr->dp[i - d->used] < t) {
7248
                tr->dp[i + 1 - d->used]++;
7249
            }
7250
#else
7251
5.53M
            o = i - d->used;
7252
6.54M
            do {
7253
            #ifndef SQR_MUL_ASM
7254
                sp_int_word tw = 0;
7255
            #else
7256
6.54M
                sp_int_digit tl = 0;
7257
6.54M
                sp_int_digit th = 0;
7258
6.54M
            #endif
7259
98.4M
                for (j = 0; j < d->used; j++) {
7260
                #ifndef SQR_MUL_ASM
7261
                    tw += (sp_int_word)d->dp[j] * t;
7262
                    trial->dp[j] = (sp_int_digit)tw;
7263
                    tw >>= SP_WORD_SIZE;
7264
                #else
7265
91.9M
                    SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
7266
91.9M
                    trial->dp[j] = tl;
7267
91.9M
                    tl = th;
7268
91.9M
                    th = 0;
7269
91.9M
                #endif
7270
91.9M
                }
7271
              #ifndef SQR_MUL_ASM
7272
                trial->dp[j] = (sp_int_digit)tw;
7273
              #else
7274
6.54M
                trial->dp[j] = tl;
7275
6.54M
              #endif
7276
7277
10.7M
                for (j = d->used; j > 0; j--) {
7278
10.6M
                    if (trial->dp[j] != sa->dp[j + o]) {
7279
6.38M
                        break;
7280
6.38M
                    }
7281
10.6M
                }
7282
6.54M
                if (trial->dp[j] > sa->dp[j + o]) {
7283
1.01M
                    t--;
7284
1.01M
                }
7285
6.54M
            }
7286
6.54M
            while (trial->dp[j] > sa->dp[j + o]);
7287
7288
        #ifndef SQR_MUL_ASM
7289
            sw = 0;
7290
        #else
7291
5.53M
            sl = 0;
7292
5.53M
            sh = 0;
7293
5.53M
        #endif
7294
80.2M
            for (j = 0; j <= d->used; j++) {
7295
            #ifndef SQR_MUL_ASM
7296
                sw += sa->dp[j + o];
7297
                sw -= trial->dp[j];
7298
                sa->dp[j + o] = (sp_int_digit)sw;
7299
                sw >>= SP_WORD_SIZE;
7300
            #else
7301
74.6M
                st = sa->dp[j + o];
7302
74.6M
                SP_ASM_ADDC(sl, sh, st);
7303
74.6M
                st = trial->dp[j];
7304
74.6M
                SP_ASM_SUBC(sl, sh, st);
7305
74.6M
                sa->dp[j + o] = sl;
7306
74.6M
                sl = sh;
7307
74.6M
                sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
7308
74.6M
            #endif
7309
74.6M
            }
7310
7311
5.53M
            tr->dp[o] = t;
7312
5.53M
#endif /* WOLFSSL_SP_SMALL */
7313
5.53M
        }
7314
1.24M
        sa->used = i + 1;
7315
7316
1.24M
        if ((err == MP_OKAY) && (rem != NULL)) {
7317
#ifdef WOLFSSL_SP_INT_NEGATIVE
7318
            sa->sign = (sa->used == 0) ? MP_ZPOS : aSign;
7319
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7320
1.24M
            if (s != SP_WORD_SIZE) {
7321
689k
                sp_rshb(sa, s, sa);
7322
689k
            }
7323
1.24M
            sp_copy(sa, rem);
7324
1.24M
            sp_clamp(rem);
7325
#ifdef WOLFSSL_SP_INT_NEGATIVE
7326
            if (sp_iszero(rem)) {
7327
                rem->sign = MP_ZPOS;
7328
            }
7329
#endif
7330
1.24M
        }
7331
1.24M
        if ((err == MP_OKAY) && (r != NULL)) {
7332
534
            sp_copy(tr, r);
7333
534
            sp_clamp(r);
7334
#ifdef WOLFSSL_SP_INT_NEGATIVE
7335
            if (sp_iszero(r)) {
7336
                r->sign = MP_ZPOS;
7337
            }
7338
            else {
7339
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7340
            }
7341
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7342
534
        }
7343
1.24M
    }
7344
7345
#if 0
7346
    if (err == MP_OKAY) {
7347
        if (rem != NULL) {
7348
            sp_print(rem, "rdr");
7349
        }
7350
        if (r != NULL) {
7351
            sp_print(r, "rdw");
7352
        }
7353
    }
7354
#endif
7355
7356
8.30M
    FREE_SP_INT_ARRAY(td, NULL);
7357
8.30M
    return err;
7358
8.30M
}
7359
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
7360
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7361
7362
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7363
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7364
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
7365
#ifndef FREESCALE_LTC_TFM
7366
/* Calculate the remainder of dividing a by m: r = a mod m.
7367
 *
7368
 * @param  [in]   a  SP integer to reduce.
7369
 * @param  [in]   m  SP integer that is the modulus.
7370
 * @param  [out]  r  SP integer to store result in.
7371
 *
7372
 * @return  MP_OKAY on success.
7373
 * @return  MP_VAL when a, m or r is NULL or m is 0.
7374
 */
7375
int sp_mod(sp_int* a, sp_int* m, sp_int* r)
7376
14.4M
{
7377
14.4M
    int err = MP_OKAY;
7378
#ifdef WOLFSSL_SP_INT_NEGATIVE
7379
    DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
7380
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7381
7382
14.4M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
7383
0
        err = MP_VAL;
7384
0
    }
7385
7386
14.4M
#ifndef WOLFSSL_SP_INT_NEGATIVE
7387
14.4M
    if (err == MP_OKAY) {
7388
14.4M
        err = sp_div(a, m, NULL, r);
7389
14.4M
    }
7390
#else
7391
    ALLOC_SP_INT(t, a->used + 1, err, NULL);
7392
    if (err == MP_OKAY) {
7393
        sp_init_size(t, a->used + 1);
7394
        err = sp_div(a, m, NULL, t);
7395
    }
7396
    if (err == MP_OKAY) {
7397
        if ((!sp_iszero(t)) && (t->sign != m->sign)) {
7398
            err = sp_add(t, m, r);
7399
        }
7400
        else {
7401
            err = sp_copy(t, r);
7402
        }
7403
    }
7404
7405
    FREE_SP_INT(t, NULL);
7406
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7407
7408
14.4M
    return err;
7409
14.4M
}
7410
#endif /* !FREESCALE_LTC_TFM */
7411
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
7412
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7413
7414
/* START SP_MUL implementations. */
7415
/* This code is generated.
7416
 * To generate:
7417
 *   cd scripts/sp/sp_int
7418
 *   ./gen.sh
7419
 * File sp_mul.c contains code.
7420
 */
7421
7422
#ifdef SQR_MUL_ASM
7423
/* Multiply a by b into r where a and b have same no. digits. r = a * b
7424
 *
7425
 * Optimised code for when number of digits in a and b are the same.
7426
 *
7427
 * @param  [in]   a    SP integer to mulitply.
7428
 * @param  [in]   b    SP integer to mulitply by.
7429
 * @param  [out]  r    SP integer to hod reult.
7430
 *
7431
 * @return  MP_OKAY otherwise.
7432
 * @return  MP_MEM when dynamic memory allocation fails.
7433
 */
7434
static int _sp_mul_nxn(sp_int* a, sp_int* b, sp_int* r)
7435
38.8M
{
7436
38.8M
    int err = MP_OKAY;
7437
38.8M
    int i;
7438
38.8M
    int j;
7439
38.8M
    int k;
7440
38.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7441
38.8M
    sp_int_digit* t = NULL;
7442
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7443
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7444
    sp_int_digit t[a->used * 2];
7445
#else
7446
    sp_int_digit t[SP_INT_DIGITS];
7447
#endif
7448
7449
38.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7450
38.8M
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
7451
38.8M
        DYNAMIC_TYPE_BIGINT);
7452
38.8M
    if (t == NULL) {
7453
974
        err = MP_MEM;
7454
974
    }
7455
38.8M
#endif
7456
38.8M
    if (err == MP_OKAY) {
7457
38.8M
        sp_int_digit l, h, o;
7458
38.8M
        sp_int_digit* dp;
7459
7460
38.8M
        h = 0;
7461
38.8M
        l = 0;
7462
38.8M
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7463
38.8M
        t[0] = h;
7464
38.8M
        h = 0;
7465
38.8M
        o = 0;
7466
350M
        for (k = 1; k <= a->used - 1; k++) {
7467
311M
            j = k;
7468
311M
            dp = a->dp;
7469
2.14G
            for (; j >= 0; dp++, j--) {
7470
1.83G
                SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
7471
1.83G
            }
7472
311M
            t[k] = l;
7473
311M
            l = h;
7474
311M
            h = o;
7475
311M
            o = 0;
7476
311M
        }
7477
350M
        for (; k <= (a->used - 1) * 2; k++) {
7478
311M
            i = k - (b->used - 1);
7479
311M
            dp = &b->dp[b->used - 1];
7480
1.83G
            for (; i < a->used; i++, dp--) {
7481
1.52G
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
7482
1.52G
            }
7483
311M
            t[k] = l;
7484
311M
            l = h;
7485
311M
            h = o;
7486
311M
            o = 0;
7487
311M
        }
7488
38.8M
        t[k] = l;
7489
38.8M
        r->used = k + 1;
7490
38.8M
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7491
38.8M
        sp_clamp(r);
7492
38.8M
    }
7493
7494
38.8M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7495
38.8M
    if (t != NULL) {
7496
38.8M
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7497
38.8M
    }
7498
38.8M
#endif
7499
38.8M
    return err;
7500
38.8M
}
7501
7502
/* Multiply a by b into r. r = a * b
7503
 *
7504
 * @param  [in]   a    SP integer to mulitply.
7505
 * @param  [in]   b    SP integer to mulitply by.
7506
 * @param  [out]  r    SP integer to hod reult.
7507
 *
7508
 * @return  MP_OKAY otherwise.
7509
 * @return  MP_MEM when dynamic memory allocation fails.
7510
 */
7511
static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
7512
2.55M
{
7513
2.55M
    int err = MP_OKAY;
7514
2.55M
    int i;
7515
2.55M
    int j;
7516
2.55M
    int k;
7517
2.55M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7518
2.55M
    sp_int_digit* t = NULL;
7519
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7520
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7521
    sp_int_digit t[a->used + b->used];
7522
#else
7523
    sp_int_digit t[SP_INT_DIGITS];
7524
#endif
7525
7526
2.55M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7527
2.55M
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
7528
2.55M
        DYNAMIC_TYPE_BIGINT);
7529
2.55M
    if (t == NULL) {
7530
207
        err = MP_MEM;
7531
207
    }
7532
2.55M
#endif
7533
2.55M
    if (err == MP_OKAY) {
7534
2.55M
        sp_int_digit l;
7535
2.55M
        sp_int_digit h;
7536
2.55M
        sp_int_digit o;
7537
7538
2.55M
        h = 0;
7539
2.55M
        l = 0;
7540
2.55M
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7541
2.55M
        t[0] = h;
7542
2.55M
        h = 0;
7543
2.55M
        o = 0;
7544
12.7M
        for (k = 1; k <= b->used - 1; k++) {
7545
10.1M
            i = 0;
7546
10.1M
            j = k;
7547
39.4M
            for (; (i < a->used) && (j >= 0); i++, j--) {
7548
29.2M
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
7549
29.2M
            }
7550
10.1M
            t[k] = l;
7551
10.1M
            l = h;
7552
10.1M
            h = o;
7553
10.1M
            o = 0;
7554
10.1M
        }
7555
10.7M
        for (; k <= (a->used - 1) + (b->used - 1); k++) {
7556
8.22M
            j = b->used - 1;
7557
8.22M
            i = k - j;
7558
34.2M
            for (; (i < a->used) && (j >= 0); i++, j--) {
7559
26.0M
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
7560
26.0M
            }
7561
8.22M
            t[k] = l;
7562
8.22M
            l = h;
7563
8.22M
            h = o;
7564
8.22M
            o = 0;
7565
8.22M
        }
7566
2.55M
        t[k] = l;
7567
2.55M
        r->used = k + 1;
7568
2.55M
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7569
2.55M
        sp_clamp(r);
7570
2.55M
    }
7571
7572
2.55M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7573
2.55M
    if (t != NULL) {
7574
2.55M
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7575
2.55M
    }
7576
2.55M
#endif
7577
2.55M
    return err;
7578
2.55M
}
7579
#else
7580
/* Multiply a by b into r. r = a * b
7581
 *
7582
 * @param  [in]   a    SP integer to mulitply.
7583
 * @param  [in]   b    SP integer to mulitply by.
7584
 * @param  [out]  r    SP integer to hod reult.
7585
 *
7586
 * @return  MP_OKAY otherwise.
7587
 * @return  MP_MEM when dynamic memory allocation fails.
7588
 */
7589
static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
7590
{
7591
    int err = MP_OKAY;
7592
    int i;
7593
    int j;
7594
    int k;
7595
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7596
    sp_int_digit* t = NULL;
7597
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7598
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7599
    sp_int_digit t[a->used + b->used];
7600
#else
7601
    sp_int_digit t[SP_INT_DIGITS];
7602
#endif
7603
7604
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7605
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
7606
        DYNAMIC_TYPE_BIGINT);
7607
    if (t == NULL) {
7608
        err = MP_MEM;
7609
    }
7610
#endif
7611
    if (err == MP_OKAY) {
7612
        sp_int_word w;
7613
        sp_int_word l;
7614
        sp_int_word h;
7615
    #ifdef SP_WORD_OVERFLOW
7616
        sp_int_word o;
7617
    #endif
7618
7619
        w = (sp_int_word)a->dp[0] * b->dp[0];
7620
        t[0] = (sp_int_digit)w;
7621
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
7622
        h = 0;
7623
    #ifdef SP_WORD_OVERFLOW
7624
        o = 0;
7625
    #endif
7626
        for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
7627
            i = k - (b->used - 1);
7628
            i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
7629
            j = k - i;
7630
            for (; (i < a->used) && (j >= 0); i++, j--) {
7631
                w = (sp_int_word)a->dp[i] * b->dp[j];
7632
                l += (sp_int_digit)w;
7633
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
7634
            #ifdef SP_WORD_OVERFLOW
7635
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
7636
                l &= SP_MASK;
7637
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
7638
                h &= SP_MASK;
7639
            #endif
7640
            }
7641
            t[k] = (sp_int_digit)l;
7642
            l >>= SP_WORD_SIZE;
7643
            l += (sp_int_digit)h;
7644
            h >>= SP_WORD_SIZE;
7645
        #ifdef SP_WORD_OVERFLOW
7646
            h += o & SP_MASK;
7647
            o >>= SP_WORD_SIZE;
7648
        #endif
7649
        }
7650
        t[k] = (sp_int_digit)l;
7651
        r->used = k + 1;
7652
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7653
        sp_clamp(r);
7654
    }
7655
7656
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7657
    if (t != NULL) {
7658
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7659
    }
7660
#endif
7661
    return err;
7662
}
7663
#endif
7664
7665
#ifndef WOLFSSL_SP_SMALL
7666
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
7667
#if SP_WORD_SIZE == 64
7668
#ifndef SQR_MUL_ASM
7669
/* Multiply a by b and store in r: r = a * b
7670
 *
7671
 * Long-hand implementation.
7672
 *
7673
 * @param  [in]   a  SP integer to multiply.
7674
 * @param  [in]   b  SP integer to multiply.
7675
 * @param  [out]  r  SP integer result.
7676
 *
7677
 * @return  MP_OKAY on success.
7678
 * @return  MP_MEM when dynamic memory allocation fails.
7679
 */
7680
static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
7681
{
7682
    int err = MP_OKAY;
7683
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7684
    sp_int_word* w = NULL;
7685
#else
7686
    sp_int_word w[16];
7687
#endif
7688
    sp_int_digit* da = a->dp;
7689
    sp_int_digit* db = b->dp;
7690
7691
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7692
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
7693
        DYNAMIC_TYPE_BIGINT);
7694
    if (w == NULL) {
7695
        err = MP_MEM;
7696
    }
7697
#endif
7698
7699
    if (err == MP_OKAY) {
7700
        w[0] = (sp_int_word)da[0] * db[0];
7701
        w[1] = (sp_int_word)da[0] * db[1];
7702
        w[2] = (sp_int_word)da[1] * db[0];
7703
        w[3] = (sp_int_word)da[0] * db[2];
7704
        w[4] = (sp_int_word)da[1] * db[1];
7705
        w[5] = (sp_int_word)da[2] * db[0];
7706
        w[6] = (sp_int_word)da[0] * db[3];
7707
        w[7] = (sp_int_word)da[1] * db[2];
7708
        w[8] = (sp_int_word)da[2] * db[1];
7709
        w[9] = (sp_int_word)da[3] * db[0];
7710
        w[10] = (sp_int_word)da[1] * db[3];
7711
        w[11] = (sp_int_word)da[2] * db[2];
7712
        w[12] = (sp_int_word)da[3] * db[1];
7713
        w[13] = (sp_int_word)da[2] * db[3];
7714
        w[14] = (sp_int_word)da[3] * db[2];
7715
        w[15] = (sp_int_word)da[3] * db[3];
7716
7717
        r->dp[0] = w[0];
7718
        w[0] >>= SP_WORD_SIZE;
7719
        w[0] += (sp_int_digit)w[1];
7720
        w[0] += (sp_int_digit)w[2];
7721
        r->dp[1] = w[0];
7722
        w[0] >>= SP_WORD_SIZE;
7723
        w[1] >>= SP_WORD_SIZE;
7724
        w[0] += (sp_int_digit)w[1];
7725
        w[2] >>= SP_WORD_SIZE;
7726
        w[0] += (sp_int_digit)w[2];
7727
        w[0] += (sp_int_digit)w[3];
7728
        w[0] += (sp_int_digit)w[4];
7729
        w[0] += (sp_int_digit)w[5];
7730
        r->dp[2] = w[0];
7731
        w[0] >>= SP_WORD_SIZE;
7732
        w[3] >>= SP_WORD_SIZE;
7733
        w[0] += (sp_int_digit)w[3];
7734
        w[4] >>= SP_WORD_SIZE;
7735
        w[0] += (sp_int_digit)w[4];
7736
        w[5] >>= SP_WORD_SIZE;
7737
        w[0] += (sp_int_digit)w[5];
7738
        w[0] += (sp_int_digit)w[6];
7739
        w[0] += (sp_int_digit)w[7];
7740
        w[0] += (sp_int_digit)w[8];
7741
        w[0] += (sp_int_digit)w[9];
7742
        r->dp[3] = w[0];
7743
        w[0] >>= SP_WORD_SIZE;
7744
        w[6] >>= SP_WORD_SIZE;
7745
        w[0] += (sp_int_digit)w[6];
7746
        w[7] >>= SP_WORD_SIZE;
7747
        w[0] += (sp_int_digit)w[7];
7748
        w[8] >>= SP_WORD_SIZE;
7749
        w[0] += (sp_int_digit)w[8];
7750
        w[9] >>= SP_WORD_SIZE;
7751
        w[0] += (sp_int_digit)w[9];
7752
        w[0] += (sp_int_digit)w[10];
7753
        w[0] += (sp_int_digit)w[11];
7754
        w[0] += (sp_int_digit)w[12];
7755
        r->dp[4] = w[0];
7756
        w[0] >>= SP_WORD_SIZE;
7757
        w[10] >>= SP_WORD_SIZE;
7758
        w[0] += (sp_int_digit)w[10];
7759
        w[11] >>= SP_WORD_SIZE;
7760
        w[0] += (sp_int_digit)w[11];
7761
        w[12] >>= SP_WORD_SIZE;
7762
        w[0] += (sp_int_digit)w[12];
7763
        w[0] += (sp_int_digit)w[13];
7764
        w[0] += (sp_int_digit)w[14];
7765
        r->dp[5] = w[0];
7766
        w[0] >>= SP_WORD_SIZE;
7767
        w[13] >>= SP_WORD_SIZE;
7768
        w[0] += (sp_int_digit)w[13];
7769
        w[14] >>= SP_WORD_SIZE;
7770
        w[0] += (sp_int_digit)w[14];
7771
        w[0] += (sp_int_digit)w[15];
7772
        r->dp[6] = w[0];
7773
        w[0] >>= SP_WORD_SIZE;
7774
        w[15] >>= SP_WORD_SIZE;
7775
        w[0] += (sp_int_digit)w[15];
7776
        r->dp[7] = w[0];
7777
7778
        r->used = 8;
7779
        sp_clamp(r);
7780
    }
7781
7782
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7783
    if (w != NULL) {
7784
        XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
7785
    }
7786
#endif
7787
    return err;
7788
}
7789
#else /* SQR_MUL_ASM */
7790
/* Multiply a by b and store in r: r = a * b
7791
 *
7792
 * Comba implementation.
7793
 *
7794
 * @param  [in]   a  SP integer to multiply.
7795
 * @param  [in]   b  SP integer to multiply.
7796
 * @param  [out]  r  SP integer result.
7797
 *
7798
 * @return  MP_OKAY on success.
7799
 * @return  MP_MEM when dynamic memory allocation fails.
7800
 */
7801
static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
7802
46.2M
{
7803
46.2M
    sp_int_digit l = 0;
7804
46.2M
    sp_int_digit h = 0;
7805
46.2M
    sp_int_digit o = 0;
7806
46.2M
    sp_int_digit t[4];
7807
7808
46.2M
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7809
46.2M
    t[0] = h;
7810
46.2M
    h = 0;
7811
46.2M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7812
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7813
46.2M
    t[1] = l;
7814
46.2M
    l = h;
7815
46.2M
    h = o;
7816
46.2M
    o = 0;
7817
46.2M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7818
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7819
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7820
46.2M
    t[2] = l;
7821
46.2M
    l = h;
7822
46.2M
    h = o;
7823
46.2M
    o = 0;
7824
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7825
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7826
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7827
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7828
46.2M
    t[3] = l;
7829
46.2M
    l = h;
7830
46.2M
    h = o;
7831
46.2M
    o = 0;
7832
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
7833
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
7834
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
7835
46.2M
    r->dp[4] = l;
7836
46.2M
    l = h;
7837
46.2M
    h = o;
7838
46.2M
    o = 0;
7839
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
7840
46.2M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
7841
46.2M
    r->dp[5] = l;
7842
46.2M
    l = h;
7843
46.2M
    h = o;
7844
46.2M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
7845
46.2M
    r->dp[6] = l;
7846
46.2M
    r->dp[7] = h;
7847
46.2M
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
7848
46.2M
    r->used = 8;
7849
46.2M
    sp_clamp(r);
7850
7851
46.2M
    return MP_OKAY;
7852
46.2M
}
7853
#endif /* SQR_MUL_ASM */
7854
#endif /* SP_WORD_SIZE == 64 */
7855
#if SP_WORD_SIZE == 64
7856
#ifdef SQR_MUL_ASM
7857
/* Multiply a by b and store in r: r = a * b
7858
 *
7859
 * Comba implementation.
7860
 *
7861
 * @param  [in]   a  SP integer to multiply.
7862
 * @param  [in]   b  SP integer to multiply.
7863
 * @param  [out]  r  SP integer result.
7864
 *
7865
 * @return  MP_OKAY on success.
7866
 * @return  MP_MEM when dynamic memory allocation fails.
7867
 */
7868
static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r)
7869
21.6M
{
7870
21.6M
    sp_int_digit l = 0;
7871
21.6M
    sp_int_digit h = 0;
7872
21.6M
    sp_int_digit o = 0;
7873
21.6M
    sp_int_digit t[6];
7874
7875
21.6M
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7876
21.6M
    t[0] = h;
7877
21.6M
    h = 0;
7878
21.6M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7879
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7880
21.6M
    t[1] = l;
7881
21.6M
    l = h;
7882
21.6M
    h = o;
7883
21.6M
    o = 0;
7884
21.6M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7885
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7886
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7887
21.6M
    t[2] = l;
7888
21.6M
    l = h;
7889
21.6M
    h = o;
7890
21.6M
    o = 0;
7891
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7892
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7893
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7894
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7895
21.6M
    t[3] = l;
7896
21.6M
    l = h;
7897
21.6M
    h = o;
7898
21.6M
    o = 0;
7899
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
7900
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
7901
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
7902
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
7903
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
7904
21.6M
    t[4] = l;
7905
21.6M
    l = h;
7906
21.6M
    h = o;
7907
21.6M
    o = 0;
7908
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
7909
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
7910
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
7911
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
7912
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
7913
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
7914
21.6M
    t[5] = l;
7915
21.6M
    l = h;
7916
21.6M
    h = o;
7917
21.6M
    o = 0;
7918
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
7919
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
7920
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
7921
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
7922
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
7923
21.6M
    r->dp[6] = l;
7924
21.6M
    l = h;
7925
21.6M
    h = o;
7926
21.6M
    o = 0;
7927
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
7928
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
7929
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
7930
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
7931
21.6M
    r->dp[7] = l;
7932
21.6M
    l = h;
7933
21.6M
    h = o;
7934
21.6M
    o = 0;
7935
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
7936
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
7937
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
7938
21.6M
    r->dp[8] = l;
7939
21.6M
    l = h;
7940
21.6M
    h = o;
7941
21.6M
    o = 0;
7942
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
7943
21.6M
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
7944
21.6M
    r->dp[9] = l;
7945
21.6M
    l = h;
7946
21.6M
    h = o;
7947
21.6M
    SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
7948
21.6M
    r->dp[10] = l;
7949
21.6M
    r->dp[11] = h;
7950
21.6M
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
7951
21.6M
    r->used = 12;
7952
21.6M
    sp_clamp(r);
7953
7954
21.6M
    return MP_OKAY;
7955
21.6M
}
7956
#endif /* SQR_MUL_ASM */
7957
#endif /* SP_WORD_SIZE == 64 */
7958
#if SP_WORD_SIZE == 32
7959
#ifdef SQR_MUL_ASM
7960
/* Multiply a by b and store in r: r = a * b
7961
 *
7962
 * Comba implementation.
7963
 *
7964
 * @param  [in]   a  SP integer to multiply.
7965
 * @param  [in]   b  SP integer to multiply.
7966
 * @param  [out]  r  SP integer result.
7967
 *
7968
 * @return  MP_OKAY on success.
7969
 * @return  MP_MEM when dynamic memory allocation fails.
7970
 */
7971
static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r)
7972
{
7973
    sp_int_digit l = 0;
7974
    sp_int_digit h = 0;
7975
    sp_int_digit o = 0;
7976
    sp_int_digit t[8];
7977
7978
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7979
    t[0] = h;
7980
    h = 0;
7981
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7982
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7983
    t[1] = l;
7984
    l = h;
7985
    h = o;
7986
    o = 0;
7987
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7988
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7989
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7990
    t[2] = l;
7991
    l = h;
7992
    h = o;
7993
    o = 0;
7994
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7995
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7996
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7997
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7998
    t[3] = l;
7999
    l = h;
8000
    h = o;
8001
    o = 0;
8002
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8003
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8004
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8005
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8006
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8007
    t[4] = l;
8008
    l = h;
8009
    h = o;
8010
    o = 0;
8011
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8012
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8013
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8014
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8015
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8016
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8017
    t[5] = l;
8018
    l = h;
8019
    h = o;
8020
    o = 0;
8021
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8022
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8023
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8024
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8025
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8026
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8027
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8028
    t[6] = l;
8029
    l = h;
8030
    h = o;
8031
    o = 0;
8032
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8033
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8034
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8035
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8036
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8037
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8038
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8039
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8040
    t[7] = l;
8041
    l = h;
8042
    h = o;
8043
    o = 0;
8044
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8045
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8046
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8047
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8048
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8049
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8050
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8051
    r->dp[8] = l;
8052
    l = h;
8053
    h = o;
8054
    o = 0;
8055
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8056
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8057
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8058
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8059
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8060
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8061
    r->dp[9] = l;
8062
    l = h;
8063
    h = o;
8064
    o = 0;
8065
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8066
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8067
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8068
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8069
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8070
    r->dp[10] = l;
8071
    l = h;
8072
    h = o;
8073
    o = 0;
8074
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8075
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8076
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8077
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8078
    r->dp[11] = l;
8079
    l = h;
8080
    h = o;
8081
    o = 0;
8082
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8083
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8084
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8085
    r->dp[12] = l;
8086
    l = h;
8087
    h = o;
8088
    o = 0;
8089
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8090
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8091
    r->dp[13] = l;
8092
    l = h;
8093
    h = o;
8094
    SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
8095
    r->dp[14] = l;
8096
    r->dp[15] = h;
8097
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
8098
    r->used = 16;
8099
    sp_clamp(r);
8100
8101
    return MP_OKAY;
8102
}
8103
#endif /* SQR_MUL_ASM */
8104
#endif /* SP_WORD_SIZE == 32 */
8105
#if SP_WORD_SIZE == 32
8106
#ifdef SQR_MUL_ASM
8107
/* Multiply a by b and store in r: r = a * b
8108
 *
8109
 * Comba implementation.
8110
 *
8111
 * @param  [in]   a  SP integer to multiply.
8112
 * @param  [in]   b  SP integer to multiply.
8113
 * @param  [out]  r  SP integer result.
8114
 *
8115
 * @return  MP_OKAY on success.
8116
 * @return  MP_MEM when dynamic memory allocation fails.
8117
 */
8118
static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r)
8119
{
8120
    sp_int_digit l = 0;
8121
    sp_int_digit h = 0;
8122
    sp_int_digit o = 0;
8123
    sp_int_digit t[12];
8124
8125
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8126
    t[0] = h;
8127
    h = 0;
8128
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8129
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8130
    t[1] = l;
8131
    l = h;
8132
    h = o;
8133
    o = 0;
8134
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8135
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8136
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8137
    t[2] = l;
8138
    l = h;
8139
    h = o;
8140
    o = 0;
8141
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8142
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8143
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8144
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8145
    t[3] = l;
8146
    l = h;
8147
    h = o;
8148
    o = 0;
8149
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8150
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8151
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8152
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8153
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8154
    t[4] = l;
8155
    l = h;
8156
    h = o;
8157
    o = 0;
8158
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8159
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8160
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8161
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8162
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8163
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8164
    t[5] = l;
8165
    l = h;
8166
    h = o;
8167
    o = 0;
8168
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8169
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8170
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8171
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8172
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8173
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8174
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8175
    t[6] = l;
8176
    l = h;
8177
    h = o;
8178
    o = 0;
8179
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8180
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8181
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8182
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8183
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8184
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8185
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8186
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8187
    t[7] = l;
8188
    l = h;
8189
    h = o;
8190
    o = 0;
8191
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8192
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8193
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8194
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8195
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8196
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8197
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8198
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8199
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8200
    t[8] = l;
8201
    l = h;
8202
    h = o;
8203
    o = 0;
8204
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8205
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8206
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8207
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8208
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8209
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8210
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8211
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8212
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8213
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8214
    t[9] = l;
8215
    l = h;
8216
    h = o;
8217
    o = 0;
8218
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8219
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8220
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8221
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8222
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8223
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8224
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8225
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8226
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8227
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8228
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8229
    t[10] = l;
8230
    l = h;
8231
    h = o;
8232
    o = 0;
8233
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8234
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8235
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8236
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8237
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8238
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8239
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8240
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8241
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8242
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8243
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8244
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8245
    t[11] = l;
8246
    l = h;
8247
    h = o;
8248
    o = 0;
8249
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8250
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8251
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8252
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8253
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8254
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8255
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8256
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8257
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8258
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8259
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8260
    r->dp[12] = l;
8261
    l = h;
8262
    h = o;
8263
    o = 0;
8264
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8265
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8266
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8267
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8268
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8269
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8270
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8271
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8272
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8273
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8274
    r->dp[13] = l;
8275
    l = h;
8276
    h = o;
8277
    o = 0;
8278
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8279
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8280
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8281
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8282
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8283
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8284
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8285
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8286
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8287
    r->dp[14] = l;
8288
    l = h;
8289
    h = o;
8290
    o = 0;
8291
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
8292
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
8293
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
8294
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
8295
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
8296
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
8297
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
8298
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
8299
    r->dp[15] = l;
8300
    l = h;
8301
    h = o;
8302
    o = 0;
8303
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
8304
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
8305
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
8306
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
8307
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
8308
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
8309
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
8310
    r->dp[16] = l;
8311
    l = h;
8312
    h = o;
8313
    o = 0;
8314
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
8315
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
8316
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
8317
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
8318
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
8319
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
8320
    r->dp[17] = l;
8321
    l = h;
8322
    h = o;
8323
    o = 0;
8324
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
8325
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
8326
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
8327
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
8328
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
8329
    r->dp[18] = l;
8330
    l = h;
8331
    h = o;
8332
    o = 0;
8333
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
8334
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
8335
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
8336
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
8337
    r->dp[19] = l;
8338
    l = h;
8339
    h = o;
8340
    o = 0;
8341
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
8342
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
8343
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
8344
    r->dp[20] = l;
8345
    l = h;
8346
    h = o;
8347
    o = 0;
8348
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
8349
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
8350
    r->dp[21] = l;
8351
    l = h;
8352
    h = o;
8353
    SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
8354
    r->dp[22] = l;
8355
    r->dp[23] = h;
8356
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
8357
    r->used = 24;
8358
    sp_clamp(r);
8359
8360
    return MP_OKAY;
8361
}
8362
#endif /* SQR_MUL_ASM */
8363
#endif /* SP_WORD_SIZE == 32 */
8364
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
8365
8366
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
8367
    #if SP_INT_DIGITS >= 32
8368
/* Multiply a by b and store in r: r = a * b
8369
 *
8370
 * Comba implementation.
8371
 *
8372
 * @param  [in]   a  SP integer to multiply.
8373
 * @param  [in]   b  SP integer to multiply.
8374
 * @param  [out]  r  SP integer result.
8375
 *
8376
 * @return  MP_OKAY on success.
8377
 * @return  MP_MEM when dynamic memory allocation fails.
8378
 */
8379
static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r)
8380
{
8381
    int err = MP_OKAY;
8382
    sp_int_digit l = 0;
8383
    sp_int_digit h = 0;
8384
    sp_int_digit o = 0;
8385
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8386
    sp_int_digit* t = NULL;
8387
#else
8388
    sp_int_digit t[16];
8389
#endif
8390
8391
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8392
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
8393
         DYNAMIC_TYPE_BIGINT);
8394
     if (t == NULL) {
8395
         err = MP_MEM;
8396
     }
8397
#endif
8398
    if (err == MP_OKAY) {
8399
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8400
        t[0] = h;
8401
        h = 0;
8402
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8403
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8404
        t[1] = l;
8405
        l = h;
8406
        h = o;
8407
        o = 0;
8408
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8409
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8410
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8411
        t[2] = l;
8412
        l = h;
8413
        h = o;
8414
        o = 0;
8415
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8416
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8417
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8418
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8419
        t[3] = l;
8420
        l = h;
8421
        h = o;
8422
        o = 0;
8423
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8424
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8425
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8426
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8427
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8428
        t[4] = l;
8429
        l = h;
8430
        h = o;
8431
        o = 0;
8432
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8433
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8434
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8435
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8436
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8437
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8438
        t[5] = l;
8439
        l = h;
8440
        h = o;
8441
        o = 0;
8442
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8443
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8444
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8445
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8446
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8447
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8448
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8449
        t[6] = l;
8450
        l = h;
8451
        h = o;
8452
        o = 0;
8453
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8454
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8455
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8456
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8457
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8458
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8459
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8460
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8461
        t[7] = l;
8462
        l = h;
8463
        h = o;
8464
        o = 0;
8465
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8466
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8467
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8468
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8469
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8470
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8471
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8472
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8473
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8474
        t[8] = l;
8475
        l = h;
8476
        h = o;
8477
        o = 0;
8478
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8479
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8480
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8481
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8482
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8483
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8484
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8485
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8486
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8487
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8488
        t[9] = l;
8489
        l = h;
8490
        h = o;
8491
        o = 0;
8492
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8493
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8494
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8495
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8496
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8497
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8498
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8499
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8500
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8501
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8502
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8503
        t[10] = l;
8504
        l = h;
8505
        h = o;
8506
        o = 0;
8507
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8508
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8509
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8510
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8511
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8512
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8513
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8514
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8515
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8516
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8517
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8518
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8519
        t[11] = l;
8520
        l = h;
8521
        h = o;
8522
        o = 0;
8523
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
8524
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8525
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8526
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8527
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8528
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8529
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8530
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8531
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8532
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8533
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8534
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8535
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
8536
        t[12] = l;
8537
        l = h;
8538
        h = o;
8539
        o = 0;
8540
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
8541
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
8542
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8543
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8544
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8545
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8546
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8547
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8548
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8549
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8550
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8551
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8552
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
8553
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
8554
        t[13] = l;
8555
        l = h;
8556
        h = o;
8557
        o = 0;
8558
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
8559
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
8560
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
8561
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8562
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8563
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8564
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8565
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8566
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8567
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8568
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8569
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8570
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
8571
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
8572
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
8573
        t[14] = l;
8574
        l = h;
8575
        h = o;
8576
        o = 0;
8577
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
8578
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
8579
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
8580
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
8581
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
8582
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
8583
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
8584
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
8585
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
8586
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
8587
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
8588
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
8589
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
8590
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
8591
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
8592
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
8593
        t[15] = l;
8594
        l = h;
8595
        h = o;
8596
        o = 0;
8597
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
8598
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
8599
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
8600
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
8601
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
8602
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
8603
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
8604
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
8605
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
8606
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
8607
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
8608
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
8609
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
8610
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
8611
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
8612
        r->dp[16] = l;
8613
        l = h;
8614
        h = o;
8615
        o = 0;
8616
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
8617
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
8618
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
8619
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
8620
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
8621
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
8622
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
8623
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
8624
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
8625
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
8626
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
8627
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
8628
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
8629
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
8630
        r->dp[17] = l;
8631
        l = h;
8632
        h = o;
8633
        o = 0;
8634
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
8635
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
8636
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
8637
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
8638
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
8639
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
8640
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
8641
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
8642
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
8643
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
8644
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
8645
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
8646
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
8647
        r->dp[18] = l;
8648
        l = h;
8649
        h = o;
8650
        o = 0;
8651
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
8652
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
8653
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
8654
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
8655
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
8656
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
8657
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
8658
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
8659
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
8660
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
8661
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
8662
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
8663
        r->dp[19] = l;
8664
        l = h;
8665
        h = o;
8666
        o = 0;
8667
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
8668
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
8669
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
8670
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
8671
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
8672
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
8673
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
8674
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
8675
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
8676
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
8677
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
8678
        r->dp[20] = l;
8679
        l = h;
8680
        h = o;
8681
        o = 0;
8682
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
8683
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
8684
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
8685
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
8686
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
8687
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
8688
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
8689
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
8690
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
8691
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
8692
        r->dp[21] = l;
8693
        l = h;
8694
        h = o;
8695
        o = 0;
8696
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
8697
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
8698
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
8699
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
8700
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
8701
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
8702
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
8703
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
8704
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
8705
        r->dp[22] = l;
8706
        l = h;
8707
        h = o;
8708
        o = 0;
8709
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
8710
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
8711
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
8712
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
8713
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
8714
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
8715
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
8716
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
8717
        r->dp[23] = l;
8718
        l = h;
8719
        h = o;
8720
        o = 0;
8721
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
8722
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
8723
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
8724
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
8725
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
8726
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
8727
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
8728
        r->dp[24] = l;
8729
        l = h;
8730
        h = o;
8731
        o = 0;
8732
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
8733
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
8734
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
8735
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
8736
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
8737
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
8738
        r->dp[25] = l;
8739
        l = h;
8740
        h = o;
8741
        o = 0;
8742
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
8743
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
8744
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
8745
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
8746
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
8747
        r->dp[26] = l;
8748
        l = h;
8749
        h = o;
8750
        o = 0;
8751
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
8752
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
8753
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
8754
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
8755
        r->dp[27] = l;
8756
        l = h;
8757
        h = o;
8758
        o = 0;
8759
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
8760
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
8761
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
8762
        r->dp[28] = l;
8763
        l = h;
8764
        h = o;
8765
        o = 0;
8766
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
8767
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
8768
        r->dp[29] = l;
8769
        l = h;
8770
        h = o;
8771
        SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
8772
        r->dp[30] = l;
8773
        r->dp[31] = h;
8774
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
8775
        r->used = 32;
8776
        sp_clamp(r);
8777
    }
8778
8779
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8780
    if (t != NULL) {
8781
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
8782
    }
8783
#endif
8784
    return err;
8785
}
8786
    #endif /* SP_INT_DIGITS >= 32 */
8787
8788
    #if SP_INT_DIGITS >= 48
8789
/* Multiply a by b and store in r: r = a * b
8790
 *
8791
 * Comba implementation.
8792
 *
8793
 * @param  [in]   a  SP integer to multiply.
8794
 * @param  [in]   b  SP integer to multiply.
8795
 * @param  [out]  r  SP integer result.
8796
 *
8797
 * @return  MP_OKAY on success.
8798
 * @return  MP_MEM when dynamic memory allocation fails.
8799
 */
8800
static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r)
8801
{
8802
    int err = MP_OKAY;
8803
    sp_int_digit l = 0;
8804
    sp_int_digit h = 0;
8805
    sp_int_digit o = 0;
8806
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8807
    sp_int_digit* t = NULL;
8808
#else
8809
    sp_int_digit t[24];
8810
#endif
8811
8812
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8813
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
8814
         DYNAMIC_TYPE_BIGINT);
8815
     if (t == NULL) {
8816
         err = MP_MEM;
8817
     }
8818
#endif
8819
    if (err == MP_OKAY) {
8820
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8821
        t[0] = h;
8822
        h = 0;
8823
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8824
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8825
        t[1] = l;
8826
        l = h;
8827
        h = o;
8828
        o = 0;
8829
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8830
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8831
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8832
        t[2] = l;
8833
        l = h;
8834
        h = o;
8835
        o = 0;
8836
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8837
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8838
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8839
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8840
        t[3] = l;
8841
        l = h;
8842
        h = o;
8843
        o = 0;
8844
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8845
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8846
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8847
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8848
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8849
        t[4] = l;
8850
        l = h;
8851
        h = o;
8852
        o = 0;
8853
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8854
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8855
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8856
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8857
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8858
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8859
        t[5] = l;
8860
        l = h;
8861
        h = o;
8862
        o = 0;
8863
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8864
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8865
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8866
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8867
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8868
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8869
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8870
        t[6] = l;
8871
        l = h;
8872
        h = o;
8873
        o = 0;
8874
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8875
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8876
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8877
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8878
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8879
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8880
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8881
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8882
        t[7] = l;
8883
        l = h;
8884
        h = o;
8885
        o = 0;
8886
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8887
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8888
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8889
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8890
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8891
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8892
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8893
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8894
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8895
        t[8] = l;
8896
        l = h;
8897
        h = o;
8898
        o = 0;
8899
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8900
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8901
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8902
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8903
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8904
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8905
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8906
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8907
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8908
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8909
        t[9] = l;
8910
        l = h;
8911
        h = o;
8912
        o = 0;
8913
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8914
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8915
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8916
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8917
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8918
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8919
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8920
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8921
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8922
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8923
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8924
        t[10] = l;
8925
        l = h;
8926
        h = o;
8927
        o = 0;
8928
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8929
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8930
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8931
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8932
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8933
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8934
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8935
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8936
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8937
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8938
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8939
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8940
        t[11] = l;
8941
        l = h;
8942
        h = o;
8943
        o = 0;
8944
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
8945
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8946
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8947
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8948
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8949
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8950
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8951
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8952
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8953
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8954
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8955
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8956
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
8957
        t[12] = l;
8958
        l = h;
8959
        h = o;
8960
        o = 0;
8961
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
8962
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
8963
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8964
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8965
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8966
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8967
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8968
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8969
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8970
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8971
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8972
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8973
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
8974
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
8975
        t[13] = l;
8976
        l = h;
8977
        h = o;
8978
        o = 0;
8979
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
8980
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
8981
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
8982
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8983
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8984
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8985
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8986
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8987
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8988
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8989
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8990
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8991
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
8992
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
8993
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
8994
        t[14] = l;
8995
        l = h;
8996
        h = o;
8997
        o = 0;
8998
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
8999
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
9000
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
9001
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
9002
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
9003
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
9004
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
9005
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
9006
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
9007
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
9008
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
9009
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
9010
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
9011
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
9012
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
9013
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
9014
        t[15] = l;
9015
        l = h;
9016
        h = o;
9017
        o = 0;
9018
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
9019
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
9020
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
9021
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
9022
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
9023
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
9024
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
9025
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
9026
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
9027
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
9028
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
9029
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
9030
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
9031
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
9032
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
9033
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
9034
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
9035
        t[16] = l;
9036
        l = h;
9037
        h = o;
9038
        o = 0;
9039
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
9040
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
9041
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
9042
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
9043
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
9044
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
9045
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
9046
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
9047
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
9048
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
9049
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
9050
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
9051
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
9052
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
9053
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
9054
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
9055
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
9056
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
9057
        t[17] = l;
9058
        l = h;
9059
        h = o;
9060
        o = 0;
9061
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
9062
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
9063
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
9064
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
9065
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
9066
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
9067
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
9068
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
9069
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
9070
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
9071
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
9072
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
9073
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
9074
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
9075
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
9076
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
9077
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
9078
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
9079
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
9080
        t[18] = l;
9081
        l = h;
9082
        h = o;
9083
        o = 0;
9084
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
9085
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
9086
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
9087
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
9088
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
9089
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
9090
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
9091
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
9092
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
9093
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
9094
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
9095
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
9096
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
9097
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
9098
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
9099
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
9100
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
9101
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
9102
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
9103
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
9104
        t[19] = l;
9105
        l = h;
9106
        h = o;
9107
        o = 0;
9108
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
9109
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
9110
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
9111
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
9112
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
9113
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
9114
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
9115
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
9116
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
9117
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
9118
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
9119
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
9120
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
9121
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
9122
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
9123
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
9124
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
9125
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
9126
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
9127
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
9128
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
9129
        t[20] = l;
9130
        l = h;
9131
        h = o;
9132
        o = 0;
9133
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
9134
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
9135
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
9136
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
9137
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
9138
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
9139
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
9140
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
9141
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
9142
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
9143
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
9144
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
9145
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
9146
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
9147
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
9148
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
9149
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
9150
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
9151
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
9152
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
9153
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
9154
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
9155
        t[21] = l;
9156
        l = h;
9157
        h = o;
9158
        o = 0;
9159
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
9160
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
9161
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
9162
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
9163
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
9164
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
9165
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
9166
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
9167
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
9168
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
9169
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
9170
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
9171
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
9172
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
9173
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
9174
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
9175
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
9176
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
9177
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
9178
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
9179
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
9180
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
9181
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
9182
        t[22] = l;
9183
        l = h;
9184
        h = o;
9185
        o = 0;
9186
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
9187
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
9188
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
9189
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
9190
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
9191
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
9192
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
9193
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
9194
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
9195
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
9196
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
9197
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
9198
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
9199
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
9200
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
9201
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
9202
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
9203
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
9204
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
9205
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
9206
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
9207
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
9208
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
9209
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
9210
        t[23] = l;
9211
        l = h;
9212
        h = o;
9213
        o = 0;
9214
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
9215
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
9216
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
9217
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
9218
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
9219
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
9220
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
9221
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
9222
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
9223
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
9224
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
9225
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
9226
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
9227
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
9228
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
9229
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
9230
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
9231
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
9232
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
9233
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
9234
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
9235
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
9236
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
9237
        r->dp[24] = l;
9238
        l = h;
9239
        h = o;
9240
        o = 0;
9241
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
9242
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
9243
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
9244
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
9245
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
9246
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
9247
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
9248
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
9249
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
9250
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
9251
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
9252
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
9253
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
9254
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
9255
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
9256
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
9257
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
9258
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
9259
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
9260
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
9261
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
9262
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
9263
        r->dp[25] = l;
9264
        l = h;
9265
        h = o;
9266
        o = 0;
9267
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
9268
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
9269
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
9270
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
9271
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
9272
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
9273
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
9274
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
9275
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
9276
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
9277
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
9278
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
9279
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
9280
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
9281
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
9282
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
9283
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
9284
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
9285
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
9286
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
9287
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
9288
        r->dp[26] = l;
9289
        l = h;
9290
        h = o;
9291
        o = 0;
9292
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
9293
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
9294
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
9295
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
9296
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
9297
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
9298
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
9299
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
9300
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
9301
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
9302
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
9303
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
9304
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
9305
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
9306
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
9307
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
9308
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
9309
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
9310
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
9311
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
9312
        r->dp[27] = l;
9313
        l = h;
9314
        h = o;
9315
        o = 0;
9316
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
9317
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
9318
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
9319
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
9320
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
9321
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
9322
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
9323
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
9324
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
9325
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
9326
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
9327
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
9328
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
9329
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
9330
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
9331
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
9332
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
9333
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
9334
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
9335
        r->dp[28] = l;
9336
        l = h;
9337
        h = o;
9338
        o = 0;
9339
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
9340
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
9341
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
9342
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
9343
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
9344
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
9345
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
9346
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
9347
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
9348
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
9349
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
9350
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
9351
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
9352
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
9353
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
9354
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
9355
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
9356
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
9357
        r->dp[29] = l;
9358
        l = h;
9359
        h = o;
9360
        o = 0;
9361
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
9362
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
9363
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
9364
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
9365
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
9366
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
9367
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
9368
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
9369
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
9370
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
9371
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
9372
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
9373
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
9374
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
9375
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
9376
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
9377
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
9378
        r->dp[30] = l;
9379
        l = h;
9380
        h = o;
9381
        o = 0;
9382
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
9383
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
9384
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
9385
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
9386
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
9387
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
9388
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
9389
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
9390
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
9391
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
9392
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
9393
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
9394
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
9395
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
9396
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
9397
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
9398
        r->dp[31] = l;
9399
        l = h;
9400
        h = o;
9401
        o = 0;
9402
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
9403
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
9404
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
9405
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
9406
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
9407
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
9408
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
9409
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
9410
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
9411
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
9412
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
9413
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
9414
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
9415
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
9416
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
9417
        r->dp[32] = l;
9418
        l = h;
9419
        h = o;
9420
        o = 0;
9421
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
9422
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
9423
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
9424
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
9425
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
9426
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
9427
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
9428
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
9429
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
9430
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
9431
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
9432
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
9433
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
9434
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
9435
        r->dp[33] = l;
9436
        l = h;
9437
        h = o;
9438
        o = 0;
9439
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
9440
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
9441
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
9442
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
9443
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
9444
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
9445
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
9446
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
9447
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
9448
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
9449
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
9450
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
9451
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
9452
        r->dp[34] = l;
9453
        l = h;
9454
        h = o;
9455
        o = 0;
9456
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
9457
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
9458
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
9459
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
9460
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
9461
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
9462
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
9463
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
9464
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
9465
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
9466
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
9467
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
9468
        r->dp[35] = l;
9469
        l = h;
9470
        h = o;
9471
        o = 0;
9472
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
9473
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
9474
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
9475
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
9476
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
9477
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
9478
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
9479
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
9480
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
9481
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
9482
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
9483
        r->dp[36] = l;
9484
        l = h;
9485
        h = o;
9486
        o = 0;
9487
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
9488
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
9489
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
9490
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
9491
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
9492
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
9493
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
9494
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
9495
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
9496
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
9497
        r->dp[37] = l;
9498
        l = h;
9499
        h = o;
9500
        o = 0;
9501
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
9502
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
9503
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
9504
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
9505
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
9506
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
9507
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
9508
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
9509
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
9510
        r->dp[38] = l;
9511
        l = h;
9512
        h = o;
9513
        o = 0;
9514
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
9515
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
9516
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
9517
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
9518
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
9519
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
9520
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
9521
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
9522
        r->dp[39] = l;
9523
        l = h;
9524
        h = o;
9525
        o = 0;
9526
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
9527
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
9528
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
9529
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
9530
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
9531
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
9532
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
9533
        r->dp[40] = l;
9534
        l = h;
9535
        h = o;
9536
        o = 0;
9537
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
9538
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
9539
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
9540
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
9541
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
9542
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
9543
        r->dp[41] = l;
9544
        l = h;
9545
        h = o;
9546
        o = 0;
9547
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
9548
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
9549
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
9550
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
9551
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
9552
        r->dp[42] = l;
9553
        l = h;
9554
        h = o;
9555
        o = 0;
9556
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
9557
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
9558
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
9559
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
9560
        r->dp[43] = l;
9561
        l = h;
9562
        h = o;
9563
        o = 0;
9564
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
9565
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
9566
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
9567
        r->dp[44] = l;
9568
        l = h;
9569
        h = o;
9570
        o = 0;
9571
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
9572
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
9573
        r->dp[45] = l;
9574
        l = h;
9575
        h = o;
9576
        SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
9577
        r->dp[46] = l;
9578
        r->dp[47] = h;
9579
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
9580
        r->used = 48;
9581
        sp_clamp(r);
9582
    }
9583
9584
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9585
    if (t != NULL) {
9586
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9587
    }
9588
#endif
9589
    return err;
9590
}
9591
    #endif /* SP_INT_DIGITS >= 48 */
9592
9593
    #if SP_INT_DIGITS >= 64
9594
/* Multiply a by b and store in r: r = a * b
9595
 *
9596
 * Karatsuba implementaiton.
9597
 *
9598
 * @param  [in]   a  SP integer to multiply.
9599
 * @param  [in]   b  SP integer to multiply.
9600
 * @param  [out]  r  SP integer result.
9601
 *
9602
 * @return  MP_OKAY on success.
9603
 * @return  MP_MEM when dynamic memory allocation fails.
9604
 */
9605
static int _sp_mul_32(sp_int* a, sp_int* b, sp_int* r)
9606
{
9607
    int err = MP_OKAY;
9608
    int i;
9609
    sp_int_digit l;
9610
    sp_int_digit h;
9611
    sp_int* a1;
9612
    sp_int* b1;
9613
    sp_int* z0;
9614
    sp_int* z1;
9615
    sp_int* z2;
9616
    sp_int_digit ca;
9617
    sp_int_digit cb;
9618
    DECL_SP_INT_ARRAY(t, 16, 2);
9619
    DECL_SP_INT_ARRAY(z, 33, 2);
9620
9621
    ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
9622
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
9623
    if (err == MP_OKAY) {
9624
        a1 = t[0];
9625
        b1 = t[1];
9626
        z1 = z[0];
9627
        z2 = z[1];
9628
        z0 = r;
9629
9630
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
9631
        a1->used = 16;
9632
        XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
9633
        b1->used = 16;
9634
9635
        /* z2 = a1 * b1 */
9636
        err = _sp_mul_16(a1, b1, z2);
9637
    }
9638
    if (err == MP_OKAY) {
9639
        l = a1->dp[0];
9640
        h = 0;
9641
        SP_ASM_ADDC(l, h, a->dp[0]);
9642
        a1->dp[0] = l;
9643
        l = h;
9644
        h = 0;
9645
        for (i = 1; i < 16; i++) {
9646
            SP_ASM_ADDC(l, h, a1->dp[i]);
9647
            SP_ASM_ADDC(l, h, a->dp[i]);
9648
            a1->dp[i] = l;
9649
            l = h;
9650
            h = 0;
9651
        }
9652
        ca = l;
9653
        /* b01 = b0 + b1 */
9654
        l = b1->dp[0];
9655
        h = 0;
9656
        SP_ASM_ADDC(l, h, b->dp[0]);
9657
        b1->dp[0] = l;
9658
        l = h;
9659
        h = 0;
9660
        for (i = 1; i < 16; i++) {
9661
            SP_ASM_ADDC(l, h, b1->dp[i]);
9662
            SP_ASM_ADDC(l, h, b->dp[i]);
9663
            b1->dp[i] = l;
9664
            l = h;
9665
            h = 0;
9666
        }
9667
        cb = l;
9668
9669
        /* z0 = a0 * b0 */
9670
        err = _sp_mul_16(a, b, z0);
9671
    }
9672
    if (err == MP_OKAY) {
9673
        /* z1 = (a0 + a1) * (b0 + b1) */
9674
        err = _sp_mul_16(a1, b1, z1);
9675
    }
9676
    if (err == MP_OKAY) {
9677
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
9678
        /* r = z0 */
9679
        /* r += (z1 - z0 - z2) << 16 */
9680
        z1->dp[32] = ca & cb;
9681
        l = 0;
9682
        if (ca) {
9683
            h = 0;
9684
            for (i = 0; i < 16; i++) {
9685
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
9686
                SP_ASM_ADDC(l, h, b1->dp[i]);
9687
                z1->dp[i + 16] = l;
9688
                l = h;
9689
                h = 0;
9690
            }
9691
        }
9692
        z1->dp[32] += l;
9693
        l = 0;
9694
        if (cb) {
9695
            h = 0;
9696
            for (i = 0; i < 16; i++) {
9697
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
9698
                SP_ASM_ADDC(l, h, a1->dp[i]);
9699
                z1->dp[i + 16] = l;
9700
                l = h;
9701
                h = 0;
9702
            }
9703
        }
9704
        z1->dp[32] += l;
9705
        /* z1 = z1 - z0 - z1 */
9706
        l = 0;
9707
        h = 0;
9708
        for (i = 0; i < 32; i++) {
9709
            l += z1->dp[i];
9710
            SP_ASM_SUBC(l, h, z0->dp[i]);
9711
            SP_ASM_SUBC(l, h, z2->dp[i]);
9712
            z1->dp[i] = l;
9713
            l = h;
9714
            h = 0;
9715
        }
9716
        z1->dp[i] += l;
9717
        /* r += z1 << 16 */
9718
        l = 0;
9719
        h = 0;
9720
        for (i = 0; i < 16; i++) {
9721
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
9722
            SP_ASM_ADDC(l, h, z1->dp[i]);
9723
            r->dp[i + 16] = l;
9724
            l = h;
9725
            h = 0;
9726
        }
9727
        for (; i < 33; i++) {
9728
            SP_ASM_ADDC(l, h, z1->dp[i]);
9729
            r->dp[i + 16] = l;
9730
            l = h;
9731
            h = 0;
9732
        }
9733
        /* r += z2 << 32  */
9734
        l = 0;
9735
        h = 0;
9736
        for (i = 0; i < 17; i++) {
9737
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
9738
            SP_ASM_ADDC(l, h, z2->dp[i]);
9739
            r->dp[i + 32] = l;
9740
            l = h;
9741
            h = 0;
9742
        }
9743
        for (; i < 32; i++) {
9744
            SP_ASM_ADDC(l, h, z2->dp[i]);
9745
            r->dp[i + 32] = l;
9746
            l = h;
9747
            h = 0;
9748
        }
9749
        r->used = 64;
9750
        sp_clamp(r);
9751
    }
9752
9753
    FREE_SP_INT_ARRAY(z, NULL);
9754
    FREE_SP_INT_ARRAY(t, NULL);
9755
    return err;
9756
}
9757
    #endif /* SP_INT_DIGITS >= 64 */
9758
9759
    #if SP_INT_DIGITS >= 96
9760
/* Multiply a by b and store in r: r = a * b
9761
 *
9762
 * Karatsuba implementaiton.
9763
 *
9764
 * @param  [in]   a  SP integer to multiply.
9765
 * @param  [in]   b  SP integer to multiply.
9766
 * @param  [out]  r  SP integer result.
9767
 *
9768
 * @return  MP_OKAY on success.
9769
 * @return  MP_MEM when dynamic memory allocation fails.
9770
 */
9771
static int _sp_mul_48(sp_int* a, sp_int* b, sp_int* r)
9772
{
9773
    int err = MP_OKAY;
9774
    int i;
9775
    sp_int_digit l;
9776
    sp_int_digit h;
9777
    sp_int* a1;
9778
    sp_int* b1;
9779
    sp_int* z0;
9780
    sp_int* z1;
9781
    sp_int* z2;
9782
    sp_int_digit ca;
9783
    sp_int_digit cb;
9784
    DECL_SP_INT_ARRAY(t, 24, 2);
9785
    DECL_SP_INT_ARRAY(z, 49, 2);
9786
9787
    ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
9788
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
9789
    if (err == MP_OKAY) {
9790
        a1 = t[0];
9791
        b1 = t[1];
9792
        z1 = z[0];
9793
        z2 = z[1];
9794
        z0 = r;
9795
9796
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
9797
        a1->used = 24;
9798
        XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
9799
        b1->used = 24;
9800
9801
        /* z2 = a1 * b1 */
9802
        err = _sp_mul_24(a1, b1, z2);
9803
    }
9804
    if (err == MP_OKAY) {
9805
        l = a1->dp[0];
9806
        h = 0;
9807
        SP_ASM_ADDC(l, h, a->dp[0]);
9808
        a1->dp[0] = l;
9809
        l = h;
9810
        h = 0;
9811
        for (i = 1; i < 24; i++) {
9812
            SP_ASM_ADDC(l, h, a1->dp[i]);
9813
            SP_ASM_ADDC(l, h, a->dp[i]);
9814
            a1->dp[i] = l;
9815
            l = h;
9816
            h = 0;
9817
        }
9818
        ca = l;
9819
        /* b01 = b0 + b1 */
9820
        l = b1->dp[0];
9821
        h = 0;
9822
        SP_ASM_ADDC(l, h, b->dp[0]);
9823
        b1->dp[0] = l;
9824
        l = h;
9825
        h = 0;
9826
        for (i = 1; i < 24; i++) {
9827
            SP_ASM_ADDC(l, h, b1->dp[i]);
9828
            SP_ASM_ADDC(l, h, b->dp[i]);
9829
            b1->dp[i] = l;
9830
            l = h;
9831
            h = 0;
9832
        }
9833
        cb = l;
9834
9835
        /* z0 = a0 * b0 */
9836
        err = _sp_mul_24(a, b, z0);
9837
    }
9838
    if (err == MP_OKAY) {
9839
        /* z1 = (a0 + a1) * (b0 + b1) */
9840
        err = _sp_mul_24(a1, b1, z1);
9841
    }
9842
    if (err == MP_OKAY) {
9843
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
9844
        /* r = z0 */
9845
        /* r += (z1 - z0 - z2) << 24 */
9846
        z1->dp[48] = ca & cb;
9847
        l = 0;
9848
        if (ca) {
9849
            h = 0;
9850
            for (i = 0; i < 24; i++) {
9851
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
9852
                SP_ASM_ADDC(l, h, b1->dp[i]);
9853
                z1->dp[i + 24] = l;
9854
                l = h;
9855
                h = 0;
9856
            }
9857
        }
9858
        z1->dp[48] += l;
9859
        l = 0;
9860
        if (cb) {
9861
            h = 0;
9862
            for (i = 0; i < 24; i++) {
9863
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
9864
                SP_ASM_ADDC(l, h, a1->dp[i]);
9865
                z1->dp[i + 24] = l;
9866
                l = h;
9867
                h = 0;
9868
            }
9869
        }
9870
        z1->dp[48] += l;
9871
        /* z1 = z1 - z0 - z1 */
9872
        l = 0;
9873
        h = 0;
9874
        for (i = 0; i < 48; i++) {
9875
            l += z1->dp[i];
9876
            SP_ASM_SUBC(l, h, z0->dp[i]);
9877
            SP_ASM_SUBC(l, h, z2->dp[i]);
9878
            z1->dp[i] = l;
9879
            l = h;
9880
            h = 0;
9881
        }
9882
        z1->dp[i] += l;
9883
        /* r += z1 << 16 */
9884
        l = 0;
9885
        h = 0;
9886
        for (i = 0; i < 24; i++) {
9887
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
9888
            SP_ASM_ADDC(l, h, z1->dp[i]);
9889
            r->dp[i + 24] = l;
9890
            l = h;
9891
            h = 0;
9892
        }
9893
        for (; i < 49; i++) {
9894
            SP_ASM_ADDC(l, h, z1->dp[i]);
9895
            r->dp[i + 24] = l;
9896
            l = h;
9897
            h = 0;
9898
        }
9899
        /* r += z2 << 48  */
9900
        l = 0;
9901
        h = 0;
9902
        for (i = 0; i < 25; i++) {
9903
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
9904
            SP_ASM_ADDC(l, h, z2->dp[i]);
9905
            r->dp[i + 48] = l;
9906
            l = h;
9907
            h = 0;
9908
        }
9909
        for (; i < 48; i++) {
9910
            SP_ASM_ADDC(l, h, z2->dp[i]);
9911
            r->dp[i + 48] = l;
9912
            l = h;
9913
            h = 0;
9914
        }
9915
        r->used = 96;
9916
        sp_clamp(r);
9917
    }
9918
9919
    FREE_SP_INT_ARRAY(z, NULL);
9920
    FREE_SP_INT_ARRAY(t, NULL);
9921
    return err;
9922
}
9923
    #endif /* SP_INT_DIGITS >= 96 */
9924
9925
    #if SP_INT_DIGITS >= 128
9926
/* Multiply a by b and store in r: r = a * b
9927
 *
9928
 * Karatsuba implementaiton.
9929
 *
9930
 * @param  [in]   a  SP integer to multiply.
9931
 * @param  [in]   b  SP integer to multiply.
9932
 * @param  [out]  r  SP integer result.
9933
 *
9934
 * @return  MP_OKAY on success.
9935
 * @return  MP_MEM when dynamic memory allocation fails.
9936
 */
9937
static int _sp_mul_64(sp_int* a, sp_int* b, sp_int* r)
9938
{
9939
    int err = MP_OKAY;
9940
    int i;
9941
    sp_int_digit l;
9942
    sp_int_digit h;
9943
    sp_int* a1;
9944
    sp_int* b1;
9945
    sp_int* z0;
9946
    sp_int* z1;
9947
    sp_int* z2;
9948
    sp_int_digit ca;
9949
    sp_int_digit cb;
9950
    DECL_SP_INT_ARRAY(t, 32, 2);
9951
    DECL_SP_INT_ARRAY(z, 65, 2);
9952
9953
    ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
9954
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
9955
    if (err == MP_OKAY) {
9956
        a1 = t[0];
9957
        b1 = t[1];
9958
        z1 = z[0];
9959
        z2 = z[1];
9960
        z0 = r;
9961
9962
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
9963
        a1->used = 32;
9964
        XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
9965
        b1->used = 32;
9966
9967
        /* z2 = a1 * b1 */
9968
        err = _sp_mul_32(a1, b1, z2);
9969
    }
9970
    if (err == MP_OKAY) {
9971
        l = a1->dp[0];
9972
        h = 0;
9973
        SP_ASM_ADDC(l, h, a->dp[0]);
9974
        a1->dp[0] = l;
9975
        l = h;
9976
        h = 0;
9977
        for (i = 1; i < 32; i++) {
9978
            SP_ASM_ADDC(l, h, a1->dp[i]);
9979
            SP_ASM_ADDC(l, h, a->dp[i]);
9980
            a1->dp[i] = l;
9981
            l = h;
9982
            h = 0;
9983
        }
9984
        ca = l;
9985
        /* b01 = b0 + b1 */
9986
        l = b1->dp[0];
9987
        h = 0;
9988
        SP_ASM_ADDC(l, h, b->dp[0]);
9989
        b1->dp[0] = l;
9990
        l = h;
9991
        h = 0;
9992
        for (i = 1; i < 32; i++) {
9993
            SP_ASM_ADDC(l, h, b1->dp[i]);
9994
            SP_ASM_ADDC(l, h, b->dp[i]);
9995
            b1->dp[i] = l;
9996
            l = h;
9997
            h = 0;
9998
        }
9999
        cb = l;
10000
10001
        /* z0 = a0 * b0 */
10002
        err = _sp_mul_32(a, b, z0);
10003
    }
10004
    if (err == MP_OKAY) {
10005
        /* z1 = (a0 + a1) * (b0 + b1) */
10006
        err = _sp_mul_32(a1, b1, z1);
10007
    }
10008
    if (err == MP_OKAY) {
10009
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
10010
        /* r = z0 */
10011
        /* r += (z1 - z0 - z2) << 32 */
10012
        z1->dp[64] = ca & cb;
10013
        l = 0;
10014
        if (ca) {
10015
            h = 0;
10016
            for (i = 0; i < 32; i++) {
10017
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
10018
                SP_ASM_ADDC(l, h, b1->dp[i]);
10019
                z1->dp[i + 32] = l;
10020
                l = h;
10021
                h = 0;
10022
            }
10023
        }
10024
        z1->dp[64] += l;
10025
        l = 0;
10026
        if (cb) {
10027
            h = 0;
10028
            for (i = 0; i < 32; i++) {
10029
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
10030
                SP_ASM_ADDC(l, h, a1->dp[i]);
10031
                z1->dp[i + 32] = l;
10032
                l = h;
10033
                h = 0;
10034
            }
10035
        }
10036
        z1->dp[64] += l;
10037
        /* z1 = z1 - z0 - z1 */
10038
        l = 0;
10039
        h = 0;
10040
        for (i = 0; i < 64; i++) {
10041
            l += z1->dp[i];
10042
            SP_ASM_SUBC(l, h, z0->dp[i]);
10043
            SP_ASM_SUBC(l, h, z2->dp[i]);
10044
            z1->dp[i] = l;
10045
            l = h;
10046
            h = 0;
10047
        }
10048
        z1->dp[i] += l;
10049
        /* r += z1 << 16 */
10050
        l = 0;
10051
        h = 0;
10052
        for (i = 0; i < 32; i++) {
10053
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
10054
            SP_ASM_ADDC(l, h, z1->dp[i]);
10055
            r->dp[i + 32] = l;
10056
            l = h;
10057
            h = 0;
10058
        }
10059
        for (; i < 65; i++) {
10060
            SP_ASM_ADDC(l, h, z1->dp[i]);
10061
            r->dp[i + 32] = l;
10062
            l = h;
10063
            h = 0;
10064
        }
10065
        /* r += z2 << 64  */
10066
        l = 0;
10067
        h = 0;
10068
        for (i = 0; i < 33; i++) {
10069
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
10070
            SP_ASM_ADDC(l, h, z2->dp[i]);
10071
            r->dp[i + 64] = l;
10072
            l = h;
10073
            h = 0;
10074
        }
10075
        for (; i < 64; i++) {
10076
            SP_ASM_ADDC(l, h, z2->dp[i]);
10077
            r->dp[i + 64] = l;
10078
            l = h;
10079
            h = 0;
10080
        }
10081
        r->used = 128;
10082
        sp_clamp(r);
10083
    }
10084
10085
    FREE_SP_INT_ARRAY(z, NULL);
10086
    FREE_SP_INT_ARRAY(t, NULL);
10087
    return err;
10088
}
10089
    #endif /* SP_INT_DIGITS >= 128 */
10090
10091
    #if SP_INT_DIGITS >= 192
10092
/* Multiply a by b and store in r: r = a * b
10093
 *
10094
 * Karatsuba implementaiton.
10095
 *
10096
 * @param  [in]   a  SP integer to multiply.
10097
 * @param  [in]   b  SP integer to multiply.
10098
 * @param  [out]  r  SP integer result.
10099
 *
10100
 * @return  MP_OKAY on success.
10101
 * @return  MP_MEM when dynamic memory allocation fails.
10102
 */
10103
static int _sp_mul_96(sp_int* a, sp_int* b, sp_int* r)
10104
{
10105
    int err = MP_OKAY;
10106
    int i;
10107
    sp_int_digit l;
10108
    sp_int_digit h;
10109
    sp_int* a1;
10110
    sp_int* b1;
10111
    sp_int* z0;
10112
    sp_int* z1;
10113
    sp_int* z2;
10114
    sp_int_digit ca;
10115
    sp_int_digit cb;
10116
    DECL_SP_INT_ARRAY(t, 48, 2);
10117
    DECL_SP_INT_ARRAY(z, 97, 2);
10118
10119
    ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
10120
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
10121
    if (err == MP_OKAY) {
10122
        a1 = t[0];
10123
        b1 = t[1];
10124
        z1 = z[0];
10125
        z2 = z[1];
10126
        z0 = r;
10127
10128
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
10129
        a1->used = 48;
10130
        XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
10131
        b1->used = 48;
10132
10133
        /* z2 = a1 * b1 */
10134
        err = _sp_mul_48(a1, b1, z2);
10135
    }
10136
    if (err == MP_OKAY) {
10137
        l = a1->dp[0];
10138
        h = 0;
10139
        SP_ASM_ADDC(l, h, a->dp[0]);
10140
        a1->dp[0] = l;
10141
        l = h;
10142
        h = 0;
10143
        for (i = 1; i < 48; i++) {
10144
            SP_ASM_ADDC(l, h, a1->dp[i]);
10145
            SP_ASM_ADDC(l, h, a->dp[i]);
10146
            a1->dp[i] = l;
10147
            l = h;
10148
            h = 0;
10149
        }
10150
        ca = l;
10151
        /* b01 = b0 + b1 */
10152
        l = b1->dp[0];
10153
        h = 0;
10154
        SP_ASM_ADDC(l, h, b->dp[0]);
10155
        b1->dp[0] = l;
10156
        l = h;
10157
        h = 0;
10158
        for (i = 1; i < 48; i++) {
10159
            SP_ASM_ADDC(l, h, b1->dp[i]);
10160
            SP_ASM_ADDC(l, h, b->dp[i]);
10161
            b1->dp[i] = l;
10162
            l = h;
10163
            h = 0;
10164
        }
10165
        cb = l;
10166
10167
        /* z0 = a0 * b0 */
10168
        err = _sp_mul_48(a, b, z0);
10169
    }
10170
    if (err == MP_OKAY) {
10171
        /* z1 = (a0 + a1) * (b0 + b1) */
10172
        err = _sp_mul_48(a1, b1, z1);
10173
    }
10174
    if (err == MP_OKAY) {
10175
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
10176
        /* r = z0 */
10177
        /* r += (z1 - z0 - z2) << 48 */
10178
        z1->dp[96] = ca & cb;
10179
        l = 0;
10180
        if (ca) {
10181
            h = 0;
10182
            for (i = 0; i < 48; i++) {
10183
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
10184
                SP_ASM_ADDC(l, h, b1->dp[i]);
10185
                z1->dp[i + 48] = l;
10186
                l = h;
10187
                h = 0;
10188
            }
10189
        }
10190
        z1->dp[96] += l;
10191
        l = 0;
10192
        if (cb) {
10193
            h = 0;
10194
            for (i = 0; i < 48; i++) {
10195
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
10196
                SP_ASM_ADDC(l, h, a1->dp[i]);
10197
                z1->dp[i + 48] = l;
10198
                l = h;
10199
                h = 0;
10200
            }
10201
        }
10202
        z1->dp[96] += l;
10203
        /* z1 = z1 - z0 - z1 */
10204
        l = 0;
10205
        h = 0;
10206
        for (i = 0; i < 96; i++) {
10207
            l += z1->dp[i];
10208
            SP_ASM_SUBC(l, h, z0->dp[i]);
10209
            SP_ASM_SUBC(l, h, z2->dp[i]);
10210
            z1->dp[i] = l;
10211
            l = h;
10212
            h = 0;
10213
        }
10214
        z1->dp[i] += l;
10215
        /* r += z1 << 16 */
10216
        l = 0;
10217
        h = 0;
10218
        for (i = 0; i < 48; i++) {
10219
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
10220
            SP_ASM_ADDC(l, h, z1->dp[i]);
10221
            r->dp[i + 48] = l;
10222
            l = h;
10223
            h = 0;
10224
        }
10225
        for (; i < 97; i++) {
10226
            SP_ASM_ADDC(l, h, z1->dp[i]);
10227
            r->dp[i + 48] = l;
10228
            l = h;
10229
            h = 0;
10230
        }
10231
        /* r += z2 << 96  */
10232
        l = 0;
10233
        h = 0;
10234
        for (i = 0; i < 49; i++) {
10235
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
10236
            SP_ASM_ADDC(l, h, z2->dp[i]);
10237
            r->dp[i + 96] = l;
10238
            l = h;
10239
            h = 0;
10240
        }
10241
        for (; i < 96; i++) {
10242
            SP_ASM_ADDC(l, h, z2->dp[i]);
10243
            r->dp[i + 96] = l;
10244
            l = h;
10245
            h = 0;
10246
        }
10247
        r->used = 192;
10248
        sp_clamp(r);
10249
    }
10250
10251
    FREE_SP_INT_ARRAY(z, NULL);
10252
    FREE_SP_INT_ARRAY(t, NULL);
10253
    return err;
10254
}
10255
    #endif /* SP_INT_DIGITS >= 192 */
10256
10257
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
10258
#endif /* !WOLFSSL_SP_SMALL */
10259
10260
/* Multiply a by b and store in r: r = a * b
10261
 *
10262
 * @param  [in]   a  SP integer to multiply.
10263
 * @param  [in]   b  SP integer to multiply.
10264
 * @param  [out]  r  SP integer result.
10265
 *
10266
 * @return  MP_OKAY on success.
10267
 * @return  MP_VAL when a, b or is NULL; or the result will be too big for fixed
10268
 *          data length.
10269
 * @return  MP_MEM when dynamic memory allocation fails.
10270
 */
10271
int sp_mul(sp_int* a, sp_int* b, sp_int* r)
10272
73.5M
{
10273
73.5M
    int err = MP_OKAY;
10274
#ifdef WOLFSSL_SP_INT_NEGATIVE
10275
    int sign = MP_ZPOS;
10276
#endif
10277
10278
73.5M
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
10279
0
        err = MP_VAL;
10280
0
    }
10281
10282
    /* Need extra digit during calculation. */
10283
73.5M
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
10284
4
        err = MP_VAL;
10285
4
    }
10286
10287
#if 0
10288
    if (err == MP_OKAY) {
10289
        sp_print(a, "a");
10290
        sp_print(b, "b");
10291
    }
10292
#endif
10293
10294
73.5M
    if (err == MP_OKAY) {
10295
    #ifdef WOLFSSL_SP_INT_NEGATIVE
10296
        sign = a->sign ^ b->sign;
10297
    #endif
10298
10299
73.5M
        if ((a->used == 0) || (b->used == 0)) {
10300
1.11M
            _sp_zero(r);
10301
1.11M
        }
10302
72.4M
        else
10303
72.4M
#ifndef WOLFSSL_SP_SMALL
10304
72.4M
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
10305
72.4M
#if SP_WORD_SIZE == 64
10306
72.4M
        if ((a->used == 4) && (b->used == 4)) {
10307
31.0M
            err = _sp_mul_4(a, b, r);
10308
31.0M
        }
10309
41.3M
        else
10310
41.3M
#endif /* SP_WORD_SIZE == 64 */
10311
41.3M
#if SP_WORD_SIZE == 64
10312
41.3M
#ifdef SQR_MUL_ASM
10313
41.3M
        if ((a->used == 6) && (b->used == 6)) {
10314
14.0M
            err = _sp_mul_6(a, b, r);
10315
14.0M
        }
10316
27.3M
        else
10317
27.3M
#endif /* SQR_MUL_ASM */
10318
27.3M
#endif /* SP_WORD_SIZE == 64 */
10319
#if SP_WORD_SIZE == 32
10320
#ifdef SQR_MUL_ASM
10321
        if ((a->used == 8) && (b->used == 8)) {
10322
            err = _sp_mul_8(a, b, r);
10323
        }
10324
        else
10325
#endif /* SQR_MUL_ASM */
10326
#endif /* SP_WORD_SIZE == 32 */
10327
#if SP_WORD_SIZE == 32
10328
#ifdef SQR_MUL_ASM
10329
        if ((a->used == 12) && (b->used == 12)) {
10330
            err = _sp_mul_12(a, b, r);
10331
        }
10332
        else
10333
#endif /* SQR_MUL_ASM */
10334
#endif /* SP_WORD_SIZE == 32 */
10335
27.3M
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10336
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10337
    #if SP_INT_DIGITS >= 32
10338
        if ((a->used == 16) && (b->used == 16)) {
10339
            err = _sp_mul_16(a, b, r);
10340
        }
10341
        else
10342
    #endif /* SP_INT_DIGITS >= 32 */
10343
    #if SP_INT_DIGITS >= 48
10344
        if ((a->used == 24) && (b->used == 24)) {
10345
            err = _sp_mul_24(a, b, r);
10346
        }
10347
        else
10348
    #endif /* SP_INT_DIGITS >= 48 */
10349
    #if SP_INT_DIGITS >= 64
10350
        if ((a->used == 32) && (b->used == 32)) {
10351
            err = _sp_mul_32(a, b, r);
10352
        }
10353
        else
10354
    #endif /* SP_INT_DIGITS >= 64 */
10355
    #if SP_INT_DIGITS >= 96
10356
        if ((a->used == 48) && (b->used == 48)) {
10357
            err = _sp_mul_48(a, b, r);
10358
        }
10359
        else
10360
    #endif /* SP_INT_DIGITS >= 96 */
10361
    #if SP_INT_DIGITS >= 128
10362
        if ((a->used == 64) && (b->used == 64)) {
10363
            err = _sp_mul_64(a, b, r);
10364
        }
10365
        else
10366
    #endif /* SP_INT_DIGITS >= 128 */
10367
    #if SP_INT_DIGITS >= 192
10368
        if ((a->used == 96) && (b->used == 96)) {
10369
            err = _sp_mul_96(a, b, r);
10370
        }
10371
        else
10372
    #endif /* SP_INT_DIGITS >= 192 */
10373
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
10374
27.3M
#endif /* !WOLFSSL_SP_SMALL */
10375
10376
27.3M
#ifdef SQR_MUL_ASM
10377
27.3M
        if (a->used == b->used) {
10378
25.7M
            err = _sp_mul_nxn(a, b, r);
10379
25.7M
        }
10380
1.61M
        else
10381
1.61M
#endif
10382
1.61M
        {
10383
1.61M
            err = _sp_mul(a, b, r);
10384
1.61M
        }
10385
73.5M
    }
10386
10387
#ifdef WOLFSSL_SP_INT_NEGATIVE
10388
    if (err == MP_OKAY) {
10389
        r->sign = (r->used == 0) ? MP_ZPOS : sign;
10390
    }
10391
#endif
10392
10393
#if 0
10394
    if (err == MP_OKAY) {
10395
        sp_print(r, "rmul");
10396
    }
10397
#endif
10398
10399
73.5M
    return err;
10400
73.5M
}
10401
/* END SP_MUL implementations. */
10402
10403
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
10404
    defined(WOLFCRYPT_HAVE_ECCSI) || \
10405
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
10406
/* Multiply a by b mod m and store in r: r = (a * b) mod m
10407
 *
10408
 * @param  [in]   a  SP integer to multiply.
10409
 * @param  [in]   b  SP integer to multiply.
10410
 * @param  [in]   m  SP integer that is the modulus.
10411
 * @param  [out]  r  SP integer result.
10412
 *
10413
 * @return  MP_OKAY on success.
10414
 * @return  MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
10415
 *          fixed data length.
10416
 * @return  MP_MEM when dynamic memory allocation fails.
10417
 */
10418
int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
10419
2.34M
{
10420
2.34M
    int err = MP_OKAY;
10421
10422
2.34M
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
10423
0
        err = MP_VAL;
10424
0
    }
10425
2.34M
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
10426
97
        err = MP_VAL;
10427
97
    }
10428
10429
2.34M
    if (err == MP_OKAY) {
10430
2.34M
        if ((r == m) || (r->size < a->used + b->used)) {
10431
0
            DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 :
10432
0
                a->used + b->used);
10433
0
            ALLOC_SP_INT(t, a->used + b->used, err, NULL);
10434
0
            if (err == MP_OKAY) {
10435
0
                err = sp_init_size(t, a->used + b->used);
10436
0
            }
10437
0
            if (err == MP_OKAY) {
10438
0
                err = sp_mul(a, b, t);
10439
0
            }
10440
0
            if (err == MP_OKAY) {
10441
0
                err = sp_mod(t, m, r);
10442
0
            }
10443
10444
0
            FREE_SP_INT(t, NULL);
10445
0
        }
10446
2.34M
        else {
10447
2.34M
            err = sp_mul(a, b, r);
10448
2.34M
            if (err == MP_OKAY) {
10449
2.34M
                err = sp_mod(r, m, r);
10450
2.34M
            }
10451
2.34M
        }
10452
2.34M
    }
10453
2.34M
    return err;
10454
2.34M
}
10455
#endif
10456
10457
#ifdef WOLFSSL_SP_INVMOD
10458
/* Calculates the multiplicative inverse in the field.
10459
 *
10460
 * @param  [in]   a  SP integer to find inverse of.
10461
 * @param  [in]   m  SP integer this is the modulus.
10462
 * @param  [out]  r  SP integer to hold result. r cannot be m.
10463
 *
10464
 * @return  MP_OKAY on success.
10465
 * @return  MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
10466
 *          m is negative.
10467
 * @return  MP_MEM when dynamic memory allocation fails.
10468
 */
10469
int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
10470
13.2k
{
10471
13.2k
    int err = MP_OKAY;
10472
13.2k
    sp_int* u = NULL;
10473
13.2k
    sp_int* v = NULL;
10474
13.2k
    sp_int* b = NULL;
10475
13.2k
    sp_int* mm;
10476
13.2k
    int evenMod = 0;
10477
13.2k
    DECL_SP_INT_ARRAY(t, (m == NULL) ? 1 : (m->used + 1), 3);
10478
13.2k
    DECL_SP_INT(c, (m == NULL) ? 1 : (2 * m->used + 1));
10479
10480
13.2k
    if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
10481
0
        err = MP_VAL;
10482
0
    }
10483
13.2k
    if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
10484
40
        err = MP_VAL;
10485
40
    }
10486
10487
#ifdef WOLFSSL_SP_INT_NEGATIVE
10488
    if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
10489
        err = MP_VAL;
10490
    }
10491
#endif
10492
10493
13.2k
    ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
10494
13.2k
    ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
10495
13.2k
    if (err == MP_OKAY) {
10496
13.1k
        u = t[0];
10497
13.1k
        v = t[1];
10498
13.1k
        b = t[2];
10499
        /* c allocated separately and larger for even mod case. */
10500
10501
13.1k
        if (_sp_cmp_abs(a, m) != MP_LT) {
10502
531
            err = sp_mod(a, m, r);
10503
531
            a = r;
10504
531
        }
10505
13.1k
    }
10506
10507
#ifdef WOLFSSL_SP_INT_NEGATIVE
10508
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
10509
        /* Make 'a' positive */
10510
        err = sp_add(m, a, r);
10511
        a = r;
10512
    }
10513
#endif
10514
10515
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
10516
13.2k
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
10517
134
        err = MP_VAL;
10518
134
    }
10519
    /* r*2*x != n*2*y + 1 for integer x,y */
10520
13.2k
    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
10521
36
        err = MP_VAL;
10522
36
    }
10523
10524
    /* 1*1 = 0*m + 1  */
10525
13.2k
    if ((err == MP_OKAY) && sp_isone(a)) {
10526
1.11k
        sp_set(r, 1);
10527
1.11k
    }
10528
12.1k
    else if (err != MP_OKAY) {
10529
369
    }
10530
11.7k
    else {
10531
11.7k
        sp_init_size(u, m->used + 1);
10532
11.7k
        sp_init_size(v, m->used + 1);
10533
11.7k
        sp_init_size(b, m->used + 1);
10534
11.7k
        sp_init_size(c, 2 * m->used + 1);
10535
10536
11.7k
        if (sp_iseven(m)) {
10537
            /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
10538
1.21k
            mm = a;
10539
1.21k
            sp_copy(a, u);
10540
1.21k
            sp_mod(m, a, v);
10541
            /* v == 0 when a divides m evenly - no inverse.  */
10542
1.21k
            if (sp_iszero(v)) {
10543
                /* Force u to no inverse answer. */
10544
49
                sp_set(u, 0);
10545
49
            }
10546
1.21k
            evenMod = 1;
10547
1.21k
        }
10548
10.5k
        else {
10549
10.5k
            mm = m;
10550
10.5k
            sp_copy(m, u);
10551
10.5k
            sp_copy(a, v);
10552
10.5k
        }
10553
11.7k
        _sp_zero(b);
10554
11.7k
        sp_set(c, 1);
10555
10556
6.74M
        while (!sp_isone(v) && !sp_iszero(u)) {
10557
6.72M
            if (sp_iseven(u)) {
10558
2.28M
                sp_div_2(u, u);
10559
2.28M
                if (sp_isodd(b)) {
10560
1.22M
                    _sp_add_off(b, mm, b, 0);
10561
1.22M
                }
10562
2.28M
                sp_div_2(b, b);
10563
2.28M
            }
10564
4.44M
            else if (sp_iseven(v)) {
10565
2.26M
                sp_div_2(v, v);
10566
2.26M
                if (sp_isodd(c)) {
10567
914k
                    _sp_add_off(c, mm, c, 0);
10568
914k
                }
10569
2.26M
                sp_div_2(c, c);
10570
2.26M
            }
10571
2.18M
            else if (_sp_cmp(u, v) != MP_LT) {
10572
1.12M
                _sp_sub_off(u, v, u, 0);
10573
1.12M
                if (_sp_cmp(b, c) == MP_LT) {
10574
454k
                    _sp_add_off(b, mm, b, 0);
10575
454k
                }
10576
1.12M
                _sp_sub_off(b, c, b, 0);
10577
1.12M
            }
10578
1.05M
            else {
10579
1.05M
                _sp_sub_off(v, u, v, 0);
10580
1.05M
                if (_sp_cmp(c, b) == MP_LT) {
10581
573k
                    _sp_add_off(c, mm, c, 0);
10582
573k
                }
10583
1.05M
                _sp_sub_off(c, b, c, 0);
10584
1.05M
            }
10585
6.72M
        }
10586
11.7k
        if (sp_iszero(u)) {
10587
252
            err = MP_VAL;
10588
252
        }
10589
11.5k
        else if (evenMod) {
10590
            /* Finish operation.
10591
             *    a^-1 mod m = m + ((1 - m*c) / a)
10592
             * => a^-1 mod m = m - ((m*c - 1) / a)
10593
             */
10594
1.07k
            err = sp_mul(c, m, c);
10595
1.07k
            if (err == MP_OKAY) {
10596
1.07k
                _sp_sub_d(c, 1, c);
10597
1.07k
                err = sp_div(c, a, c, NULL);
10598
1.07k
            }
10599
1.07k
            if (err == MP_OKAY) {
10600
1.07k
                sp_sub(m, c, r);
10601
1.07k
            }
10602
1.07k
        }
10603
10.4k
        else {
10604
10.4k
            err = sp_copy(c, r);
10605
10.4k
        }
10606
11.7k
    }
10607
10608
13.2k
    FREE_SP_INT(c, NULL);
10609
13.2k
    FREE_SP_INT_ARRAY(t, NULL);
10610
13.2k
    return err;
10611
13.2k
}
10612
#endif /* WOLFSSL_SP_INVMOD */
10613
10614
#ifdef WOLFSSL_SP_INVMOD_MONT_CT
10615
10616
4.08M
#define CT_INV_MOD_PRE_CNT      8
10617
10618
/* Calculates the multiplicative inverse in the field - constant time.
10619
 *
10620
 * Modulus (m) must be a prime and greater than 2.
10621
 *
10622
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
10623
 * @param  [in]   m   SP integer this is the modulus.
10624
 * @param  [out]  r   SP integer to hold result.
10625
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
10626
 *
10627
 * @return  MP_OKAY on success.
10628
 * @return  MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
10629
 * @return  MP_MEM when dynamic memory allocation fails.
10630
 */
10631
int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp)
10632
12.2k
{
10633
12.2k
    int err = MP_OKAY;
10634
12.2k
    int i;
10635
12.2k
    int j = 0;
10636
12.2k
    sp_int* t = NULL;
10637
12.2k
    sp_int* e = NULL;
10638
12.2k
    DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1,
10639
12.2k
                                                        CT_INV_MOD_PRE_CNT + 2);
10640
10641
12.2k
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
10642
0
        err = MP_VAL;
10643
0
    }
10644
10645
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
10646
12.2k
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
10647
12.2k
                                              (m->used == 1 && m->dp[0] < 3))) {
10648
0
        err = MP_VAL;
10649
0
    }
10650
10651
12.2k
    ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
10652
12.2k
    if (err == MP_OKAY) {
10653
12.2k
        t = pre[CT_INV_MOD_PRE_CNT + 0];
10654
12.2k
        e = pre[CT_INV_MOD_PRE_CNT + 1];
10655
12.2k
        sp_init_size(t, m->used * 2 + 1);
10656
12.2k
        sp_init_size(e, m->used * 2 + 1);
10657
10658
12.2k
        sp_init_size(pre[0], m->used * 2 + 1);
10659
12.2k
        err = sp_copy(a, pre[0]);
10660
97.6k
        for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
10661
85.4k
            sp_init_size(pre[i], m->used * 2 + 1);
10662
85.4k
            err = sp_sqr(pre[i-1], pre[i]);
10663
85.4k
            if (err == MP_OKAY) {
10664
85.4k
                err = _sp_mont_red(pre[i], m, mp);
10665
85.4k
            }
10666
85.4k
            if (err == MP_OKAY) {
10667
85.4k
                err = sp_mul(pre[i], a, pre[i]);
10668
85.4k
            }
10669
85.4k
            if (err == MP_OKAY) {
10670
85.4k
                err = _sp_mont_red(pre[i], m, mp);
10671
85.4k
            }
10672
85.4k
        }
10673
12.2k
    }
10674
10675
12.2k
    if (err == MP_OKAY) {
10676
12.2k
        _sp_sub_d(m, 2, e);
10677
97.4k
        for (i = sp_count_bits(e)-1, j = 0; i >= 0; i--, j++) {
10678
97.4k
              if ((!sp_is_bit_set(e, i)) || (j == CT_INV_MOD_PRE_CNT)) {
10679
12.2k
                  break;
10680
12.2k
              }
10681
97.4k
        }
10682
12.2k
        err = sp_copy(pre[j-1], t);
10683
3.87M
        for (j = 0; (err == MP_OKAY) && (i >= 0); i--) {
10684
3.86M
            int set = sp_is_bit_set(e, i);
10685
10686
3.86M
            if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) {
10687
488k
                err = sp_mul(t, pre[j-1], t);
10688
488k
                if (err == MP_OKAY) {
10689
488k
                    err = _sp_mont_red(t, m, mp);
10690
488k
                }
10691
488k
                j = 0;
10692
488k
            }
10693
3.86M
            if (err == MP_OKAY) {
10694
3.86M
                err = sp_sqr(t, t);
10695
3.86M
                if (err == MP_OKAY) {
10696
3.86M
                    err = _sp_mont_red(t, m, mp);
10697
3.86M
                }
10698
3.86M
            }
10699
3.86M
            j += set;
10700
3.86M
        }
10701
12.2k
    }
10702
12.2k
    if (err == MP_OKAY) {
10703
12.1k
        if (j > 0) {
10704
12.1k
            err = sp_mul(t, pre[j-1], r);
10705
12.1k
            if (err == MP_OKAY) {
10706
12.1k
                err = _sp_mont_red(r, m, mp);
10707
12.1k
            }
10708
12.1k
        }
10709
0
        else {
10710
0
            err = sp_copy(t, r);
10711
0
        }
10712
12.1k
    }
10713
10714
12.2k
    FREE_SP_INT_ARRAY(pre, NULL);
10715
12.2k
    return err;
10716
12.2k
}
10717
10718
#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
10719
10720
10721
/**************************
10722
 * Exponentiation functions
10723
 **************************/
10724
10725
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
10726
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
10727
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10728
 * Process the exponent one bit at a time.
10729
 * Is constant time and can be cache attack resistant.
10730
 *
10731
 * @param  [in]   b     SP integer that is the base.
10732
 * @param  [in]   e     SP integer that is the exponent.
10733
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10734
 *                      count of bits in b.
10735
 * @param  [in]   m     SP integer that is the modulus.
10736
 * @param  [out]  r     SP integer to hold result.
10737
 *
10738
 * @return  MP_OKAY on success.
10739
 * @return  MP_MEM when dynamic memory allocation fails.
10740
 */
10741
static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r)
10742
8.20k
{
10743
8.20k
    int i;
10744
8.20k
    int err = MP_OKAY;
10745
8.20k
    int done = 0;
10746
8.20k
    int j;
10747
8.20k
    int y;
10748
8.20k
    int seenTopBit = 0;
10749
#ifdef WC_NO_CACHE_RESISTANT
10750
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
10751
#else
10752
8.20k
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
10753
8.20k
#endif
10754
10755
#ifdef WC_NO_CACHE_RESISTANT
10756
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
10757
#else
10758
8.20k
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
10759
8.20k
#endif
10760
8.20k
    if (err == MP_OKAY) {
10761
7.81k
        sp_init_size(t[0], 2 * m->used + 1);
10762
7.81k
        sp_init_size(t[1], 2 * m->used + 1);
10763
7.81k
    #ifndef WC_NO_CACHE_RESISTANT
10764
7.81k
        sp_init_size(t[2], 2 * m->used + 1);
10765
7.81k
    #endif
10766
10767
        /* Ensure base is less than exponent. */
10768
7.81k
        if (_sp_cmp_abs(b, m) != MP_LT) {
10769
116
            err = sp_mod(b, m, t[0]);
10770
116
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
10771
39
                sp_set(r, 0);
10772
39
                done = 1;
10773
39
            }
10774
116
        }
10775
7.69k
        else {
10776
7.69k
            err = sp_copy(b, t[0]);
10777
7.69k
        }
10778
7.81k
    }
10779
10780
8.20k
    if ((!done) && (err == MP_OKAY)) {
10781
        /* t[0] is dummy value and t[1] is result */
10782
7.76k
        err = sp_copy(t[0], t[1]);
10783
10784
993k
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
10785
#ifdef WC_NO_CACHE_RESISTANT
10786
            /* Square real result if seen the top bit. */
10787
            err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]);
10788
            if (err == MP_OKAY) {
10789
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10790
                j = y & seenTopBit;
10791
                seenTopBit |= y;
10792
                /* Multiply real result if bit is set and seen the top bit. */
10793
                err = sp_mulmod(t[j], b, m, t[j]);
10794
            }
10795
#else
10796
            /* Square real result if seen the top bit. */
10797
985k
            sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10798
985k
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])),
10799
985k
                    t[2]);
10800
985k
            err = sp_sqrmod(t[2], m, t[2]);
10801
985k
            sp_copy(t[2],
10802
985k
                    (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10803
985k
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])));
10804
985k
            if (err == MP_OKAY) {
10805
985k
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10806
985k
                j = y & seenTopBit;
10807
985k
                seenTopBit |= y;
10808
                /* Multiply real result if bit is set and seen the top bit. */
10809
985k
                sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10810
985k
                                  ((size_t)t[1] & sp_off_on_addr[j  ])),
10811
985k
                        t[2]);
10812
985k
                err = sp_mulmod(t[2], b, m, t[2]);
10813
985k
                sp_copy(t[2],
10814
985k
                        (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10815
985k
                                  ((size_t)t[1] & sp_off_on_addr[j  ])));
10816
985k
            }
10817
985k
#endif
10818
985k
        }
10819
7.76k
    }
10820
8.20k
    if ((!done) && (err == MP_OKAY)) {
10821
6.87k
        err = sp_copy(t[1], r);
10822
6.87k
    }
10823
10824
8.20k
    FREE_SP_INT_ARRAY(t, NULL);
10825
8.20k
    return err;
10826
8.20k
}
10827
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
10828
        * WOLFSSL_HAVE_SP_DH */
10829
10830
#if defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
10831
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))
10832
#ifndef WC_NO_HARDEN
10833
#if !defined(WC_NO_CACHE_RESISTANT)
10834
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10835
 * Process the exponent one bit at a time with base in montgomery form.
10836
 * Is constant time and cache attack resistant.
10837
 *
10838
 * @param  [in]   b     SP integer that is the base.
10839
 * @param  [in]   e     SP integer that is the exponent.
10840
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10841
 *                      count of bits in b.
10842
 * @param  [in]   m     SP integer that is the modulus.
10843
 * @param  [out]  r     SP integer to hold result.
10844
 *
10845
 * @return  MP_OKAY on success.
10846
 * @return  MP_MEM when dynamic memory allocation fails.
10847
 */
10848
static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
10849
                               sp_int* r)
10850
151k
{
10851
151k
    int i;
10852
151k
    int err = MP_OKAY;
10853
151k
    int done = 0;
10854
151k
    int j;
10855
151k
    int y;
10856
151k
    int seenTopBit = 0;
10857
151k
    sp_int_digit mp;
10858
151k
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
10859
10860
151k
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
10861
151k
    if (err == MP_OKAY) {
10862
151k
        sp_init_size(t[0], m->used * 2 + 1);
10863
151k
        sp_init_size(t[1], m->used * 2 + 1);
10864
151k
        sp_init_size(t[2], m->used * 2 + 1);
10865
151k
        sp_init_size(t[3], m->used * 2 + 1);
10866
10867
        /* Ensure base is less than exponent. */
10868
151k
        if (_sp_cmp_abs(b, m) != MP_LT) {
10869
0
            err = sp_mod(b, m, t[0]);
10870
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
10871
0
                sp_set(r, 0);
10872
0
                done = 1;
10873
0
            }
10874
0
        }
10875
151k
        else {
10876
151k
            err = sp_copy(b, t[0]);
10877
151k
        }
10878
151k
    }
10879
10880
10881
151k
    if ((!done) && (err == MP_OKAY)) {
10882
151k
        err = sp_mont_setup(m, &mp);
10883
151k
        if (err == MP_OKAY) {
10884
151k
            err = sp_mont_norm(t[1], m);
10885
151k
        }
10886
151k
        if (err == MP_OKAY) {
10887
            /* Convert to montgomery form. */
10888
151k
            err = sp_mulmod(t[0], t[1], m, t[0]);
10889
151k
        }
10890
151k
        if (err == MP_OKAY) {
10891
            /* t[0] is fake working value and t[1] is real working value. */
10892
150k
            sp_copy(t[0], t[1]);
10893
            /* Montgomert form of base to multiply by. */
10894
150k
            sp_copy(t[0], t[2]);
10895
150k
        }
10896
10897
19.1M
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
10898
            /* Square real working value if seen the top bit. */
10899
18.9M
            sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10900
18.9M
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])),
10901
18.9M
                    t[3]);
10902
18.9M
            err = sp_sqr(t[3], t[3]);
10903
18.9M
            if (err == MP_OKAY) {
10904
18.9M
                err = _sp_mont_red(t[3], m, mp);
10905
18.9M
            }
10906
18.9M
            sp_copy(t[3],
10907
18.9M
                    (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10908
18.9M
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])));
10909
18.9M
            if (err == MP_OKAY) {
10910
18.9M
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10911
18.9M
                j = y & seenTopBit;
10912
18.9M
                seenTopBit |= y;
10913
                /* Multiply real value if bit is set and seen the top bit. */
10914
18.9M
                sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10915
18.9M
                                  ((size_t)t[1] & sp_off_on_addr[j  ])),
10916
18.9M
                        t[3]);
10917
18.9M
                err = sp_mul(t[3], t[2], t[3]);
10918
18.9M
                if (err == MP_OKAY) {
10919
18.9M
                    err = _sp_mont_red(t[3], m, mp);
10920
18.9M
                }
10921
18.9M
                sp_copy(t[3],
10922
18.9M
                        (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10923
18.9M
                                  ((size_t)t[1] & sp_off_on_addr[j  ])));
10924
18.9M
            }
10925
18.9M
        }
10926
151k
        if (err == MP_OKAY) {
10927
            /* Convert from montgomery form. */
10928
149k
            err = _sp_mont_red(t[1], m, mp);
10929
            /* Reduction implementation returns number to range < m. */
10930
149k
        }
10931
151k
    }
10932
151k
    if ((!done) && (err == MP_OKAY)) {
10933
149k
        err = sp_copy(t[1], r);
10934
149k
    }
10935
10936
151k
    FREE_SP_INT_ARRAY(t, NULL);
10937
151k
    return err;
10938
151k
}
10939
#else
10940
10941
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
10942
#define SP_ALLOC
10943
10944
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10945
 * Creates a window of precalculated exponents with base in montgomery form.
10946
 * Is constant time but NOT cache attack resistant.
10947
 *
10948
 * @param  [in]   b     SP integer that is the base.
10949
 * @param  [in]   e     SP integer that is the exponent.
10950
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10951
 *                      count of bits in b.
10952
 * @param  [in]   m     SP integer that is the modulus.
10953
 * @param  [out]  r     SP integer to hold result.
10954
 *
10955
 * @return  MP_OKAY on success.
10956
 * @return  MP_MEM when dynamic memory allocation fails.
10957
 */
10958
static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
10959
                               sp_int* r)
10960
{
10961
    int i;
10962
    int j;
10963
    int c;
10964
    int y;
10965
    int winBits;
10966
    int preCnt;
10967
    int err = MP_OKAY;
10968
    int done = 0;
10969
    sp_int_digit mp;
10970
    sp_int_digit n;
10971
    sp_int_digit mask;
10972
    sp_int* tr = NULL;
10973
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
10974
10975
    if (bits > 450) {
10976
        winBits = 6;
10977
    }
10978
    else if (bits <= 21) {
10979
        winBits = 1;
10980
    }
10981
    else if (bits <= 36) {
10982
        winBits = 3;
10983
    }
10984
    else if (bits <= 140) {
10985
        winBits = 4;
10986
    }
10987
    else {
10988
        winBits = 5;
10989
    }
10990
    preCnt = 1 << winBits;
10991
    mask = preCnt - 1;
10992
10993
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
10994
    if (err == MP_OKAY) {
10995
        tr = t[preCnt];
10996
10997
        for (i = 0; i < preCnt; i++) {
10998
            sp_init_size(t[i], m->used * 2 + 1);
10999
        }
11000
        sp_init_size(tr, m->used * 2 + 1);
11001
11002
        /* Ensure base is less than exponent. */
11003
        if (_sp_cmp_abs(b, m) != MP_LT) {
11004
            err = sp_mod(b, m, t[1]);
11005
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
11006
                sp_set(r, 0);
11007
                done = 1;
11008
            }
11009
        }
11010
        else {
11011
            err = sp_copy(b, t[1]);
11012
        }
11013
    }
11014
11015
    if ((!done) && (err == MP_OKAY)) {
11016
        err = sp_mont_setup(m, &mp);
11017
        if (err == MP_OKAY) {
11018
            /* Norm value is 1 in montgomery form. */
11019
            err = sp_mont_norm(t[0], m);
11020
        }
11021
        if (err == MP_OKAY) {
11022
            /* Convert base to montgomery form. */
11023
            err = sp_mulmod(t[1], t[0], m, t[1]);
11024
        }
11025
11026
        /* Pre-calculate values */
11027
        for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
11028
            if ((i & 1) == 0) {
11029
                err = sp_sqr(t[i/2], t[i]);
11030
            }
11031
            else {
11032
                err = sp_mul(t[i-1], t[1], t[i]);
11033
            }
11034
            if (err == MP_OKAY) {
11035
                err = _sp_mont_red(t[i], m, mp);
11036
            }
11037
        }
11038
11039
        if (err == MP_OKAY) {
11040
            /* Bits from the top that - possibly left over. */
11041
            i = (bits - 1) >> SP_WORD_SHIFT;
11042
            n = e->dp[i--];
11043
            c = bits & (SP_WORD_SIZE - 1);
11044
            if (c == 0) {
11045
                c = SP_WORD_SIZE;
11046
            }
11047
            c -= bits % winBits;
11048
            y = (int)(n >> c);
11049
            n <<= SP_WORD_SIZE - c;
11050
            /* Copy window number for top bits. */
11051
            sp_copy(t[y], tr);
11052
            for (; (i >= 0) || (c >= winBits); ) {
11053
                if (c == 0) {
11054
                    /* Bits up to end of digit */
11055
                    n = e->dp[i--];
11056
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
11057
                    n <<= winBits;
11058
                    c = SP_WORD_SIZE - winBits;
11059
                }
11060
                else if (c < winBits) {
11061
                    /* Bits to end of digit and part of next */
11062
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
11063
                    n = e->dp[i--];
11064
                    c = winBits - c;
11065
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11066
                    n <<= c;
11067
                    c = SP_WORD_SIZE - c;
11068
                }
11069
                else {
11070
                    /* Bits from middle of digit */
11071
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11072
                    n <<= winBits;
11073
                    c -= winBits;
11074
                }
11075
11076
                /* Square for number of bits in window. */
11077
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
11078
                    err = sp_sqr(tr, tr);
11079
                    if (err == MP_OKAY) {
11080
                        err = _sp_mont_red(tr, m, mp);
11081
                    }
11082
                }
11083
                /* Multiply by window number for next set of bits. */
11084
                if (err == MP_OKAY) {
11085
                    err = sp_mul(tr, t[y], tr);
11086
                }
11087
                if (err == MP_OKAY) {
11088
                    err = _sp_mont_red(tr, m, mp);
11089
                }
11090
            }
11091
        }
11092
11093
        if (err == MP_OKAY) {
11094
            /* Convert from montgomery form. */
11095
            err = _sp_mont_red(tr, m, mp);
11096
            /* Reduction implementation returns number to range < m. */
11097
        }
11098
    }
11099
    if ((!done) && (err == MP_OKAY)) {
11100
        err = sp_copy(tr, r);
11101
    }
11102
11103
    FREE_SP_INT_ARRAY(t, NULL);
11104
    return err;
11105
}
11106
11107
#undef SP_ALLOC
11108
11109
#endif /* !WC_NO_CACHE_RESISTANT */
11110
#endif /* !WC_NO_HARDEN */
11111
11112
#if SP_WORD_SIZE <= 16
11113
    #define EXP2_WINSIZE    2
11114
#elif SP_WORD_SIZE <= 32
11115
    #define EXP2_WINSIZE    3
11116
#elif SP_WORD_SIZE <= 64
11117
739k
    #define EXP2_WINSIZE    4
11118
#elif SP_WORD_SIZE <= 128
11119
    #define EXP2_WINSIZE    5
11120
#endif
11121
11122
/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
11123
 * Is constant time and cache attack resistant.
11124
 *
11125
 * @param  [in]   e       SP integer that is the exponent.
11126
 * @param  [in]   digits  Number of digits in base to use. May be greater than
11127
 *                        count of bits in b.
11128
 * @param  [in]   m       SP integer that is the modulus.
11129
 * @param  [out]  r       SP integer to hold result.
11130
 *
11131
 * @return  MP_OKAY on success.
11132
 * @return  MP_MEM when dynamic memory allocation fails.
11133
 */
11134
static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r)
11135
3.16k
{
11136
3.16k
    int i = 0;
11137
3.16k
    int j;
11138
3.16k
    int c = 0;
11139
3.16k
    int y;
11140
3.16k
    int err = MP_OKAY;
11141
3.16k
    sp_int* t = NULL;
11142
3.16k
    sp_int* tr = NULL;
11143
3.16k
    sp_int_digit mp = 0, n = 0;
11144
3.16k
    DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
11145
11146
#if 0
11147
    sp_print_int(2, "a");
11148
    sp_print(e, "b");
11149
    sp_print(m, "m");
11150
#endif
11151
11152
3.16k
    ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
11153
3.16k
    if (err == MP_OKAY) {
11154
3.11k
        t  = d[0];
11155
3.11k
        tr = d[1];
11156
11157
3.11k
        sp_init_size(t, m->used * 2 + 1);
11158
3.11k
        sp_init_size(tr, m->used * 2 + 1);
11159
11160
3.11k
        if (m->used > 1) {
11161
2.42k
            err = sp_mont_setup(m, &mp);
11162
2.42k
            if (err == MP_OKAY) {
11163
                /* Norm value is 1 in montgomery form. */
11164
2.42k
                err = sp_mont_norm(tr, m);
11165
2.42k
            }
11166
2.42k
            if (err == MP_OKAY) {
11167
2.42k
                err = sp_mul_2d(m, 1 << EXP2_WINSIZE, t);
11168
2.42k
            }
11169
2.42k
        }
11170
694
        else {
11171
694
            err = sp_set(tr, 1);
11172
694
        }
11173
11174
3.11k
        if (err == MP_OKAY) {
11175
            /* Bits from the top. */
11176
3.11k
            i = digits - 1;
11177
3.11k
            n = e->dp[i--];
11178
3.11k
            c = SP_WORD_SIZE;
11179
#if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
11180
            c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
11181
            if (c != SP_WORD_SIZE) {
11182
                y = (int)(n >> c);
11183
                n <<= SP_WORD_SIZE - c;
11184
            }
11185
            else
11186
#endif
11187
3.11k
            {
11188
3.11k
                y = 0;
11189
3.11k
            }
11190
11191
            /* Multiply montgomery representation of 1 by 2 ^ top */
11192
3.11k
            err = sp_mul_2d(tr, y, tr);
11193
3.11k
        }
11194
3.11k
        if ((err == MP_OKAY) && (m->used > 1)) {
11195
2.42k
            err = sp_add(tr, t, tr);
11196
2.42k
        }
11197
3.11k
        if (err == MP_OKAY) {
11198
3.11k
            err = sp_mod(tr, m, tr);
11199
3.11k
        }
11200
3.11k
        if (err == MP_OKAY) {
11201
83.2k
            for (; (i >= 0) || (c >= EXP2_WINSIZE); ) {
11202
80.3k
                if (c == 0) {
11203
                    /* Bits up to end of digit */
11204
4.57k
                    n = e->dp[i--];
11205
4.57k
                    y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
11206
4.57k
                    n <<= EXP2_WINSIZE;
11207
4.57k
                    c = SP_WORD_SIZE - EXP2_WINSIZE;
11208
4.57k
                }
11209
#if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
11210
                else if (c < EXP2_WINSIZE) {
11211
                    /* Bits to end of digit and part of next */
11212
                    y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
11213
                    n = e->dp[i--];
11214
                    c = EXP2_WINSIZE - c;
11215
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11216
                    n <<= c;
11217
                    c = SP_WORD_SIZE - c;
11218
                }
11219
#endif
11220
75.7k
                else {
11221
                    /* Bits from middle of digit */
11222
75.7k
                    y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) &
11223
75.7k
                              ((1 << EXP2_WINSIZE) - 1));
11224
75.7k
                    n <<= EXP2_WINSIZE;
11225
75.7k
                    c -= EXP2_WINSIZE;
11226
75.7k
                }
11227
11228
                /* Square for number of bits in window. */
11229
373k
                for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
11230
293k
                    err = sp_sqr(tr, tr);
11231
293k
                    if (err != MP_OKAY) {
11232
72
                        break;
11233
72
                    }
11234
293k
                    if (m->used > 1) {
11235
231k
                        err = _sp_mont_red(tr, m, mp);
11236
231k
                    }
11237
61.7k
                    else {
11238
61.7k
                        err = sp_mod(tr, m, tr);
11239
61.7k
                    }
11240
293k
                }
11241
11242
80.3k
                if (err == MP_OKAY) {
11243
                    /* then multiply by 2^y */
11244
80.2k
                    err = sp_mul_2d(tr, y, tr);
11245
80.2k
                }
11246
80.3k
                if ((err == MP_OKAY) && (m->used > 1)) {
11247
                    /* Add in value to make mod operation take same time */
11248
63.0k
                    err = sp_add(tr, t, tr);
11249
63.0k
                }
11250
80.3k
                if (err == MP_OKAY) {
11251
80.2k
                    err = sp_mod(tr, m, tr);
11252
80.2k
                }
11253
80.3k
                if (err != MP_OKAY) {
11254
110
                    break;
11255
110
                }
11256
80.3k
            }
11257
3.09k
        }
11258
11259
3.11k
        if ((err == MP_OKAY) && (m->used > 1)) {
11260
            /* Convert from montgomery form. */
11261
2.36k
            err = _sp_mont_red(tr, m, mp);
11262
            /* Reduction implementation returns number to range < m. */
11263
2.36k
        }
11264
3.11k
    }
11265
3.16k
    if (err == MP_OKAY) {
11266
2.98k
        err = sp_copy(tr, r);
11267
2.98k
    }
11268
11269
#if 0
11270
    sp_print(r, "rme");
11271
#endif
11272
11273
3.16k
    FREE_SP_INT_ARRAY(d, NULL);
11274
3.16k
    return err;
11275
3.16k
}
11276
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
11277
11278
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
11279
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
11280
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11281
 *
11282
 * @param  [in]   b     SP integer that is the base.
11283
 * @param  [in]   e     SP integer that is the exponent.
11284
 * @param  [in]   bits  Number of bits in base to use. May be greater than
11285
 *                      count of bits in b.
11286
 * @param  [in]   m     SP integer that is the modulus.
11287
 * @param  [out]  r     SP integer to hold result.
11288
 *
11289
 * @return  MP_OKAY on success.
11290
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11291
 * @return  MP_MEM when dynamic memory allocation fails.
11292
 */
11293
int sp_exptmod_ex(sp_int* b, sp_int* e, int digits, sp_int* m, sp_int* r)
11294
167k
{
11295
167k
    int err = MP_OKAY;
11296
167k
    int done = 0;
11297
167k
    int mBits = sp_count_bits(m);
11298
167k
    int bBits = sp_count_bits(b);
11299
167k
    int eBits = sp_count_bits(e);
11300
11301
167k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11302
0
        err = MP_VAL;
11303
0
    }
11304
11305
#if 0
11306
    if (err == MP_OKAY) {
11307
        sp_print(b, "a");
11308
        sp_print(e, "b");
11309
        sp_print(m, "m");
11310
    }
11311
#endif
11312
11313
    /* Check for invalid modulus. */
11314
167k
    if ((err == MP_OKAY) && sp_iszero(m)) {
11315
128
        err = MP_VAL;
11316
128
    }
11317
#ifdef WOLFSSL_SP_INT_NEGATIVE
11318
    /* Check for unsupported negative values of exponent and modulus. */
11319
    if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
11320
        err = MP_VAL;
11321
    }
11322
#endif
11323
11324
    /* Check for degenerate cases. */
11325
167k
    if ((err == MP_OKAY) && sp_isone(m)) {
11326
134
        sp_set(r, 0);
11327
134
        done = 1;
11328
134
    }
11329
167k
    if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
11330
911
        sp_set(r, 1);
11331
911
        done = 1;
11332
911
    }
11333
11334
    /* Check whether base needs to be reduced. */
11335
167k
    if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
11336
28.0k
        if ((r == e) || (r == m)) {
11337
0
            err = MP_VAL;
11338
0
        }
11339
28.0k
        if (err == MP_OKAY) {
11340
28.0k
            err = sp_mod(b, m, r);
11341
28.0k
        }
11342
28.0k
        if (err == MP_OKAY) {
11343
27.8k
            b = r;
11344
27.8k
        }
11345
28.0k
    }
11346
    /* Check for degenerate case of base. */
11347
167k
    if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
11348
10.9k
        sp_set(r, 0);
11349
10.9k
        done = 1;
11350
10.9k
    }
11351
11352
    /* Ensure SP integers have space for intermediate values. */
11353
167k
    if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
11354
102
        err = MP_VAL;
11355
102
    }
11356
11357
167k
    if ((!done) && (err == MP_OKAY)) {
11358
        /* Use code optimized for specific sizes if possible */
11359
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
11360
    (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
11361
    #ifndef WOLFSSL_SP_NO_2048
11362
        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
11363
            (eBits <= 1024)) {
11364
            err = sp_ModExp_1024(b, e, m, r);
11365
            done = 1;
11366
        }
11367
        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
11368
                 (eBits <= 2048)) {
11369
            err = sp_ModExp_2048(b, e, m, r);
11370
            done = 1;
11371
        }
11372
        else
11373
    #endif
11374
    #ifndef WOLFSSL_SP_NO_3072
11375
        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
11376
            (eBits <= 1536)) {
11377
            err = sp_ModExp_1536(b, e, m, r);
11378
            done = 1;
11379
        }
11380
        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
11381
                 (eBits <= 3072)) {
11382
            err = sp_ModExp_3072(b, e, m, r);
11383
            done = 1;
11384
        }
11385
        else
11386
    #endif
11387
    #ifdef WOLFSSL_SP_4096
11388
        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
11389
            (eBits <= 4096)) {
11390
            err = sp_ModExp_4096(b, e, m, r);
11391
            done = 1;
11392
        }
11393
        else
11394
    #endif
11395
#endif
11396
155k
        {
11397
155k
        }
11398
155k
    }
11399
167k
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH)
11400
#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
11401
    defined(NO_DH)
11402
    if ((!done) && (err == MP_OKAY))
11403
        err = sp_exptmod_nct(b, e, m, r);
11404
    }
11405
#else
11406
167k
#if defined(WOLFSSL_SP_MATH_ALL)
11407
167k
    if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
11408
167k
         mp_isodd(m)) {
11409
        /* Use the generic base 2 implementation. */
11410
6.17k
        err = _sp_exptmod_base_2(e, digits, m, r);
11411
6.17k
    }
11412
161k
    else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
11413
111k
    #ifndef WC_NO_HARDEN
11414
111k
        err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
11415
    #else
11416
        err = sp_exptmod_nct(b, e, m, r);
11417
    #endif
11418
111k
    }
11419
50.1k
    else
11420
50.1k
#endif /* WOLFSSL_SP_MATH_ALL */
11421
50.1k
    if ((!done) && (err == MP_OKAY)) {
11422
        /* Otherwise use the generic implementation. */
11423
37.7k
        err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
11424
37.7k
    }
11425
167k
#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
11426
#else
11427
    if ((!done) && (err == MP_OKAY)) {
11428
        err = MP_VAL;
11429
    }
11430
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11431
11432
167k
    (void)mBits;
11433
167k
    (void)bBits;
11434
167k
    (void)eBits;
11435
167k
    (void)digits;
11436
11437
#if 0
11438
    if (err == MP_OKAY) {
11439
        sp_print(r, "rme");
11440
    }
11441
#endif
11442
167k
    return err;
11443
167k
}
11444
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11445
11446
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
11447
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
11448
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11449
 *
11450
 * @param  [in]   b  SP integer that is the base.
11451
 * @param  [in]   e  SP integer that is the exponent.
11452
 * @param  [in]   m  SP integer that is the modulus.
11453
 * @param  [out]  r  SP integer to hold result.
11454
 *
11455
 * @return  MP_OKAY on success.
11456
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11457
 * @return  MP_MEM when dynamic memory allocation fails.
11458
 */
11459
int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11460
255k
{
11461
255k
    int err = MP_OKAY;
11462
11463
255k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11464
0
        err = MP_VAL;
11465
0
    }
11466
255k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
11467
255k
    if (err == MP_OKAY) {
11468
255k
        err = sp_exptmod_ex(b, e, e->used, m, r);
11469
255k
    }
11470
255k
    RESTORE_VECTOR_REGISTERS();
11471
255k
    return err;
11472
255k
}
11473
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
11474
        * WOLFSSL_HAVE_SP_DH */
11475
11476
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
11477
#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
11478
11479
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
11480
#define SP_ALLOC
11481
11482
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11483
 * Creates a window of precalculated exponents with base in montgomery form.
11484
 * Sliding window and is NOT constant time.
11485
 *
11486
 * @param  [in]   b     SP integer that is the base.
11487
 * @param  [in]   e     SP integer that is the exponent.
11488
 * @param  [in]   bits  Number of bits in base to use. May be greater than
11489
 *                      count of bits in b.
11490
 * @param  [in]   m     SP integer that is the modulus.
11491
 * @param  [out]  r     SP integer to hold result.
11492
 *
11493
 * @return  MP_OKAY on success.
11494
 * @return  MP_MEM when dynamic memory allocation fails.
11495
 */
11496
static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11497
949
{
11498
949
    int i = 0;
11499
949
    int j = 0;
11500
949
    int c = 0;
11501
949
    int y = 0;
11502
949
    int bits;
11503
949
    int winBits;
11504
949
    int preCnt;
11505
949
    int err = MP_OKAY;
11506
949
    int done = 0;
11507
949
    sp_int* tr = NULL;
11508
949
    sp_int* bm = NULL;
11509
949
    sp_int_digit mask;
11510
    /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
11511
949
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
11512
11513
949
    bits = sp_count_bits(e);
11514
11515
949
    if (bits > 450) {
11516
159
        winBits = 6;
11517
159
    }
11518
790
    else if (bits <= 21) {
11519
221
        winBits = 1;
11520
221
    }
11521
569
    else if (bits <= 36) {
11522
37
        winBits = 3;
11523
37
    }
11524
532
    else if (bits <= 140) {
11525
218
        winBits = 4;
11526
218
    }
11527
314
    else {
11528
314
        winBits = 5;
11529
314
    }
11530
949
    preCnt = 1 << (winBits - 1);
11531
949
    mask = preCnt - 1;
11532
11533
949
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL);
11534
949
    if (err == MP_OKAY) {
11535
        /* Initialize window numbers and temporary result. */
11536
920
        tr = t[preCnt + 0];
11537
920
        bm = t[preCnt + 1];
11538
11539
12.8k
        for (i = 0; i < preCnt; i++) {
11540
11.8k
            sp_init_size(t[i], m->used * 2 + 1);
11541
11.8k
        }
11542
920
        sp_init_size(tr, m->used * 2 + 1);
11543
920
        sp_init_size(bm, m->used * 2 + 1);
11544
11545
        /* Ensure base is less than exponent. */
11546
920
        if (_sp_cmp_abs(b, m) != MP_LT) {
11547
439
            err = sp_mod(b, m, bm);
11548
439
            if ((err == MP_OKAY) && sp_iszero(bm)) {
11549
71
                sp_set(r, 0);
11550
71
                done = 1;
11551
71
            }
11552
439
        }
11553
481
        else {
11554
481
            err = sp_copy(b, bm);
11555
481
        }
11556
920
    }
11557
11558
949
    if ((!done) && (err == MP_OKAY)) {
11559
823
        sp_int_digit mp;
11560
823
        sp_int_digit n;
11561
11562
823
        err = sp_mont_setup(m, &mp);
11563
823
        if (err == MP_OKAY) {
11564
823
            err = sp_mont_norm(t[0], m);
11565
823
        }
11566
823
        if (err == MP_OKAY) {
11567
823
            err = sp_mulmod(bm, t[0], m, bm);
11568
823
        }
11569
823
        if (err == MP_OKAY) {
11570
770
            err = sp_copy(bm, t[0]);
11571
770
        }
11572
3.05k
        for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
11573
2.23k
            err = sp_sqr(t[0], t[0]);
11574
2.23k
            if (err == MP_OKAY) {
11575
2.20k
                err = _sp_mont_red(t[0], m, mp);
11576
2.20k
            }
11577
2.23k
        }
11578
8.76k
        for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
11579
7.93k
            err = sp_mul(t[i-1], bm, t[i]);
11580
7.93k
            if (err == MP_OKAY) {
11581
7.91k
                err = _sp_mont_red(t[i], m, mp);
11582
7.91k
            }
11583
7.93k
        }
11584
11585
823
        if (err == MP_OKAY) {
11586
            /* Find the top bit. */
11587
720
            i = (bits - 1) >> SP_WORD_SHIFT;
11588
720
            n = e->dp[i--];
11589
720
            c = bits % SP_WORD_SIZE;
11590
720
            if (c == 0) {
11591
155
                c = SP_WORD_SIZE;
11592
155
            }
11593
            /* Put top bit at highest offset in digit. */
11594
720
            n <<= SP_WORD_SIZE - c;
11595
11596
720
            if (bits >= winBits) {
11597
                /* Top bit set. Copy from window. */
11598
720
                if (c < winBits) {
11599
                    /* Bits to end of digit and part of next */
11600
64
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11601
64
                    n = e->dp[i--];
11602
64
                    c = winBits - c;
11603
64
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11604
64
                    n <<= c;
11605
64
                    c = SP_WORD_SIZE - c;
11606
64
                }
11607
656
                else {
11608
                    /* Bits from middle of digit */
11609
656
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11610
656
                    n <<= winBits;
11611
656
                    c -= winBits;
11612
656
                }
11613
720
                err = sp_copy(t[y], tr);
11614
720
            }
11615
0
            else {
11616
                /* 1 in Montgomery form. */
11617
0
                err = sp_mont_norm(tr, m);
11618
0
            }
11619
33.8k
            while (err == MP_OKAY) {
11620
                /* Sqaure until we find bit that is 1 or there's less than a
11621
                 * window of bits left.
11622
                 */
11623
72.0k
                while (err == MP_OKAY && ((i >= 0) || (c >= winBits))) {
11624
71.3k
                    sp_int_digit n2 = n;
11625
71.3k
                    int c2 = c;
11626
71.3k
                    int i2 = i;
11627
11628
                    /* Make sure n2 has bits from the right digit. */
11629
71.3k
                    if (c2 == 0) {
11630
2.53k
                        n2 = e->dp[i2--];
11631
2.53k
                        c2 = SP_WORD_SIZE;
11632
2.53k
                    }
11633
                    /* Mask off the next bit. */
11634
71.3k
                    y = (int)((n2 >> (SP_WORD_SIZE - 1)) & 1);
11635
71.3k
                    if (y == 1) {
11636
33.0k
                        break;
11637
33.0k
                    }
11638
11639
                    /* Square and update position. */
11640
38.2k
                    err = sp_sqr(tr, tr);
11641
38.2k
                    if (err == MP_OKAY) {
11642
38.2k
                        err = _sp_mont_red(tr, m, mp);
11643
38.2k
                    }
11644
38.2k
                    n = n2 << 1;
11645
38.2k
                    c = c2 - 1;
11646
38.2k
                    i = i2;
11647
38.2k
                }
11648
11649
33.8k
                if (err == MP_OKAY) {
11650
                    /* Check we have enough bits left for a window. */
11651
33.8k
                    if ((i < 0) && (c < winBits)) {
11652
702
                        break;
11653
702
                    }
11654
11655
33.0k
                    if (c == 0) {
11656
                        /* Bits up to end of digit */
11657
1.21k
                        n = e->dp[i--];
11658
1.21k
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11659
1.21k
                        n <<= winBits;
11660
1.21k
                        c = SP_WORD_SIZE - winBits;
11661
1.21k
                    }
11662
31.8k
                    else if (c < winBits) {
11663
                        /* Bits to end of digit and part of next */
11664
5.09k
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11665
5.09k
                        n = e->dp[i--];
11666
5.09k
                        c = winBits - c;
11667
5.09k
                        y |= (int)(n >> (SP_WORD_SIZE - c));
11668
5.09k
                        n <<= c;
11669
5.09k
                        c = SP_WORD_SIZE - c;
11670
5.09k
                    }
11671
26.7k
                    else {
11672
                        /* Bits from middle of digit */
11673
26.7k
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11674
26.7k
                        n <<= winBits;
11675
26.7k
                        c -= winBits;
11676
26.7k
                    }
11677
33.0k
                    y &= mask;
11678
33.0k
                }
11679
11680
                /* Square for number of bits in window. */
11681
209k
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
11682
176k
                    err = sp_sqr(tr, tr);
11683
176k
                    if (err == MP_OKAY) {
11684
175k
                        err = _sp_mont_red(tr, m, mp);
11685
175k
                    }
11686
176k
                }
11687
                /* Multiply by window number for next set of bits. */
11688
33.1k
                if (err == MP_OKAY) {
11689
33.0k
                    err = sp_mul(tr, t[y], tr);
11690
33.0k
                }
11691
33.1k
                if (err == MP_OKAY) {
11692
33.0k
                    err = _sp_mont_red(tr, m, mp);
11693
33.0k
                }
11694
33.1k
            }
11695
720
            if ((err == MP_OKAY) && (c > 0)) {
11696
                /* Handle remaining bits.
11697
                 * Window values have top bit set and can't be used. */
11698
510
                n = e->dp[0];
11699
1.93k
                for (--c; (err == MP_OKAY) && (c >= 0); c--) {
11700
1.42k
                    err = sp_sqr(tr, tr);
11701
1.42k
                    if (err == MP_OKAY) {
11702
1.42k
                        err = _sp_mont_red(tr, m, mp);
11703
1.42k
                    }
11704
1.42k
                    if ((err == MP_OKAY) && ((n >> c) & 1)) {
11705
852
                        err = sp_mul(tr, bm, tr);
11706
852
                        if (err == MP_OKAY) {
11707
852
                            err = _sp_mont_red(tr, m, mp);
11708
852
                        }
11709
852
                    }
11710
1.42k
                }
11711
510
            }
11712
720
        }
11713
11714
823
        if (err == MP_OKAY) {
11715
            /* Convert from montgomery form. */
11716
702
            err = _sp_mont_red(tr, m, mp);
11717
            /* Reduction implementation returns number to range < m. */
11718
702
        }
11719
823
    }
11720
949
    if ((!done) && (err == MP_OKAY)) {
11721
702
        err = sp_copy(tr, r);
11722
702
    }
11723
11724
949
    FREE_SP_INT_ARRAY(t, NULL);
11725
949
    return err;
11726
949
}
11727
11728
#undef SP_ALLOC
11729
11730
#else
11731
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11732
 * Non-constant time implementation.
11733
 *
11734
 * @param  [in]   b  SP integer that is the base.
11735
 * @param  [in]   e  SP integer that is the exponent.
11736
 * @param  [in]   m  SP integer that is the modulus.
11737
 * @param  [out]  r  SP integer to hold result.
11738
 *
11739
 * @return  MP_OKAY on success.
11740
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11741
 * @return  MP_MEM when dynamic memory allocation fails.
11742
 */
11743
static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11744
{
11745
    int i;
11746
    int err = MP_OKAY;
11747
    int done = 0;
11748
    int y = 0;
11749
    int bits = sp_count_bits(e);
11750
    sp_int_digit mp;
11751
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
11752
11753
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
11754
    if (err == MP_OKAY) {
11755
        sp_init_size(t[0], m->used * 2 + 1);
11756
        sp_init_size(t[1], m->used * 2 + 1);
11757
11758
        /* Ensure base is less than exponent. */
11759
        if (_sp_cmp_abs(b, m) != MP_LT) {
11760
            err = sp_mod(b, m, t[0]);
11761
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
11762
                sp_set(r, 0);
11763
                done = 1;
11764
            }
11765
        }
11766
        else {
11767
            err = sp_copy(b, t[0]);
11768
        }
11769
    }
11770
11771
    if ((!done) && (err == MP_OKAY)) {
11772
        err = sp_mont_setup(m, &mp);
11773
        if (err == MP_OKAY) {
11774
            err = sp_mont_norm(t[1], m);
11775
        }
11776
        if (err == MP_OKAY) {
11777
            /* Convert to montgomery form. */
11778
            err = sp_mulmod(t[0], t[1], m, t[0]);
11779
        }
11780
        if (err == MP_OKAY) {
11781
            /* Montgomert form of base to multiply by. */
11782
            sp_copy(t[0], t[1]);
11783
        }
11784
11785
        for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
11786
            err = sp_sqr(t[0], t[0]);
11787
            if (err == MP_OKAY) {
11788
                err = _sp_mont_red(t[0], m, mp);
11789
            }
11790
            if (err == MP_OKAY) {
11791
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
11792
                if (y != 0) {
11793
                    err = sp_mul(t[0], t[1], t[0]);
11794
                    if (err == MP_OKAY) {
11795
                        err = _sp_mont_red(t[0], m, mp);
11796
                    }
11797
                }
11798
            }
11799
        }
11800
        if (err == MP_OKAY) {
11801
            /* Convert from montgomery form. */
11802
            err = _sp_mont_red(t[0], m, mp);
11803
            /* Reduction implementation returns number to range < m. */
11804
        }
11805
    }
11806
    if ((!done) && (err == MP_OKAY)) {
11807
        err = sp_copy(t[0], r);
11808
    }
11809
11810
    FREE_SP_INT_ARRAY(t, NULL);
11811
    return err;
11812
}
11813
#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
11814
11815
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11816
 * Non-constant time implementation.
11817
 *
11818
 * @param  [in]   b  SP integer that is the base.
11819
 * @param  [in]   e  SP integer that is the exponent.
11820
 * @param  [in]   m  SP integer that is the modulus.
11821
 * @param  [out]  r  SP integer to hold result.
11822
 *
11823
 * @return  MP_OKAY on success.
11824
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11825
 * @return  MP_MEM when dynamic memory allocation fails.
11826
 */
11827
int sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11828
6.82k
{
11829
6.82k
    int err = MP_OKAY;
11830
11831
6.82k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11832
0
        err = MP_VAL;
11833
0
    }
11834
11835
#if 0
11836
    if (err == MP_OKAY) {
11837
        sp_print(b, "a");
11838
        sp_print(e, "b");
11839
        sp_print(m, "m");
11840
    }
11841
#endif
11842
11843
6.82k
    if (err != MP_OKAY) {
11844
0
    }
11845
    /* Handle special cases. */
11846
6.82k
    else if (sp_iszero(m)) {
11847
25
        err = MP_VAL;
11848
25
    }
11849
#ifdef WOLFSSL_SP_INT_NEGATIVE
11850
    else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
11851
        err = MP_VAL;
11852
    }
11853
#endif
11854
6.80k
    else if (sp_isone(m)) {
11855
22
        sp_set(r, 0);
11856
22
    }
11857
6.78k
    else if (sp_iszero(e)) {
11858
213
        sp_set(r, 1);
11859
213
    }
11860
6.56k
    else if (sp_iszero(b)) {
11861
48
        sp_set(r, 0);
11862
48
    }
11863
    /* Ensure SP integers have space for intermediate values. */
11864
6.52k
    else if (m->used * 2 >= r->size) {
11865
15
        err = MP_VAL;
11866
15
    }
11867
6.50k
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
11868
6.50k
    else if (mp_iseven(m)) {
11869
86
        err = _sp_exptmod_ex(b, e, e->used * SP_WORD_SIZE, m, r);
11870
86
    }
11871
6.41k
#endif
11872
6.41k
    else {
11873
6.41k
        err = _sp_exptmod_nct(b, e, m, r);
11874
6.41k
    }
11875
11876
#if 0
11877
    if (err == MP_OKAY) {
11878
        sp_print(r, "rme");
11879
    }
11880
#endif
11881
11882
6.82k
    return err;
11883
6.82k
}
11884
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11885
11886
/***************
11887
 * 2^e functions
11888
 ***************/
11889
11890
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
11891
/* Divide by 2^e: r = a >> e and rem = bits shifted out
11892
 *
11893
 * @param  [in]   a    SP integer to divide.
11894
 * @param  [in]   e    Exponent bits (dividing by 2^e).
11895
 * @param  [in]   m    SP integer that is the modulus.
11896
 * @param  [out]  r    SP integer to hold result.
11897
 * @param  [out]  rem  SP integer to hold remainder.
11898
 *
11899
 * @return  MP_OKAY on success.
11900
 * @return  MP_VAL when a is NULL.
11901
 */
11902
int sp_div_2d(sp_int* a, int e, sp_int* r, sp_int* rem)
11903
591
{
11904
591
    int err = MP_OKAY;
11905
11906
591
    if (a == NULL) {
11907
0
        err = MP_VAL;
11908
0
    }
11909
11910
591
    if (err == MP_OKAY) {
11911
591
        int remBits = sp_count_bits(a) - e;
11912
11913
591
        if (remBits <= 0) {
11914
            /* Shifting down by more bits than in number. */
11915
194
            _sp_zero(r);
11916
194
            sp_copy(a, rem);
11917
194
        }
11918
397
        else {
11919
397
            if (rem != NULL) {
11920
                /* Copy a in to remainder. */
11921
290
                err = sp_copy(a, rem);
11922
290
            }
11923
            /* Shift a down by into result. */
11924
397
            sp_rshb(a, e, r);
11925
397
            if (rem != NULL) {
11926
                /* Set used and mask off top digit of remainder. */
11927
290
                rem->used = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
11928
290
                e &= SP_WORD_MASK;
11929
290
                if (e > 0) {
11930
110
                    rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
11931
110
                }
11932
290
                sp_clamp(rem);
11933
            #ifdef WOLFSSL_SP_INT_NEGATIVE
11934
                rem->sign = MP_ZPOS;
11935
            #endif
11936
290
            }
11937
397
        }
11938
591
    }
11939
11940
591
    return err;
11941
591
}
11942
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
11943
11944
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
11945
/* The bottom e bits: r = a & ((1 << e) - 1)
11946
 *
11947
 * @param  [in]   a  SP integer to reduce.
11948
 * @param  [in]   e  Modulus bits (modulus equals 2^e).
11949
 * @param  [out]  r  SP integer to hold result.
11950
 *
11951
 * @return  MP_OKAY on success.
11952
 * @return  MP_VAL when a or r is NULL.
11953
 */
11954
int sp_mod_2d(sp_int* a, int e, sp_int* r)
11955
95
{
11956
95
    int err = MP_OKAY;
11957
11958
95
    if ((a == NULL) || (r == NULL)) {
11959
0
        err = MP_VAL;
11960
0
    }
11961
11962
95
    if (err == MP_OKAY) {
11963
95
        int digits = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
11964
95
        if (a != r) {
11965
95
            XMEMCPY(r->dp, a->dp, digits * sizeof(sp_int_digit));
11966
95
            r->used = a->used;
11967
        #ifdef WOLFSSL_SP_INT_NEGATIVE
11968
            r->sign = a->sign;
11969
        #endif
11970
95
        }
11971
95
    #ifndef WOLFSSL_SP_INT_NEGATIVE
11972
95
        if (digits <= a->used)
11973
    #else
11974
        if ((a->sign != MP_ZPOS) || (digits <= a->used))
11975
    #endif
11976
73
        {
11977
        #ifdef WOLFSSL_SP_INT_NEGATIVE
11978
            if (a->sign == MP_NEG) {
11979
                int i;
11980
                sp_int_digit carry = 0;
11981
11982
                /* Negate value. */
11983
                for (i = 0; i < r->used; i++) {
11984
                    sp_int_digit next = r->dp[i] > 0;
11985
                    r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
11986
                    carry |= next;
11987
                }
11988
                for (; i < digits; i++) {
11989
                    r->dp[i] = (sp_int_digit)0 - carry;
11990
                }
11991
                r->sign = MP_ZPOS;
11992
            }
11993
        #endif
11994
            /* Set used and mask off top digit of result. */
11995
73
            r->used = digits;
11996
73
            e &= SP_WORD_MASK;
11997
73
            if (e > 0) {
11998
28
                r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
11999
28
            }
12000
73
            sp_clamp(r);
12001
73
        }
12002
95
    }
12003
12004
95
    return err;
12005
95
}
12006
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
12007
12008
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
12009
    !defined(NO_DH))
12010
/* Multiply by 2^e: r = a << e
12011
 *
12012
 * @param  [in]   a  SP integer to multiply.
12013
 * @param  [in]   e  Multiplier bits (multiplier equals 2^e).
12014
 * @param  [out]  r  SP integer to hold result.
12015
 *
12016
 * @return  MP_OKAY on success.
12017
 * @return  MP_VAL when a or r is NULL, or result is too big for fixed data
12018
 *          length.
12019
 */
12020
int sp_mul_2d(sp_int* a, int e, sp_int* r)
12021
583k
{
12022
583k
    int err = MP_OKAY;
12023
12024
583k
    if ((a == NULL) || (r == NULL)) {
12025
0
        err = MP_VAL;
12026
0
    }
12027
12028
583k
    if ((err == MP_OKAY) && (sp_count_bits(a) + e > r->size * SP_WORD_SIZE)) {
12029
0
        err = MP_VAL;
12030
0
    }
12031
12032
583k
    if (err == MP_OKAY) {
12033
        /* Copy a into r as left shift function works on the number. */
12034
583k
        if (a != r) {
12035
10.8k
            err = sp_copy(a, r);
12036
10.8k
        }
12037
583k
    }
12038
12039
583k
    if (err == MP_OKAY) {
12040
#if 0
12041
        sp_print(a, "a");
12042
        sp_print_int(e, "n");
12043
#endif
12044
583k
        err = sp_lshb(r, e);
12045
#if 0
12046
        sp_print(r, "rsl");
12047
#endif
12048
583k
    }
12049
12050
583k
    return err;
12051
583k
}
12052
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
12053
12054
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12055
    defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
12056
12057
/* START SP_SQR implementations */
12058
/* This code is generated.
12059
 * To generate:
12060
 *   cd scripts/sp/sp_int
12061
 *   ./gen.sh
12062
 * File sp_sqr.c contains code.
12063
 */
12064
12065
#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
12066
#ifdef SQR_MUL_ASM
12067
/* Square a and store in r. r = a * a
12068
 *
12069
 * @param  [in]   a  SP integer to square.
12070
 * @param  [out]  r  SP integer result.
12071
 *
12072
 * @return  MP_OKAY on success.
12073
 * @return  MP_MEM when dynamic memory allocation fails.
12074
 */
12075
static int _sp_sqr(sp_int* a, sp_int* r)
12076
27.2M
{
12077
27.2M
    int err = MP_OKAY;
12078
27.2M
    int i;
12079
27.2M
    int j;
12080
27.2M
    int k;
12081
27.2M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12082
27.2M
    sp_int_digit* t = NULL;
12083
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
12084
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
12085
    sp_int_digit t[a->used * 2];
12086
#else
12087
    sp_int_digit t[SP_INT_DIGITS];
12088
#endif
12089
12090
27.2M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12091
27.2M
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
12092
27.2M
        DYNAMIC_TYPE_BIGINT);
12093
27.2M
    if (t == NULL) {
12094
1.18k
        err = MP_MEM;
12095
1.18k
    }
12096
27.2M
#endif
12097
27.2M
    if ((err == MP_OKAY) && (a->used <= 1)) {
12098
993k
        sp_int_digit l, h;
12099
12100
993k
        h = 0;
12101
993k
        l = 0;
12102
993k
        SP_ASM_SQR(h, l, a->dp[0]);
12103
993k
        t[0] = h;
12104
993k
        t[1] = l;
12105
993k
    }
12106
26.2M
    else if (err == MP_OKAY) {
12107
26.2M
        sp_int_digit l, h, o;
12108
12109
26.2M
        h = 0;
12110
26.2M
        l = 0;
12111
26.2M
        SP_ASM_SQR(h, l, a->dp[0]);
12112
26.2M
        t[0] = h;
12113
26.2M
        h = 0;
12114
26.2M
        o = 0;
12115
133M
        for (k = 1; k < (a->used + 1) / 2; k++) {
12116
107M
            i = k;
12117
107M
            j = k - 1;
12118
413M
            for (; (j >= 0); i++, j--) {
12119
306M
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12120
306M
            }
12121
107M
            t[k * 2 - 1] = l;
12122
107M
            l = h;
12123
107M
            h = o;
12124
107M
            o = 0;
12125
12126
107M
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
12127
107M
            i = k + 1;
12128
107M
            j = k - 1;
12129
413M
            for (; (j >= 0); i++, j--) {
12130
306M
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12131
306M
            }
12132
107M
            t[k * 2] = l;
12133
107M
            l = h;
12134
107M
            h = o;
12135
107M
            o = 0;
12136
107M
        }
12137
138M
        for (; k < a->used; k++) {
12138
112M
            i = k;
12139
112M
            j = k - 1;
12140
440M
            for (; (i < a->used); i++, j--) {
12141
328M
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12142
328M
            }
12143
112M
            t[k * 2 - 1] = l;
12144
112M
            l = h;
12145
112M
            h = o;
12146
112M
            o = 0;
12147
12148
112M
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
12149
112M
            i = k + 1;
12150
112M
            j = k - 1;
12151
328M
            for (; (i < a->used); i++, j--) {
12152
216M
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12153
216M
            }
12154
112M
            t[k * 2] = l;
12155
112M
            l = h;
12156
112M
            h = o;
12157
112M
            o = 0;
12158
112M
        }
12159
26.2M
        t[k * 2 - 1] = l;
12160
26.2M
    }
12161
12162
27.2M
    if (err == MP_OKAY) {
12163
27.2M
        r->used = a->used * 2;
12164
27.2M
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
12165
27.2M
        sp_clamp(r);
12166
27.2M
    }
12167
12168
27.2M
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12169
27.2M
    if (t != NULL) {
12170
27.2M
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
12171
27.2M
    }
12172
27.2M
#endif
12173
27.2M
    return err;
12174
27.2M
}
12175
#else /* !SQR_MUL_ASM */
12176
/* Square a and store in r. r = a * a
12177
 *
12178
 * @param  [in]   a  SP integer to square.
12179
 * @param  [out]  r  SP integer result.
12180
 *
12181
 * @return  MP_OKAY on success.
12182
 * @return  MP_MEM when dynamic memory allocation fails.
12183
 */
12184
static int _sp_sqr(sp_int* a, sp_int* r)
12185
{
12186
    int err = MP_OKAY;
12187
    int i;
12188
    int j;
12189
    int k;
12190
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12191
    sp_int_digit* t = NULL;
12192
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
12193
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
12194
    sp_int_digit t[a->used * 2];
12195
#else
12196
    sp_int_digit t[SP_INT_DIGITS];
12197
#endif
12198
12199
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12200
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
12201
        DYNAMIC_TYPE_BIGINT);
12202
    if (t == NULL) {
12203
        err = MP_MEM;
12204
    }
12205
#endif
12206
    if (err == MP_OKAY) {
12207
        sp_int_word w;
12208
        sp_int_word l;
12209
        sp_int_word h;
12210
    #ifdef SP_WORD_OVERFLOW
12211
        sp_int_word o;
12212
    #endif
12213
12214
        w = (sp_int_word)a->dp[0] * a->dp[0];
12215
        t[0] = (sp_int_digit)w;
12216
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
12217
        h = 0;
12218
    #ifdef SP_WORD_OVERFLOW
12219
        o = 0;
12220
    #endif
12221
        for (k = 1; k <= (a->used - 1) * 2; k++) {
12222
            i = k / 2;
12223
            j = k - i;
12224
            if (i == j) {
12225
                w = (sp_int_word)a->dp[i] * a->dp[j];
12226
                l += (sp_int_digit)w;
12227
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12228
            #ifdef SP_WORD_OVERFLOW
12229
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12230
                l &= SP_MASK;
12231
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12232
                h &= SP_MASK;
12233
            #endif
12234
            }
12235
            for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
12236
                w = (sp_int_word)a->dp[i] * a->dp[j];
12237
                l += (sp_int_digit)w;
12238
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12239
            #ifdef SP_WORD_OVERFLOW
12240
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12241
                l &= SP_MASK;
12242
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12243
                h &= SP_MASK;
12244
            #endif
12245
                l += (sp_int_digit)w;
12246
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12247
            #ifdef SP_WORD_OVERFLOW
12248
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12249
                l &= SP_MASK;
12250
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12251
                h &= SP_MASK;
12252
            #endif
12253
            }
12254
            t[k] = (sp_int_digit)l;
12255
            l >>= SP_WORD_SIZE;
12256
            l += (sp_int_digit)h;
12257
            h >>= SP_WORD_SIZE;
12258
        #ifdef SP_WORD_OVERFLOW
12259
            h += o & SP_MASK;
12260
            o >>= SP_WORD_SIZE;
12261
        #endif
12262
        }
12263
        t[k] = (sp_int_digit)l;
12264
        r->used = k + 1;
12265
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
12266
        sp_clamp(r);
12267
    }
12268
12269
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12270
    if (t != NULL) {
12271
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
12272
    }
12273
#endif
12274
    return err;
12275
}
12276
#endif /* SQR_MUL_ASM */
12277
#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
12278
12279
#ifndef WOLFSSL_SP_SMALL
12280
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12281
#if SP_WORD_SIZE == 64
12282
#ifndef SQR_MUL_ASM
12283
/* Square a and store in r. r = a * a
12284
 *
12285
 * Long-hand implementation.
12286
 *
12287
 * @param  [in]   a  SP integer to square.
12288
 * @param  [out]  r  SP integer result.
12289
 *
12290
 * @return  MP_OKAY on success.
12291
 * @return  MP_MEM when dynamic memory allocation fails.
12292
 */
12293
static int _sp_sqr_4(sp_int* a, sp_int* r)
12294
{
12295
    int err = MP_OKAY;
12296
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12297
    sp_int_word* w = NULL;
12298
#else
12299
    sp_int_word w[10];
12300
#endif
12301
    sp_int_digit* da = a->dp;
12302
12303
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12304
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
12305
        DYNAMIC_TYPE_BIGINT);
12306
    if (w == NULL) {
12307
        err = MP_MEM;
12308
    }
12309
#endif
12310
12311
12312
    if (err == MP_OKAY) {
12313
        w[0] = (sp_int_word)da[0] * da[0];
12314
        w[1] = (sp_int_word)da[0] * da[1];
12315
        w[2] = (sp_int_word)da[0] * da[2];
12316
        w[3] = (sp_int_word)da[1] * da[1];
12317
        w[4] = (sp_int_word)da[0] * da[3];
12318
        w[5] = (sp_int_word)da[1] * da[2];
12319
        w[6] = (sp_int_word)da[1] * da[3];
12320
        w[7] = (sp_int_word)da[2] * da[2];
12321
        w[8] = (sp_int_word)da[2] * da[3];
12322
        w[9] = (sp_int_word)da[3] * da[3];
12323
12324
        r->dp[0] = w[0];
12325
        w[0] >>= SP_WORD_SIZE;
12326
        w[0] += (sp_int_digit)w[1];
12327
        w[0] += (sp_int_digit)w[1];
12328
        r->dp[1] = w[0];
12329
        w[0] >>= SP_WORD_SIZE;
12330
        w[1] >>= SP_WORD_SIZE;
12331
        w[0] += (sp_int_digit)w[1];
12332
        w[0] += (sp_int_digit)w[1];
12333
        w[0] += (sp_int_digit)w[2];
12334
        w[0] += (sp_int_digit)w[2];
12335
        w[0] += (sp_int_digit)w[3];
12336
        r->dp[2] = w[0];
12337
        w[0] >>= SP_WORD_SIZE;
12338
        w[2] >>= SP_WORD_SIZE;
12339
        w[0] += (sp_int_digit)w[2];
12340
        w[0] += (sp_int_digit)w[2];
12341
        w[3] >>= SP_WORD_SIZE;
12342
        w[0] += (sp_int_digit)w[3];
12343
        w[0] += (sp_int_digit)w[4];
12344
        w[0] += (sp_int_digit)w[4];
12345
        w[0] += (sp_int_digit)w[5];
12346
        w[0] += (sp_int_digit)w[5];
12347
        r->dp[3] = w[0];
12348
        w[0] >>= SP_WORD_SIZE;
12349
        w[4] >>= SP_WORD_SIZE;
12350
        w[0] += (sp_int_digit)w[4];
12351
        w[0] += (sp_int_digit)w[4];
12352
        w[5] >>= SP_WORD_SIZE;
12353
        w[0] += (sp_int_digit)w[5];
12354
        w[0] += (sp_int_digit)w[5];
12355
        w[0] += (sp_int_digit)w[6];
12356
        w[0] += (sp_int_digit)w[6];
12357
        w[0] += (sp_int_digit)w[7];
12358
        r->dp[4] = w[0];
12359
        w[0] >>= SP_WORD_SIZE;
12360
        w[6] >>= SP_WORD_SIZE;
12361
        w[0] += (sp_int_digit)w[6];
12362
        w[0] += (sp_int_digit)w[6];
12363
        w[7] >>= SP_WORD_SIZE;
12364
        w[0] += (sp_int_digit)w[7];
12365
        w[0] += (sp_int_digit)w[8];
12366
        w[0] += (sp_int_digit)w[8];
12367
        r->dp[5] = w[0];
12368
        w[0] >>= SP_WORD_SIZE;
12369
        w[8] >>= SP_WORD_SIZE;
12370
        w[0] += (sp_int_digit)w[8];
12371
        w[0] += (sp_int_digit)w[8];
12372
        w[0] += (sp_int_digit)w[9];
12373
        r->dp[6] = w[0];
12374
        w[0] >>= SP_WORD_SIZE;
12375
        w[9] >>= SP_WORD_SIZE;
12376
        w[0] += (sp_int_digit)w[9];
12377
        r->dp[7] = w[0];
12378
12379
        r->used = 8;
12380
        sp_clamp(r);
12381
    }
12382
12383
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12384
    if (w != NULL) {
12385
        XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
12386
    }
12387
#endif
12388
    return err;
12389
}
12390
#else /* SQR_MUL_ASM */
12391
/* Square a and store in r. r = a * a
12392
 *
12393
 * Comba implementation.
12394
 *
12395
 * @param  [in]   a  SP integer to square.
12396
 * @param  [out]  r  SP integer result.
12397
 *
12398
 * @return  MP_OKAY on success.
12399
 * @return  MP_MEM when dynamic memory allocation fails.
12400
 */
12401
static int _sp_sqr_4(sp_int* a, sp_int* r)
12402
30.7M
{
12403
30.7M
    sp_int_digit l = 0;
12404
30.7M
    sp_int_digit h = 0;
12405
30.7M
    sp_int_digit o = 0;
12406
30.7M
    sp_int_digit t[4];
12407
12408
30.7M
    SP_ASM_SQR(h, l, a->dp[0]);
12409
30.7M
    t[0] = h;
12410
30.7M
    h = 0;
12411
30.7M
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12412
30.7M
    t[1] = l;
12413
30.7M
    l = h;
12414
30.7M
    h = o;
12415
30.7M
    o = 0;
12416
30.7M
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12417
30.7M
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12418
30.7M
    t[2] = l;
12419
30.7M
    l = h;
12420
30.7M
    h = o;
12421
30.7M
    o = 0;
12422
30.7M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12423
30.7M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12424
30.7M
    t[3] = l;
12425
30.7M
    l = h;
12426
30.7M
    h = o;
12427
30.7M
    o = 0;
12428
30.7M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12429
30.7M
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12430
30.7M
    r->dp[4] = l;
12431
30.7M
    l = h;
12432
30.7M
    h = o;
12433
30.7M
    o = 0;
12434
30.7M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
12435
30.7M
    r->dp[5] = l;
12436
30.7M
    l = h;
12437
30.7M
    h = o;
12438
30.7M
    SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
12439
30.7M
    r->dp[6] = l;
12440
30.7M
    r->dp[7] = h;
12441
30.7M
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
12442
30.7M
    r->used = 8;
12443
30.7M
    sp_clamp(r);
12444
12445
30.7M
    return MP_OKAY;
12446
30.7M
}
12447
#endif /* SQR_MUL_ASM */
12448
#endif /* SP_WORD_SIZE == 64 */
12449
#if SP_WORD_SIZE == 64
12450
#ifdef SQR_MUL_ASM
12451
/* Square a and store in r. r = a * a
12452
 *
12453
 * Comba implementation.
12454
 *
12455
 * @param  [in]   a  SP integer to square.
12456
 * @param  [out]  r  SP integer result.
12457
 *
12458
 * @return  MP_OKAY on success.
12459
 * @return  MP_MEM when dynamic memory allocation fails.
12460
 */
12461
static int _sp_sqr_6(sp_int* a, sp_int* r)
12462
12.5M
{
12463
12.5M
    sp_int_digit l = 0;
12464
12.5M
    sp_int_digit h = 0;
12465
12.5M
    sp_int_digit o = 0;
12466
12.5M
    sp_int_digit tl = 0;
12467
12.5M
    sp_int_digit th = 0;
12468
12.5M
    sp_int_digit to;
12469
12.5M
    sp_int_digit t[6];
12470
12471
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12472
    to = 0;
12473
#endif
12474
12475
12.5M
    SP_ASM_SQR(h, l, a->dp[0]);
12476
12.5M
    t[0] = h;
12477
12.5M
    h = 0;
12478
12.5M
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12479
12.5M
    t[1] = l;
12480
12.5M
    l = h;
12481
12.5M
    h = o;
12482
12.5M
    o = 0;
12483
12.5M
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12484
12.5M
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12485
12.5M
    t[2] = l;
12486
12.5M
    l = h;
12487
12.5M
    h = o;
12488
12.5M
    o = 0;
12489
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12490
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12491
12.5M
    t[3] = l;
12492
12.5M
    l = h;
12493
12.5M
    h = o;
12494
12.5M
    o = 0;
12495
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12496
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12497
12.5M
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12498
12.5M
    t[4] = l;
12499
12.5M
    l = h;
12500
12.5M
    h = o;
12501
12.5M
    o = 0;
12502
12.5M
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12503
12.5M
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12504
12.5M
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12505
12.5M
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12506
12.5M
    t[5] = l;
12507
12.5M
    l = h;
12508
12.5M
    h = o;
12509
12.5M
    o = 0;
12510
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
12511
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
12512
12.5M
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12513
12.5M
    r->dp[6] = l;
12514
12.5M
    l = h;
12515
12.5M
    h = o;
12516
12.5M
    o = 0;
12517
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
12518
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
12519
12.5M
    r->dp[7] = l;
12520
12.5M
    l = h;
12521
12.5M
    h = o;
12522
12.5M
    o = 0;
12523
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
12524
12.5M
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12525
12.5M
    r->dp[8] = l;
12526
12.5M
    l = h;
12527
12.5M
    h = o;
12528
12.5M
    o = 0;
12529
12.5M
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
12530
12.5M
    r->dp[9] = l;
12531
12.5M
    l = h;
12532
12.5M
    h = o;
12533
12.5M
    SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
12534
12.5M
    r->dp[10] = l;
12535
12.5M
    r->dp[11] = h;
12536
12.5M
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
12537
12.5M
    r->used = 12;
12538
12.5M
    sp_clamp(r);
12539
12540
12.5M
    return MP_OKAY;
12541
12.5M
}
12542
#endif /* SQR_MUL_ASM */
12543
#endif /* SP_WORD_SIZE == 64 */
12544
#if SP_WORD_SIZE == 32
12545
#ifdef SQR_MUL_ASM
12546
/* Square a and store in r. r = a * a
12547
 *
12548
 * Comba implementation.
12549
 *
12550
 * @param  [in]   a  SP integer to square.
12551
 * @param  [out]  r  SP integer result.
12552
 *
12553
 * @return  MP_OKAY on success.
12554
 * @return  MP_MEM when dynamic memory allocation fails.
12555
 */
12556
static int _sp_sqr_8(sp_int* a, sp_int* r)
12557
{
12558
    sp_int_digit l = 0;
12559
    sp_int_digit h = 0;
12560
    sp_int_digit o = 0;
12561
    sp_int_digit tl = 0;
12562
    sp_int_digit th = 0;
12563
    sp_int_digit to;
12564
    sp_int_digit t[8];
12565
12566
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12567
    to = 0;
12568
#endif
12569
12570
    SP_ASM_SQR(h, l, a->dp[0]);
12571
    t[0] = h;
12572
    h = 0;
12573
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12574
    t[1] = l;
12575
    l = h;
12576
    h = o;
12577
    o = 0;
12578
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12579
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12580
    t[2] = l;
12581
    l = h;
12582
    h = o;
12583
    o = 0;
12584
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12585
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12586
    t[3] = l;
12587
    l = h;
12588
    h = o;
12589
    o = 0;
12590
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12591
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12592
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12593
    t[4] = l;
12594
    l = h;
12595
    h = o;
12596
    o = 0;
12597
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12598
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12599
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12600
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12601
    t[5] = l;
12602
    l = h;
12603
    h = o;
12604
    o = 0;
12605
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12606
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12607
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12608
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12609
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12610
    t[6] = l;
12611
    l = h;
12612
    h = o;
12613
    o = 0;
12614
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12615
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12616
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12617
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12618
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12619
    t[7] = l;
12620
    l = h;
12621
    h = o;
12622
    o = 0;
12623
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
12624
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12625
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12626
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12627
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12628
    r->dp[8] = l;
12629
    l = h;
12630
    h = o;
12631
    o = 0;
12632
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
12633
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12634
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12635
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12636
    r->dp[9] = l;
12637
    l = h;
12638
    h = o;
12639
    o = 0;
12640
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
12641
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
12642
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
12643
    r->dp[10] = l;
12644
    l = h;
12645
    h = o;
12646
    o = 0;
12647
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
12648
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
12649
    r->dp[11] = l;
12650
    l = h;
12651
    h = o;
12652
    o = 0;
12653
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
12654
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
12655
    r->dp[12] = l;
12656
    l = h;
12657
    h = o;
12658
    o = 0;
12659
    SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
12660
    r->dp[13] = l;
12661
    l = h;
12662
    h = o;
12663
    SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
12664
    r->dp[14] = l;
12665
    r->dp[15] = h;
12666
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
12667
    r->used = 16;
12668
    sp_clamp(r);
12669
12670
    return MP_OKAY;
12671
}
12672
#endif /* SQR_MUL_ASM */
12673
#endif /* SP_WORD_SIZE == 32 */
12674
#if SP_WORD_SIZE == 32
12675
#ifdef SQR_MUL_ASM
12676
/* Square a and store in r. r = a * a
12677
 *
12678
 * Comba implementation.
12679
 *
12680
 * @param  [in]   a  SP integer to square.
12681
 * @param  [out]  r  SP integer result.
12682
 *
12683
 * @return  MP_OKAY on success.
12684
 * @return  MP_MEM when dynamic memory allocation fails.
12685
 */
12686
static int _sp_sqr_12(sp_int* a, sp_int* r)
12687
{
12688
    sp_int_digit l = 0;
12689
    sp_int_digit h = 0;
12690
    sp_int_digit o = 0;
12691
    sp_int_digit tl = 0;
12692
    sp_int_digit th = 0;
12693
    sp_int_digit to;
12694
    sp_int_digit t[12];
12695
12696
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12697
    to = 0;
12698
#endif
12699
12700
    SP_ASM_SQR(h, l, a->dp[0]);
12701
    t[0] = h;
12702
    h = 0;
12703
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12704
    t[1] = l;
12705
    l = h;
12706
    h = o;
12707
    o = 0;
12708
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12709
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12710
    t[2] = l;
12711
    l = h;
12712
    h = o;
12713
    o = 0;
12714
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12715
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12716
    t[3] = l;
12717
    l = h;
12718
    h = o;
12719
    o = 0;
12720
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12721
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12722
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12723
    t[4] = l;
12724
    l = h;
12725
    h = o;
12726
    o = 0;
12727
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12728
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12729
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12730
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12731
    t[5] = l;
12732
    l = h;
12733
    h = o;
12734
    o = 0;
12735
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12736
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12737
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12738
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12739
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12740
    t[6] = l;
12741
    l = h;
12742
    h = o;
12743
    o = 0;
12744
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12745
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12746
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12747
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12748
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12749
    t[7] = l;
12750
    l = h;
12751
    h = o;
12752
    o = 0;
12753
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
12754
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
12755
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12756
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12757
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12758
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12759
    t[8] = l;
12760
    l = h;
12761
    h = o;
12762
    o = 0;
12763
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
12764
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
12765
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
12766
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12767
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12768
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12769
    t[9] = l;
12770
    l = h;
12771
    h = o;
12772
    o = 0;
12773
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
12774
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
12775
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
12776
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
12777
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
12778
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
12779
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12780
    t[10] = l;
12781
    l = h;
12782
    h = o;
12783
    o = 0;
12784
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
12785
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
12786
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
12787
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
12788
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
12789
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
12790
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12791
    t[11] = l;
12792
    l = h;
12793
    h = o;
12794
    o = 0;
12795
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
12796
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
12797
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
12798
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
12799
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
12800
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
12801
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12802
    r->dp[12] = l;
12803
    l = h;
12804
    h = o;
12805
    o = 0;
12806
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
12807
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
12808
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
12809
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
12810
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
12811
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12812
    r->dp[13] = l;
12813
    l = h;
12814
    h = o;
12815
    o = 0;
12816
    SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
12817
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
12818
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
12819
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
12820
    SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
12821
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12822
    r->dp[14] = l;
12823
    l = h;
12824
    h = o;
12825
    o = 0;
12826
    SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
12827
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
12828
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
12829
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
12830
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12831
    r->dp[15] = l;
12832
    l = h;
12833
    h = o;
12834
    o = 0;
12835
    SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
12836
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
12837
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
12838
    SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
12839
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12840
    r->dp[16] = l;
12841
    l = h;
12842
    h = o;
12843
    o = 0;
12844
    SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
12845
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
12846
    SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
12847
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12848
    r->dp[17] = l;
12849
    l = h;
12850
    h = o;
12851
    o = 0;
12852
    SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
12853
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
12854
    SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
12855
    r->dp[18] = l;
12856
    l = h;
12857
    h = o;
12858
    o = 0;
12859
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
12860
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
12861
    r->dp[19] = l;
12862
    l = h;
12863
    h = o;
12864
    o = 0;
12865
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
12866
    SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
12867
    r->dp[20] = l;
12868
    l = h;
12869
    h = o;
12870
    o = 0;
12871
    SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
12872
    r->dp[21] = l;
12873
    l = h;
12874
    h = o;
12875
    SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
12876
    r->dp[22] = l;
12877
    r->dp[23] = h;
12878
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
12879
    r->used = 24;
12880
    sp_clamp(r);
12881
12882
    return MP_OKAY;
12883
}
12884
#endif /* SQR_MUL_ASM */
12885
#endif /* SP_WORD_SIZE == 32 */
12886
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12887
12888
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12889
    #if SP_INT_DIGITS >= 32
12890
/* Square a and store in r. r = a * a
12891
 *
12892
 * Comba implementation.
12893
 *
12894
 * @param  [in]   a  SP integer to square.
12895
 * @param  [out]  r  SP integer result.
12896
 *
12897
 * @return  MP_OKAY on success.
12898
 * @return  MP_MEM when dynamic memory allocation fails.
12899
 */
12900
static int _sp_sqr_16(sp_int* a, sp_int* r)
12901
{
12902
    int err = MP_OKAY;
12903
    sp_int_digit l = 0;
12904
    sp_int_digit h = 0;
12905
    sp_int_digit o = 0;
12906
    sp_int_digit tl = 0;
12907
    sp_int_digit th = 0;
12908
    sp_int_digit to;
12909
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12910
    sp_int_digit* t = NULL;
12911
#else
12912
    sp_int_digit t[16];
12913
#endif
12914
12915
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12916
    to = 0;
12917
#endif
12918
12919
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12920
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
12921
         DYNAMIC_TYPE_BIGINT);
12922
     if (t == NULL) {
12923
         err = MP_MEM;
12924
     }
12925
#endif
12926
    if (err == MP_OKAY) {
12927
        SP_ASM_SQR(h, l, a->dp[0]);
12928
        t[0] = h;
12929
        h = 0;
12930
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12931
        t[1] = l;
12932
        l = h;
12933
        h = o;
12934
        o = 0;
12935
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12936
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12937
        t[2] = l;
12938
        l = h;
12939
        h = o;
12940
        o = 0;
12941
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12942
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12943
        t[3] = l;
12944
        l = h;
12945
        h = o;
12946
        o = 0;
12947
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12948
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12949
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12950
        t[4] = l;
12951
        l = h;
12952
        h = o;
12953
        o = 0;
12954
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12955
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12956
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12957
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12958
        t[5] = l;
12959
        l = h;
12960
        h = o;
12961
        o = 0;
12962
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12963
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12964
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12965
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12966
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12967
        t[6] = l;
12968
        l = h;
12969
        h = o;
12970
        o = 0;
12971
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12972
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12973
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12974
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12975
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12976
        t[7] = l;
12977
        l = h;
12978
        h = o;
12979
        o = 0;
12980
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
12981
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
12982
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12983
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12984
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12985
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12986
        t[8] = l;
12987
        l = h;
12988
        h = o;
12989
        o = 0;
12990
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
12991
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
12992
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
12993
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12994
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12995
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12996
        t[9] = l;
12997
        l = h;
12998
        h = o;
12999
        o = 0;
13000
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
13001
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
13002
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
13003
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
13004
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
13005
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
13006
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13007
        t[10] = l;
13008
        l = h;
13009
        h = o;
13010
        o = 0;
13011
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
13012
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
13013
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
13014
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
13015
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
13016
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
13017
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13018
        t[11] = l;
13019
        l = h;
13020
        h = o;
13021
        o = 0;
13022
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
13023
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
13024
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
13025
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
13026
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
13027
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
13028
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
13029
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13030
        t[12] = l;
13031
        l = h;
13032
        h = o;
13033
        o = 0;
13034
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
13035
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
13036
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
13037
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
13038
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
13039
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
13040
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
13041
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13042
        t[13] = l;
13043
        l = h;
13044
        h = o;
13045
        o = 0;
13046
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
13047
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
13048
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
13049
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
13050
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
13051
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
13052
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
13053
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
13054
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13055
        t[14] = l;
13056
        l = h;
13057
        h = o;
13058
        o = 0;
13059
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
13060
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
13061
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
13062
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
13063
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
13064
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
13065
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
13066
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
13067
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13068
        t[15] = l;
13069
        l = h;
13070
        h = o;
13071
        o = 0;
13072
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
13073
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
13074
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
13075
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
13076
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
13077
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
13078
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
13079
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
13080
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13081
        r->dp[16] = l;
13082
        l = h;
13083
        h = o;
13084
        o = 0;
13085
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
13086
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
13087
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
13088
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
13089
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
13090
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
13091
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
13092
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13093
        r->dp[17] = l;
13094
        l = h;
13095
        h = o;
13096
        o = 0;
13097
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
13098
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
13099
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
13100
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
13101
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
13102
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
13103
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
13104
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13105
        r->dp[18] = l;
13106
        l = h;
13107
        h = o;
13108
        o = 0;
13109
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
13110
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
13111
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
13112
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
13113
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
13114
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
13115
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13116
        r->dp[19] = l;
13117
        l = h;
13118
        h = o;
13119
        o = 0;
13120
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
13121
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
13122
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
13123
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
13124
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
13125
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
13126
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13127
        r->dp[20] = l;
13128
        l = h;
13129
        h = o;
13130
        o = 0;
13131
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
13132
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
13133
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
13134
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
13135
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
13136
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13137
        r->dp[21] = l;
13138
        l = h;
13139
        h = o;
13140
        o = 0;
13141
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
13142
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
13143
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
13144
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
13145
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
13146
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13147
        r->dp[22] = l;
13148
        l = h;
13149
        h = o;
13150
        o = 0;
13151
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
13152
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
13153
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
13154
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
13155
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13156
        r->dp[23] = l;
13157
        l = h;
13158
        h = o;
13159
        o = 0;
13160
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
13161
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
13162
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
13163
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
13164
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13165
        r->dp[24] = l;
13166
        l = h;
13167
        h = o;
13168
        o = 0;
13169
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
13170
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
13171
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
13172
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13173
        r->dp[25] = l;
13174
        l = h;
13175
        h = o;
13176
        o = 0;
13177
        SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
13178
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
13179
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
13180
        r->dp[26] = l;
13181
        l = h;
13182
        h = o;
13183
        o = 0;
13184
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
13185
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
13186
        r->dp[27] = l;
13187
        l = h;
13188
        h = o;
13189
        o = 0;
13190
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
13191
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
13192
        r->dp[28] = l;
13193
        l = h;
13194
        h = o;
13195
        o = 0;
13196
        SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
13197
        r->dp[29] = l;
13198
        l = h;
13199
        h = o;
13200
        SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
13201
        r->dp[30] = l;
13202
        r->dp[31] = h;
13203
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
13204
        r->used = 32;
13205
        sp_clamp(r);
13206
    }
13207
13208
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13209
    if (t != NULL) {
13210
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
13211
    }
13212
#endif
13213
    return err;
13214
}
13215
    #endif /* SP_INT_DIGITS >= 32 */
13216
13217
    #if SP_INT_DIGITS >= 48
13218
/* Square a and store in r. r = a * a
13219
 *
13220
 * Comba implementation.
13221
 *
13222
 * @param  [in]   a  SP integer to square.
13223
 * @param  [out]  r  SP integer result.
13224
 *
13225
 * @return  MP_OKAY on success.
13226
 * @return  MP_MEM when dynamic memory allocation fails.
13227
 */
13228
static int _sp_sqr_24(sp_int* a, sp_int* r)
13229
{
13230
    int err = MP_OKAY;
13231
    sp_int_digit l = 0;
13232
    sp_int_digit h = 0;
13233
    sp_int_digit o = 0;
13234
    sp_int_digit tl = 0;
13235
    sp_int_digit th = 0;
13236
    sp_int_digit to;
13237
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13238
    sp_int_digit* t = NULL;
13239
#else
13240
    sp_int_digit t[24];
13241
#endif
13242
13243
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
13244
    to = 0;
13245
#endif
13246
13247
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13248
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
13249
         DYNAMIC_TYPE_BIGINT);
13250
     if (t == NULL) {
13251
         err = MP_MEM;
13252
     }
13253
#endif
13254
    if (err == MP_OKAY) {
13255
        SP_ASM_SQR(h, l, a->dp[0]);
13256
        t[0] = h;
13257
        h = 0;
13258
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
13259
        t[1] = l;
13260
        l = h;
13261
        h = o;
13262
        o = 0;
13263
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
13264
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
13265
        t[2] = l;
13266
        l = h;
13267
        h = o;
13268
        o = 0;
13269
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
13270
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
13271
        t[3] = l;
13272
        l = h;
13273
        h = o;
13274
        o = 0;
13275
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
13276
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
13277
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
13278
        t[4] = l;
13279
        l = h;
13280
        h = o;
13281
        o = 0;
13282
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
13283
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
13284
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
13285
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13286
        t[5] = l;
13287
        l = h;
13288
        h = o;
13289
        o = 0;
13290
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
13291
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
13292
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
13293
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
13294
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13295
        t[6] = l;
13296
        l = h;
13297
        h = o;
13298
        o = 0;
13299
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
13300
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
13301
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
13302
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
13303
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13304
        t[7] = l;
13305
        l = h;
13306
        h = o;
13307
        o = 0;
13308
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
13309
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
13310
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
13311
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
13312
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
13313
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13314
        t[8] = l;
13315
        l = h;
13316
        h = o;
13317
        o = 0;
13318
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
13319
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
13320
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
13321
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
13322
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
13323
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13324
        t[9] = l;
13325
        l = h;
13326
        h = o;
13327
        o = 0;
13328
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
13329
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
13330
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
13331
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
13332
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
13333
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
13334
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13335
        t[10] = l;
13336
        l = h;
13337
        h = o;
13338
        o = 0;
13339
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
13340
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
13341
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
13342
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
13343
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
13344
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
13345
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13346
        t[11] = l;
13347
        l = h;
13348
        h = o;
13349
        o = 0;
13350
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
13351
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
13352
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
13353
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
13354
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
13355
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
13356
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
13357
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13358
        t[12] = l;
13359
        l = h;
13360
        h = o;
13361
        o = 0;
13362
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
13363
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
13364
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
13365
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
13366
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
13367
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
13368
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
13369
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13370
        t[13] = l;
13371
        l = h;
13372
        h = o;
13373
        o = 0;
13374
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
13375
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
13376
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
13377
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
13378
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
13379
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
13380
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
13381
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
13382
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13383
        t[14] = l;
13384
        l = h;
13385
        h = o;
13386
        o = 0;
13387
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
13388
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
13389
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
13390
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
13391
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
13392
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
13393
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
13394
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
13395
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13396
        t[15] = l;
13397
        l = h;
13398
        h = o;
13399
        o = 0;
13400
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
13401
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
13402
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
13403
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
13404
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
13405
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
13406
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
13407
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
13408
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
13409
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13410
        t[16] = l;
13411
        l = h;
13412
        h = o;
13413
        o = 0;
13414
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
13415
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
13416
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
13417
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
13418
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
13419
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
13420
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
13421
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
13422
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
13423
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13424
        t[17] = l;
13425
        l = h;
13426
        h = o;
13427
        o = 0;
13428
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
13429
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
13430
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
13431
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
13432
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
13433
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
13434
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
13435
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
13436
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
13437
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
13438
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13439
        t[18] = l;
13440
        l = h;
13441
        h = o;
13442
        o = 0;
13443
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
13444
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
13445
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
13446
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
13447
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
13448
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
13449
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
13450
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
13451
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
13452
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
13453
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13454
        t[19] = l;
13455
        l = h;
13456
        h = o;
13457
        o = 0;
13458
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
13459
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
13460
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
13461
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
13462
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
13463
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
13464
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
13465
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
13466
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
13467
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
13468
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
13469
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13470
        t[20] = l;
13471
        l = h;
13472
        h = o;
13473
        o = 0;
13474
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
13475
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
13476
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
13477
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
13478
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
13479
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
13480
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
13481
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
13482
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
13483
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
13484
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
13485
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13486
        t[21] = l;
13487
        l = h;
13488
        h = o;
13489
        o = 0;
13490
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
13491
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
13492
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
13493
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
13494
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
13495
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
13496
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
13497
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
13498
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
13499
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
13500
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
13501
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
13502
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13503
        t[22] = l;
13504
        l = h;
13505
        h = o;
13506
        o = 0;
13507
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
13508
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
13509
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
13510
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
13511
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
13512
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
13513
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
13514
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
13515
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
13516
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
13517
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
13518
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
13519
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13520
        t[23] = l;
13521
        l = h;
13522
        h = o;
13523
        o = 0;
13524
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
13525
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
13526
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
13527
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
13528
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
13529
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
13530
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
13531
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
13532
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
13533
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
13534
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
13535
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
13536
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13537
        r->dp[24] = l;
13538
        l = h;
13539
        h = o;
13540
        o = 0;
13541
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
13542
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
13543
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
13544
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
13545
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
13546
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
13547
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
13548
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
13549
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
13550
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
13551
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
13552
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13553
        r->dp[25] = l;
13554
        l = h;
13555
        h = o;
13556
        o = 0;
13557
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
13558
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
13559
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
13560
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
13561
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
13562
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
13563
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
13564
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
13565
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
13566
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
13567
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
13568
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13569
        r->dp[26] = l;
13570
        l = h;
13571
        h = o;
13572
        o = 0;
13573
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
13574
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
13575
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
13576
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
13577
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
13578
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
13579
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
13580
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
13581
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
13582
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
13583
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13584
        r->dp[27] = l;
13585
        l = h;
13586
        h = o;
13587
        o = 0;
13588
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
13589
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
13590
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
13591
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
13592
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
13593
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
13594
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
13595
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
13596
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
13597
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
13598
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13599
        r->dp[28] = l;
13600
        l = h;
13601
        h = o;
13602
        o = 0;
13603
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
13604
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
13605
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
13606
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
13607
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
13608
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
13609
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
13610
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
13611
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
13612
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13613
        r->dp[29] = l;
13614
        l = h;
13615
        h = o;
13616
        o = 0;
13617
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
13618
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
13619
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
13620
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
13621
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
13622
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
13623
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
13624
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
13625
        SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
13626
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13627
        r->dp[30] = l;
13628
        l = h;
13629
        h = o;
13630
        o = 0;
13631
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
13632
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
13633
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
13634
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
13635
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
13636
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
13637
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
13638
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
13639
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13640
        r->dp[31] = l;
13641
        l = h;
13642
        h = o;
13643
        o = 0;
13644
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
13645
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
13646
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
13647
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
13648
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
13649
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
13650
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
13651
        SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
13652
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13653
        r->dp[32] = l;
13654
        l = h;
13655
        h = o;
13656
        o = 0;
13657
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
13658
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
13659
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
13660
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
13661
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
13662
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
13663
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
13664
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13665
        r->dp[33] = l;
13666
        l = h;
13667
        h = o;
13668
        o = 0;
13669
        SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
13670
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
13671
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
13672
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
13673
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
13674
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
13675
        SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
13676
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13677
        r->dp[34] = l;
13678
        l = h;
13679
        h = o;
13680
        o = 0;
13681
        SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
13682
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
13683
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
13684
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
13685
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
13686
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
13687
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13688
        r->dp[35] = l;
13689
        l = h;
13690
        h = o;
13691
        o = 0;
13692
        SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
13693
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
13694
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
13695
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
13696
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
13697
        SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
13698
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13699
        r->dp[36] = l;
13700
        l = h;
13701
        h = o;
13702
        o = 0;
13703
        SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
13704
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
13705
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
13706
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
13707
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
13708
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13709
        r->dp[37] = l;
13710
        l = h;
13711
        h = o;
13712
        o = 0;
13713
        SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
13714
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
13715
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
13716
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
13717
        SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
13718
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13719
        r->dp[38] = l;
13720
        l = h;
13721
        h = o;
13722
        o = 0;
13723
        SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
13724
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
13725
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
13726
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
13727
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13728
        r->dp[39] = l;
13729
        l = h;
13730
        h = o;
13731
        o = 0;
13732
        SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
13733
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
13734
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
13735
        SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
13736
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13737
        r->dp[40] = l;
13738
        l = h;
13739
        h = o;
13740
        o = 0;
13741
        SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
13742
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
13743
        SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
13744
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13745
        r->dp[41] = l;
13746
        l = h;
13747
        h = o;
13748
        o = 0;
13749
        SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
13750
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
13751
        SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
13752
        r->dp[42] = l;
13753
        l = h;
13754
        h = o;
13755
        o = 0;
13756
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
13757
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
13758
        r->dp[43] = l;
13759
        l = h;
13760
        h = o;
13761
        o = 0;
13762
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
13763
        SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
13764
        r->dp[44] = l;
13765
        l = h;
13766
        h = o;
13767
        o = 0;
13768
        SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
13769
        r->dp[45] = l;
13770
        l = h;
13771
        h = o;
13772
        SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
13773
        r->dp[46] = l;
13774
        r->dp[47] = h;
13775
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
13776
        r->used = 48;
13777
        sp_clamp(r);
13778
    }
13779
13780
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13781
    if (t != NULL) {
13782
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
13783
    }
13784
#endif
13785
    return err;
13786
}
13787
    #endif /* SP_INT_DIGITS >= 48 */
13788
13789
    #if SP_INT_DIGITS >= 64
13790
/* Square a and store in r. r = a * a
13791
 *
13792
 * Karatsuba implementation.
13793
 *
13794
 * @param  [in]   a  SP integer to square.
13795
 * @param  [out]  r  SP integer result.
13796
 *
13797
 * @return  MP_OKAY on success.
13798
 * @return  MP_MEM when dynamic memory allocation fails.
13799
 */
13800
static int _sp_sqr_32(sp_int* a, sp_int* r)
13801
{
13802
    int err = MP_OKAY;
13803
    int i;
13804
    sp_int_digit l;
13805
    sp_int_digit h;
13806
    sp_int* z0;
13807
    sp_int* z1;
13808
    sp_int* z2;
13809
    sp_int_digit ca;
13810
    DECL_SP_INT(a1, 16);
13811
    DECL_SP_INT_ARRAY(z, 33, 2);
13812
13813
    ALLOC_SP_INT(a1, 16, err, NULL);
13814
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
13815
    if (err == MP_OKAY) {
13816
        z1 = z[0];
13817
        z2 = z[1];
13818
        z0 = r;
13819
13820
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
13821
        a1->used = 16;
13822
13823
        /* z2 = a1 ^ 2 */
13824
        err = _sp_sqr_16(a1, z2);
13825
    }
13826
    if (err == MP_OKAY) {
13827
        l = 0;
13828
        h = 0;
13829
        for (i = 0; i < 16; i++) {
13830
            SP_ASM_ADDC(l, h, a1->dp[i]);
13831
            SP_ASM_ADDC(l, h, a->dp[i]);
13832
            a1->dp[i] = l;
13833
            l = h;
13834
            h = 0;
13835
        }
13836
        ca = l;
13837
13838
        /* z0 = a0 ^ 2 */
13839
        err = _sp_sqr_16(a, z0);
13840
    }
13841
    if (err == MP_OKAY) {
13842
        /* z1 = (a0 + a1) ^ 2 */
13843
        err = _sp_sqr_16(a1, z1);
13844
    }
13845
    if (err == MP_OKAY) {
13846
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
13847
        /* r = z0 */
13848
        /* r += (z1 - z0 - z2) << 16 */
13849
        z1->dp[32] = ca;
13850
        l = 0;
13851
        if (ca) {
13852
            l = z1->dp[0 + 16];
13853
            h = 0;
13854
            SP_ASM_ADDC(l, h, a1->dp[0]);
13855
            SP_ASM_ADDC(l, h, a1->dp[0]);
13856
            z1->dp[0 + 16] = l;
13857
            l = h;
13858
            h = 0;
13859
            for (i = 1; i < 16; i++) {
13860
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
13861
                SP_ASM_ADDC(l, h, a1->dp[i]);
13862
                SP_ASM_ADDC(l, h, a1->dp[i]);
13863
                z1->dp[i + 16] = l;
13864
                l = h;
13865
                h = 0;
13866
            }
13867
        }
13868
        z1->dp[32] += l;
13869
        /* z1 = z1 - z0 - z1 */
13870
        l = z1->dp[0];
13871
        h = 0;
13872
        SP_ASM_SUBC(l, h, z0->dp[0]);
13873
        SP_ASM_SUBC(l, h, z2->dp[0]);
13874
        z1->dp[0] = l;
13875
        l = h;
13876
        h = 0;
13877
        for (i = 1; i < 32; i++) {
13878
            l += z1->dp[i];
13879
            SP_ASM_SUBC(l, h, z0->dp[i]);
13880
            SP_ASM_SUBC(l, h, z2->dp[i]);
13881
            z1->dp[i] = l;
13882
            l = h;
13883
            h = 0;
13884
        }
13885
        z1->dp[i] += l;
13886
        /* r += z1 << 16 */
13887
        l = 0;
13888
        h = 0;
13889
        for (i = 0; i < 16; i++) {
13890
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
13891
            SP_ASM_ADDC(l, h, z1->dp[i]);
13892
            r->dp[i + 16] = l;
13893
            l = h;
13894
            h = 0;
13895
        }
13896
        for (; i < 33; i++) {
13897
            SP_ASM_ADDC(l, h, z1->dp[i]);
13898
            r->dp[i + 16] = l;
13899
            l = h;
13900
            h = 0;
13901
        }
13902
        /* r += z2 << 32  */
13903
        l = 0;
13904
        h = 0;
13905
        for (i = 0; i < 17; i++) {
13906
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
13907
            SP_ASM_ADDC(l, h, z2->dp[i]);
13908
            r->dp[i + 32] = l;
13909
            l = h;
13910
            h = 0;
13911
        }
13912
        for (; i < 32; i++) {
13913
            SP_ASM_ADDC(l, h, z2->dp[i]);
13914
            r->dp[i + 32] = l;
13915
            l = h;
13916
            h = 0;
13917
        }
13918
        r->used = 64;
13919
        sp_clamp(r);
13920
    }
13921
13922
    FREE_SP_INT_ARRAY(z, NULL);
13923
    FREE_SP_INT(a1, NULL);
13924
    return err;
13925
}
13926
    #endif /* SP_INT_DIGITS >= 64 */
13927
13928
    #if SP_INT_DIGITS >= 96
13929
/* Square a and store in r. r = a * a
13930
 *
13931
 * Karatsuba implementation.
13932
 *
13933
 * @param  [in]   a  SP integer to square.
13934
 * @param  [out]  r  SP integer result.
13935
 *
13936
 * @return  MP_OKAY on success.
13937
 * @return  MP_MEM when dynamic memory allocation fails.
13938
 */
13939
static int _sp_sqr_48(sp_int* a, sp_int* r)
13940
{
13941
    int err = MP_OKAY;
13942
    int i;
13943
    sp_int_digit l;
13944
    sp_int_digit h;
13945
    sp_int* z0;
13946
    sp_int* z1;
13947
    sp_int* z2;
13948
    sp_int_digit ca;
13949
    DECL_SP_INT(a1, 24);
13950
    DECL_SP_INT_ARRAY(z, 49, 2);
13951
13952
    ALLOC_SP_INT(a1, 24, err, NULL);
13953
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
13954
    if (err == MP_OKAY) {
13955
        z1 = z[0];
13956
        z2 = z[1];
13957
        z0 = r;
13958
13959
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
13960
        a1->used = 24;
13961
13962
        /* z2 = a1 ^ 2 */
13963
        err = _sp_sqr_24(a1, z2);
13964
    }
13965
    if (err == MP_OKAY) {
13966
        l = 0;
13967
        h = 0;
13968
        for (i = 0; i < 24; i++) {
13969
            SP_ASM_ADDC(l, h, a1->dp[i]);
13970
            SP_ASM_ADDC(l, h, a->dp[i]);
13971
            a1->dp[i] = l;
13972
            l = h;
13973
            h = 0;
13974
        }
13975
        ca = l;
13976
13977
        /* z0 = a0 ^ 2 */
13978
        err = _sp_sqr_24(a, z0);
13979
    }
13980
    if (err == MP_OKAY) {
13981
        /* z1 = (a0 + a1) ^ 2 */
13982
        err = _sp_sqr_24(a1, z1);
13983
    }
13984
    if (err == MP_OKAY) {
13985
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
13986
        /* r = z0 */
13987
        /* r += (z1 - z0 - z2) << 24 */
13988
        z1->dp[48] = ca;
13989
        l = 0;
13990
        if (ca) {
13991
            l = z1->dp[0 + 24];
13992
            h = 0;
13993
            SP_ASM_ADDC(l, h, a1->dp[0]);
13994
            SP_ASM_ADDC(l, h, a1->dp[0]);
13995
            z1->dp[0 + 24] = l;
13996
            l = h;
13997
            h = 0;
13998
            for (i = 1; i < 24; i++) {
13999
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
14000
                SP_ASM_ADDC(l, h, a1->dp[i]);
14001
                SP_ASM_ADDC(l, h, a1->dp[i]);
14002
                z1->dp[i + 24] = l;
14003
                l = h;
14004
                h = 0;
14005
            }
14006
        }
14007
        z1->dp[48] += l;
14008
        /* z1 = z1 - z0 - z1 */
14009
        l = z1->dp[0];
14010
        h = 0;
14011
        SP_ASM_SUBC(l, h, z0->dp[0]);
14012
        SP_ASM_SUBC(l, h, z2->dp[0]);
14013
        z1->dp[0] = l;
14014
        l = h;
14015
        h = 0;
14016
        for (i = 1; i < 48; i++) {
14017
            l += z1->dp[i];
14018
            SP_ASM_SUBC(l, h, z0->dp[i]);
14019
            SP_ASM_SUBC(l, h, z2->dp[i]);
14020
            z1->dp[i] = l;
14021
            l = h;
14022
            h = 0;
14023
        }
14024
        z1->dp[i] += l;
14025
        /* r += z1 << 16 */
14026
        l = 0;
14027
        h = 0;
14028
        for (i = 0; i < 24; i++) {
14029
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
14030
            SP_ASM_ADDC(l, h, z1->dp[i]);
14031
            r->dp[i + 24] = l;
14032
            l = h;
14033
            h = 0;
14034
        }
14035
        for (; i < 49; i++) {
14036
            SP_ASM_ADDC(l, h, z1->dp[i]);
14037
            r->dp[i + 24] = l;
14038
            l = h;
14039
            h = 0;
14040
        }
14041
        /* r += z2 << 48  */
14042
        l = 0;
14043
        h = 0;
14044
        for (i = 0; i < 25; i++) {
14045
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
14046
            SP_ASM_ADDC(l, h, z2->dp[i]);
14047
            r->dp[i + 48] = l;
14048
            l = h;
14049
            h = 0;
14050
        }
14051
        for (; i < 48; i++) {
14052
            SP_ASM_ADDC(l, h, z2->dp[i]);
14053
            r->dp[i + 48] = l;
14054
            l = h;
14055
            h = 0;
14056
        }
14057
        r->used = 96;
14058
        sp_clamp(r);
14059
    }
14060
14061
    FREE_SP_INT_ARRAY(z, NULL);
14062
    FREE_SP_INT(a1, NULL);
14063
    return err;
14064
}
14065
    #endif /* SP_INT_DIGITS >= 96 */
14066
14067
    #if SP_INT_DIGITS >= 128
14068
/* Square a and store in r. r = a * a
14069
 *
14070
 * Karatsuba implementation.
14071
 *
14072
 * @param  [in]   a  SP integer to square.
14073
 * @param  [out]  r  SP integer result.
14074
 *
14075
 * @return  MP_OKAY on success.
14076
 * @return  MP_MEM when dynamic memory allocation fails.
14077
 */
14078
static int _sp_sqr_64(sp_int* a, sp_int* r)
14079
{
14080
    int err = MP_OKAY;
14081
    int i;
14082
    sp_int_digit l;
14083
    sp_int_digit h;
14084
    sp_int* z0;
14085
    sp_int* z1;
14086
    sp_int* z2;
14087
    sp_int_digit ca;
14088
    DECL_SP_INT(a1, 32);
14089
    DECL_SP_INT_ARRAY(z, 65, 2);
14090
14091
    ALLOC_SP_INT(a1, 32, err, NULL);
14092
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
14093
    if (err == MP_OKAY) {
14094
        z1 = z[0];
14095
        z2 = z[1];
14096
        z0 = r;
14097
14098
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
14099
        a1->used = 32;
14100
14101
        /* z2 = a1 ^ 2 */
14102
        err = _sp_sqr_32(a1, z2);
14103
    }
14104
    if (err == MP_OKAY) {
14105
        l = 0;
14106
        h = 0;
14107
        for (i = 0; i < 32; i++) {
14108
            SP_ASM_ADDC(l, h, a1->dp[i]);
14109
            SP_ASM_ADDC(l, h, a->dp[i]);
14110
            a1->dp[i] = l;
14111
            l = h;
14112
            h = 0;
14113
        }
14114
        ca = l;
14115
14116
        /* z0 = a0 ^ 2 */
14117
        err = _sp_sqr_32(a, z0);
14118
    }
14119
    if (err == MP_OKAY) {
14120
        /* z1 = (a0 + a1) ^ 2 */
14121
        err = _sp_sqr_32(a1, z1);
14122
    }
14123
    if (err == MP_OKAY) {
14124
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
14125
        /* r = z0 */
14126
        /* r += (z1 - z0 - z2) << 32 */
14127
        z1->dp[64] = ca;
14128
        l = 0;
14129
        if (ca) {
14130
            l = z1->dp[0 + 32];
14131
            h = 0;
14132
            SP_ASM_ADDC(l, h, a1->dp[0]);
14133
            SP_ASM_ADDC(l, h, a1->dp[0]);
14134
            z1->dp[0 + 32] = l;
14135
            l = h;
14136
            h = 0;
14137
            for (i = 1; i < 32; i++) {
14138
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
14139
                SP_ASM_ADDC(l, h, a1->dp[i]);
14140
                SP_ASM_ADDC(l, h, a1->dp[i]);
14141
                z1->dp[i + 32] = l;
14142
                l = h;
14143
                h = 0;
14144
            }
14145
        }
14146
        z1->dp[64] += l;
14147
        /* z1 = z1 - z0 - z1 */
14148
        l = z1->dp[0];
14149
        h = 0;
14150
        SP_ASM_SUBC(l, h, z0->dp[0]);
14151
        SP_ASM_SUBC(l, h, z2->dp[0]);
14152
        z1->dp[0] = l;
14153
        l = h;
14154
        h = 0;
14155
        for (i = 1; i < 64; i++) {
14156
            l += z1->dp[i];
14157
            SP_ASM_SUBC(l, h, z0->dp[i]);
14158
            SP_ASM_SUBC(l, h, z2->dp[i]);
14159
            z1->dp[i] = l;
14160
            l = h;
14161
            h = 0;
14162
        }
14163
        z1->dp[i] += l;
14164
        /* r += z1 << 16 */
14165
        l = 0;
14166
        h = 0;
14167
        for (i = 0; i < 32; i++) {
14168
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
14169
            SP_ASM_ADDC(l, h, z1->dp[i]);
14170
            r->dp[i + 32] = l;
14171
            l = h;
14172
            h = 0;
14173
        }
14174
        for (; i < 65; i++) {
14175
            SP_ASM_ADDC(l, h, z1->dp[i]);
14176
            r->dp[i + 32] = l;
14177
            l = h;
14178
            h = 0;
14179
        }
14180
        /* r += z2 << 64  */
14181
        l = 0;
14182
        h = 0;
14183
        for (i = 0; i < 33; i++) {
14184
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
14185
            SP_ASM_ADDC(l, h, z2->dp[i]);
14186
            r->dp[i + 64] = l;
14187
            l = h;
14188
            h = 0;
14189
        }
14190
        for (; i < 64; i++) {
14191
            SP_ASM_ADDC(l, h, z2->dp[i]);
14192
            r->dp[i + 64] = l;
14193
            l = h;
14194
            h = 0;
14195
        }
14196
        r->used = 128;
14197
        sp_clamp(r);
14198
    }
14199
14200
    FREE_SP_INT_ARRAY(z, NULL);
14201
    FREE_SP_INT(a1, NULL);
14202
    return err;
14203
}
14204
    #endif /* SP_INT_DIGITS >= 128 */
14205
14206
    #if SP_INT_DIGITS >= 192
14207
/* Square a and store in r. r = a * a
14208
 *
14209
 * Karatsuba implementation.
14210
 *
14211
 * @param  [in]   a  SP integer to square.
14212
 * @param  [out]  r  SP integer result.
14213
 *
14214
 * @return  MP_OKAY on success.
14215
 * @return  MP_MEM when dynamic memory allocation fails.
14216
 */
14217
static int _sp_sqr_96(sp_int* a, sp_int* r)
14218
{
14219
    int err = MP_OKAY;
14220
    int i;
14221
    sp_int_digit l;
14222
    sp_int_digit h;
14223
    sp_int* z0;
14224
    sp_int* z1;
14225
    sp_int* z2;
14226
    sp_int_digit ca;
14227
    DECL_SP_INT(a1, 48);
14228
    DECL_SP_INT_ARRAY(z, 97, 2);
14229
14230
    ALLOC_SP_INT(a1, 48, err, NULL);
14231
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
14232
    if (err == MP_OKAY) {
14233
        z1 = z[0];
14234
        z2 = z[1];
14235
        z0 = r;
14236
14237
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
14238
        a1->used = 48;
14239
14240
        /* z2 = a1 ^ 2 */
14241
        err = _sp_sqr_48(a1, z2);
14242
    }
14243
    if (err == MP_OKAY) {
14244
        l = 0;
14245
        h = 0;
14246
        for (i = 0; i < 48; i++) {
14247
            SP_ASM_ADDC(l, h, a1->dp[i]);
14248
            SP_ASM_ADDC(l, h, a->dp[i]);
14249
            a1->dp[i] = l;
14250
            l = h;
14251
            h = 0;
14252
        }
14253
        ca = l;
14254
14255
        /* z0 = a0 ^ 2 */
14256
        err = _sp_sqr_48(a, z0);
14257
    }
14258
    if (err == MP_OKAY) {
14259
        /* z1 = (a0 + a1) ^ 2 */
14260
        err = _sp_sqr_48(a1, z1);
14261
    }
14262
    if (err == MP_OKAY) {
14263
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
14264
        /* r = z0 */
14265
        /* r += (z1 - z0 - z2) << 48 */
14266
        z1->dp[96] = ca;
14267
        l = 0;
14268
        if (ca) {
14269
            l = z1->dp[0 + 48];
14270
            h = 0;
14271
            SP_ASM_ADDC(l, h, a1->dp[0]);
14272
            SP_ASM_ADDC(l, h, a1->dp[0]);
14273
            z1->dp[0 + 48] = l;
14274
            l = h;
14275
            h = 0;
14276
            for (i = 1; i < 48; i++) {
14277
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
14278
                SP_ASM_ADDC(l, h, a1->dp[i]);
14279
                SP_ASM_ADDC(l, h, a1->dp[i]);
14280
                z1->dp[i + 48] = l;
14281
                l = h;
14282
                h = 0;
14283
            }
14284
        }
14285
        z1->dp[96] += l;
14286
        /* z1 = z1 - z0 - z1 */
14287
        l = z1->dp[0];
14288
        h = 0;
14289
        SP_ASM_SUBC(l, h, z0->dp[0]);
14290
        SP_ASM_SUBC(l, h, z2->dp[0]);
14291
        z1->dp[0] = l;
14292
        l = h;
14293
        h = 0;
14294
        for (i = 1; i < 96; i++) {
14295
            l += z1->dp[i];
14296
            SP_ASM_SUBC(l, h, z0->dp[i]);
14297
            SP_ASM_SUBC(l, h, z2->dp[i]);
14298
            z1->dp[i] = l;
14299
            l = h;
14300
            h = 0;
14301
        }
14302
        z1->dp[i] += l;
14303
        /* r += z1 << 16 */
14304
        l = 0;
14305
        h = 0;
14306
        for (i = 0; i < 48; i++) {
14307
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
14308
            SP_ASM_ADDC(l, h, z1->dp[i]);
14309
            r->dp[i + 48] = l;
14310
            l = h;
14311
            h = 0;
14312
        }
14313
        for (; i < 97; i++) {
14314
            SP_ASM_ADDC(l, h, z1->dp[i]);
14315
            r->dp[i + 48] = l;
14316
            l = h;
14317
            h = 0;
14318
        }
14319
        /* r += z2 << 96  */
14320
        l = 0;
14321
        h = 0;
14322
        for (i = 0; i < 49; i++) {
14323
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
14324
            SP_ASM_ADDC(l, h, z2->dp[i]);
14325
            r->dp[i + 96] = l;
14326
            l = h;
14327
            h = 0;
14328
        }
14329
        for (; i < 96; i++) {
14330
            SP_ASM_ADDC(l, h, z2->dp[i]);
14331
            r->dp[i + 96] = l;
14332
            l = h;
14333
            h = 0;
14334
        }
14335
        r->used = 192;
14336
        sp_clamp(r);
14337
    }
14338
14339
    FREE_SP_INT_ARRAY(z, NULL);
14340
    FREE_SP_INT(a1, NULL);
14341
    return err;
14342
}
14343
    #endif /* SP_INT_DIGITS >= 192 */
14344
14345
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
14346
#endif /* !WOLFSSL_SP_SMALL */
14347
14348
/* Square a and store in r. r = a * a
14349
 *
14350
 * @param  [in]   a  SP integer to square.
14351
 * @param  [out]  r  SP integer result.
14352
 *
14353
 * @return  MP_OKAY on success.
14354
 * @return  MP_VAL when a or r is NULL, or the result will be too big for fixed
14355
 *          data length.
14356
 * @return  MP_MEM when dynamic memory allocation fails.
14357
 */
14358
int sp_sqr(sp_int* a, sp_int* r)
14359
50.4M
{
14360
#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
14361
    return sp_mul(a, a, r);
14362
#else
14363
50.4M
    int err = MP_OKAY;
14364
14365
50.4M
    if ((a == NULL) || (r == NULL)) {
14366
0
        err = MP_VAL;
14367
0
    }
14368
    /* Need extra digit during calculation. */
14369
50.4M
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
14370
33
        err = MP_VAL;
14371
33
    }
14372
14373
#if 0
14374
    if (err == MP_OKAY) {
14375
        sp_print(a, "a");
14376
    }
14377
#endif
14378
14379
50.4M
    if (err == MP_OKAY) {
14380
50.4M
        if (a->used == 0) {
14381
3.56M
            _sp_zero(r);
14382
3.56M
        }
14383
46.8M
    else
14384
46.8M
#ifndef WOLFSSL_SP_SMALL
14385
46.8M
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
14386
46.8M
#if SP_WORD_SIZE == 64
14387
46.8M
        if (a->used == 4) {
14388
20.8M
            err = _sp_sqr_4(a, r);
14389
20.8M
        }
14390
25.9M
        else
14391
25.9M
#endif /* SP_WORD_SIZE == 64 */
14392
25.9M
#if SP_WORD_SIZE == 64
14393
25.9M
#ifdef SQR_MUL_ASM
14394
25.9M
        if (a->used == 6) {
14395
8.17M
            err = _sp_sqr_6(a, r);
14396
8.17M
        }
14397
17.8M
        else
14398
17.8M
#endif /* SQR_MUL_ASM */
14399
17.8M
#endif /* SP_WORD_SIZE == 64 */
14400
#if SP_WORD_SIZE == 32
14401
#ifdef SQR_MUL_ASM
14402
        if (a->used == 8) {
14403
            err = _sp_sqr_8(a, r);
14404
        }
14405
        else
14406
#endif /* SQR_MUL_ASM */
14407
#endif /* SP_WORD_SIZE == 32 */
14408
#if SP_WORD_SIZE == 32
14409
#ifdef SQR_MUL_ASM
14410
        if (a->used == 12) {
14411
            err = _sp_sqr_12(a, r);
14412
        }
14413
        else
14414
#endif /* SQR_MUL_ASM */
14415
#endif /* SP_WORD_SIZE == 32 */
14416
17.8M
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
14417
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
14418
    #if SP_INT_DIGITS >= 32
14419
        if (a->used == 16) {
14420
            err = _sp_sqr_16(a, r);
14421
        }
14422
        else
14423
    #endif /* SP_INT_DIGITS >= 32 */
14424
    #if SP_INT_DIGITS >= 48
14425
        if (a->used == 24) {
14426
            err = _sp_sqr_24(a, r);
14427
        }
14428
        else
14429
    #endif /* SP_INT_DIGITS >= 48 */
14430
    #if SP_INT_DIGITS >= 64
14431
        if (a->used == 32) {
14432
            err = _sp_sqr_32(a, r);
14433
        }
14434
        else
14435
    #endif /* SP_INT_DIGITS >= 64 */
14436
    #if SP_INT_DIGITS >= 96
14437
        if (a->used == 48) {
14438
            err = _sp_sqr_48(a, r);
14439
        }
14440
        else
14441
    #endif /* SP_INT_DIGITS >= 96 */
14442
    #if SP_INT_DIGITS >= 128
14443
        if (a->used == 64) {
14444
            err = _sp_sqr_64(a, r);
14445
        }
14446
        else
14447
    #endif /* SP_INT_DIGITS >= 128 */
14448
    #if SP_INT_DIGITS >= 192
14449
        if (a->used == 96) {
14450
            err = _sp_sqr_96(a, r);
14451
        }
14452
        else
14453
    #endif /* SP_INT_DIGITS >= 192 */
14454
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
14455
17.8M
#endif /* !WOLFSSL_SP_SMALL */
14456
17.8M
        {
14457
17.8M
            err = _sp_sqr(a, r);
14458
17.8M
        }
14459
50.4M
    }
14460
14461
#ifdef WOLFSSL_SP_INT_NEGATIVE
14462
    if (err == MP_OKAY) {
14463
        r->sign = MP_ZPOS;
14464
    }
14465
#endif
14466
14467
#if 0
14468
    if (err == MP_OKAY) {
14469
        sp_print(r, "rsqr");
14470
    }
14471
#endif
14472
14473
50.4M
    return err;
14474
50.4M
#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
14475
50.4M
}
14476
/* END SP_SQR implementations */
14477
14478
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
14479
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
14480
14481
#if (!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
14482
     !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
14483
/* Square a mod m and store in r: r = (a * a) mod m
14484
 *
14485
 * @param  [in]   a  SP integer to square.
14486
 * @param  [in]   m  SP integer that is the modulus.
14487
 * @param  [out]  r  SP integer result.
14488
 *
14489
 * @return  MP_OKAY on success.
14490
 * @return  MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
14491
 *          for fixed data length.
14492
 * @return  MP_MEM when dynamic memory allocation fails.
14493
 */
14494
int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
14495
7.31M
{
14496
7.31M
    int err = MP_OKAY;
14497
14498
7.31M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
14499
0
        err = MP_VAL;
14500
0
    }
14501
7.31M
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
14502
71
        err = MP_VAL;
14503
71
    }
14504
14505
7.31M
    if (err == MP_OKAY) {
14506
7.31M
        err = sp_sqr(a, r);
14507
7.31M
    }
14508
7.31M
    if (err == MP_OKAY) {
14509
7.30M
        err = sp_mod(r, m, r);
14510
7.30M
    }
14511
14512
7.31M
    return err;
14513
7.31M
}
14514
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
14515
14516
/**********************
14517
 * Montgomery functions
14518
 **********************/
14519
14520
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
14521
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
14522
/* Reduce a number in montgomery form.
14523
 *
14524
 * Assumes a and m are not NULL and m is not 0.
14525
 *
14526
 * @param  [in,out]  a   SP integer to Montgomery reduce.
14527
 * @param  [in]      m   SP integer that is the modulus.
14528
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
14529
 *
14530
 * @return  MP_OKAY on success.
14531
 */
14532
static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
14533
180M
{
14534
#if !defined(SQR_MUL_ASM)
14535
    int i;
14536
    int bits;
14537
    sp_int_word w;
14538
    sp_int_digit mu;
14539
14540
#if 0
14541
    sp_print(a, "a");
14542
    sp_print(m, "m");
14543
#endif
14544
14545
    bits = sp_count_bits(m);
14546
14547
    for (i = a->used; i < m->used * 2; i++) {
14548
        a->dp[i] = 0;
14549
    }
14550
14551
    if (m->used == 1) {
14552
        mu = mp * a->dp[0];
14553
        w = a->dp[0];
14554
        w += (sp_int_word)mu * m->dp[0];
14555
        a->dp[0] = (sp_int_digit)w;
14556
        w >>= SP_WORD_SIZE;
14557
        w += a->dp[1];
14558
        a->dp[1] = (sp_int_digit)w;
14559
        w >>= SP_WORD_SIZE;
14560
        a->dp[2] = (sp_int_digit)w;
14561
        a->used = 3;
14562
        /* mp is SP_WORD_SIZE */
14563
        bits = SP_WORD_SIZE;
14564
    }
14565
    else {
14566
        sp_int_digit mask = (sp_int_digit)
14567
                            ((1UL << (bits & (SP_WORD_SIZE - 1))) - 1);
14568
        sp_int_word o = 0;
14569
        for (i = 0; i < m->used; i++) {
14570
            int j;
14571
14572
            mu = mp * a->dp[i];
14573
            if ((i == m->used - 1) && (mask != 0)) {
14574
                mu &= mask;
14575
            }
14576
            w = a->dp[i];
14577
            w += (sp_int_word)mu * m->dp[0];
14578
            a->dp[i] = (sp_int_digit)w;
14579
            w >>= SP_WORD_SIZE;
14580
            for (j = 1; j < m->used - 1; j++) {
14581
                w += a->dp[i + j];
14582
                w += (sp_int_word)mu * m->dp[j];
14583
                a->dp[i + j] = (sp_int_digit)w;
14584
                w >>= SP_WORD_SIZE;
14585
            }
14586
            w += o;
14587
            w += a->dp[i + j];
14588
            o = (sp_int_digit)(w >> SP_WORD_SIZE);
14589
            w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
14590
            a->dp[i + j] = (sp_int_digit)w;
14591
            w >>= SP_WORD_SIZE;
14592
            o += w;
14593
        }
14594
        o += a->dp[m->used * 2 - 1];
14595
        a->dp[m->used * 2 - 1] = (sp_int_digit)o;
14596
        o >>= SP_WORD_SIZE;
14597
        a->dp[m->used * 2] = (sp_int_digit)o;
14598
        a->used = m->used * 2 + 1;
14599
    }
14600
14601
    sp_clamp(a);
14602
    sp_rshb(a, bits, a);
14603
14604
    if (_sp_cmp_abs(a, m) != MP_LT) {
14605
        _sp_sub_off(a, m, a, 0);
14606
    }
14607
14608
#if 0
14609
    sp_print(a, "rr");
14610
#endif
14611
14612
    return MP_OKAY;
14613
#else /* !SQR_MUL_ASM */
14614
180M
    int i;
14615
180M
    int j;
14616
180M
    int bits;
14617
180M
    sp_int_digit mu;
14618
180M
    sp_int_digit o;
14619
180M
    sp_int_digit mask;
14620
14621
180M
    bits = sp_count_bits(m);
14622
180M
    mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
14623
14624
320M
    for (i = a->used; i < m->used * 2; i++) {
14625
139M
        a->dp[i] = 0;
14626
139M
    }
14627
14628
180M
    if (m->used <= 1) {
14629
    #ifndef SQR_MUL_ASM
14630
        sp_int_word w;
14631
    #else
14632
99.8k
        sp_int_digit l;
14633
99.8k
        sp_int_digit h;
14634
99.8k
        sp_int_digit t;
14635
99.8k
    #endif
14636
14637
99.8k
        mu = mp * a->dp[0];
14638
    #ifndef SQR_MUL_ASM
14639
        w = a->dp[0];
14640
        w += (sp_int_word)mu * m->dp[0];
14641
        a->dp[0] = (sp_int_digit)w;
14642
        w >>= SP_WORD_SIZE;
14643
        w += a->dp[1];
14644
        a->dp[1] = (sp_int_digit)w;
14645
        w >>= SP_WORD_SIZE;
14646
        a->dp[2] = (sp_int_digit)w;
14647
    #else
14648
99.8k
        l = a->dp[0];
14649
99.8k
        h = 0;
14650
99.8k
        t = m->dp[0];
14651
99.8k
        SP_ASM_MUL_ADD_NO(l, h, mu, t);
14652
99.8k
        a->dp[0] = l;
14653
99.8k
        l = h;
14654
99.8k
        h = 0;
14655
99.8k
        t = a->dp[1];
14656
99.8k
        SP_ASM_ADDC(l, h, t);
14657
99.8k
        a->dp[1] = l;
14658
99.8k
        a->dp[2] = h;
14659
99.8k
    #endif
14660
99.8k
        a->used = m->used * 2 + 1;
14661
        /* mp is SP_WORD_SIZE */
14662
99.8k
        bits = SP_WORD_SIZE;
14663
99.8k
    }
14664
180M
#ifndef WOLFSSL_HAVE_SP_ECC
14665
180M
#if SP_WORD_SIZE == 64
14666
180M
    else if ((m->used == 4) && (mask == 0)) {
14667
41.4M
        sp_int_digit l;
14668
41.4M
        sp_int_digit h;
14669
41.4M
        sp_int_digit o2;
14670
14671
41.4M
        l = 0;
14672
41.4M
        h = 0;
14673
41.4M
        o = 0;
14674
41.4M
        o2 = 0;
14675
207M
        for (i = 0; i < 4; i++) {
14676
165M
            mu = mp * a->dp[0];
14677
165M
            l = a->dp[0];
14678
165M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
14679
165M
            l = h;
14680
165M
            h = 0;
14681
165M
            SP_ASM_ADDC(l, h, a->dp[1]);
14682
165M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
14683
165M
            a->dp[0] = l;
14684
165M
            l = h;
14685
165M
            h = 0;
14686
165M
            SP_ASM_ADDC(l, h, a->dp[2]);
14687
165M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
14688
165M
            a->dp[1] = l;
14689
165M
            l = h;
14690
165M
            h = o2;
14691
165M
            o2 = 0;
14692
165M
            SP_ASM_ADDC_REG(l, h, o);
14693
165M
            SP_ASM_ADDC(l, h, a->dp[i + 3]);
14694
165M
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
14695
165M
            a->dp[2] = l;
14696
165M
            o = h;
14697
165M
            l = h;
14698
165M
            h = 0;
14699
165M
        }
14700
41.4M
        h = o2;
14701
41.4M
        SP_ASM_ADDC(l, h, a->dp[7]);
14702
41.4M
        a->dp[3] = l;
14703
41.4M
        a->dp[4] = h;
14704
41.4M
        a->used = 5;
14705
14706
41.4M
        sp_clamp(a);
14707
14708
41.4M
        if (_sp_cmp_abs(a, m) != MP_LT) {
14709
10.9M
            sp_sub(a, m, a);
14710
10.9M
        }
14711
14712
41.4M
        return MP_OKAY;
14713
41.4M
    }
14714
139M
    else if ((m->used == 6) && (mask == 0)) {
14715
34.5M
        sp_int_digit l;
14716
34.5M
        sp_int_digit h;
14717
34.5M
        sp_int_digit o2;
14718
14719
34.5M
        l = 0;
14720
34.5M
        h = 0;
14721
34.5M
        o = 0;
14722
34.5M
        o2 = 0;
14723
241M
        for (i = 0; i < 6; i++) {
14724
207M
            mu = mp * a->dp[0];
14725
207M
            l = a->dp[0];
14726
207M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
14727
207M
            l = h;
14728
207M
            h = 0;
14729
207M
            SP_ASM_ADDC(l, h, a->dp[1]);
14730
207M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
14731
207M
            a->dp[0] = l;
14732
207M
            l = h;
14733
207M
            h = 0;
14734
207M
            SP_ASM_ADDC(l, h, a->dp[2]);
14735
207M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
14736
207M
            a->dp[1] = l;
14737
207M
            l = h;
14738
207M
            h = 0;
14739
207M
            SP_ASM_ADDC(l, h, a->dp[3]);
14740
207M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
14741
207M
            a->dp[2] = l;
14742
207M
            l = h;
14743
207M
            h = 0;
14744
207M
            SP_ASM_ADDC(l, h, a->dp[4]);
14745
207M
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
14746
207M
            a->dp[3] = l;
14747
207M
            l = h;
14748
207M
            h = o2;
14749
207M
            o2 = 0;
14750
207M
            SP_ASM_ADDC_REG(l, h, o);
14751
207M
            SP_ASM_ADDC(l, h, a->dp[i + 5]);
14752
207M
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
14753
207M
            a->dp[4] = l;
14754
207M
            o = h;
14755
207M
            l = h;
14756
207M
            h = 0;
14757
207M
        }
14758
34.5M
        h = o2;
14759
34.5M
        SP_ASM_ADDC(l, h, a->dp[11]);
14760
34.5M
        a->dp[5] = l;
14761
34.5M
        a->dp[6] = h;
14762
34.5M
        a->used = 7;
14763
14764
34.5M
        sp_clamp(a);
14765
14766
34.5M
        if (_sp_cmp_abs(a, m) != MP_LT) {
14767
8.62M
            sp_sub(a, m, a);
14768
8.62M
        }
14769
14770
34.5M
        return MP_OKAY;
14771
34.5M
    }
14772
#elif SP_WORD_SIZE == 32
14773
    else if ((m->used <= 12) && (mask == 0)) {
14774
        sp_int_digit l;
14775
        sp_int_digit h;
14776
        sp_int_digit o2;
14777
        sp_int_digit* ad;
14778
        sp_int_digit* md;
14779
14780
        o = 0;
14781
        o2 = 0;
14782
        ad = a->dp;
14783
        for (i = 0; i < m->used; i++) {
14784
            md = m->dp;
14785
            mu = mp * ad[0];
14786
            l = ad[0];
14787
            h = 0;
14788
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14789
            l = h;
14790
            for (j = 1; j + 1 < m->used - 1; j += 2) {
14791
                h = 0;
14792
                SP_ASM_ADDC(l, h, ad[j]);
14793
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14794
                ad[j - 1] = l;
14795
                l = 0;
14796
                SP_ASM_ADDC(h, l, ad[j + 1]);
14797
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
14798
                ad[j] = h;
14799
            }
14800
            for (; j < m->used - 1; j++) {
14801
                h = 0;
14802
                SP_ASM_ADDC(l, h, ad[j]);
14803
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14804
                ad[j - 1] = l;
14805
                l = h;
14806
            }
14807
            h = o2;
14808
            o2 = 0;
14809
            SP_ASM_ADDC_REG(l, h, o);
14810
            SP_ASM_ADDC(l, h, ad[i + j]);
14811
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
14812
            ad[j - 1] = l;
14813
            o = h;
14814
        }
14815
        l = o;
14816
        h = o2;
14817
        SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
14818
        a->dp[m->used  - 1] = l;
14819
        a->dp[m->used] = h;
14820
        a->used = m->used + 1;
14821
14822
        sp_clamp(a);
14823
14824
        if (_sp_cmp_abs(a, m) != MP_LT) {
14825
            sp_sub(a, m, a);
14826
        }
14827
14828
        return MP_OKAY;
14829
    }
14830
#endif /* SP_WORD_SIZE == 64 | 32 */
14831
104M
#endif /* WOLFSSL_HAVE_SP_ECC */
14832
104M
    else {
14833
104M
        sp_int_digit l;
14834
104M
        sp_int_digit h;
14835
104M
        sp_int_digit o2;
14836
104M
        sp_int_digit* ad;
14837
104M
        sp_int_digit* md;
14838
14839
104M
        o = 0;
14840
104M
        o2 = 0;
14841
104M
        ad = a->dp;
14842
873M
        for (i = 0; i < m->used; i++, ad++) {
14843
768M
            md = m->dp;
14844
768M
            mu = mp * ad[0];
14845
768M
            if ((i == m->used - 1) && (mask != 0)) {
14846
92.5M
                mu &= mask;
14847
92.5M
            }
14848
768M
            l = ad[0];
14849
768M
            h = 0;
14850
768M
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14851
768M
            ad[0] = l;
14852
768M
            l = h;
14853
3.12G
            for (j = 1; j + 1 < m->used - 1; j += 2) {
14854
2.35G
                h = 0;
14855
2.35G
                SP_ASM_ADDC(l, h, ad[j + 0]);
14856
2.35G
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14857
2.35G
                ad[j + 0] = l;
14858
2.35G
                l = 0;
14859
2.35G
                SP_ASM_ADDC(h, l, ad[j + 1]);
14860
2.35G
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
14861
2.35G
                ad[j + 1] = h;
14862
2.35G
            }
14863
1.28G
            for (; j < m->used - 1; j++) {
14864
519M
                h = 0;
14865
519M
                SP_ASM_ADDC(l, h, ad[j]);
14866
519M
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14867
519M
                ad[j] = l;
14868
519M
                l = h;
14869
519M
            }
14870
768M
            h = o2;
14871
768M
            o2 = 0;
14872
768M
            SP_ASM_ADDC_REG(l, h, o);
14873
768M
            SP_ASM_ADDC(l, h, ad[j]);
14874
768M
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
14875
768M
            ad[j] = l;
14876
768M
            o = h;
14877
768M
        }
14878
104M
        l = o;
14879
104M
        h = o2;
14880
104M
        SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
14881
104M
        a->dp[m->used * 2 - 1] = l;
14882
104M
        a->dp[m->used * 2] = h;
14883
104M
        a->used = m->used * 2 + 1;
14884
104M
    }
14885
14886
104M
    sp_clamp(a);
14887
104M
    sp_rshb(a, bits, a);
14888
14889
104M
    if (_sp_cmp_abs(a, m) != MP_LT) {
14890
27.2M
        sp_sub(a, m, a);
14891
27.2M
    }
14892
14893
104M
    return MP_OKAY;
14894
180M
#endif /* !SQR_MUL_ASM */
14895
180M
}
14896
14897
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14898
    (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
14899
/* Reduce a number in montgomery form.
14900
 *
14901
 * @param  [in,out]  a   SP integer to Montgomery reduce.
14902
 * @param  [in]      m   SP integer that is the modulus.
14903
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
14904
 *
14905
 * @return  MP_OKAY on success.
14906
 * @return  MP_VAL when a or m is NULL or m is zero.
14907
 */
14908
int sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
14909
382M
{
14910
382M
    int err;
14911
14912
382M
    if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
14913
0
        err = MP_VAL;
14914
0
    }
14915
382M
    else if (a->size < m->used * 2 + 1) {
14916
11
        err = MP_VAL;
14917
11
    }
14918
382M
    else {
14919
382M
        err = _sp_mont_red(a, m, mp);
14920
382M
    }
14921
14922
382M
    return err;
14923
382M
}
14924
#endif
14925
14926
/* Calculate the bottom digit of the inverse of negative m.
14927
 *
14928
 * Used when performing Montgomery Reduction.
14929
 *
14930
 * @param  [in]   m   SP integer that is the modulus.
14931
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
14932
 *
14933
 * @return  MP_OKAY on success.
14934
 * @return  MP_VAL when m or rho is NULL.
14935
 */
14936
int sp_mont_setup(sp_int* m, sp_int_digit* rho)
14937
310k
{
14938
310k
    int err = MP_OKAY;
14939
14940
310k
    if ((m == NULL) || (rho == NULL)) {
14941
0
        err = MP_VAL;
14942
0
    }
14943
310k
    if ((err == MP_OKAY) && !sp_isodd(m)) {
14944
90
        err = MP_VAL;
14945
90
    }
14946
14947
310k
    if (err == MP_OKAY) {
14948
310k
        sp_int_digit x;
14949
310k
        sp_int_digit b;
14950
14951
310k
        b = m->dp[0];
14952
310k
        x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
14953
310k
        x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
14954
310k
    #if SP_WORD_SIZE >= 16
14955
310k
        x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
14956
310k
    #if SP_WORD_SIZE >= 32
14957
310k
        x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
14958
310k
    #if SP_WORD_SIZE >= 64
14959
310k
        x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
14960
310k
    #endif /* SP_WORD_SIZE >= 64 */
14961
310k
    #endif /* SP_WORD_SIZE >= 32 */
14962
310k
    #endif /* SP_WORD_SIZE >= 16 */
14963
14964
        /* rho = -1/m mod b, subtract x (unsigned) from 0, assign negative */
14965
310k
        *rho = (sp_int_digit)((sp_int_digit)0 - (sp_sint_digit)x);
14966
310k
    }
14967
14968
310k
    return err;
14969
310k
}
14970
14971
/* Calculate the normalization value of m.
14972
 *   norm = 2^k - m, where k is the number of bits in m
14973
 *
14974
 * @param  [out]  norm   SP integer that normalises numbers into Montgomery
14975
 *                       form.
14976
 * @param  [in]   m      SP integer that is the modulus.
14977
 *
14978
 * @return  MP_OKAY on success.
14979
 * @return  MP_VAL when norm or m is NULL, or number of bits in m is maximual.
14980
 */
14981
int sp_mont_norm(sp_int* norm, sp_int* m)
14982
327k
{
14983
327k
    int err = MP_OKAY;
14984
327k
    int bits = 0;
14985
14986
327k
    if ((norm == NULL) || (m == NULL)) {
14987
0
        err = MP_VAL;
14988
0
    }
14989
327k
    if (err == MP_OKAY) {
14990
327k
        bits = sp_count_bits(m);
14991
327k
        if (bits == m->size * SP_WORD_SIZE) {
14992
0
            err = MP_VAL;
14993
0
        }
14994
327k
    }
14995
327k
    if (err == MP_OKAY) {
14996
327k
        if (bits < SP_WORD_SIZE) {
14997
1.70k
            bits = SP_WORD_SIZE;
14998
1.70k
        }
14999
327k
        _sp_zero(norm);
15000
327k
        sp_set_bit(norm, bits);
15001
327k
        err = sp_sub(norm, m, norm);
15002
327k
    }
15003
327k
    if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
15004
2.14k
        norm->dp[0] %= m->dp[0];
15005
2.14k
    }
15006
327k
    if (err == MP_OKAY) {
15007
327k
        sp_clamp(norm);
15008
327k
    }
15009
15010
327k
    return err;
15011
327k
}
15012
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
15013
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
15014
15015
/*********************************
15016
 * To and from binary and strings.
15017
 *********************************/
15018
15019
/* Calculate the number of 8-bit values required to represent the
15020
 * multi-precision number.
15021
 *
15022
 * When a is NULL, return s 0.
15023
 *
15024
 * @param  [in]  a  SP integer.
15025
 *
15026
 * @return  The count of 8-bit values.
15027
 */
15028
int sp_unsigned_bin_size(const sp_int* a)
15029
660k
{
15030
660k
    int cnt = 0;
15031
15032
660k
    if (a != NULL) {
15033
660k
        cnt = (sp_count_bits(a) + 7) / 8;
15034
660k
    }
15035
15036
660k
    return cnt;
15037
660k
}
15038
15039
/* Convert a number as an array of bytes in big-endian format to a
15040
 * multi-precision number.
15041
 *
15042
 * @param  [out]  a     SP integer.
15043
 * @param  [in]   in    Array of bytes.
15044
 * @param  [in]   inSz  Number of data bytes in array.
15045
 *
15046
 * @return  MP_OKAY on success.
15047
 * @return  MP_VAL when the number is too big to fit in an SP.
15048
 */
15049
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
15050
639k
{
15051
639k
    int err = MP_OKAY;
15052
15053
639k
    if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
15054
0
        err = MP_VAL;
15055
0
    }
15056
15057
639k
    if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
15058
373
        err = MP_VAL;
15059
373
    }
15060
15061
#ifndef LITTLE_ENDIAN_ORDER
15062
    if (err == MP_OKAY) {
15063
        int i;
15064
        int j;
15065
        int s;
15066
15067
        a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15068
15069
    #ifndef WOLFSSL_SP_INT_DIGIT_ALIGN
15070
        for (i = inSz-1,j = 0; i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF,j++) {
15071
            a->dp[j] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
15072
        }
15073
    #else
15074
        for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
15075
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0);
15076
        #if SP_WORD_SIZE >= 16
15077
            a->dp[j] |= ((sp_int_digit)in[i - 1] <<  8);
15078
        #endif
15079
        #if SP_WORD_SIZE >= 32
15080
            a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
15081
                        ((sp_int_digit)in[i - 3] << 24);
15082
        #endif
15083
        #if SP_WORD_SIZE >= 64
15084
            a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
15085
                        ((sp_int_digit)in[i - 5] << 40) |
15086
                        ((sp_int_digit)in[i - 6] << 48) |
15087
                        ((sp_int_digit)in[i - 7] << 56);
15088
        #endif
15089
            j++;
15090
        }
15091
    #endif
15092
        if (i >= 0) {
15093
            a->dp[a->used - 1] = 0;
15094
            for (s = 0; i >= 0; i--,s += 8) {
15095
                a->dp[j] |= ((sp_int_digit)in[i]) << s;
15096
            }
15097
        }
15098
15099
        sp_clamp(a);
15100
    }
15101
#else
15102
639k
    if (err == MP_OKAY) {
15103
639k
        int i;
15104
639k
        int j;
15105
15106
639k
        a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15107
15108
3.94M
        for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
15109
3.30M
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0);
15110
3.30M
        #if SP_WORD_SIZE >= 16
15111
3.30M
            a->dp[j] |= ((sp_int_digit)in[i - 1] <<  8);
15112
3.30M
        #endif
15113
3.30M
        #if SP_WORD_SIZE >= 32
15114
3.30M
            a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
15115
3.30M
                        ((sp_int_digit)in[i - 3] << 24);
15116
3.30M
        #endif
15117
3.30M
        #if SP_WORD_SIZE >= 64
15118
3.30M
            a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
15119
3.30M
                        ((sp_int_digit)in[i - 5] << 40) |
15120
3.30M
                        ((sp_int_digit)in[i - 6] << 48) |
15121
3.30M
                        ((sp_int_digit)in[i - 7] << 56);
15122
3.30M
        #endif
15123
3.30M
            j++;
15124
3.30M
        }
15125
15126
639k
    #if SP_WORD_SIZE >= 16
15127
639k
        if (i >= 0) {
15128
425k
            byte *d = (byte*)a->dp;
15129
15130
425k
            a->dp[a->used - 1] = 0;
15131
425k
            switch (i) {
15132
10.4k
                case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
15133
46.7k
                case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
15134
72.4k
                case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
15135
132k
                case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
15136
172k
                case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
15137
257k
                case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
15138
425k
                case 0: d[inSz - 1 - 0] = in[0];
15139
425k
            }
15140
425k
        }
15141
639k
    #endif
15142
15143
639k
        sp_clamp(a);
15144
639k
    }
15145
639k
#endif /* LITTLE_ENDIAN_ORDER */
15146
15147
639k
    return err;
15148
639k
}
15149
15150
/* Convert the multi-precision number to an array of bytes in big-endian format.
15151
 *
15152
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15153
 * to calculate the number of bytes required.
15154
 *
15155
 * @param  [in]   a    SP integer.
15156
 * @param  [out]  out  Array to put encoding into.
15157
 *
15158
 * @return  MP_OKAY on success.
15159
 * @return  MP_VAL when a or out is NULL.
15160
 */
15161
int sp_to_unsigned_bin(sp_int* a, byte* out)
15162
185k
{
15163
185k
    return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
15164
185k
}
15165
15166
/* Convert the multi-precision number to an array of bytes in big-endian format.
15167
 *
15168
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15169
 * to calculate the number of bytes required.
15170
 * Front-pads the output array with zeros make number the size of the array.
15171
 *
15172
 * @param  [in]   a      SP integer.
15173
 * @param  [out]  out    Array to put encoding into.
15174
 * @param  [in]   outSz  Size of the array in bytes.
15175
 *
15176
 * @return  MP_OKAY on success.
15177
 * @return  MP_VAL when a or out is NULL.
15178
 */
15179
int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
15180
309k
{
15181
309k
    int err = MP_OKAY;
15182
15183
309k
    if ((a == NULL) || (out == NULL)) {
15184
15.5k
        err = MP_VAL;
15185
15.5k
    }
15186
309k
    if (err == MP_OKAY) {
15187
294k
        int j = outSz - 1;
15188
15189
294k
        if (!sp_iszero(a)) {
15190
257k
            int i;
15191
2.43M
            for (i = 0; (j >= 0) && (i < a->used); i++) {
15192
2.17M
                int b;
15193
14.7M
                for (b = 0; b < SP_WORD_SIZE; b += 8) {
15194
12.7M
                    out[j--] = (byte)(a->dp[i] >> b);
15195
12.7M
                    if (j < 0) {
15196
246k
                        break;
15197
246k
                    }
15198
12.7M
                }
15199
2.17M
            }
15200
257k
        }
15201
37.7M
        for (; j >= 0; j--) {
15202
37.4M
            out[j] = 0;
15203
37.4M
        }
15204
294k
    }
15205
15206
309k
    return err;
15207
309k
}
15208
15209
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
15210
/* Store the number in big-endian format in array at an offset.
15211
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15212
 * to calculate the number of bytes required.
15213
 *
15214
 * @param  [in]   o    Offset into array o start encoding.
15215
 * @param  [in]   a    SP integer.
15216
 * @param  [out]  out  Array to put encoding into.
15217
 *
15218
 * @return  Index of next byte after data.
15219
 * @return  MP_VAL when a or out is NULL.
15220
 */
15221
int sp_to_unsigned_bin_at_pos(int o, sp_int*a, unsigned char* out)
15222
0
{
15223
0
    int ret = sp_to_unsigned_bin(a, out + o);
15224
15225
0
    if (ret == MP_OKAY) {
15226
0
        ret = o + sp_unsigned_bin_size(a);
15227
0
    }
15228
15229
0
    return ret;
15230
0
}
15231
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
15232
15233
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15234
    defined(HAVE_ECC) || !defined(NO_DSA)
15235
/* Convert hexadecimal number as string in big-endian format to a
15236
 * multi-precision number.
15237
 *
15238
 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
15239
 *
15240
 * @param  [out]  a   SP integer.
15241
 * @param  [in]   in  NUL terminated string.
15242
 *
15243
 * @return  MP_OKAY on success.
15244
 * @return  MP_VAL when radix not supported, value is negative, or a character
15245
 *          is not valid.
15246
 */
15247
static int _sp_read_radix_16(sp_int* a, const char* in)
15248
424k
{
15249
424k
    int  err = MP_OKAY;
15250
424k
    int  i;
15251
424k
    int  s = 0;
15252
424k
    int  j = 0;
15253
15254
#ifdef WOLFSSL_SP_INT_NEGATIVE
15255
    if (*in == '-') {
15256
        a->sign = MP_NEG;
15257
        in++;
15258
    }
15259
#endif
15260
15261
964k
    while (*in == '0') {
15262
539k
        in++;
15263
539k
    }
15264
15265
424k
    a->dp[0] = 0;
15266
26.5M
    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
15267
26.0M
        int ch = (int)HexCharToByte(in[i]);
15268
26.0M
        if (ch < 0) {
15269
790
            err = MP_VAL;
15270
790
            break;
15271
790
        }
15272
15273
26.0M
        if (s == SP_WORD_SIZE) {
15274
1.39M
            j++;
15275
1.39M
            if (j >= a->size) {
15276
157
                err = MP_VAL;
15277
157
                break;
15278
157
            }
15279
1.39M
            s = 0;
15280
1.39M
            a->dp[j] = 0;
15281
1.39M
        }
15282
15283
26.0M
        a->dp[j] |= ((sp_int_digit)ch) << s;
15284
26.0M
        s += 4;
15285
26.0M
    }
15286
15287
424k
    if (err == MP_OKAY) {
15288
424k
        a->used = j + 1;
15289
424k
        sp_clamp(a);
15290
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15291
        if (sp_iszero(a)) {
15292
            a->sign = MP_ZPOS;
15293
        }
15294
    #endif
15295
424k
    }
15296
424k
    return err;
15297
424k
}
15298
#endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
15299
15300
#ifdef WOLFSSL_SP_READ_RADIX_10
15301
/* Convert decimal number as string in big-endian format to a multi-precision
15302
 * number.
15303
 *
15304
 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
15305
 *
15306
 * @param  [out]  a   SP integer.
15307
 * @param  [in]   in  NUL terminated string.
15308
 *
15309
 * @return  MP_OKAY on success.
15310
 * @return  MP_VAL when radix not supported, value is negative, or a character
15311
 *          is not valid.
15312
 */
15313
static int _sp_read_radix_10(sp_int* a, const char* in)
15314
138k
{
15315
138k
    int  err = MP_OKAY;
15316
138k
    int  i;
15317
138k
    int  len;
15318
138k
    char ch;
15319
15320
138k
    _sp_zero(a);
15321
#ifdef WOLFSSL_SP_INT_NEGATIVE
15322
    if (*in == '-') {
15323
        a->sign = MP_NEG;
15324
        in++;
15325
    }
15326
#endif /* WOLFSSL_SP_INT_NEGATIVE */
15327
15328
457k
    while (*in == '0') {
15329
319k
        in++;
15330
319k
    }
15331
15332
138k
    len = (int)XSTRLEN(in);
15333
3.93M
    for (i = 0; i < len; i++) {
15334
3.79M
        ch = in[i];
15335
3.79M
        if ((ch >= '0') && (ch <= '9')) {
15336
3.79M
            ch -= '0';
15337
3.79M
        }
15338
0
        else {
15339
0
            err = MP_VAL;
15340
0
            break;
15341
0
        }
15342
3.79M
        err = _sp_mul_d(a, 10, a, 0);
15343
3.79M
        if (err != MP_OKAY) {
15344
80
            break;
15345
80
        }
15346
3.79M
        err = _sp_add_d(a, ch, a);
15347
3.79M
        if (err != MP_OKAY) {
15348
0
            break;
15349
0
        }
15350
3.79M
    }
15351
#ifdef WOLFSSL_SP_INT_NEGATIVE
15352
    if ((err == MP_OKAY) && sp_iszero(a)) {
15353
        a->sign = MP_ZPOS;
15354
    }
15355
#endif
15356
15357
138k
    return err;
15358
138k
}
15359
#endif /* WOLFSSL_SP_READ_RADIX_10 */
15360
15361
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
15362
    !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || !defined(NO_DSA)
15363
/* Convert a number as string in big-endian format to a big number.
15364
 * Only supports base-16 (hexadecimal) and base-10 (decimal).
15365
 *
15366
 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
15367
 *
15368
 * @param  [out]  a      SP integer.
15369
 * @param  [in]   in     NUL terminated string.
15370
 * @param  [in]   radix  Number of values in a digit.
15371
 *
15372
 * @return  MP_OKAY on success.
15373
 * @return  MP_VAL when a or in is NULL, radix not supported, value is negative,
15374
 *          or a character is not valid.
15375
 */
15376
int sp_read_radix(sp_int* a, const char* in, int radix)
15377
451k
{
15378
451k
    int err = MP_OKAY;
15379
15380
451k
    if ((a == NULL) || (in == NULL)) {
15381
0
        err = MP_VAL;
15382
0
    }
15383
15384
451k
    if (err == MP_OKAY) {
15385
451k
    #ifndef WOLFSSL_SP_INT_NEGATIVE
15386
451k
        if (*in == '-') {
15387
4.81k
            err = MP_VAL;
15388
4.81k
        }
15389
447k
        else
15390
447k
    #endif
15391
447k
        if (radix == 16) {
15392
308k
            err = _sp_read_radix_16(a, in);
15393
308k
        }
15394
138k
    #ifdef WOLFSSL_SP_READ_RADIX_10
15395
138k
        else if (radix == 10) {
15396
138k
            err = _sp_read_radix_10(a, in);
15397
138k
        }
15398
0
    #endif
15399
0
        else {
15400
0
            err = MP_VAL;
15401
0
        }
15402
451k
    }
15403
15404
451k
    return err;
15405
451k
}
15406
#endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
15407
15408
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15409
    defined(WC_MP_TO_RADIX)
15410
15411
/* Put the big-endian, hex string encoding of a into str.
15412
 *
15413
 * Assumes str is large enough for result.
15414
 * Use sp_radix_size() to calculate required length.
15415
 *
15416
 * @param  [in]   a    SP integer to convert.
15417
 * @param  [out]  str  String to hold hex string result.
15418
 *
15419
 * @return  MP_OKAY on success.
15420
 * @return  MP_VAL when a or str is NULL.
15421
 */
15422
int sp_tohex(sp_int* a, char* str)
15423
18.4k
{
15424
18.4k
    int err = MP_OKAY;
15425
18.4k
    int i;
15426
18.4k
    int j;
15427
15428
18.4k
    if ((a == NULL) || (str == NULL)) {
15429
0
        err = MP_VAL;
15430
0
    }
15431
18.4k
    if (err == MP_OKAY) {
15432
        /* quick out if its zero */
15433
18.4k
        if (sp_iszero(a) == MP_YES) {
15434
1.45k
    #ifndef WC_DISABLE_RADIX_ZERO_PAD
15435
1.45k
            *str++ = '0';
15436
1.45k
    #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15437
1.45k
            *str++ = '0';
15438
1.45k
            *str = '\0';
15439
1.45k
        }
15440
16.9k
        else {
15441
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15442
            if (a->sign == MP_NEG) {
15443
                *str = '-';
15444
                str++;
15445
            }
15446
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
15447
15448
16.9k
            i = a->used - 1;
15449
16.9k
    #ifndef WC_DISABLE_RADIX_ZERO_PAD
15450
            /* Find highest non-zero byte in most-significant word. */
15451
68.7k
            for (j = SP_WORD_SIZE - 8; j >= 0; j -= 8) {
15452
68.7k
                if (((a->dp[i] >> j) & 0xff) != 0) {
15453
16.9k
                    break;
15454
16.9k
                }
15455
51.8k
                else if (j == 0) {
15456
0
                    j = SP_WORD_SIZE - 8;
15457
0
                    --i;
15458
0
                }
15459
68.7k
            }
15460
            /* Start with high nibble of byte. */
15461
16.9k
            j += 4;
15462
    #else
15463
            /* Find highest non-zero nibble in most-significant word. */
15464
            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
15465
                if (((a->dp[i] >> j) & 0xf) != 0) {
15466
                    break;
15467
                }
15468
                else if (j == 0) {
15469
                    j = SP_WORD_SIZE - 4;
15470
                    --i;
15471
                }
15472
            }
15473
    #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15474
            /* Most-significant word. */
15475
185k
            for (; j >= 0; j -= 4) {
15476
168k
                *(str++) = ByteToHex((byte)(a->dp[i] >> j));
15477
168k
            }
15478
123k
            for (--i; i >= 0; i--) {
15479
1.81M
                for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
15480
1.71M
                    *(str++) = (byte)ByteToHex((byte)(a->dp[i] >> j));
15481
1.71M
                }
15482
107k
            }
15483
16.9k
            *str = '\0';
15484
16.9k
        }
15485
18.4k
    }
15486
15487
18.4k
    return err;
15488
18.4k
}
15489
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15490
15491
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15492
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
15493
    defined(WC_MP_TO_RADIX)
15494
/* Put the big-endian, decimal string encoding of a into str.
15495
 *
15496
 * Assumes str is large enough for result.
15497
 * Use sp_radix_size() to calculate required length.
15498
 *
15499
 * @param  [in]   a    SP integer to convert.
15500
 * @param  [out]  str  String to hold hex string result.
15501
 *
15502
 * @return  MP_OKAY on success.
15503
 * @return  MP_VAL when a or str is NULL.
15504
 * @return  MP_MEM when dynamic memory allocation fails.
15505
 */
15506
int sp_todecimal(sp_int* a, char* str)
15507
29.1k
{
15508
29.1k
    int err = MP_OKAY;
15509
29.1k
    int i;
15510
29.1k
    int j;
15511
29.1k
    sp_int_digit d;
15512
15513
29.1k
    if ((a == NULL) || (str == NULL)) {
15514
0
        err = MP_VAL;
15515
0
    }
15516
    /* quick out if its zero */
15517
29.1k
    else if (sp_iszero(a) == MP_YES) {
15518
3.48k
        *str++ = '0';
15519
3.48k
        *str = '\0';
15520
3.48k
    }
15521
25.6k
    else {
15522
25.6k
        DECL_SP_INT(t, a->used + 1);
15523
15524
25.6k
        ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
15525
25.6k
        if (err == MP_OKAY) {
15526
25.3k
            err = sp_copy(a, t);
15527
25.3k
        }
15528
25.6k
        if (err == MP_OKAY) {
15529
15530
        #ifdef WOLFSSL_SP_INT_NEGATIVE
15531
            if (a->sign == MP_NEG) {
15532
                *str = '-';
15533
                str++;
15534
            }
15535
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
15536
15537
25.3k
            i = 0;
15538
2.35M
            while (!sp_iszero(t)) {
15539
2.32M
                sp_div_d(t, 10, t, &d);
15540
2.32M
                str[i++] = (char)('0' + d);
15541
2.32M
            }
15542
25.3k
            str[i] = '\0';
15543
15544
1.19M
            for (j = 0; j <= (i - 1) / 2; j++) {
15545
1.17M
                int c = (unsigned char)str[j];
15546
1.17M
                str[j] = str[i - 1 - j];
15547
1.17M
                str[i - 1 - j] = (char)c;
15548
1.17M
            }
15549
25.3k
        }
15550
15551
25.6k
        FREE_SP_INT(t, NULL);
15552
25.6k
    }
15553
15554
29.1k
    return err;
15555
29.1k
}
15556
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15557
15558
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15559
    defined(WC_MP_TO_RADIX)
15560
/* Put the string version, big-endian, of a in str using the given radix.
15561
 *
15562
 * @param  [in]   a      SP integer to convert.
15563
 * @param  [out]  str    String to hold hex string result.
15564
 * @param  [in]   radix  Base of character.
15565
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
15566
 *
15567
 * @return  MP_OKAY on success.
15568
 * @return  MP_VAL when a or str is NULL, or radix not supported.
15569
 */
15570
int sp_toradix(sp_int* a, char* str, int radix)
15571
58.5k
{
15572
58.5k
    int err = MP_OKAY;
15573
15574
58.5k
    if ((a == NULL) || (str == NULL)) {
15575
0
        err = MP_VAL;
15576
0
    }
15577
58.5k
    else if (radix == MP_RADIX_HEX) {
15578
374
        err = sp_tohex(a, str);
15579
374
    }
15580
58.2k
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
15581
58.2k
    defined(HAVE_COMP_KEY)
15582
58.2k
    else if (radix == MP_RADIX_DEC) {
15583
58.2k
        err = sp_todecimal(a, str);
15584
58.2k
    }
15585
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15586
0
    else {
15587
0
        err = MP_VAL;
15588
0
    }
15589
15590
58.5k
    return err;
15591
58.5k
}
15592
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15593
15594
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15595
    defined(WC_MP_TO_RADIX)
15596
/* Calculate the length of the string version, big-endian, of a using the given
15597
 * radix.
15598
 *
15599
 * @param  [in]   a      SP integer to convert.
15600
 * @param  [in]   radix  Base of character.
15601
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
15602
 * @param  [out]  size   The number of characters in encoding.
15603
 *
15604
 * @return  MP_OKAY on success.
15605
 * @return  MP_VAL when a or size is NULL, or radix not supported.
15606
 */
15607
int sp_radix_size(sp_int* a, int radix, int* size)
15608
49.6k
{
15609
49.6k
    int err = MP_OKAY;
15610
15611
49.6k
    if ((a == NULL) || (size == NULL)) {
15612
0
        err = MP_VAL;
15613
0
    }
15614
49.6k
    else if (radix == MP_RADIX_HEX) {
15615
2.58k
        if (a->used == 0) {
15616
266
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
15617
            /* 00 and '\0' */
15618
266
            *size = 2 + 1;
15619
        #else
15620
            /* Zero and '\0' */
15621
            *size = 1 + 1;
15622
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15623
266
        }
15624
2.32k
        else {
15625
2.32k
            int nibbles = (sp_count_bits(a) + 3) / 4;
15626
2.32k
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
15627
2.32k
            if (nibbles & 1) {
15628
821
                nibbles++;
15629
821
            }
15630
2.32k
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15631
        #ifdef WOLFSSL_SP_INT_NEGATIVE
15632
            if (a->sign == MP_NEG) {
15633
                nibbles++;
15634
            }
15635
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
15636
            /* One more for \0 */
15637
2.32k
            *size = nibbles + 1;
15638
2.32k
        }
15639
2.58k
    }
15640
47.0k
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
15641
47.0k
    defined(HAVE_COMP_KEY)
15642
47.0k
    else if (radix == MP_RADIX_DEC) {
15643
29.7k
        int i;
15644
29.7k
        sp_int_digit d;
15645
15646
        /* quick out if its zero */
15647
29.7k
        if (sp_iszero(a) == MP_YES) {
15648
            /* Zero and '\0' */
15649
3.48k
            *size = 1 + 1;
15650
3.48k
        }
15651
26.2k
        else {
15652
26.2k
            DECL_SP_INT(t, a->used + 1);
15653
15654
26.2k
            ALLOC_SP_INT(t, a->used + 1, err, NULL);
15655
26.2k
            if (err == MP_OKAY) {
15656
25.6k
        #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15657
25.6k
                t->size = a->used + 1;
15658
25.6k
        #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */
15659
25.6k
                err = sp_copy(a, t);
15660
25.6k
            }
15661
15662
26.2k
            if (err == MP_OKAY) {
15663
15664
2.38M
                for (i = 0; !sp_iszero(t); i++) {
15665
2.36M
                    sp_div_d(t, 10, t, &d);
15666
2.36M
                }
15667
            #ifdef WOLFSSL_SP_INT_NEGATIVE
15668
                if (a->sign == MP_NEG) {
15669
                    i++;
15670
                }
15671
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
15672
                /* One more for \0 */
15673
25.6k
                *size = i + 1;
15674
25.6k
            }
15675
15676
26.2k
            FREE_SP_INT(t, NULL);
15677
26.2k
        }
15678
29.7k
    }
15679
17.2k
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15680
17.2k
    else {
15681
17.2k
        err = MP_VAL;
15682
17.2k
    }
15683
15684
49.6k
    return err;
15685
49.6k
}
15686
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15687
15688
/***************************************
15689
 * Prime number generation and checking.
15690
 ***************************************/
15691
15692
#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
15693
    !defined(NO_DSA)) && !defined(WC_NO_RNG)
15694
/* Generate a random prime for RSA only.
15695
 *
15696
 * @param  [out]  r     SP integer to hold result.
15697
 * @param  [in]   len   Number of bytes in prime.
15698
 * @param  [in]   rng   Random number generator.
15699
 * @param  [in]   heap  Heap hint. Unused.
15700
 *
15701
 * @return  MP_OKAY on success
15702
 * @return  MP_VAL when r or rng is NULL, length is not supported or random
15703
 *          number generator fails.
15704
 */
15705
int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
15706
1.72k
{
15707
1.72k
    static const int USE_BBS = 1;
15708
1.72k
    int   err = MP_OKAY;
15709
1.72k
    int   type = 0;
15710
1.72k
    int   isPrime = MP_NO;
15711
1.72k
#ifdef WOLFSSL_SP_MATH_ALL
15712
1.72k
    int   bits = 0;
15713
1.72k
#endif /* WOLFSSL_SP_MATH_ALL */
15714
15715
1.72k
    (void)heap;
15716
15717
    /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
15718
1.72k
    if ((r == NULL) || (rng == NULL) || (len == 0)) {
15719
49
        err = MP_VAL;
15720
49
    }
15721
15722
1.72k
    if (err == MP_OKAY) {
15723
        /* get type */
15724
1.67k
        if (len < 0) {
15725
0
            type = USE_BBS;
15726
0
            len = -len;
15727
0
        }
15728
15729
    #ifndef WOLFSSL_SP_MATH_ALL
15730
        /* For minimal maths, support only what's in SP and needed for DH. */
15731
    #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
15732
        if (len == 32) {
15733
        }
15734
        else
15735
    #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
15736
        /* Generate RSA primes that are half the modulus length. */
15737
    #ifndef WOLFSSL_SP_NO_3072
15738
        if ((len != 128) && (len != 192))
15739
    #else
15740
        if (len != 128)
15741
    #endif /* WOLFSSL_SP_NO_3072 */
15742
        {
15743
            err = MP_VAL;
15744
        }
15745
    #endif /* !WOLFSSL_SP_MATH_ALL */
15746
15747
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15748
        r->sign = MP_ZPOS;
15749
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
15750
1.67k
        r->used = (len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15751
1.67k
    #ifdef WOLFSSL_SP_MATH_ALL
15752
1.67k
        bits = (len * 8) & SP_WORD_MASK;
15753
1.67k
    #endif /* WOLFSSL_SP_MATH_ALL */
15754
1.67k
    }
15755
15756
    /* Assume the candidate is probably prime and then test until
15757
     * it is proven composite. */
15758
141k
    while (err == MP_OKAY && isPrime == MP_NO) {
15759
#ifdef SHOW_GEN
15760
        printf(".");
15761
        fflush(stdout);
15762
#endif /* SHOW_GEN */
15763
        /* generate value */
15764
140k
        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
15765
140k
        if (err != 0) {
15766
469
            err = MP_VAL;
15767
469
            break;
15768
469
        }
15769
15770
        /* munge bits */
15771
#ifndef LITTLE_ENDIAN_ORDER
15772
        ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
15773
#else
15774
139k
        ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
15775
139k
#endif /* LITTLE_ENDIAN_ORDER */
15776
139k
        r->dp[0]              |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
15777
15778
#ifndef LITTLE_ENDIAN_ORDER
15779
        if (((len * 8) & SP_WORD_MASK) != 0) {
15780
            r->dp[r->used-1] >>= SP_WORD_SIZE - ((len * 8) & SP_WORD_MASK);
15781
        }
15782
#endif /* LITTLE_ENDIAN_ORDER */
15783
139k
#ifdef WOLFSSL_SP_MATH_ALL
15784
139k
        if (bits > 0) {
15785
69.5k
            r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
15786
69.5k
        }
15787
139k
#endif /* WOLFSSL_SP_MATH_ALL */
15788
15789
        /* test */
15790
        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
15791
         * of a 1024-bit candidate being a false positive, when it is our
15792
         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
15793
         * Using 8 because we've always used 8 */
15794
139k
        sp_prime_is_prime_ex(r, 8, &isPrime, rng);
15795
139k
    }
15796
15797
1.72k
    return err;
15798
1.72k
}
15799
#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
15800
15801
#ifdef WOLFSSL_SP_PRIME_GEN
15802
/* Miller-Rabin test of "a" to the base of "b" as described in
15803
 * HAC pp. 139 Algorithm 4.24
15804
 *
15805
 * Sets result to 0 if definitely composite or 1 if probably prime.
15806
 * Randomly the chance of error is no more than 1/4 and often
15807
 * very much lower.
15808
 *
15809
 * @param  [in]   a       SP integer to check.
15810
 * @param  [in]   b       SP integer that is a small prime.
15811
 * @param  [out]  result  MP_YES when number is likey prime.
15812
 *                        MP_NO otherwise.
15813
 * @param  [in]   n1      SP integer temporary.
15814
 * @param  [in]   y       SP integer temporary.
15815
 * @param  [in]   r       SP integer temporary.
15816
 *
15817
 * @return  MP_OKAY on success.
15818
 * @return  MP_MEM when dynamic memory allocation fails.
15819
 */
15820
static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result,
15821
                                    sp_int* n1, sp_int* y, sp_int* r)
15822
40.6k
{
15823
40.6k
    int s;
15824
40.6k
    int j;
15825
40.6k
    int err = MP_OKAY;
15826
15827
    /* default */
15828
40.6k
    *result = MP_NO;
15829
15830
    /* ensure b > 1 */
15831
40.6k
    if (sp_cmp_d(b, 1) == MP_GT) {
15832
        /* get n1 = a - 1 */
15833
40.6k
        (void)sp_copy(a, n1);
15834
40.6k
        _sp_sub_d(n1, 1, n1);
15835
        /* set 2**s * r = n1 */
15836
40.6k
        (void)sp_copy(n1, r);
15837
15838
        /* count the number of least significant bits
15839
         * which are zero
15840
         */
15841
40.6k
        s = sp_cnt_lsb(r);
15842
15843
        /* now divide n - 1 by 2**s */
15844
40.6k
        sp_rshb(r, s, r);
15845
15846
        /* compute y = b**r mod a */
15847
40.6k
        err = sp_exptmod(b, r, a, y);
15848
15849
40.6k
        if (err == MP_OKAY) {
15850
            /* probably prime until shown otherwise */
15851
37.2k
            *result = MP_YES;
15852
15853
            /* if y != 1 and y != n1 do */
15854
37.2k
            if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
15855
31.5k
                j = 1;
15856
                /* while j <= s-1 and y != n1 */
15857
3.31M
                while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
15858
3.28M
                    err = sp_sqrmod(y, a, y);
15859
3.28M
                    if (err != MP_OKAY) {
15860
1.12k
                        break;
15861
1.12k
                    }
15862
15863
                    /* if y == 1 then composite */
15864
3.28M
                    if (sp_cmp_d(y, 1) == MP_EQ) {
15865
0
                        *result = MP_NO;
15866
0
                        break;
15867
0
                    }
15868
3.28M
                    ++j;
15869
3.28M
                }
15870
15871
                /* if y != n1 then composite */
15872
31.5k
                if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
15873
26.7k
                    *result = MP_NO;
15874
26.7k
                }
15875
31.5k
            }
15876
37.2k
        }
15877
40.6k
    }
15878
15879
40.6k
    return err;
15880
40.6k
}
15881
15882
/* Miller-Rabin test of "a" to the base of "b" as described in
15883
 * HAC pp. 139 Algorithm 4.24
15884
 *
15885
 * Sets result to 0 if definitely composite or 1 if probably prime.
15886
 * Randomly the chance of error is no more than 1/4 and often
15887
 * very much lower.
15888
 *
15889
 * @param  [in]   a       SP integer to check.
15890
 * @param  [in]   b       SP integer that is a small prime.
15891
 * @param  [out]  result  MP_YES when number is likey prime.
15892
 *                        MP_NO otherwise.
15893
 *
15894
 * @return  MP_OKAY on success.
15895
 * @return  MP_MEM when dynamic memory allocation fails.
15896
 */
15897
static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result)
15898
0
{
15899
0
    int err = MP_OKAY;
15900
0
    sp_int *n1;
15901
0
    sp_int *y;
15902
0
    sp_int *r;
15903
0
    DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3);
15904
15905
0
    ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL);
15906
0
    if (err == MP_OKAY) {
15907
0
        n1 = t[0];
15908
0
        y  = t[1];
15909
0
        r  = t[2];
15910
15911
        /* Only 'y' needs to be twice as big. */
15912
0
        sp_init_size(n1, a->used * 2 + 1);
15913
0
        sp_init_size(y, a->used * 2 + 1);
15914
0
        sp_init_size(r, a->used * 2 + 1);
15915
15916
0
        err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
15917
15918
0
        sp_clear(n1);
15919
0
        sp_clear(y);
15920
0
        sp_clear(r);
15921
0
    }
15922
15923
0
    FREE_SP_INT_ARRAY(t, NULL);
15924
0
    return err;
15925
0
}
15926
15927
#if SP_WORD_SIZE == 8
15928
/* Number of pre-computed primes. First n primes - fitting in a digit. */
15929
#define SP_PRIME_SIZE      54
15930
15931
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
15932
    0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
15933
    0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
15934
    0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
15935
    0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
15936
    0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
15937
    0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
15938
    0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
15939
};
15940
#else
15941
/* Number of pre-computed primes. First n primes. */
15942
8.53M
#define SP_PRIME_SIZE      256
15943
15944
/* The first 256 primes. */
15945
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
15946
    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
15947
    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
15948
    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
15949
    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
15950
    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
15951
    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
15952
    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
15953
    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
15954
15955
    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
15956
    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
15957
    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
15958
    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
15959
    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
15960
    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
15961
    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
15962
    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
15963
15964
    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
15965
    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
15966
    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
15967
    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
15968
    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
15969
    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
15970
    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
15971
    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
15972
15973
    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
15974
    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
15975
    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
15976
    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
15977
    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
15978
    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
15979
    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
15980
    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
15981
};
15982
#endif
15983
15984
/* Check whether a is prime.
15985
 * Checks against a number of small primes and does t iterations of
15986
 * Miller-Rabin.
15987
 *
15988
 * @param  [in]   a       SP integer to check.
15989
 * @param  [in]   t       Number of iterations of Miller-Rabin test to perform.
15990
 * @param  [out]  result  MP_YES when number is prime.
15991
 *                        MP_NO otherwise.
15992
 *
15993
 * @return  MP_OKAY on success.
15994
 * @return  MP_VAL when a or result is NULL, or t is out of range.
15995
 * @return  MP_MEM when dynamic memory allocation fails.
15996
 */
15997
int sp_prime_is_prime(sp_int* a, int t, int* result)
15998
0
{
15999
0
    int         err = MP_OKAY;
16000
0
    int         i;
16001
0
    int         haveRes = 0;
16002
0
    sp_int_digit d;
16003
0
    DECL_SP_INT(b, 2);
16004
16005
0
    if ((a == NULL) || (result == NULL)) {
16006
0
        if (result != NULL) {
16007
0
            *result = MP_NO;
16008
0
        }
16009
0
        err = MP_VAL;
16010
0
    }
16011
16012
0
    if ((err == MP_OKAY) && ((t <= 0) || (t > SP_PRIME_SIZE))) {
16013
0
        *result = MP_NO;
16014
0
        err = MP_VAL;
16015
0
    }
16016
16017
0
    if ((err == MP_OKAY) && sp_isone(a)) {
16018
0
        *result = MP_NO;
16019
0
        haveRes = 1;
16020
0
    }
16021
16022
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16023
16024
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
16025
        /* check against primes table */
16026
0
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16027
0
            if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
16028
0
                *result = MP_YES;
16029
0
                haveRes = 1;
16030
0
                break;
16031
0
            }
16032
0
        }
16033
0
    }
16034
16035
0
    if ((err == MP_OKAY) && (!haveRes)) {
16036
        /* do trial division */
16037
0
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16038
0
            err = sp_mod_d(a, sp_primes[i], &d);
16039
0
            if ((err != MP_OKAY) || (d == 0)) {
16040
0
                *result = MP_NO;
16041
0
                haveRes = 1;
16042
0
                break;
16043
0
            }
16044
0
        }
16045
0
    }
16046
16047
0
    if ((err == MP_OKAY) && (!haveRes)) {
16048
0
        ALLOC_SP_INT(b, 1, err, NULL);
16049
0
        if (err == MP_OKAY) {
16050
            /* now do 't' miller rabins */
16051
0
            sp_init_size(b, 1);
16052
0
            for (i = 0; i < t; i++) {
16053
0
                sp_set(b, sp_primes[i]);
16054
0
                err = sp_prime_miller_rabin(a, b, result);
16055
0
                if ((err != MP_OKAY) || (*result == MP_NO)) {
16056
0
                    break;
16057
0
                }
16058
0
            }
16059
0
        }
16060
0
     }
16061
16062
0
     RESTORE_VECTOR_REGISTERS();
16063
16064
0
     FREE_SP_INT(b, NULL);
16065
0
     return err;
16066
0
}
16067
16068
/* Check whether a is prime.
16069
 * Checks against a number of small primes and does t iterations of
16070
 * Miller-Rabin.
16071
 *
16072
 * @param  [in]   a       SP integer to check.
16073
 * @param  [in]   t       Number of iterations of Miller-Rabin test to perform.
16074
 * @param  [out]  result  MP_YES when number is prime.
16075
 *                        MP_NO otherwise.
16076
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
16077
 *
16078
 * @return  MP_OKAY on success.
16079
 * @return  MP_VAL when a, result or rng is NULL.
16080
 * @return  MP_MEM when dynamic memory allocation fails.
16081
 */
16082
int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
16083
96.7k
{
16084
96.7k
    int err = MP_OKAY;
16085
96.7k
    int ret = MP_YES;
16086
96.7k
    int haveRes = 0;
16087
96.7k
    int i;
16088
96.7k
#ifndef WC_NO_RNG
16089
96.7k
    sp_int *b = NULL;
16090
96.7k
    sp_int *c = NULL;
16091
96.7k
    sp_int *n1 = NULL;
16092
96.7k
    sp_int *y = NULL;
16093
96.7k
    sp_int *r = NULL;
16094
96.7k
#endif /* WC_NO_RNG */
16095
16096
96.7k
    if ((a == NULL) || (result == NULL) || (rng == NULL)) {
16097
0
        err = MP_VAL;
16098
0
    }
16099
16100
#ifdef WOLFSSL_SP_INT_NEGATIVE
16101
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
16102
        err = MP_VAL;
16103
    }
16104
#endif
16105
16106
96.7k
    if ((err == MP_OKAY) && sp_isone(a)) {
16107
0
        ret = MP_NO;
16108
0
        haveRes = 1;
16109
0
    }
16110
16111
96.7k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16112
16113
96.7k
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
16114
        /* check against primes table */
16115
2.14M
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16116
2.14M
            if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
16117
36
                ret = MP_YES;
16118
36
                haveRes = 1;
16119
36
                break;
16120
36
            }
16121
2.14M
        }
16122
8.38k
    }
16123
16124
96.7k
    if ((err == MP_OKAY) && (!haveRes)) {
16125
96.7k
        sp_int_digit d;
16126
16127
        /* do trial division */
16128
6.39M
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16129
6.36M
            err = sp_mod_d(a, sp_primes[i], &d);
16130
6.36M
            if ((err != MP_OKAY) || (d == 0)) {
16131
75.3k
                ret = MP_NO;
16132
75.3k
                haveRes = 1;
16133
75.3k
                break;
16134
75.3k
            }
16135
6.36M
        }
16136
96.7k
    }
16137
16138
96.7k
#ifndef WC_NO_RNG
16139
    /* now do a miller rabin with up to t random numbers, this should
16140
     * give a (1/4)^t chance of a false prime. */
16141
96.7k
    if ((err == MP_OKAY) && (!haveRes)) {
16142
21.3k
        int bits = sp_count_bits(a);
16143
21.3k
        word32 baseSz = (bits + 7) / 8;
16144
21.3k
        DECL_SP_INT_ARRAY(ds, a->used + 1, 3);
16145
21.3k
        DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
16146
16147
21.3k
        ALLOC_SP_INT_ARRAY(ds, a->used + 1, 3, err, NULL);
16148
21.3k
        ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
16149
21.3k
        if (err == MP_OKAY) {
16150
21.1k
            b  = ds[0];
16151
21.1k
            c  = ds[1];
16152
21.1k
            n1 = ds[2];
16153
21.1k
            y  = d[0];
16154
21.1k
            r  = d[1];
16155
16156
            /* Only 'y' needs to be twice as big. */
16157
21.1k
            sp_init_size(b , a->used + 1);
16158
21.1k
            sp_init_size(c , a->used + 1);
16159
21.1k
            sp_init_size(n1, a->used + 1);
16160
21.1k
            sp_init_size(y , a->used * 2 + 1);
16161
21.1k
            sp_init_size(r , a->used * 2 + 1);
16162
16163
21.1k
            _sp_sub_d(a, 2, c);
16164
16165
21.1k
            bits &= SP_WORD_MASK;
16166
16167
34.9k
            while (t > 0) {
16168
34.5k
                err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
16169
34.5k
                if (err != MP_OKAY) {
16170
146
                    break;
16171
146
                }
16172
34.3k
                b->used = a->used;
16173
16174
            #ifdef BIG_ENDIAN_ORDER
16175
                if (((baseSz * 8) & SP_WORD_MASK) != 0) {
16176
                    b->dp[b->used-1] >>=
16177
                        SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
16178
                }
16179
            #endif /* LITTLE_ENDIAN_ORDER */
16180
16181
                /* Ensure the top word has no more bits than necessary. */
16182
34.3k
                if (bits > 0) {
16183
12.7k
                    b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
16184
12.7k
                    sp_clamp(b);
16185
12.7k
                }
16186
16187
34.3k
                if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
16188
10.1k
                    continue;
16189
10.1k
                }
16190
16191
24.1k
                err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
16192
24.1k
                if ((err != MP_OKAY) || (ret == MP_NO)) {
16193
20.5k
                    break;
16194
20.5k
                }
16195
16196
3.60k
                t--;
16197
3.60k
            }
16198
16199
21.1k
            sp_clear(n1);
16200
21.1k
            sp_clear(y);
16201
21.1k
            sp_clear(r);
16202
21.1k
            sp_clear(b);
16203
21.1k
            sp_clear(c);
16204
21.1k
        }
16205
16206
21.3k
        FREE_SP_INT_ARRAY(d, NULL);
16207
21.3k
        FREE_SP_INT_ARRAY(ds, NULL);
16208
21.3k
    }
16209
#else
16210
    (void)t;
16211
#endif /* !WC_NO_RNG */
16212
16213
96.7k
    if (result != NULL) {
16214
96.7k
        *result = ret;
16215
96.7k
    }
16216
16217
96.7k
    RESTORE_VECTOR_REGISTERS();
16218
16219
96.7k
    return err;
16220
96.7k
}
16221
#endif /* WOLFSSL_SP_PRIME_GEN */
16222
16223
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
16224
16225
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
16226
 *
16227
 * a and b are positive integers.
16228
 *
16229
 * @param  [in]   a  SP integer of first operand.
16230
 * @param  [in]   b  SP integer of second operand.
16231
 * @param  [out]  r  SP integer to hold result.
16232
 *
16233
 * @return  MP_OKAY on success.
16234
 * @return  MP_VAL when a, b or r is NULL or too large.
16235
 * @return  MP_MEM when dynamic memory allocation fails.
16236
 */
16237
int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
16238
2.26k
{
16239
2.26k
    int err = MP_OKAY;
16240
16241
2.26k
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
16242
0
        err = MP_VAL;
16243
0
    }
16244
2.26k
    else if (a->used >= SP_INT_DIGITS || b->used >= SP_INT_DIGITS) {
16245
9
        err = MP_VAL;
16246
9
    }
16247
2.25k
    else if (sp_iszero(a)) {
16248
        /* GCD of 0 and 0 is undefined as all integers divide 0. */
16249
117
        if (sp_iszero(b)) {
16250
54
            err = MP_VAL;
16251
54
        }
16252
63
        else {
16253
63
            err = sp_copy(b, r);
16254
63
        }
16255
117
    }
16256
2.13k
    else if (sp_iszero(b)) {
16257
62
        err = sp_copy(a, r);
16258
62
    }
16259
2.07k
    else {
16260
2.07k
        sp_int* u = NULL;
16261
2.07k
        sp_int* v = NULL;
16262
2.07k
        sp_int* t = NULL;
16263
2.07k
        int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
16264
2.07k
        DECL_SP_INT_ARRAY(d, used, 3);
16265
16266
2.07k
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16267
16268
2.07k
        ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
16269
16270
2.07k
        if (err == MP_OKAY) {
16271
2.04k
            u = d[0];
16272
2.04k
            v = d[1];
16273
2.04k
            t = d[2];
16274
2.04k
            sp_init_size(u, used);
16275
2.04k
            sp_init_size(v, used);
16276
2.04k
            sp_init_size(t, used);
16277
16278
2.04k
            if (_sp_cmp(a, b) != MP_LT) {
16279
869
                sp_copy(b, u);
16280
                /* First iteration - u = a, v = b */
16281
869
                if (b->used == 1) {
16282
700
                    err = sp_mod_d(a, b->dp[0], &v->dp[0]);
16283
700
                    if (err == MP_OKAY) {
16284
700
                        v->used = (v->dp[0] != 0);
16285
700
                    }
16286
700
                }
16287
169
                else {
16288
169
                    err = sp_mod(a, b, v);
16289
169
                }
16290
869
            }
16291
1.17k
            else {
16292
1.17k
                sp_copy(a, u);
16293
                /* First iteration - u = b, v = a */
16294
1.17k
                if (a->used == 1) {
16295
918
                    err = sp_mod_d(b, a->dp[0], &v->dp[0]);
16296
918
                    if (err == MP_OKAY) {
16297
918
                        v->used = (v->dp[0] != 0);
16298
918
                    }
16299
918
                }
16300
253
                else {
16301
253
                    err = sp_mod(b, a, v);
16302
253
                }
16303
1.17k
            }
16304
2.04k
        }
16305
16306
2.07k
        if (err == MP_OKAY) {
16307
#ifdef WOLFSSL_SP_INT_NEGATIVE
16308
            u->sign = MP_ZPOS;
16309
            v->sign = MP_ZPOS;
16310
#endif /* WOLFSSL_SP_INT_NEGATIVE */
16311
16312
61.9k
            while (!sp_iszero(v)) {
16313
59.9k
                if (v->used == 1) {
16314
29.4k
                    err = sp_mod_d(u, v->dp[0], &t->dp[0]);
16315
29.4k
                    if (err == MP_OKAY) {
16316
29.4k
                        t->used = (t->dp[0] != 0);
16317
29.4k
                    }
16318
29.4k
                }
16319
30.4k
                else {
16320
30.4k
                    err = sp_mod(u, v, t);
16321
30.4k
                }
16322
59.9k
                if (err != MP_OKAY) {
16323
47
                    break;
16324
47
                }
16325
59.8k
                sp_copy(v, u);
16326
59.8k
                sp_copy(t, v);
16327
59.8k
            }
16328
2.04k
            if (err == MP_OKAY)
16329
1.99k
                err = sp_copy(u, r);
16330
2.04k
        }
16331
16332
2.07k
        FREE_SP_INT_ARRAY(d, NULL);
16333
16334
2.07k
        RESTORE_VECTOR_REGISTERS();
16335
2.07k
    }
16336
16337
2.26k
    return err;
16338
2.26k
}
16339
16340
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
16341
16342
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
16343
    (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
16344
16345
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
16346
 *
16347
 * a and b are positive integers.
16348
 *
16349
 * @param  [in]   a  SP integer of first operand.
16350
 * @param  [in]   b  SP integer of second operand.
16351
 * @param  [out]  r  SP integer to hold result.
16352
 *
16353
 * @return  MP_OKAY on success.
16354
 * @return  MP_VAL when a, b or r is NULL; or a or b is zero.
16355
 * @return  MP_MEM when dynamic memory allocation fails.
16356
 */
16357
int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
16358
{
16359
    int err = MP_OKAY;
16360
    int used = ((a == NULL) || (b == NULL)) ? 1 :
16361
                   (a->used >= b->used ? a->used + 1: b->used + 1);
16362
    DECL_SP_INT_ARRAY(t, used, 2);
16363
16364
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
16365
        err = MP_VAL;
16366
    }
16367
16368
    /* LCM of 0 and any number is undefined as 0 is not in the set of values
16369
     * being used.
16370
     */
16371
    if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
16372
        err = MP_VAL;
16373
    }
16374
16375
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
16376
16377
    if (err == MP_OKAY) {
16378
        sp_init_size(t[0], used);
16379
        sp_init_size(t[1], used);
16380
16381
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16382
16383
        if (err == MP_OKAY)
16384
            err = sp_gcd(a, b, t[0]);
16385
16386
        if (err == MP_OKAY) {
16387
            if (_sp_cmp_abs(a, b) == MP_GT) {
16388
                err = sp_div(a, t[0], t[1], NULL);
16389
                if (err == MP_OKAY) {
16390
                    err = sp_mul(b, t[1], r);
16391
                }
16392
            }
16393
            else {
16394
                err = sp_div(b, t[0], t[1], NULL);
16395
                if (err == MP_OKAY) {
16396
                    err = sp_mul(a, t[1], r);
16397
                }
16398
            }
16399
        }
16400
16401
        RESTORE_VECTOR_REGISTERS();
16402
    }
16403
16404
    FREE_SP_INT_ARRAY(t, NULL);
16405
    return err;
16406
}
16407
16408
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
16409
16410
/* Returns the run time settings.
16411
 *
16412
 * @return  Settings value.
16413
 */
16414
word32 CheckRunTimeSettings(void)
16415
0
{
16416
0
    return CTC_SETTINGS;
16417
0
}
16418
16419
/* Returns the fast math settings.
16420
 *
16421
 * @return  Setting - number of bits in a digit.
16422
 */
16423
word32 CheckRunTimeFastMath(void)
16424
0
{
16425
0
    return SP_WORD_SIZE;
16426
0
}
16427
16428
#ifdef WOLFSSL_CHECK_MEM_ZERO
16429
/* Add an MP to check.
16430
 *
16431
 * @param [in] name  Name of address to check.
16432
 * @param [in] mp    mp_int that needs to be checked.
16433
 */
16434
void sp_memzero_add(const char* name, mp_int* mp)
16435
{
16436
    wc_MemZero_Add(name, mp->dp, mp->size * sizeof(sp_digit));
16437
}
16438
16439
/* Check the memory in the data pointer for memory that must be zero.
16440
 *
16441
 * @param [in] mp    mp_int that needs to be checked.
16442
 */
16443
void sp_memzero_check(mp_int* mp)
16444
{
16445
    wc_MemZero_Check(mp->dp, mp->size * sizeof(sp_digit));
16446
}
16447
#endif /* WOLFSSL_CHECK_MEM_ZERO */
16448
16449
16450
#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */