Coverage Report

Created: 2022-08-24 06:26

/src/wolfssl-sp-math/wolfcrypt/src/sp_int.c
Line
Count
Source (jump to first uncovered line)
1
/* sp_int.c
2
 *
3
 * Copyright (C) 2006-2022 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Implementation by Sean Parkinson. */
23
24
/*
25
DESCRIPTION
26
This library provides single precision (SP) integer math functions.
27
28
*/
29
#ifdef HAVE_CONFIG_H
30
    #include <config.h>
31
#endif
32
33
#include <wolfssl/wolfcrypt/settings.h>
34
35
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
36
37
#include <wolfssl/wolfcrypt/error-crypt.h>
38
#ifdef NO_INLINE
39
    #include <wolfssl/wolfcrypt/misc.h>
40
#else
41
    #define WOLFSSL_MISC_INCLUDED
42
    #include <wolfcrypt/src/misc.c>
43
#endif
44
45
/* SP Build Options:
46
 * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
47
 * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
48
 * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
49
 * WOLFSSL_SP_MATH:             Use only single precision math and algorithms
50
 *      it supports (no fastmath tfm.c or normal integer.c)
51
 * WOLFSSL_SP_MATH_ALL          Implementation of all MP functions
52
 *      (replacement for tfm.c and integer.c)
53
 * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large
54
 *      stack variables
55
 * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
56
 * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
57
 * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
58
 * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
59
 * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
60
 * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
61
 * WOLFSSL_SP_521               Enable ECC 521-bit SECP521R1 support
62
 * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
63
 * WOLFSSL_SP_X86_64_ASM        Enable Intel x64 assembly implementation
64
 * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly implementation
65
 * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly implementation
66
 * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly implementation
67
 * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly implementation
68
 *      (used with -mthumb)
69
 * WOLFSSL_SP_X86_64            Enable Intel x86 64-bit assembly speedups
70
 * WOLFSSL_SP_X86               Enable Intel x86 assembly speedups
71
 * WOLFSSL_SP_ARM64             Enable Aarch64 assembly speedups
72
 * WOLFSSL_SP_ARM32             Enable ARM32 assembly speedups
73
 * WOLFSSL_SP_ARM32_UDIV        Enable word divide asm that uses UDIV instr
74
 * WOLFSSL_SP_ARM_THUMB         Enable ARM Thumb assembly speedups
75
 *                              (explicitly uses register 'r7')
76
 * WOLFSSL_SP_PPC64             Enable PPC64 assembly speedups
77
 * WOLFSSL_SP_PPC               Enable PPC assembly speedups
78
 * WOLFSSL_SP_MIPS64            Enable MIPS64 assembly speedups
79
 * WOLFSSL_SP_MIPS              Enable MIPS assembly speedups
80
 * WOLFSSL_SP_RISCV64           Enable RISCV64 assembly speedups
81
 * WOLFSSL_SP_RISCV32           Enable RISCV32 assembly speedups
82
 * WOLFSSL_SP_S390X             Enable S390X assembly speedups
83
 * SP_WORD_SIZE                 Force 32 or 64 bit mode
84
 * WOLFSSL_SP_NONBLOCK          Enables "non blocking" mode for SP math, which
85
 *      will return FP_WOULDBLOCK for long operations and function must be
86
 *      called again until complete.
87
 * WOLFSSL_SP_FAST_NCT_EXPTMOD  Enables the faster non-constant time modular
88
 *      exponentation implementation.
89
 * WOLFSSL_SP_INT_NEGATIVE      Enables negative values to be used.
90
 * WOLFSSL_SP_INT_DIGIT_ALIGN   Enable when unaligned access of sp_int_digit
91
 *                              pointer is not allowed.
92
 * WOLFSSL_SP_NO_DYN_STACK      Disable use of dynamic stack items.
93
 *                              Used with small code size and not small stack.
94
 * WOLFSSL_SP_FAST_MODEXP       Allow fast mod_exp with small C code
95
 */
96
97
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
98
#if defined(__clang__) && defined(__clang_major__) && \
99
    (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
100
    #undef WOLFSSL_SP_SMALL
101
#endif
102
103
#include <wolfssl/wolfcrypt/sp_int.h>
104
105
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
106
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
107
    !defined(WOLFSSL_SP_NO_MALLOC)
108
    /* Declare a variable that will be assigned a value on XMALLOC. */
109
    #define DECL_SP_INT(n, s)   \
110
1.13k
        sp_int* n = NULL
111
#else
112
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
113
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
114
        /* Declare a variable on the stack with the required data size. */
115
        #define DECL_SP_INT(n, s)               \
116
            byte    n##d[MP_INT_SIZEOF(s)];     \
117
            sp_int* n = (sp_int*)n##d
118
    #else
119
        /* Declare a variable on the stack. */
120
        #define DECL_SP_INT(n, s)               \
121
            sp_int n[1]
122
    #endif
123
#endif
124
125
/* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */
126
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
127
    !defined(WOLFSSL_SP_NO_MALLOC)
128
    /* Dynamically allocate just enough data to support size. */
129
    #define ALLOC_SP_INT(n, s, err, h)                                         \
130
1.13k
    do {                                                                       \
131
1.13k
        if ((err) == MP_OKAY) {                                                \
132
1.10k
            (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), DYNAMIC_TYPE_BIGINT); \
133
1.10k
            if ((n) == NULL) {                                                 \
134
19
                (err) = MP_MEM;                                                \
135
19
            }                                                                  \
136
1.10k
        }                                                                      \
137
1.13k
    }                                                                          \
138
1.13k
    while (0)
139
140
    /* Dynamically allocate just enough data to support size - and set size. */
141
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
142
0
    do {                                                                       \
143
0
        ALLOC_SP_INT(n, s, err, h);                                            \
144
0
        if ((err) == MP_OKAY) {                                                \
145
0
            (n)->size = (s);                                                   \
146
0
        }                                                                      \
147
0
    }                                                                          \
148
0
    while (0)
149
#else
150
    /* Array declared on stack - nothing to do. */
151
    #define ALLOC_SP_INT(n, s, err, h)
152
    /* Array declared on stack - set the size field. */
153
    #define ALLOC_SP_INT_SIZE(n, s, err, h)     \
154
        n->size = s;
155
#endif
156
157
/* FREE_SP_INT: Free an 'sp_int' variable. */
158
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
159
    !defined(WOLFSSL_SP_NO_MALLOC)
160
    /* Free dynamically allocated data. */
161
    #define FREE_SP_INT(n, h)                   \
162
1.13k
    do {                                        \
163
1.13k
        if ((n) != NULL) {                      \
164
1.08k
            XFREE(n, h, DYNAMIC_TYPE_BIGINT);   \
165
1.08k
        }                                       \
166
1.13k
    }                                           \
167
1.13k
    while (0)
168
#else
169
    /* Nothing to do as declared on stack. */
170
    #define FREE_SP_INT(n, h)
171
#endif
172
173
174
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
175
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
176
    !defined(WOLFSSL_SP_NO_MALLOC)
177
    /* Declare a variable that will be assigned a value on XMALLOC. */
178
    #define DECL_SP_INT_ARRAY(n, s, c)  \
179
1.19M
        sp_int* n##d = NULL;            \
180
1.19M
        sp_int* (n)[c] = { NULL, }
181
#else
182
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
183
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
184
        /* Declare a variable on the stack with the required data size. */
185
        #define DECL_SP_INT_ARRAY(n, s, c)          \
186
            byte    n##d[MP_INT_SIZEOF(s) * (c)];   \
187
            sp_int* (n)[c]
188
    #else
189
        /* Declare a variable on the stack. */
190
        #define DECL_SP_INT_ARRAY(n, s, c)      \
191
            sp_int n##d[c];                     \
192
            sp_int* (n)[c]
193
    #endif
194
#endif
195
196
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */
197
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
198
    !defined(WOLFSSL_SP_NO_MALLOC)
199
    /* Dynamically allocate just enough data to support multiple sp_ints of the
200
     * required size. Use pointers into data to make up array and set sizes.
201
     */
202
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
203
1.01M
    do {                                                                       \
204
1.01M
        if ((err) == MP_OKAY) {                                                \
205
1.01M
            n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h),               \
206
1.01M
                                                         DYNAMIC_TYPE_BIGINT); \
207
1.01M
            if (n##d == NULL) {                                                \
208
272
                (err) = MP_MEM;                                                \
209
272
            }                                                                  \
210
1.01M
            else {                                                             \
211
1.01M
                int n##ii;                                                     \
212
1.01M
                (n)[0] = n##d;                                                 \
213
1.01M
                (n)[0]->size = (s);                                            \
214
3.03M
                for (n##ii = 1; n##ii < (c); n##ii++) {                        \
215
2.02M
                    (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                 \
216
2.02M
                    (n)[n##ii]->size = (s);                                    \
217
2.02M
                }                                                              \
218
1.01M
            }                                                                  \
219
1.01M
        }                                                                      \
220
1.01M
    }                                                                          \
221
1.01M
    while (0)
222
#else
223
    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
224
        defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
225
        /* Data declared on stack that supports multiple sp_ints of the
226
         * required size. Use pointers into data to make up array and set sizes.
227
         */
228
        #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                            \
229
        do {                                                                   \
230
            if ((err) == MP_OKAY) {                                            \
231
                int n##ii;                                                     \
232
                (n)[0] = (sp_int*)n##d;                                        \
233
                (n)[0]->size = (s);                                            \
234
                for (n##ii = 1; n##ii < (c); n##ii++) {                        \
235
                    (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                 \
236
                    (n)[n##ii]->size = (s);                                    \
237
                }                                                              \
238
            }                                                                  \
239
        }                                                                      \
240
        while (0)
241
    #else
242
        /* Data declared on stack that supports multiple sp_ints of the
243
         * required size. Set into array and set sizes.
244
         */
245
        #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                            \
246
        do {                                                                   \
247
            if ((err) == MP_OKAY) {                                            \
248
                int n##ii;                                                     \
249
                for (n##ii = 0; n##ii < (c); n##ii++) {                        \
250
                    (n)[n##ii] = &n##d[n##ii];                                 \
251
                    (n)[n##ii]->size = (s);                                    \
252
                }                                                              \
253
            }                                                                  \
254
        }                                                                      \
255
        while (0)
256
    #endif
257
#endif
258
259
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
260
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
261
    !defined(WOLFSSL_SP_NO_MALLOC)
262
    /* Free data variable that was dynamically allocated. */
263
    #define FREE_SP_INT_ARRAY(n, h)                 \
264
1.19M
    do {                                            \
265
1.19M
        if (n##d != NULL) {                         \
266
1.01M
            XFREE(n##d, h, DYNAMIC_TYPE_BIGINT);    \
267
1.01M
        }                                           \
268
1.19M
    }                                               \
269
1.19M
    while (0)
270
#else
271
    /* Nothing to do as data declared on stack. */
272
    #define FREE_SP_INT_ARRAY(n, h)
273
#endif
274
275
276
#ifndef WOLFSSL_NO_ASM
277
    #ifdef __IAR_SYSTEMS_ICC__
278
        #define __asm__        asm
279
        #define __volatile__   volatile
280
    #endif /* __IAR_SYSTEMS_ICC__ */
281
    #ifdef __KEIL__
282
        #define __asm__        __asm
283
        #define __volatile__   volatile
284
    #endif
285
286
    #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
287
/*
288
 * CPU: x86_64
289
 */
290
291
/* Multiply va by vb and store double size result in: vh | vl */
292
#define SP_ASM_MUL(vl, vh, va, vb)                       \
293
    __asm__ __volatile__ (                               \
294
        "movq %[b], %%rax \n\t"                    \
295
        "mulq %[a]    \n\t"                    \
296
        "movq %%rax, %[l] \n\t"                    \
297
        "movq %%rdx, %[h] \n\t"                    \
298
        : [h] "+r" (vh), [l] "+r" (vl)                   \
299
        : [a] "m" (va), [b] "m" (vb)                     \
300
        : "memory", "%rax", "%rdx", "cc"                 \
301
    )
302
/* Multiply va by vb and store double size result in: vo | vh | vl */
303
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
304
    __asm__ __volatile__ (                               \
305
        "movq %[b], %%rax \n\t"                    \
306
        "mulq %[a]    \n\t"                    \
307
        "movq $0   , %[o] \n\t"                    \
308
        "movq %%rax, %[l] \n\t"                    \
309
        "movq %%rdx, %[h] \n\t"                    \
310
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
311
        : [a] "m" (va), [b] "m" (vb)                     \
312
        : "%rax", "%rdx", "cc"                           \
313
    )
314
/* Multiply va by vb and add double size result into: vo | vh | vl */
315
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
316
    __asm__ __volatile__ (                               \
317
        "movq %[b], %%rax \n\t"                    \
318
        "mulq %[a]    \n\t"                    \
319
        "addq %%rax, %[l] \n\t"                    \
320
        "adcq %%rdx, %[h] \n\t"                    \
321
        "adcq $0   , %[o] \n\t"                    \
322
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
323
        : [a] "m" (va), [b] "m" (vb)                     \
324
        : "%rax", "%rdx", "cc"                           \
325
    )
326
/* Multiply va by vb and add double size result into: vh | vl */
327
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
328
    __asm__ __volatile__ (                               \
329
        "movq %[b], %%rax \n\t"                    \
330
        "mulq %[a]    \n\t"                    \
331
        "addq %%rax, %[l] \n\t"                    \
332
        "adcq %%rdx, %[h] \n\t"                    \
333
        : [l] "+r" (vl), [h] "+r" (vh)                   \
334
        : [a] "m" (va), [b] "m" (vb)                     \
335
        : "%rax", "%rdx", "cc"                           \
336
    )
337
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
338
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
339
    __asm__ __volatile__ (                               \
340
        "movq %[b], %%rax \n\t"                    \
341
        "mulq %[a]    \n\t"                    \
342
        "addq %%rax, %[l] \n\t"                    \
343
        "adcq %%rdx, %[h] \n\t"                    \
344
        "adcq $0   , %[o] \n\t"                    \
345
        "addq %%rax, %[l] \n\t"                    \
346
        "adcq %%rdx, %[h] \n\t"                    \
347
        "adcq $0   , %[o] \n\t"                    \
348
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
349
        : [a] "m" (va), [b] "m" (vb)                     \
350
        : "%rax", "%rdx", "cc"                           \
351
    )
352
/* Multiply va by vb and add double size result twice into: vo | vh | vl
353
 * Assumes first add will not overflow vh | vl
354
 */
355
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
356
    __asm__ __volatile__ (                               \
357
        "movq %[b], %%rax \n\t"                    \
358
        "mulq %[a]    \n\t"                    \
359
        "addq %%rax, %[l] \n\t"                    \
360
        "adcq %%rdx, %[h] \n\t"                    \
361
        "addq %%rax, %[l] \n\t"                    \
362
        "adcq %%rdx, %[h] \n\t"                    \
363
        "adcq $0   , %[o] \n\t"                    \
364
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
365
        : [a] "m" (va), [b] "m" (vb)                     \
366
        : "%rax", "%rdx", "cc"                           \
367
    )
368
/* Square va and store double size result in: vh | vl */
369
#define SP_ASM_SQR(vl, vh, va)                           \
370
    __asm__ __volatile__ (                               \
371
        "movq %[a], %%rax \n\t"                    \
372
        "mulq %%rax   \n\t"                    \
373
        "movq %%rax, %[l] \n\t"                    \
374
        "movq %%rdx, %[h] \n\t"                    \
375
        : [h] "+r" (vh), [l] "+r" (vl)                   \
376
        : [a] "m" (va)                                   \
377
        : "memory", "%rax", "%rdx", "cc"                 \
378
    )
379
/* Square va and add double size result into: vo | vh | vl */
380
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
381
    __asm__ __volatile__ (                               \
382
        "movq %[a], %%rax \n\t"                    \
383
        "mulq %%rax   \n\t"                    \
384
        "addq %%rax, %[l] \n\t"                    \
385
        "adcq %%rdx, %[h] \n\t"                    \
386
        "adcq $0   , %[o] \n\t"                    \
387
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
388
        : [a] "m" (va)                                   \
389
        : "%rax", "%rdx", "cc"                           \
390
    )
391
/* Square va and add double size result into: vh | vl */
392
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
393
    __asm__ __volatile__ (                               \
394
        "movq %[a], %%rax \n\t"                    \
395
        "mulq %%rax   \n\t"                    \
396
        "addq %%rax, %[l] \n\t"                    \
397
        "adcq %%rdx, %[h] \n\t"                    \
398
        : [l] "+r" (vl), [h] "+r" (vh)                   \
399
        : [a] "m" (va)                                   \
400
        : "%rax", "%rdx", "cc"                           \
401
    )
402
/* Add va into: vh | vl */
403
#define SP_ASM_ADDC(vl, vh, va)                          \
404
    __asm__ __volatile__ (                               \
405
        "addq %[a], %[l]  \n\t"                    \
406
        "adcq $0  , %[h]  \n\t"                    \
407
        : [l] "+r" (vl), [h] "+r" (vh)                   \
408
        : [a] "m" (va)                                   \
409
        : "cc"                                           \
410
    )
411
/* Add va, variable in a register, into: vh | vl */
412
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
413
    __asm__ __volatile__ (                               \
414
        "addq %[a], %[l]  \n\t"                    \
415
        "adcq $0  , %[h]  \n\t"                    \
416
        : [l] "+r" (vl), [h] "+r" (vh)                   \
417
        : [a] "r" (va)                                   \
418
        : "cc"                                           \
419
    )
420
/* Sub va from: vh | vl */
421
#define SP_ASM_SUBC(vl, vh, va)                          \
422
    __asm__ __volatile__ (                               \
423
        "subq %[a], %[l]  \n\t"                    \
424
        "sbbq $0  , %[h]  \n\t"                    \
425
        : [l] "+r" (vl), [h] "+r" (vh)                   \
426
        : [a] "m" (va)                                   \
427
        : "cc"                                           \
428
    )
429
/* Add two times vc | vb | va into vo | vh | vl */
430
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
431
    __asm__ __volatile__ (                               \
432
        "addq %[a], %[l]  \n\t"                    \
433
        "adcq %[b], %[h]  \n\t"                    \
434
        "adcq %[c], %[o]  \n\t"                    \
435
        "addq %[a], %[l]  \n\t"                    \
436
        "adcq %[b], %[h]  \n\t"                    \
437
        "adcq %[c], %[o]  \n\t"                    \
438
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
439
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
440
        : "%rax", "%rdx", "cc"                           \
441
    )
442
443
#ifndef WOLFSSL_SP_DIV_WORD_HALF
444
/* Divide a two digit number by a digit number and return. (hi | lo) / d
445
 *
446
 * Using divq instruction on Intel x64.
447
 *
448
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
449
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
450
 * @param  [in]  d   SP integer digit. Number to divide by.
451
 * @return  The division result.
452
 */
453
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
454
                                          sp_int_digit d)
455
{
456
    __asm__ __volatile__ (
457
        "divq %2"
458
        : "+a" (lo)
459
        : "d" (hi), "r" (d)
460
        : "cc"
461
    );
462
    return lo;
463
}
464
#define SP_ASM_DIV_WORD
465
#endif
466
467
#define SP_INT_ASM_AVAILABLE
468
469
    #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
470
471
    #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
472
/*
473
 * CPU: x86
474
 */
475
476
/* Multiply va by vb and store double size result in: vh | vl */
477
#define SP_ASM_MUL(vl, vh, va, vb)                       \
478
    __asm__ __volatile__ (                               \
479
        "movl %[b], %%eax \n\t"                    \
480
        "mull %[a]    \n\t"                    \
481
        "movl %%eax, %[l] \n\t"                    \
482
        "movl %%edx, %[h] \n\t"                    \
483
        : [h] "+r" (vh), [l] "+r" (vl)                   \
484
        : [a] "m" (va), [b] "m" (vb)                     \
485
        : "memory", "eax", "edx", "cc"                   \
486
    )
487
/* Multiply va by vb and store double size result in: vo | vh | vl */
488
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
489
    __asm__ __volatile__ (                               \
490
        "movl %[b], %%eax \n\t"                    \
491
        "mull %[a]    \n\t"                    \
492
        "movl $0   , %[o] \n\t"                    \
493
        "movl %%eax, %[l] \n\t"                    \
494
        "movl %%edx, %[h] \n\t"                    \
495
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
496
        : [a] "m" (va), [b] "m" (vb)                     \
497
        : "eax", "edx", "cc"                             \
498
    )
499
/* Multiply va by vb and add double size result into: vo | vh | vl */
500
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
501
    __asm__ __volatile__ (                               \
502
        "movl %[b], %%eax \n\t"                    \
503
        "mull %[a]    \n\t"                    \
504
        "addl %%eax, %[l] \n\t"                    \
505
        "adcl %%edx, %[h] \n\t"                    \
506
        "adcl $0   , %[o] \n\t"                    \
507
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
508
        : [a] "r" (va), [b] "r" (vb)                     \
509
        : "eax", "edx", "cc"                             \
510
    )
511
/* Multiply va by vb and add double size result into: vh | vl */
512
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
513
    __asm__ __volatile__ (                               \
514
        "movl %[b], %%eax \n\t"                    \
515
        "mull %[a]    \n\t"                    \
516
        "addl %%eax, %[l] \n\t"                    \
517
        "adcl %%edx, %[h] \n\t"                    \
518
        : [l] "+r" (vl), [h] "+r" (vh)                   \
519
        : [a] "m" (va), [b] "m" (vb)                     \
520
        : "eax", "edx", "cc"                             \
521
    )
522
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
523
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
524
    __asm__ __volatile__ (                               \
525
        "movl %[b], %%eax \n\t"                    \
526
        "mull %[a]    \n\t"                    \
527
        "addl %%eax, %[l] \n\t"                    \
528
        "adcl %%edx, %[h] \n\t"                    \
529
        "adcl $0   , %[o] \n\t"                    \
530
        "addl %%eax, %[l] \n\t"                    \
531
        "adcl %%edx, %[h] \n\t"                    \
532
        "adcl $0   , %[o] \n\t"                    \
533
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
534
        : [a] "r" (va), [b] "r" (vb)                     \
535
        : "eax", "edx", "cc"                             \
536
    )
537
/* Multiply va by vb and add double size result twice into: vo | vh | vl
538
 * Assumes first add will not overflow vh | vl
539
 */
540
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
541
    __asm__ __volatile__ (                               \
542
        "movl %[b], %%eax \n\t"                    \
543
        "mull %[a]    \n\t"                    \
544
        "addl %%eax, %[l] \n\t"                    \
545
        "adcl %%edx, %[h] \n\t"                    \
546
        "addl %%eax, %[l] \n\t"                    \
547
        "adcl %%edx, %[h] \n\t"                    \
548
        "adcl $0   , %[o] \n\t"                    \
549
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
550
        : [a] "m" (va), [b] "m" (vb)                     \
551
        : "eax", "edx", "cc"                             \
552
    )
553
/* Square va and store double size result in: vh | vl */
554
#define SP_ASM_SQR(vl, vh, va)                           \
555
    __asm__ __volatile__ (                               \
556
        "movl %[a], %%eax \n\t"                    \
557
        "mull %%eax   \n\t"                    \
558
        "movl %%eax, %[l] \n\t"                    \
559
        "movl %%edx, %[h] \n\t"                    \
560
        : [h] "+r" (vh), [l] "+r" (vl)                   \
561
        : [a] "m" (va)                                   \
562
        : "memory", "eax", "edx", "cc"                   \
563
    )
564
/* Square va and add double size result into: vo | vh | vl */
565
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
566
    __asm__ __volatile__ (                               \
567
        "movl %[a], %%eax \n\t"                    \
568
        "mull %%eax   \n\t"                    \
569
        "addl %%eax, %[l] \n\t"                    \
570
        "adcl %%edx, %[h] \n\t"                    \
571
        "adcl $0   , %[o] \n\t"                    \
572
        : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
573
        : [a] "m" (va)                                   \
574
        : "eax", "edx", "cc"                             \
575
    )
576
/* Square va and add double size result into: vh | vl */
577
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
578
    __asm__ __volatile__ (                               \
579
        "movl %[a], %%eax \n\t"                    \
580
        "mull %%eax   \n\t"                    \
581
        "addl %%eax, %[l] \n\t"                    \
582
        "adcl %%edx, %[h] \n\t"                    \
583
        : [l] "+r" (vl), [h] "+r" (vh)                   \
584
        : [a] "m" (va)                                   \
585
        : "eax", "edx", "cc"                             \
586
    )
587
/* Add va into: vh | vl */
588
#define SP_ASM_ADDC(vl, vh, va)                          \
589
    __asm__ __volatile__ (                               \
590
        "addl %[a], %[l]  \n\t"                    \
591
        "adcl $0  , %[h]  \n\t"                    \
592
        : [l] "+r" (vl), [h] "+r" (vh)                   \
593
        : [a] "m" (va)                                   \
594
        : "cc"                                           \
595
    )
596
/* Add va, variable in a register, into: vh | vl */
597
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
598
    __asm__ __volatile__ (                               \
599
        "addl %[a], %[l]  \n\t"                    \
600
        "adcl $0  , %[h]  \n\t"                    \
601
        : [l] "+r" (vl), [h] "+r" (vh)                   \
602
        : [a] "r" (va)                                   \
603
        : "cc"                                           \
604
    )
605
/* Sub va from: vh | vl */
606
#define SP_ASM_SUBC(vl, vh, va)                          \
607
    __asm__ __volatile__ (                               \
608
        "subl %[a], %[l]  \n\t"                    \
609
        "sbbl $0  , %[h]  \n\t"                    \
610
        : [l] "+r" (vl), [h] "+r" (vh)                   \
611
        : [a] "m" (va)                                   \
612
        : "cc"                                           \
613
    )
614
/* Add two times vc | vb | va into vo | vh | vl */
615
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
616
    __asm__ __volatile__ (                               \
617
        "addl %[a], %[l]  \n\t"                    \
618
        "adcl %[b], %[h]  \n\t"                    \
619
        "adcl %[c], %[o]  \n\t"                    \
620
        "addl %[a], %[l]  \n\t"                    \
621
        "adcl %[b], %[h]  \n\t"                    \
622
        "adcl %[c], %[o]  \n\t"                    \
623
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
624
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
625
        : "cc"                                           \
626
    )
627
628
#ifndef WOLFSSL_SP_DIV_WORD_HALF
629
/* Divide a two digit number by a digit number and return. (hi | lo) / d
630
 *
631
 * Using divl instruction on Intel x64.
632
 *
633
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
634
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
635
 * @param  [in]  d   SP integer digit. Number to divide by.
636
 * @return  The division result.
637
 */
638
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
639
                                          sp_int_digit d)
640
{
641
    __asm__ __volatile__ (
642
        "divl %2"
643
        : "+a" (lo)
644
        : "d" (hi), "r" (d)
645
        : "cc"
646
    );
647
    return lo;
648
}
649
#define SP_ASM_DIV_WORD
650
#endif
651
652
#define SP_INT_ASM_AVAILABLE
653
654
    #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
655
656
    #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
657
/*
658
 * CPU: Aarch64
659
 */
660
661
/* Multiply va by vb and store double size result in: vh | vl */
662
#define SP_ASM_MUL(vl, vh, va, vb)                       \
663
    __asm__ __volatile__ (                               \
664
        "mul  %[l], %[a], %[b]  \n\t"            \
665
        "umulh  %[h], %[a], %[b]  \n\t"            \
666
        : [h] "+r" (vh), [l] "+r" (vl)                   \
667
        : [a] "r" (va), [b] "r" (vb)                     \
668
        : "memory", "cc"                                 \
669
    )
670
/* Multiply va by vb and store double size result in: vo | vh | vl */
671
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
672
    __asm__ __volatile__ (                               \
673
        "mul  x8, %[a], %[b]    \n\t"            \
674
        "umulh  %[h], %[a], %[b]  \n\t"            \
675
        "mov  %[l], x8    \n\t"            \
676
        "mov  %[o], xzr   \n\t"            \
677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
678
        : [a] "r" (va), [b] "r" (vb)                     \
679
        : "x8"                                           \
680
    )
681
/* Multiply va by vb and add double size result into: vo | vh | vl */
682
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
683
    __asm__ __volatile__ (                               \
684
        "mul  x8, %[a], %[b]    \n\t"            \
685
        "umulh  x9, %[a], %[b]    \n\t"            \
686
        "adds %[l], %[l], x8    \n\t"            \
687
        "adcs %[h], %[h], x9    \n\t"            \
688
        "adc  %[o], %[o], xzr   \n\t"            \
689
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
690
        : [a] "r" (va), [b] "r" (vb)                     \
691
        : "x8", "x9", "cc"                               \
692
    )
693
/* Multiply va by vb and add double size result into: vh | vl */
694
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
695
    __asm__ __volatile__ (                               \
696
        "mul  x8, %[a], %[b]    \n\t"            \
697
        "umulh  x9, %[a], %[b]    \n\t"            \
698
        "adds %[l], %[l], x8    \n\t"            \
699
        "adc  %[h], %[h], x9    \n\t"            \
700
        : [l] "+r" (vl), [h] "+r" (vh)                   \
701
        : [a] "r" (va), [b] "r" (vb)                     \
702
        : "x8", "x9", "cc"                               \
703
    )
704
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
705
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
706
    __asm__ __volatile__ (                               \
707
        "mul  x8, %[a], %[b]    \n\t"            \
708
        "umulh  x9, %[a], %[b]    \n\t"            \
709
        "adds %[l], %[l], x8    \n\t"            \
710
        "adcs %[h], %[h], x9    \n\t"            \
711
        "adc  %[o], %[o], xzr   \n\t"            \
712
        "adds %[l], %[l], x8    \n\t"            \
713
        "adcs %[h], %[h], x9    \n\t"            \
714
        "adc  %[o], %[o], xzr   \n\t"            \
715
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
716
        : [a] "r" (va), [b] "r" (vb)                     \
717
        : "x8", "x9", "cc"                               \
718
    )
719
/* Multiply va by vb and add double size result twice into: vo | vh | vl
720
 * Assumes first add will not overflow vh | vl
721
 */
722
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
723
    __asm__ __volatile__ (                               \
724
        "mul  x8, %[a], %[b]    \n\t"            \
725
        "umulh  x9, %[a], %[b]    \n\t"            \
726
        "adds %[l], %[l], x8    \n\t"            \
727
        "adc  %[h], %[h], x9    \n\t"            \
728
        "adds %[l], %[l], x8    \n\t"            \
729
        "adcs %[h], %[h], x9    \n\t"            \
730
        "adc  %[o], %[o], xzr   \n\t"            \
731
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
732
        : [a] "r" (va), [b] "r" (vb)                     \
733
        : "x8", "x9", "cc"                               \
734
    )
735
/* Square va and store double size result in: vh | vl */
736
#define SP_ASM_SQR(vl, vh, va)                           \
737
    __asm__ __volatile__ (                               \
738
        "mul  %[l], %[a], %[a]  \n\t"            \
739
        "umulh  %[h], %[a], %[a]  \n\t"            \
740
        : [h] "+r" (vh), [l] "+r" (vl)                   \
741
        : [a] "r" (va)                                   \
742
        : "memory"                                       \
743
    )
744
/* Square va and add double size result into: vo | vh | vl */
745
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
746
    __asm__ __volatile__ (                               \
747
        "mul  x8, %[a], %[a]    \n\t"            \
748
        "umulh  x9, %[a], %[a]    \n\t"            \
749
        "adds %[l], %[l], x8    \n\t"            \
750
        "adcs %[h], %[h], x9    \n\t"            \
751
        "adc  %[o], %[o], xzr   \n\t"            \
752
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
753
        : [a] "r" (va)                                   \
754
        : "x8", "x9", "cc"                               \
755
    )
756
/* Square va and add double size result into: vh | vl */
757
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
758
    __asm__ __volatile__ (                               \
759
        "mul  x8, %[a], %[a]    \n\t"            \
760
        "umulh  x9, %[a], %[a]    \n\t"            \
761
        "adds %[l], %[l], x8    \n\t"            \
762
        "adc  %[h], %[h], x9    \n\t"            \
763
        : [l] "+r" (vl), [h] "+r" (vh)                   \
764
        : [a] "r" (va)                                   \
765
        : "x8", "x9", "cc"                               \
766
    )
767
/* Add va into: vh | vl */
768
#define SP_ASM_ADDC(vl, vh, va)                          \
769
    __asm__ __volatile__ (                               \
770
        "adds %[l], %[l], %[a]  \n\t"            \
771
        "adc  %[h], %[h], xzr   \n\t"            \
772
        : [l] "+r" (vl), [h] "+r" (vh)                   \
773
        : [a] "r" (va)                                   \
774
        : "cc"                                           \
775
    )
776
/* Sub va from: vh | vl */
777
#define SP_ASM_SUBC(vl, vh, va)                          \
778
    __asm__ __volatile__ (                               \
779
        "subs %[l], %[l], %[a]  \n\t"            \
780
        "sbc  %[h], %[h], xzr   \n\t"            \
781
        : [l] "+r" (vl), [h] "+r" (vh)                   \
782
        : [a] "r" (va)                                   \
783
        : "cc"                                           \
784
    )
785
/* Add two times vc | vb | va into vo | vh | vl */
786
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
787
    __asm__ __volatile__ (                               \
788
        "adds %[l], %[l], %[a]  \n\t"            \
789
        "adcs %[h], %[h], %[b]  \n\t"            \
790
        "adc  %[o], %[o], %[c]  \n\t"            \
791
        "adds %[l], %[l], %[a]  \n\t"            \
792
        "adcs %[h], %[h], %[b]  \n\t"            \
793
        "adc  %[o], %[o], %[c]  \n\t"            \
794
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
795
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
796
        : "cc"                                           \
797
    )
798
799
#ifndef WOLFSSL_SP_DIV_WORD_HALF
800
/* Divide a two digit number by a digit number and return. (hi | lo) / d
801
 *
802
 * Using udiv instruction on Aarch64.
803
 * Constant time.
804
 *
805
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
806
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
807
 * @param  [in]  d   SP integer digit. Number to divide by.
808
 * @return  The division result.
809
 */
810
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
811
                                          sp_int_digit d)
812
{
813
    __asm__ __volatile__ (
814
        "lsr  x3, %[d], 48\n\t"
815
        "mov  x5, 16\n\t"
816
        "cmp  x3, 0\n\t"
817
        "mov  x4, 63\n\t"
818
        "csel x3, x5, xzr, eq\n\t"
819
        "sub  x4, x4, x3\n\t"
820
        "lsl  %[d], %[d], x3\n\t"
821
        "lsl  %[hi], %[hi], x3\n\t"
822
        "lsr  x5, %[lo], x4\n\t"
823
        "lsl  %[lo], %[lo], x3\n\t"
824
        "orr  %[hi], %[hi], x5, lsr 1\n\t"
825
826
        "lsr  x5, %[d], 32\n\t"
827
        "add  x5, x5, 1\n\t"
828
829
        "udiv x3, %[hi], x5\n\t"
830
        "lsl  x6, x3, 32\n\t"
831
        "mul  x4, %[d], x6\n\t"
832
        "umulh  x3, %[d], x6\n\t"
833
        "subs %[lo], %[lo], x4\n\t"
834
        "sbc  %[hi], %[hi], x3\n\t"
835
836
        "udiv x3, %[hi], x5\n\t"
837
        "lsl  x3, x3, 32\n\t"
838
        "add  x6, x6, x3\n\t"
839
        "mul  x4, %[d], x3\n\t"
840
        "umulh  x3, %[d], x3\n\t"
841
        "subs %[lo], %[lo], x4\n\t"
842
        "sbc  %[hi], %[hi], x3\n\t"
843
844
        "lsr  x3, %[lo], 32\n\t"
845
        "orr  x3, x3, %[hi], lsl 32\n\t"
846
847
        "udiv x3, x3, x5\n\t"
848
        "add  x6, x6, x3\n\t"
849
        "mul  x4, %[d], x3\n\t"
850
        "umulh  x3, %[d], x3\n\t"
851
        "subs %[lo], %[lo], x4\n\t"
852
        "sbc  %[hi], %[hi], x3\n\t"
853
854
        "lsr  x3, %[lo], 32\n\t"
855
        "orr  x3, x3, %[hi], lsl 32\n\t"
856
857
        "udiv x3, x3, x5\n\t"
858
        "add  x6, x6, x3\n\t"
859
        "mul  x4, %[d], x3\n\t"
860
        "sub  %[lo], %[lo], x4\n\t"
861
862
        "udiv x3, %[lo], %[d]\n\t"
863
        "add  %[hi], x6, x3\n\t"
864
865
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
866
        :
867
        : "x3", "x4", "x5", "x6"
868
    );
869
870
    return hi;
871
}
872
#define SP_ASM_DIV_WORD
873
#endif
874
875
#define SP_INT_ASM_AVAILABLE
876
877
    #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
878
879
    #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
880
        SP_WORD_SIZE == 32
881
/*
882
 * CPU: ARM32 or Cortex-M4 and similar
883
 */
884
885
/* Multiply va by vb and store double size result in: vh | vl */
886
#define SP_ASM_MUL(vl, vh, va, vb)                       \
887
    __asm__ __volatile__ (                               \
888
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
889
        : [h] "+r" (vh), [l] "+r" (vl)                   \
890
        : [a] "r" (va), [b] "r" (vb)                     \
891
        : "memory"                                       \
892
    )
893
/* Multiply va by vb and store double size result in: vo | vh | vl */
894
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
895
    __asm__ __volatile__ (                               \
896
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
897
        "mov  %[o], #0    \n\t"            \
898
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
899
        : [a] "r" (va), [b] "r" (vb)                     \
900
        :                                                \
901
    )
902
/* Multiply va by vb and add double size result into: vo | vh | vl */
903
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
904
    __asm__ __volatile__ (                               \
905
        "umull  r8, r9, %[a], %[b]  \n\t"            \
906
        "adds %[l], %[l], r8    \n\t"            \
907
        "adcs %[h], %[h], r9    \n\t"            \
908
        "adc  %[o], %[o], #0    \n\t"            \
909
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
910
        : [a] "r" (va), [b] "r" (vb)                     \
911
        : "r8", "r9", "cc"                               \
912
    )
913
/* Multiply va by vb and add double size result into: vh | vl */
914
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
915
    __asm__ __volatile__ (                               \
916
        "umlal  %[l], %[h], %[a], %[b]  \n\t"            \
917
        : [l] "+r" (vl), [h] "+r" (vh)                   \
918
        : [a] "r" (va), [b] "r" (vb)                     \
919
        :                                                \
920
    )
921
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
922
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
923
    __asm__ __volatile__ (                               \
924
        "umull  r8, r9, %[a], %[b]  \n\t"            \
925
        "adds %[l], %[l], r8    \n\t"            \
926
        "adcs %[h], %[h], r9    \n\t"            \
927
        "adc  %[o], %[o], #0    \n\t"            \
928
        "adds %[l], %[l], r8    \n\t"            \
929
        "adcs %[h], %[h], r9    \n\t"            \
930
        "adc  %[o], %[o], #0    \n\t"            \
931
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
932
        : [a] "r" (va), [b] "r" (vb)                     \
933
        : "r8", "r9", "cc"                               \
934
    )
935
/* Multiply va by vb and add double size result twice into: vo | vh | vl
936
 * Assumes first add will not overflow vh | vl
937
 */
938
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
939
    __asm__ __volatile__ (                               \
940
        "umull  r8, r9, %[a], %[b]  \n\t"            \
941
        "adds %[l], %[l], r8    \n\t"            \
942
        "adc  %[h], %[h], r9    \n\t"            \
943
        "adds %[l], %[l], r8    \n\t"            \
944
        "adcs %[h], %[h], r9    \n\t"            \
945
        "adc  %[o], %[o], #0    \n\t"            \
946
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
947
        : [a] "r" (va), [b] "r" (vb)                     \
948
        : "r8", "r9", "cc"                               \
949
    )
950
/* Square va and store double size result in: vh | vl */
951
#define SP_ASM_SQR(vl, vh, va)                           \
952
    __asm__ __volatile__ (                               \
953
        "umull  %[l], %[h], %[a], %[a]  \n\t"            \
954
        : [h] "+r" (vh), [l] "+r" (vl)                   \
955
        : [a] "r" (va)                                   \
956
        : "memory"                                       \
957
    )
958
/* Square va and add double size result into: vo | vh | vl */
959
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
960
    __asm__ __volatile__ (                               \
961
        "umull  r8, r9, %[a], %[a]  \n\t"            \
962
        "adds %[l], %[l], r8    \n\t"            \
963
        "adcs %[h], %[h], r9    \n\t"            \
964
        "adc  %[o], %[o], #0    \n\t"            \
965
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
966
        : [a] "r" (va)                                   \
967
        : "r8", "r9", "cc"                               \
968
    )
969
/* Square va and add double size result into: vh | vl */
970
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
971
    __asm__ __volatile__ (                               \
972
        "umlal  %[l], %[h], %[a], %[a]  \n\t"            \
973
        : [l] "+r" (vl), [h] "+r" (vh)                   \
974
        : [a] "r" (va)                                   \
975
        : "cc"                                           \
976
    )
977
/* Add va into: vh | vl */
978
#define SP_ASM_ADDC(vl, vh, va)                          \
979
    __asm__ __volatile__ (                               \
980
        "adds %[l], %[l], %[a]  \n\t"            \
981
        "adc  %[h], %[h], #0    \n\t"            \
982
        : [l] "+r" (vl), [h] "+r" (vh)                   \
983
        : [a] "r" (va)                                   \
984
        : "cc"                                           \
985
    )
986
/* Sub va from: vh | vl */
987
#define SP_ASM_SUBC(vl, vh, va)                          \
988
    __asm__ __volatile__ (                               \
989
        "subs %[l], %[l], %[a]  \n\t"            \
990
        "sbc  %[h], %[h], #0    \n\t"            \
991
        : [l] "+r" (vl), [h] "+r" (vh)                   \
992
        : [a] "r" (va)                                   \
993
        : "cc"                                           \
994
    )
995
/* Add two times vc | vb | va into vo | vh | vl */
996
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
997
    __asm__ __volatile__ (                               \
998
        "adds %[l], %[l], %[a]  \n\t"            \
999
        "adcs %[h], %[h], %[b]  \n\t"            \
1000
        "adc  %[o], %[o], %[c]  \n\t"            \
1001
        "adds %[l], %[l], %[a]  \n\t"            \
1002
        "adcs %[h], %[h], %[b]  \n\t"            \
1003
        "adc  %[o], %[o], %[c]  \n\t"            \
1004
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1005
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1006
        : "cc"                                           \
1007
    )
1008
1009
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1010
#ifndef WOLFSSL_SP_ARM32_UDIV
1011
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1012
 *
1013
 * No division instruction used - does operation bit by bit.
1014
 * Constant time.
1015
 *
1016
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1017
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1018
 * @param  [in]  d   SP integer digit. Number to divide by.
1019
 * @return  The division result.
1020
 */
1021
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1022
                                          sp_int_digit d)
1023
{
1024
    sp_int_digit r = 0;
1025
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1026
    static const char debruijn32[32] = {
1027
        0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1028
        1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1029
    };
1030
    static const sp_uint32 debruijn32_mul = 0x076be629;
1031
#endif
1032
1033
    __asm__ __volatile__ (
1034
        /* Shift d so that top bit is set. */
1035
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1036
        "ldr  r4, %[m]\n\t"
1037
        "mov  r5, %[d]\n\t"
1038
        "orr  r5, r5, r5, lsr #1\n\t"
1039
        "orr  r5, r5, r5, lsr #2\n\t"
1040
        "orr  r5, r5, r5, lsr #4\n\t"
1041
        "orr  r5, r5, r5, lsr #8\n\t"
1042
        "orr  r5, r5, r5, lsr #16\n\t"
1043
        "add  r5, r5, #1\n\t"
1044
        "mul  r5, r5, r4\n\t"
1045
        "lsr  r5, r5, #27\n\t"
1046
        "ldrb r5, [%[t], r5]\n\t"
1047
#else
1048
        "clz  r5, %[d]\n\t"
1049
#endif
1050
        "rsb  r6, r5, #31\n\t"
1051
        "lsl  %[d], %[d], r5\n\t"
1052
        "lsl  %[hi], %[hi], r5\n\t"
1053
        "lsr  r9, %[lo], r6\n\t"
1054
        "lsl  %[lo], %[lo], r5\n\t"
1055
        "orr  %[hi], %[hi], r9, lsr #1\n\t"
1056
1057
        "lsr  r5, %[d], #1\n\t"
1058
        "add  r5, r5, #1\n\t"
1059
        "mov  r6, %[lo]\n\t"
1060
        "mov  r9, %[hi]\n\t"
1061
        /* Do top 32 */
1062
        "subs r8, r5, r9\n\t"
1063
        "sbc  r8, r8, r8\n\t"
1064
        "add  %[r], %[r], %[r]\n\t"
1065
        "sub  %[r], %[r], r8\n\t"
1066
        "and  r8, r8, r5\n\t"
1067
        "subs r9, r9, r8\n\t"
1068
        /* Next 30 bits */
1069
        "mov  r4, #29\n\t"
1070
        "\n1:\n\t"
1071
        "movs r6, r6, lsl #1\n\t"
1072
        "adc  r9, r9, r9\n\t"
1073
        "subs r8, r5, r9\n\t"
1074
        "sbc  r8, r8, r8\n\t"
1075
        "add  %[r], %[r], %[r]\n\t"
1076
        "sub  %[r], %[r], r8\n\t"
1077
        "and  r8, r8, r5\n\t"
1078
        "subs r9, r9, r8\n\t"
1079
        "subs r4, r4, #1\n\t"
1080
        "bpl  1b\n\t"
1081
1082
        "add  %[r], %[r], %[r]\n\t"
1083
        "add  %[r], %[r], #1\n\t"
1084
1085
        /* Handle difference has hi word > 0. */
1086
        "umull  r4, r5, %[r], %[d]\n\t"
1087
        "subs r4, %[lo], r4\n\t"
1088
        "sbc  r5, %[hi], r5\n\t"
1089
        "add  %[r], %[r], r5\n\t"
1090
        "umull  r4, r5, %[r], %[d]\n\t"
1091
        "subs r4, %[lo], r4\n\t"
1092
        "sbc  r5, %[hi], r5\n\t"
1093
        "add  %[r], %[r], r5\n\t"
1094
1095
        /* Add 1 to result if bottom half of difference is >= d. */
1096
        "mul  r4, %[r], %[d]\n\t"
1097
        "subs r4, %[lo], r4\n\t"
1098
        "subs r9, %[d], r4\n\t"
1099
        "sbc  r8, r8, r8\n\t"
1100
        "sub  %[r], %[r], r8\n\t"
1101
        "subs r9, r9, #1\n\t"
1102
        "sbc  r8, r8, r8\n\t"
1103
        "sub  %[r], %[r], r8\n\t"
1104
        : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1105
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
1106
        : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1107
#else
1108
        :
1109
#endif
1110
        : "r4", "r5", "r6", "r8", "r9"
1111
    );
1112
1113
    return r;
1114
}
1115
#else
1116
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1117
 *
1118
 * Using udiv instruction on arm32
1119
 * Constant time.
1120
 *
1121
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1122
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1123
 * @param  [in]  d   SP integer digit. Number to divide by.
1124
 * @return  The division result.
1125
 */
1126
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1127
                                          sp_int_digit d)
1128
{
1129
    __asm__ __volatile__ (
1130
        "lsrs r3, %[d], #24\n\t"
1131
  "it eq\n\t"
1132
        "moveq  r3, #8\n\t"
1133
  "it ne\n\t"
1134
        "movne  r3, #0\n\t"
1135
        "rsb  r4, r3, #31\n\t"
1136
        "lsl  %[d], %[d], r3\n\t"
1137
        "lsl  %[hi], %[hi], r3\n\t"
1138
        "lsr  r5, %[lo], r4\n\t"
1139
        "lsl  %[lo], %[lo], r3\n\t"
1140
        "orr  %[hi], %[hi], r5, lsr #1\n\t"
1141
1142
        "lsr  r5, %[d], 16\n\t"
1143
        "add  r5, r5, 1\n\t"
1144
1145
        "udiv r3, %[hi], r5\n\t"
1146
        "lsl  r6, r3, 16\n\t"
1147
        "umull  r4, r3, %[d], r6\n\t"
1148
        "subs %[lo], %[lo], r4\n\t"
1149
        "sbc  %[hi], %[hi], r3\n\t"
1150
1151
        "udiv r3, %[hi], r5\n\t"
1152
        "lsl  r3, r3, 16\n\t"
1153
        "add  r6, r6, r3\n\t"
1154
        "umull  r4, r3, %[d], r3\n\t"
1155
        "subs %[lo], %[lo], r4\n\t"
1156
        "sbc  %[hi], %[hi], r3\n\t"
1157
1158
        "lsr  r3, %[lo], 16\n\t"
1159
        "orr  r3, r3, %[hi], lsl 16\n\t"
1160
1161
        "udiv r3, r3, r5\n\t"
1162
        "add  r6, r6, r3\n\t"
1163
        "umull  r4, r3, %[d], r3\n\t"
1164
        "subs %[lo], %[lo], r4\n\t"
1165
        "sbc  %[hi], %[hi], r3\n\t"
1166
1167
        "lsr  r3, %[lo], 16\n\t"
1168
        "orr  r3, r3, %[hi], lsl 16\n\t"
1169
1170
        "udiv r3, r3, r5\n\t"
1171
        "add  r6, r6, r3\n\t"
1172
        "mul  r4, %[d], r3\n\t"
1173
        "sub  %[lo], %[lo], r4\n\t"
1174
1175
        "udiv r3, %[lo], %[d]\n\t"
1176
        "add  %[hi], r6, r3\n\t"
1177
1178
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1179
        :
1180
        : "r3", "r4", "r5", "r6"
1181
    );
1182
1183
    return hi;
1184
}
1185
#endif
1186
1187
#define SP_ASM_DIV_WORD
1188
#endif
1189
1190
#define SP_INT_ASM_AVAILABLE
1191
1192
    #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1193
1194
    #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1195
/*
1196
 * CPU: ARM Thumb (like Cortex-M0)
1197
 */
1198
1199
/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1200
 * usage of register 'r7'.
1201
 */
1202
1203
#if defined(__clang__)
1204
1205
/* Multiply va by vb and store double size result in: vh | vl */
1206
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1207
    __asm__ __volatile__ (                               \
1208
        /* al * bl */                                    \
1209
        "uxth r6, %[a]    \n\t"            \
1210
        "uxth %[l], %[b]    \n\t"            \
1211
        "muls %[l], r6    \n\t"            \
1212
        /* al * bh */                                    \
1213
        "lsrs r4, %[b], #16   \n\t"            \
1214
        "muls r6, r4      \n\t"            \
1215
        "lsrs %[h], r6, #16   \n\t"            \
1216
        "lsls r6, r6, #16   \n\t"            \
1217
        "adds %[l], %[l], r6    \n\t"            \
1218
        "movs r5, #0      \n\t"            \
1219
        "adcs %[h], r5    \n\t"            \
1220
        /* ah * bh */                                    \
1221
        "lsrs r6, %[a], #16   \n\t"            \
1222
        "muls r4, r6      \n\t"            \
1223
        "adds %[h], %[h], r4    \n\t"            \
1224
        /* ah * bl */                                    \
1225
        "uxth r4, %[b]    \n\t"            \
1226
        "muls r6, r4      \n\t"            \
1227
        "lsrs r4, r6, #16   \n\t"            \
1228
        "lsls r6, r6, #16   \n\t"            \
1229
        "adds %[l], %[l], r6    \n\t"            \
1230
        "adcs %[h], r4    \n\t"            \
1231
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1232
        : [a] "l" (va), [b] "l" (vb)                     \
1233
        : "r4", "r5", "r6", "cc"                         \
1234
    )
1235
/* Multiply va by vb and store double size result in: vo | vh | vl */
1236
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1237
    __asm__ __volatile__ (                               \
1238
        /* al * bl */                                    \
1239
        "uxth r6, %[a]    \n\t"            \
1240
        "uxth %[l], %[b]    \n\t"            \
1241
        "muls %[l], r6    \n\t"            \
1242
        /* al * bh */                                    \
1243
        "lsrs r7, %[b], #16   \n\t"            \
1244
        "muls r6, r7      \n\t"            \
1245
        "lsrs %[h], r6, #16   \n\t"            \
1246
        "lsls r6, r6, #16   \n\t"            \
1247
        "adds %[l], %[l], r6    \n\t"            \
1248
        "movs %[o], #0    \n\t"            \
1249
        "adcs %[h], %[o]    \n\t"            \
1250
        /* ah * bh */                                    \
1251
        "lsrs r6, %[a], #16   \n\t"            \
1252
        "muls r7, r6      \n\t"            \
1253
        "adds %[h], %[h], r7    \n\t"            \
1254
        /* ah * bl */                                    \
1255
        "uxth r7, %[b]    \n\t"            \
1256
        "muls r6, r7      \n\t"            \
1257
        "lsrs r7, r6, #16   \n\t"            \
1258
        "lsls r6, r6, #16   \n\t"            \
1259
        "adds %[l], %[l], r6    \n\t"            \
1260
        "adcs %[h], r7    \n\t"            \
1261
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1262
        : [a] "l" (va), [b] "l" (vb)                     \
1263
        : "r6", "r7", "cc"                               \
1264
    )
1265
#ifndef WOLFSSL_SP_SMALL
1266
/* Multiply va by vb and add double size result into: vo | vh | vl */
1267
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1268
    __asm__ __volatile__ (                               \
1269
        /* al * bl */                                    \
1270
        "uxth r6, %[a]    \n\t"            \
1271
        "uxth r7, %[b]    \n\t"            \
1272
        "muls r7, r6      \n\t"            \
1273
        "adds %[l], %[l], r7    \n\t"            \
1274
        "movs r5, #0      \n\t"            \
1275
        "adcs %[h], r5    \n\t"            \
1276
        "adcs %[o], r5    \n\t"            \
1277
        /* al * bh */                                    \
1278
        "lsrs r7, %[b], #16   \n\t"            \
1279
        "muls r6, r7      \n\t"            \
1280
        "lsrs r7, r6, #16   \n\t"            \
1281
        "lsls r6, r6, #16   \n\t"            \
1282
        "adds %[l], %[l], r6    \n\t"            \
1283
        "adcs %[h], r7    \n\t"            \
1284
        "adcs %[o], r5    \n\t"            \
1285
        /* ah * bh */                                    \
1286
        "lsrs r6, %[a], #16   \n\t"            \
1287
        "lsrs r7, %[b], #16   \n\t"            \
1288
        "muls r7, r6      \n\t"            \
1289
        "adds %[h], %[h], r7    \n\t"            \
1290
        "adcs %[o], r5    \n\t"            \
1291
        /* ah * bl */                                    \
1292
        "uxth r7, %[b]    \n\t"            \
1293
        "muls r6, r7      \n\t"            \
1294
        "lsrs r7, r6, #16   \n\t"            \
1295
        "lsls r6, r6, #16   \n\t"            \
1296
        "adds %[l], %[l], r6    \n\t"            \
1297
        "adcs %[h], r7    \n\t"            \
1298
        "adcs %[o], r5    \n\t"            \
1299
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1300
        : [a] "l" (va), [b] "l" (vb)                     \
1301
        : "r5", "r6", "r7", "cc"                         \
1302
    )
1303
#else
1304
/* Multiply va by vb and add double size result into: vo | vh | vl */
1305
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1306
    __asm__ __volatile__ (                               \
1307
        /* al * bl */                                    \
1308
        "uxth r6, %[a]    \n\t"            \
1309
        "uxth r5, %[b]    \n\t"            \
1310
        "muls r5, r6      \n\t"            \
1311
        "adds %[l], %[l], r5    \n\t"            \
1312
        "movs r5, #0      \n\t"            \
1313
        "adcs %[h], r5    \n\t"            \
1314
        "adcs %[o], r5    \n\t"            \
1315
        /* al * bh */                                    \
1316
        "lsrs r5, %[b], #16   \n\t"            \
1317
        "muls r6, r5      \n\t"            \
1318
        "lsrs r5, r6, #16   \n\t"            \
1319
        "lsls r6, r6, #16   \n\t"            \
1320
        "adds %[l], %[l], r6    \n\t"            \
1321
        "adcs %[h], r5    \n\t"            \
1322
        "movs r5, #0      \n\t"            \
1323
        "adcs %[o], r5    \n\t"            \
1324
        /* ah * bh */                                    \
1325
        "lsrs r6, %[a], #16   \n\t"            \
1326
        "lsrs r5, %[b], #16   \n\t"            \
1327
        "muls r5, r6      \n\t"            \
1328
        "adds %[h], %[h], r5    \n\t"            \
1329
        "movs r5, #0      \n\t"            \
1330
        "adcs %[o], r5    \n\t"            \
1331
        /* ah * bl */                                    \
1332
        "uxth r5, %[b]    \n\t"            \
1333
        "muls r6, r5      \n\t"            \
1334
        "lsrs r5, r6, #16   \n\t"            \
1335
        "lsls r6, r6, #16   \n\t"            \
1336
        "adds %[l], %[l], r6    \n\t"            \
1337
        "adcs %[h], r5    \n\t"            \
1338
        "movs r5, #0      \n\t"            \
1339
        "adcs %[o], r5    \n\t"            \
1340
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1341
        : [a] "l" (va), [b] "l" (vb)                     \
1342
        : "r5", "r6", "cc"                               \
1343
    )
1344
#endif
1345
/* Multiply va by vb and add double size result into: vh | vl */
1346
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1347
    __asm__ __volatile__ (                               \
1348
        /* al * bl */                                    \
1349
        "uxth r6, %[a]    \n\t"            \
1350
        "uxth r4, %[b]    \n\t"            \
1351
        "muls r4, r6      \n\t"            \
1352
        "adds %[l], %[l], r4    \n\t"            \
1353
        "movs r5, #0      \n\t"            \
1354
        "adcs %[h], r5    \n\t"            \
1355
        /* al * bh */                                    \
1356
        "lsrs r4, %[b], #16   \n\t"            \
1357
        "muls r6, r4      \n\t"            \
1358
        "lsrs r4, r6, #16   \n\t"            \
1359
        "lsls r6, r6, #16   \n\t"            \
1360
        "adds %[l], %[l], r6    \n\t"            \
1361
        "adcs %[h], r4    \n\t"            \
1362
        /* ah * bh */                                    \
1363
        "lsrs r6, %[a], #16   \n\t"            \
1364
        "lsrs r4, %[b], #16   \n\t"            \
1365
        "muls r4, r6      \n\t"            \
1366
        "adds %[h], %[h], r4    \n\t"            \
1367
        /* ah * bl */                                    \
1368
        "uxth r4, %[b]    \n\t"            \
1369
        "muls r6, r4      \n\t"            \
1370
        "lsrs r4, r6, #16   \n\t"            \
1371
        "lsls r6, r6, #16   \n\t"            \
1372
        "adds %[l], %[l], r6    \n\t"            \
1373
        "adcs %[h], r4    \n\t"            \
1374
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1375
        : [a] "l" (va), [b] "l" (vb)                     \
1376
        : "r4", "r5", "r6", "cc"                         \
1377
    )
1378
#ifndef WOLFSSL_SP_SMALL
1379
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1380
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1381
    __asm__ __volatile__ (                               \
1382
        /* al * bl */                                    \
1383
        "uxth r6, %[a]    \n\t"            \
1384
        "uxth r7, %[b]    \n\t"            \
1385
        "muls r7, r6      \n\t"            \
1386
        "adds %[l], %[l], r7    \n\t"            \
1387
        "movs r5, #0      \n\t"            \
1388
        "adcs %[h], r5    \n\t"            \
1389
        "adcs %[o], r5    \n\t"            \
1390
        "adds %[l], %[l], r7    \n\t"            \
1391
        "adcs %[h], r5    \n\t"            \
1392
        "adcs %[o], r5    \n\t"            \
1393
        /* al * bh */                                    \
1394
        "lsrs r7, %[b], #16   \n\t"            \
1395
        "muls r6, r7      \n\t"            \
1396
        "lsrs r7, r6, #16   \n\t"            \
1397
        "lsls r6, r6, #16   \n\t"            \
1398
        "adds %[l], %[l], r6    \n\t"            \
1399
        "adcs %[h], r7    \n\t"            \
1400
        "adcs %[o], r5    \n\t"            \
1401
        "adds %[l], %[l], r6    \n\t"            \
1402
        "adcs %[h], r7    \n\t"            \
1403
        "adcs %[o], r5    \n\t"            \
1404
        /* ah * bh */                                    \
1405
        "lsrs r6, %[a], #16   \n\t"            \
1406
        "lsrs r7, %[b], #16   \n\t"            \
1407
        "muls r7, r6      \n\t"            \
1408
        "adds %[h], %[h], r7    \n\t"            \
1409
        "adcs %[o], r5    \n\t"            \
1410
        "adds %[h], %[h], r7    \n\t"            \
1411
        "adcs %[o], r5    \n\t"            \
1412
        /* ah * bl */                                    \
1413
        "uxth r7, %[b]    \n\t"            \
1414
        "muls r6, r7      \n\t"            \
1415
        "lsrs r7, r6, #16   \n\t"            \
1416
        "lsls r6, r6, #16   \n\t"            \
1417
        "adds %[l], %[l], r6    \n\t"            \
1418
        "adcs %[h], r7    \n\t"            \
1419
        "adcs %[o], r5    \n\t"            \
1420
        "adds %[l], %[l], r6    \n\t"            \
1421
        "adcs %[h], r7    \n\t"            \
1422
        "adcs %[o], r5    \n\t"            \
1423
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1424
        : [a] "l" (va), [b] "l" (vb)                     \
1425
        : "r5", "r6", "r7", "cc"                         \
1426
    )
1427
#else
1428
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1429
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1430
    __asm__ __volatile__ (                               \
1431
        "movs r8, %[a]    \n\t"            \
1432
        /* al * bl */                                    \
1433
        "uxth r6, %[a]    \n\t"            \
1434
        "uxth r5, %[b]    \n\t"            \
1435
        "muls r5, r6      \n\t"            \
1436
        "adds %[l], %[l], r5    \n\t"            \
1437
        "movs %[a], #0    \n\t"            \
1438
        "adcs %[h], %[a]    \n\t"            \
1439
        "adcs %[o], %[a]    \n\t"            \
1440
        "adds %[l], %[l], r5    \n\t"            \
1441
        "adcs %[h], %[a]    \n\t"            \
1442
        "adcs %[o], %[a]    \n\t"            \
1443
        /* al * bh */                                    \
1444
        "lsrs r5, %[b], #16   \n\t"            \
1445
        "muls r6, r5      \n\t"            \
1446
        "lsrs r5, r6, #16   \n\t"            \
1447
        "lsls r6, r6, #16   \n\t"            \
1448
        "adds %[l], %[l], r6    \n\t"            \
1449
        "adcs %[h], r5    \n\t"            \
1450
        "adcs %[o], %[a]    \n\t"            \
1451
        "adds %[l], %[l], r6    \n\t"            \
1452
        "adcs %[h], r5    \n\t"            \
1453
        "adcs %[o], %[a]    \n\t"            \
1454
        /* ah * bh */                                    \
1455
        "movs %[a], r8    \n\t"            \
1456
        "lsrs r6, %[a], #16   \n\t"            \
1457
        "lsrs r5, %[b], #16   \n\t"            \
1458
        "muls r5, r6      \n\t"            \
1459
        "adds %[h], %[h], r5    \n\t"            \
1460
        "movs %[a], #0    \n\t"            \
1461
        "adcs %[o], %[a]    \n\t"            \
1462
        "adds %[h], %[h], r5    \n\t"            \
1463
        "adcs %[o], %[a]    \n\t"            \
1464
        /* ah * bl */                                    \
1465
        "uxth r5, %[b]    \n\t"            \
1466
        "muls r6, r5      \n\t"            \
1467
        "lsrs r5, r6, #16   \n\t"            \
1468
        "lsls r6, r6, #16   \n\t"            \
1469
        "adds %[l], %[l], r6    \n\t"            \
1470
        "adcs %[h], r5    \n\t"            \
1471
        "adcs %[o], %[a]    \n\t"            \
1472
        "adds %[l], %[l], r6    \n\t"            \
1473
        "adcs %[h], r5    \n\t"            \
1474
        "adcs %[o], %[a]    \n\t"            \
1475
        "movs %[a], r8    \n\t"            \
1476
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1477
        : [a] "l" (va), [b] "l" (vb)                     \
1478
        : "r5", "r6", "r8", "cc"                         \
1479
    )
1480
#endif
1481
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1482
 * Assumes first add will not overflow vh | vl
1483
 */
1484
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1485
    __asm__ __volatile__ (                               \
1486
        /* al * bl */                                    \
1487
        "uxth r6, %[a]    \n\t"            \
1488
        "uxth r7, %[b]    \n\t"            \
1489
        "muls r7, r6      \n\t"            \
1490
        "adds %[l], %[l], r7    \n\t"            \
1491
        "movs r5, #0      \n\t"            \
1492
        "adcs %[h], r5    \n\t"            \
1493
        "adds %[l], %[l], r7    \n\t"            \
1494
        "adcs %[h], r5    \n\t"            \
1495
        /* al * bh */                                    \
1496
        "lsrs r7, %[b], #16   \n\t"            \
1497
        "muls r6, r7      \n\t"            \
1498
        "lsrs r7, r6, #16   \n\t"            \
1499
        "lsls r6, r6, #16   \n\t"            \
1500
        "adds %[l], %[l], r6    \n\t"            \
1501
        "adcs %[h], r7    \n\t"            \
1502
        "adds %[l], %[l], r6    \n\t"            \
1503
        "adcs %[h], r7    \n\t"            \
1504
        "adcs %[o], r5    \n\t"            \
1505
        /* ah * bh */                                    \
1506
        "lsrs r6, %[a], #16   \n\t"            \
1507
        "lsrs r7, %[b], #16   \n\t"            \
1508
        "muls r7, r6      \n\t"            \
1509
        "adds %[h], %[h], r7    \n\t"            \
1510
        "adcs %[o], r5    \n\t"            \
1511
        "adds %[h], %[h], r7    \n\t"            \
1512
        "adcs %[o], r5    \n\t"            \
1513
        /* ah * bl */                                    \
1514
        "uxth r7, %[b]    \n\t"            \
1515
        "muls r6, r7      \n\t"            \
1516
        "lsrs r7, r6, #16   \n\t"            \
1517
        "lsls r6, r6, #16   \n\t"            \
1518
        "adds %[l], %[l], r6    \n\t"            \
1519
        "adcs %[h], r7    \n\t"            \
1520
        "adcs %[o], r5    \n\t"            \
1521
        "adds %[l], %[l], r6    \n\t"            \
1522
        "adcs %[h], r7    \n\t"            \
1523
        "adcs %[o], r5    \n\t"            \
1524
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1525
        : [a] "l" (va), [b] "l" (vb)                     \
1526
        : "r5", "r6", "r7", "cc"                         \
1527
    )
1528
/* Square va and store double size result in: vh | vl */
1529
#define SP_ASM_SQR(vl, vh, va)                           \
1530
    __asm__ __volatile__ (                               \
1531
        "lsrs r5, %[a], #16   \n\t"            \
1532
        "uxth r6, %[a]    \n\t"            \
1533
        "mov  %[l], r6    \n\t"            \
1534
        "mov  %[h], r5    \n\t"            \
1535
        /* al * al */                                    \
1536
        "muls %[l], %[l]    \n\t"            \
1537
        /* ah * ah */                                    \
1538
        "muls %[h], %[h]    \n\t"            \
1539
        /* 2 * al * ah */                                \
1540
        "muls r6, r5      \n\t"            \
1541
        "lsrs r5, r6, #15   \n\t"            \
1542
        "lsls r6, r6, #17   \n\t"            \
1543
        "adds %[l], %[l], r6    \n\t"            \
1544
        "adcs %[h], r5    \n\t"            \
1545
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1546
        : [a] "l" (va)                                   \
1547
        : "r5", "r6", "cc"                               \
1548
    )
1549
/* Square va and add double size result into: vo | vh | vl */
1550
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1551
    __asm__ __volatile__ (                               \
1552
        "lsrs r4, %[a], #16   \n\t"            \
1553
        "uxth r6, %[a]    \n\t"            \
1554
        /* al * al */                                    \
1555
        "muls r6, r6      \n\t"            \
1556
        /* ah * ah */                                    \
1557
        "muls r4, r4      \n\t"            \
1558
        "adds %[l], %[l], r6    \n\t"            \
1559
        "adcs %[h], r4    \n\t"            \
1560
        "movs r5, #0      \n\t"            \
1561
        "adcs %[o], r5    \n\t"            \
1562
        "lsrs r4, %[a], #16   \n\t"            \
1563
        "uxth r6, %[a]    \n\t"            \
1564
        /* 2 * al * ah */                                \
1565
        "muls r6, r4      \n\t"            \
1566
        "lsrs r4, r6, #15   \n\t"            \
1567
        "lsls r6, r6, #17   \n\t"            \
1568
        "adds %[l], %[l], r6    \n\t"            \
1569
        "adcs %[h], r4    \n\t"            \
1570
        "adcs %[o], r5    \n\t"            \
1571
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1572
        : [a] "l" (va)                                   \
1573
        : "r4", "r5", "r6", "cc"                         \
1574
    )
1575
/* Square va and add double size result into: vh | vl */
1576
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1577
    __asm__ __volatile__ (                               \
1578
        "lsrs r7, %[a], #16   \n\t"            \
1579
        "uxth r6, %[a]    \n\t"            \
1580
        /* al * al */                                    \
1581
        "muls r6, r6      \n\t"            \
1582
        /* ah * ah */                                    \
1583
        "muls r7, r7      \n\t"            \
1584
        "adds %[l], %[l], r6    \n\t"            \
1585
        "adcs %[h], r7    \n\t"            \
1586
        "lsrs r7, %[a], #16   \n\t"            \
1587
        "uxth r6, %[a]    \n\t"            \
1588
        /* 2 * al * ah */                                \
1589
        "muls r6, r7      \n\t"            \
1590
        "lsrs r7, r6, #15   \n\t"            \
1591
        "lsls r6, r6, #17   \n\t"            \
1592
        "adds %[l], %[l], r6    \n\t"            \
1593
        "adcs %[h], r7    \n\t"            \
1594
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1595
        : [a] "l" (va)                                   \
1596
        : "r6", "r7", "cc"                               \
1597
    )
1598
/* Add va into: vh | vl */
1599
#define SP_ASM_ADDC(vl, vh, va)                          \
1600
    __asm__ __volatile__ (                               \
1601
        "adds %[l], %[l], %[a]  \n\t"            \
1602
        "movs r5, #0      \n\t"            \
1603
        "adcs %[h], r5    \n\t"            \
1604
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1605
        : [a] "l" (va)                                   \
1606
        : "r5", "cc"                                     \
1607
    )
1608
/* Sub va from: vh | vl */
1609
#define SP_ASM_SUBC(vl, vh, va)                          \
1610
    __asm__ __volatile__ (                               \
1611
        "subs %[l], %[l], %[a]  \n\t"            \
1612
        "movs r5, #0      \n\t"            \
1613
        "sbcs %[h], r5    \n\t"            \
1614
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1615
        : [a] "l" (va)                                   \
1616
        : "r5", "cc"                                     \
1617
    )
1618
/* Add two times vc | vb | va into vo | vh | vl */
1619
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1620
    __asm__ __volatile__ (                               \
1621
        "adds %[l], %[l], %[a]  \n\t"            \
1622
        "adcs %[h], %[b]    \n\t"            \
1623
        "adcs %[o], %[c]    \n\t"            \
1624
        "adds %[l], %[l], %[a]  \n\t"            \
1625
        "adcs %[h], %[b]    \n\t"            \
1626
        "adcs %[o], %[c]    \n\t"            \
1627
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1628
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
1629
        : "cc"                                           \
1630
    )
1631
1632
#elif defined(WOLFSSL_KEIL)
1633
1634
/* Multiply va by vb and store double size result in: vh | vl */
1635
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1636
    __asm__ __volatile__ (                               \
1637
        /* al * bl */                                    \
1638
        "uxth r6, %[a]    \n\t"            \
1639
        "uxth %[l], %[b]    \n\t"            \
1640
        "muls %[l], r6, %[l]    \n\t"            \
1641
        /* al * bh */                                    \
1642
        "lsrs r4, %[b], #16   \n\t"            \
1643
        "muls r6, r4, r6    \n\t"            \
1644
        "lsrs %[h], r6, #16   \n\t"            \
1645
        "lsls r6, r6, #16   \n\t"            \
1646
        "adds %[l], %[l], r6    \n\t"            \
1647
        "movs r5, #0      \n\t"            \
1648
        "adcs %[h], %[h], r5    \n\t"            \
1649
        /* ah * bh */                                    \
1650
        "lsrs r6, %[a], #16   \n\t"            \
1651
        "muls r4, r6, r4    \n\t"            \
1652
        "adds %[h], %[h], r4    \n\t"            \
1653
        /* ah * bl */                                    \
1654
        "uxth r4, %[b]    \n\t"            \
1655
        "muls r6, r4, r6    \n\t"            \
1656
        "lsrs r4, r6, #16   \n\t"            \
1657
        "lsls r6, r6, #16   \n\t"            \
1658
        "adds %[l], %[l], r6    \n\t"            \
1659
        "adcs %[h], %[h], r4    \n\t"            \
1660
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1661
        : [a] "l" (va), [b] "l" (vb)                     \
1662
        : "r4", "r5", "r6", "cc"                         \
1663
    )
1664
/* Multiply va by vb and store double size result in: vo | vh | vl */
1665
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1666
    __asm__ __volatile__ (                               \
1667
        /* al * bl */                                    \
1668
        "uxth r6, %[a]    \n\t"            \
1669
        "uxth %[l], %[b]    \n\t"            \
1670
        "muls %[l], r6, %[l]    \n\t"            \
1671
        /* al * bh */                                    \
1672
        "lsrs r7, %[b], #16   \n\t"            \
1673
        "muls r6, r7, r6    \n\t"            \
1674
        "lsrs %[h], r6, #16   \n\t"            \
1675
        "lsls r6, r6, #16   \n\t"            \
1676
        "adds %[l], %[l], r6    \n\t"            \
1677
        "movs %[o], #0    \n\t"            \
1678
        "adcs %[h], %[h], %[o]  \n\t"            \
1679
        /* ah * bh */                                    \
1680
        "lsrs r6, %[a], #16   \n\t"            \
1681
        "muls r7, r6, r7    \n\t"            \
1682
        "adds %[h], %[h], r7    \n\t"            \
1683
        /* ah * bl */                                    \
1684
        "uxth r7, %[b]    \n\t"            \
1685
        "muls r6, r7, r6    \n\t"            \
1686
        "lsrs r7, r6, #16   \n\t"            \
1687
        "lsls r6, r6, #16   \n\t"            \
1688
        "adds %[l], %[l], r6    \n\t"            \
1689
        "adcs %[h], %[h], r7    \n\t"            \
1690
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1691
        : [a] "l" (va), [b] "l" (vb)                     \
1692
        : "r6", "r7", "cc"                               \
1693
    )
1694
#ifndef WOLFSSL_SP_SMALL
1695
/* Multiply va by vb and add double size result into: vo | vh | vl */
1696
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1697
    __asm__ __volatile__ (                               \
1698
        /* al * bl */                                    \
1699
        "uxth r6, %[a]    \n\t"            \
1700
        "uxth r7, %[b]    \n\t"            \
1701
        "muls r7, r6, r7    \n\t"            \
1702
        "adds %[l], %[l], r7    \n\t"            \
1703
        "movs r5, #0      \n\t"            \
1704
        "adcs %[h], %[h], r5    \n\t"            \
1705
        "adcs %[o], %[o], r5    \n\t"            \
1706
        /* al * bh */                                    \
1707
        "lsrs r7, %[b], #16   \n\t"            \
1708
        "muls r6, r7, r6    \n\t"            \
1709
        "lsrs r7, r6, #16   \n\t"            \
1710
        "lsls r6, r6, #16   \n\t"            \
1711
        "adds %[l], %[l], r6    \n\t"            \
1712
        "adcs %[h], %[h], r7    \n\t"            \
1713
        "adcs %[o], %[o], r5    \n\t"            \
1714
        /* ah * bh */                                    \
1715
        "lsrs r6, %[a], #16   \n\t"            \
1716
        "lsrs r7, %[b], #16   \n\t"            \
1717
        "muls r7, r6, r7    \n\t"            \
1718
        "adds %[h], %[h], r7    \n\t"            \
1719
        "adcs %[o], %[o], r5    \n\t"            \
1720
        /* ah * bl */                                    \
1721
        "uxth r7, %[b]    \n\t"            \
1722
        "muls r6, r7, r6    \n\t"            \
1723
        "lsrs r7, r6, #16   \n\t"            \
1724
        "lsls r6, r6, #16   \n\t"            \
1725
        "adds %[l], %[l], r6    \n\t"            \
1726
        "adcs %[h], %[h], r7    \n\t"            \
1727
        "adcs %[o], %[o], r5    \n\t"            \
1728
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1729
        : [a] "l" (va), [b] "l" (vb)                     \
1730
        : "r5", "r6", "r7", "cc"                         \
1731
    )
1732
#else
1733
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1734
    __asm__ __volatile__ (                               \
1735
        /* al * bl */                                    \
1736
        "uxth   r6, %[a]                \n\t"            \
1737
        "uxth   r5, %[b]                \n\t"            \
1738
        "muls   r5, r6, r5              \n\t"            \
1739
        "adds   %[l], %[l], r5          \n\t"            \
1740
        "movs   r5, #0                  \n\t"            \
1741
        "adcs   %[h], %[h], r5          \n\t"            \
1742
        "adcs   %[o], %[o], r5          \n\t"            \
1743
        /* al * bh */                                    \
1744
        "lsrs   r5, %[b], #16           \n\t"            \
1745
        "muls   r6, r5, r6              \n\t"            \
1746
        "lsrs   r5, r6, #16             \n\t"            \
1747
        "lsls   r6, r6, #16             \n\t"            \
1748
        "adds   %[l], %[l], r6          \n\t"            \
1749
        "adcs   %[h], %[h], r5          \n\t"            \
1750
        "movs   r5, #0                  \n\t"            \
1751
        "adcs   %[o], %[o], r5          \n\t"            \
1752
        /* ah * bh */                                    \
1753
        "lsrs   r6, %[a], #16           \n\t"            \
1754
        "lsrs   r5, %[b], #16           \n\t"            \
1755
        "muls   r5, r6, r5              \n\t"            \
1756
        "adds   %[h], %[h], r5          \n\t"            \
1757
        "movs   r5, #0                  \n\t"            \
1758
        "adcs   %[o], %[o], r5          \n\t"            \
1759
        /* ah * bl */                                    \
1760
        "uxth   r5, %[b]                \n\t"            \
1761
        "muls   r6, r5, r6              \n\t"            \
1762
        "lsrs   r5, r6, #16             \n\t"            \
1763
        "lsls   r6, r6, #16             \n\t"            \
1764
        "adds   %[l], %[l], r6          \n\t"            \
1765
        "adcs   %[h], %[h], r5          \n\t"            \
1766
        "movs   r5, #0                  \n\t"            \
1767
        "adcs   %[o], %[o], r5          \n\t"            \
1768
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1769
        : [a] "l" (va), [b] "l" (vb)                     \
1770
        : "r5", "r6", "cc"                               \
1771
    )
1772
#endif
1773
/* Multiply va by vb and add double size result into: vh | vl */
1774
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1775
    __asm__ __volatile__ (                               \
1776
        /* al * bl */                                    \
1777
        "uxth r6, %[a]    \n\t"            \
1778
        "uxth r4, %[b]    \n\t"            \
1779
        "muls r4, r6, r4    \n\t"            \
1780
        "adds %[l], %[l], r4    \n\t"            \
1781
        "movs r5, #0      \n\t"            \
1782
        "adcs %[h], %[h], r5    \n\t"            \
1783
        /* al * bh */                                    \
1784
        "lsrs r4, %[b], #16   \n\t"            \
1785
        "muls r6, r4, r6    \n\t"            \
1786
        "lsrs r4, r6, #16   \n\t"            \
1787
        "lsls r6, r6, #16   \n\t"            \
1788
        "adds %[l], %[l], r6    \n\t"            \
1789
        "adcs %[h], %[h], r4    \n\t"            \
1790
        /* ah * bh */                                    \
1791
        "lsrs r6, %[a], #16   \n\t"            \
1792
        "lsrs r4, %[b], #16   \n\t"            \
1793
        "muls r4, r6, r4    \n\t"            \
1794
        "adds %[h], %[h], r4    \n\t"            \
1795
        /* ah * bl */                                    \
1796
        "uxth r4, %[b]    \n\t"            \
1797
        "muls r6, r4, r6    \n\t"            \
1798
        "lsrs r4, r6, #16   \n\t"            \
1799
        "lsls r6, r6, #16   \n\t"            \
1800
        "adds %[l], %[l], r6    \n\t"            \
1801
        "adcs %[h], %[h], r4    \n\t"            \
1802
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1803
        : [a] "l" (va), [b] "l" (vb)                     \
1804
        : "r4", "r5", "r6", "cc"                         \
1805
    )
1806
#ifndef WOLFSSL_SP_SMALL
1807
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1808
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1809
    __asm__ __volatile__ (                               \
1810
        /* al * bl */                                    \
1811
        "uxth r6, %[a]    \n\t"            \
1812
        "uxth r7, %[b]    \n\t"            \
1813
        "muls r7, r6, r7    \n\t"            \
1814
        "adds %[l], %[l], r7    \n\t"            \
1815
        "movs r5, #0      \n\t"            \
1816
        "adcs %[h], %[h], r5    \n\t"            \
1817
        "adcs %[o], %[o], r5    \n\t"            \
1818
        "adds %[l], %[l], r7    \n\t"            \
1819
        "adcs %[h], %[h], r5    \n\t"            \
1820
        "adcs %[o], %[o], r5    \n\t"            \
1821
        /* al * bh */                                    \
1822
        "lsrs r7, %[b], #16   \n\t"            \
1823
        "muls r6, r7, r6    \n\t"            \
1824
        "lsrs r7, r6, #16   \n\t"            \
1825
        "lsls r6, r6, #16   \n\t"            \
1826
        "adds %[l], %[l], r6    \n\t"            \
1827
        "adcs %[h], %[h], r7    \n\t"            \
1828
        "adcs %[o], %[o], r5    \n\t"            \
1829
        "adds %[l], %[l], r6    \n\t"            \
1830
        "adcs %[h], %[h], r7    \n\t"            \
1831
        "adcs %[o], %[o], r5    \n\t"            \
1832
        /* ah * bh */                                    \
1833
        "lsrs r6, %[a], #16   \n\t"            \
1834
        "lsrs r7, %[b], #16   \n\t"            \
1835
        "muls r7, r6, r7    \n\t"            \
1836
        "adds %[h], %[h], r7    \n\t"            \
1837
        "adcs %[o], %[o], r5    \n\t"            \
1838
        "adds %[h], %[h], r7    \n\t"            \
1839
        "adcs %[o], %[o], r5    \n\t"            \
1840
        /* ah * bl */                                    \
1841
        "uxth r7, %[b]    \n\t"            \
1842
        "muls r6, r7, r6    \n\t"            \
1843
        "lsrs r7, r6, #16   \n\t"            \
1844
        "lsls r6, r6, #16   \n\t"            \
1845
        "adds %[l], %[l], r6    \n\t"            \
1846
        "adcs %[h], %[h], r7    \n\t"            \
1847
        "adcs %[o], %[o], r5    \n\t"            \
1848
        "adds %[l], %[l], r6    \n\t"            \
1849
        "adcs %[h], %[h], r7    \n\t"            \
1850
        "adcs %[o], %[o], r5    \n\t"            \
1851
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1852
        : [a] "l" (va), [b] "l" (vb)                     \
1853
        : "r5", "r6", "r7", "cc"                         \
1854
    )
1855
#else
1856
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1857
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1858
    __asm__ __volatile__ (                               \
1859
        "movs r8, %[a]    \n\t"            \
1860
        /* al * bl */                                    \
1861
        "uxth r6, %[a]    \n\t"            \
1862
        "uxth r5, %[b]    \n\t"            \
1863
        "muls r5, r6, r5    \n\t"            \
1864
        "adds %[l], %[l], r5    \n\t"            \
1865
        "movs %[a], #0    \n\t"            \
1866
        "adcs %[h], %[h], %[a]  \n\t"            \
1867
        "adcs %[o], %[o], %[a]  \n\t"            \
1868
        "adds %[l], %[l], r5    \n\t"            \
1869
        "adcs %[h], %[h], %[a]  \n\t"            \
1870
        "adcs %[o], %[o], %[a]  \n\t"            \
1871
        /* al * bh */                                    \
1872
        "lsrs r5, %[b], #16   \n\t"            \
1873
        "muls r6, r5, r6    \n\t"            \
1874
        "lsrs r5, r6, #16   \n\t"            \
1875
        "lsls r6, r6, #16   \n\t"            \
1876
        "adds %[l], %[l], r6    \n\t"            \
1877
        "adcs %[h], %[h], r5    \n\t"            \
1878
        "adcs %[o], %[o], %[a]  \n\t"            \
1879
        "adds %[l], %[l], r6    \n\t"            \
1880
        "adcs %[h], %[h], r5    \n\t"            \
1881
        "adcs %[o], %[o], %[a]  \n\t"            \
1882
        /* ah * bh */                                    \
1883
        "movs %[a], r8    \n\t"            \
1884
        "lsrs r6, %[a], #16   \n\t"            \
1885
        "lsrs r5, %[b], #16   \n\t"            \
1886
        "muls r5, r6, r5    \n\t"            \
1887
        "adds %[h], %[h], r5    \n\t"            \
1888
        "movs %[a], #0    \n\t"            \
1889
        "adcs %[o], %[o], %[a]  \n\t"            \
1890
        "adds %[h], %[h], r5    \n\t"            \
1891
        "adcs %[o], %[o], %[a]  \n\t"            \
1892
        /* ah * bl */                                    \
1893
        "uxth r5, %[b]    \n\t"            \
1894
        "muls r6, r5, r6    \n\t"            \
1895
        "lsrs r5, r6, #16   \n\t"            \
1896
        "lsls r6, r6, #16   \n\t"            \
1897
        "adds %[l], %[l], r6    \n\t"            \
1898
        "adcs %[h], %[h], r5    \n\t"            \
1899
        "adcs %[o], %[o], %[a]  \n\t"            \
1900
        "adds %[l], %[l], r6    \n\t"            \
1901
        "adcs %[h], %[h], r5    \n\t"            \
1902
        "adcs %[o], %[o], %[a]  \n\t"            \
1903
        "movs %[a], r8    \n\t"            \
1904
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1905
        : [a] "l" (va), [b] "l" (vb)                     \
1906
        : "r5", "r6", "r8", "cc"                         \
1907
    )
1908
#endif
1909
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1910
 * Assumes first add will not overflow vh | vl
1911
 */
1912
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1913
    __asm__ __volatile__ (                               \
1914
        /* al * bl */                                    \
1915
        "uxth r6, %[a]    \n\t"            \
1916
        "uxth r7, %[b]    \n\t"            \
1917
        "muls r7, r6, r7    \n\t"            \
1918
        "adds %[l], %[l], r7    \n\t"            \
1919
        "movs r5, #0      \n\t"            \
1920
        "adcs %[h], %[h], r5    \n\t"            \
1921
        "adds %[l], %[l], r7    \n\t"            \
1922
        "adcs %[h], %[h], r5    \n\t"            \
1923
        /* al * bh */                                    \
1924
        "lsrs r7, %[b], #16   \n\t"            \
1925
        "muls r6, r7, r6    \n\t"            \
1926
        "lsrs r7, r6, #16   \n\t"            \
1927
        "lsls r6, r6, #16   \n\t"            \
1928
        "adds %[l], %[l], r6    \n\t"            \
1929
        "adcs %[h], %[h], r7    \n\t"            \
1930
        "adds %[l], %[l], r6    \n\t"            \
1931
        "adcs %[h], %[h], r7    \n\t"            \
1932
        "adcs %[o], %[o], r5    \n\t"            \
1933
        /* ah * bh */                                    \
1934
        "lsrs r6, %[a], #16   \n\t"            \
1935
        "lsrs r7, %[b], #16   \n\t"            \
1936
        "muls r7, r6, r7    \n\t"            \
1937
        "adds %[h], %[h], r7    \n\t"            \
1938
        "adcs %[o], %[o], r5    \n\t"            \
1939
        "adds %[h], %[h], r7    \n\t"            \
1940
        "adcs %[o], %[o], r5    \n\t"            \
1941
        /* ah * bl */                                    \
1942
        "uxth r7, %[b]    \n\t"            \
1943
        "muls r6, r7, r6    \n\t"            \
1944
        "lsrs r7, r6, #16   \n\t"            \
1945
        "lsls r6, r6, #16   \n\t"            \
1946
        "adds %[l], %[l], r6    \n\t"            \
1947
        "adcs %[h], %[h], r7    \n\t"            \
1948
        "adcs %[o], %[o], r5    \n\t"            \
1949
        "adds %[l], %[l], r6    \n\t"            \
1950
        "adcs %[h], %[h], r7    \n\t"            \
1951
        "adcs %[o], %[o], r5    \n\t"            \
1952
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1953
        : [a] "l" (va), [b] "l" (vb)                     \
1954
        : "r5", "r6", "r7", "cc"                         \
1955
    )
1956
/* Square va and store double size result in: vh | vl */
1957
#define SP_ASM_SQR(vl, vh, va)                           \
1958
    __asm__ __volatile__ (                               \
1959
        "lsrs r5, %[a], #16   \n\t"            \
1960
        "uxth r6, %[a]    \n\t"            \
1961
        "mov  %[l], r6    \n\t"            \
1962
        "mov  %[h], r5    \n\t"            \
1963
        /* al * al */                                    \
1964
        "muls %[l], %[l], %[l]  \n\t"            \
1965
        /* ah * ah */                                    \
1966
        "muls %[h], %[h], %[h]  \n\t"            \
1967
        /* 2 * al * ah */                                \
1968
        "muls r6, r5, r6    \n\t"            \
1969
        "lsrs r5, r6, #15   \n\t"            \
1970
        "lsls r6, r6, #17   \n\t"            \
1971
        "adds %[l], %[l], r6    \n\t"            \
1972
        "adcs %[h], %[h], r5    \n\t"            \
1973
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1974
        : [a] "l" (va)                                   \
1975
        : "r5", "r6", "cc"                               \
1976
    )
1977
/* Square va and add double size result into: vo | vh | vl */
1978
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1979
    __asm__ __volatile__ (                               \
1980
        "lsrs r4, %[a], #16   \n\t"            \
1981
        "uxth r6, %[a]    \n\t"            \
1982
        /* al * al */                                    \
1983
        "muls r6, r6, r6    \n\t"            \
1984
        /* ah * ah */                                    \
1985
        "muls r4, r4, r4    \n\t"            \
1986
        "adds %[l], %[l], r6    \n\t"            \
1987
        "adcs %[h], %[h], r4    \n\t"            \
1988
        "movs r5, #0      \n\t"            \
1989
        "adcs %[o], %[o], r5    \n\t"            \
1990
        "lsrs r4, %[a], #16   \n\t"            \
1991
        "uxth r6, %[a]    \n\t"            \
1992
        /* 2 * al * ah */                                \
1993
        "muls r6, r4, r6    \n\t"            \
1994
        "lsrs r4, r6, #15   \n\t"            \
1995
        "lsls r6, r6, #17   \n\t"            \
1996
        "adds %[l], %[l], r6    \n\t"            \
1997
        "adcs %[h], %[h], r4    \n\t"            \
1998
        "adcs %[o], %[o], r5    \n\t"            \
1999
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2000
        : [a] "l" (va)                                   \
2001
        : "r4", "r5", "r6", "cc"                         \
2002
    )
2003
/* Square va and add double size result into: vh | vl */
2004
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2005
    __asm__ __volatile__ (                               \
2006
        "lsrs r7, %[a], #16   \n\t"            \
2007
        "uxth r6, %[a]    \n\t"            \
2008
        /* al * al */                                    \
2009
        "muls r6, r6, r6    \n\t"            \
2010
        /* ah * ah */                                    \
2011
        "muls r7, r7, r7    \n\t"            \
2012
        "adds %[l], %[l], r6    \n\t"            \
2013
        "adcs %[h], %[h], r7    \n\t"            \
2014
        "lsrs r7, %[a], #16   \n\t"            \
2015
        "uxth r6, %[a]    \n\t"            \
2016
        /* 2 * al * ah */                                \
2017
        "muls r6, r7, r6    \n\t"            \
2018
        "lsrs r7, r6, #15   \n\t"            \
2019
        "lsls r6, r6, #17   \n\t"            \
2020
        "adds %[l], %[l], r6    \n\t"            \
2021
        "adcs %[h], %[h], r7    \n\t"            \
2022
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2023
        : [a] "l" (va)                                   \
2024
        : "r6", "r7", "cc"                               \
2025
    )
2026
/* Add va into: vh | vl */
2027
#define SP_ASM_ADDC(vl, vh, va)                          \
2028
    __asm__ __volatile__ (                               \
2029
        "adds %[l], %[l], %[a]  \n\t"            \
2030
        "movs r5, #0      \n\t"            \
2031
        "adcs %[h], %[h], r5    \n\t"            \
2032
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2033
        : [a] "l" (va)                                   \
2034
        : "r5", "cc"                                     \
2035
    )
2036
/* Sub va from: vh | vl */
2037
#define SP_ASM_SUBC(vl, vh, va)                          \
2038
    __asm__ __volatile__ (                               \
2039
        "subs %[l], %[l], %[a]  \n\t"            \
2040
        "movs r5, #0      \n\t"            \
2041
        "sbcs %[h], %[h], r5    \n\t"            \
2042
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2043
        : [a] "l" (va)                                   \
2044
        : "r5", "cc"                                     \
2045
    )
2046
/* Add two times vc | vb | va into vo | vh | vl */
2047
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2048
    __asm__ __volatile__ (                               \
2049
        "adds %[l], %[l], %[a]  \n\t"            \
2050
        "adcs %[h], %[h], %[b]  \n\t"            \
2051
        "adcs %[o], %[o], %[c]  \n\t"            \
2052
        "adds %[l], %[l], %[a]  \n\t"            \
2053
        "adcs %[h], %[h], %[b]  \n\t"            \
2054
        "adcs %[o], %[o], %[c]  \n\t"            \
2055
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2056
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2057
        : "cc"                                           \
2058
    )
2059
2060
#elif defined(__GNUC__)
2061
2062
/* Multiply va by vb and store double size result in: vh | vl */
2063
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2064
    __asm__ __volatile__ (                               \
2065
        /* al * bl */                                    \
2066
        "uxth r6, %[a]    \n\t"            \
2067
        "uxth %[l], %[b]    \n\t"            \
2068
        "mul  %[l], r6    \n\t"            \
2069
        /* al * bh */                                    \
2070
        "lsr  r4, %[b], #16   \n\t"            \
2071
        "mul  r6, r4      \n\t"            \
2072
        "lsr  %[h], r6, #16   \n\t"            \
2073
        "lsl  r6, r6, #16   \n\t"            \
2074
        "add  %[l], %[l], r6    \n\t"            \
2075
        "mov  r5, #0      \n\t"            \
2076
        "adc  %[h], r5    \n\t"            \
2077
        /* ah * bh */                                    \
2078
        "lsr  r6, %[a], #16   \n\t"            \
2079
        "mul  r4, r6      \n\t"            \
2080
        "add  %[h], %[h], r4    \n\t"            \
2081
        /* ah * bl */                                    \
2082
        "uxth r4, %[b]    \n\t"            \
2083
        "mul  r6, r4      \n\t"            \
2084
        "lsr  r4, r6, #16   \n\t"            \
2085
        "lsl  r6, r6, #16   \n\t"            \
2086
        "add  %[l], %[l], r6    \n\t"            \
2087
        "adc  %[h], r4    \n\t"            \
2088
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2089
        : [a] "l" (va), [b] "l" (vb)                     \
2090
        : "r4", "r5", "r6", "cc"                         \
2091
    )
2092
/* Multiply va by vb and store double size result in: vo | vh | vl */
2093
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2094
    __asm__ __volatile__ (                               \
2095
        /* al * bl */                                    \
2096
        "uxth r6, %[a]    \n\t"            \
2097
        "uxth %[l], %[b]    \n\t"            \
2098
        "mul  %[l], r6    \n\t"            \
2099
        /* al * bh */                                    \
2100
        "lsr  r7, %[b], #16   \n\t"            \
2101
        "mul  r6, r7      \n\t"            \
2102
        "lsr  %[h], r6, #16   \n\t"            \
2103
        "lsl  r6, r6, #16   \n\t"            \
2104
        "add  %[l], %[l], r6    \n\t"            \
2105
        "mov  %[o], #0    \n\t"            \
2106
        "adc  %[h], %[o]    \n\t"            \
2107
        /* ah * bh */                                    \
2108
        "lsr  r6, %[a], #16   \n\t"            \
2109
        "mul  r7, r6      \n\t"            \
2110
        "add  %[h], %[h], r7    \n\t"            \
2111
        /* ah * bl */                                    \
2112
        "uxth r7, %[b]    \n\t"            \
2113
        "mul  r6, r7      \n\t"            \
2114
        "lsr  r7, r6, #16   \n\t"            \
2115
        "lsl  r6, r6, #16   \n\t"            \
2116
        "add  %[l], %[l], r6    \n\t"            \
2117
        "adc  %[h], r7    \n\t"            \
2118
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2119
        : [a] "l" (va), [b] "l" (vb)                     \
2120
        : "r6", "r7", "cc"                               \
2121
    )
2122
#ifndef WOLFSSL_SP_SMALL
2123
/* Multiply va by vb and add double size result into: vo | vh | vl */
2124
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2125
    __asm__ __volatile__ (                               \
2126
        /* al * bl */                                    \
2127
        "uxth r6, %[a]    \n\t"            \
2128
        "uxth r7, %[b]    \n\t"            \
2129
        "mul  r7, r6      \n\t"            \
2130
        "add  %[l], %[l], r7    \n\t"            \
2131
        "mov  r5, #0      \n\t"            \
2132
        "adc  %[h], r5    \n\t"            \
2133
        "adc  %[o], r5    \n\t"            \
2134
        /* al * bh */                                    \
2135
        "lsr  r7, %[b], #16   \n\t"            \
2136
        "mul  r6, r7      \n\t"            \
2137
        "lsr  r7, r6, #16   \n\t"            \
2138
        "lsl  r6, r6, #16   \n\t"            \
2139
        "add  %[l], %[l], r6    \n\t"            \
2140
        "adc  %[h], r7    \n\t"            \
2141
        "adc  %[o], r5    \n\t"            \
2142
        /* ah * bh */                                    \
2143
        "lsr  r6, %[a], #16   \n\t"            \
2144
        "lsr  r7, %[b], #16   \n\t"            \
2145
        "mul  r7, r6      \n\t"            \
2146
        "add  %[h], %[h], r7    \n\t"            \
2147
        "adc  %[o], r5    \n\t"            \
2148
        /* ah * bl */                                    \
2149
        "uxth r7, %[b]    \n\t"            \
2150
        "mul  r6, r7      \n\t"            \
2151
        "lsr  r7, r6, #16   \n\t"            \
2152
        "lsl  r6, r6, #16   \n\t"            \
2153
        "add  %[l], %[l], r6    \n\t"            \
2154
        "adc  %[h], r7    \n\t"            \
2155
        "adc  %[o], r5    \n\t"            \
2156
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2157
        : [a] "l" (va), [b] "l" (vb)                     \
2158
        : "r5", "r6", "r7", "cc"                         \
2159
    )
2160
#else
2161
/* Multiply va by vb and add double size result into: vo | vh | vl */
2162
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2163
    __asm__ __volatile__ (                               \
2164
        /* al * bl */                                    \
2165
        "uxth   r6, %[a]                \n\t"            \
2166
        "uxth   r5, %[b]                \n\t"            \
2167
        "mul    r5, r6                  \n\t"            \
2168
        "add    %[l], %[l], r5          \n\t"            \
2169
        "mov    r5, #0                  \n\t"            \
2170
        "adc    %[h], r5                \n\t"            \
2171
        "adc    %[o], r5                \n\t"            \
2172
        /* al * bh */                                    \
2173
        "lsr    r5, %[b], #16           \n\t"            \
2174
        "mul    r6, r5                  \n\t"            \
2175
        "lsr    r5, r6, #16             \n\t"            \
2176
        "lsl    r6, r6, #16             \n\t"            \
2177
        "add    %[l], %[l], r6          \n\t"            \
2178
        "adc    %[h], r5                \n\t"            \
2179
        "mov    r5, #0                  \n\t"            \
2180
        "adc    %[o], r5                \n\t"            \
2181
        /* ah * bh */                                    \
2182
        "lsr    r6, %[a], #16           \n\t"            \
2183
        "lsr    r5, %[b], #16           \n\t"            \
2184
        "mul    r5, r6                  \n\t"            \
2185
        "add    %[h], %[h], r5          \n\t"            \
2186
        "mov    r5, #0                  \n\t"            \
2187
        "adc    %[o], r5                \n\t"            \
2188
        /* ah * bl */                                    \
2189
        "uxth   r5, %[b]                \n\t"            \
2190
        "mul    r6, r5                  \n\t"            \
2191
        "lsr    r5, r6, #16             \n\t"            \
2192
        "lsl    r6, r6, #16             \n\t"            \
2193
        "add    %[l], %[l], r6          \n\t"            \
2194
        "adc    %[h], r5                \n\t"            \
2195
        "mov    r5, #0                  \n\t"            \
2196
        "adc    %[o], r5                \n\t"            \
2197
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2198
        : [a] "l" (va), [b] "l" (vb)                     \
2199
        : "r5", "r6", "cc"                               \
2200
    )
2201
#endif
2202
/* Multiply va by vb and add double size result into: vh | vl */
2203
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2204
    __asm__ __volatile__ (                               \
2205
        /* al * bl */                                    \
2206
        "uxth r6, %[a]    \n\t"            \
2207
        "uxth r4, %[b]    \n\t"            \
2208
        "mul  r4, r6      \n\t"            \
2209
        "add  %[l], %[l], r4    \n\t"            \
2210
        "mov  r5, #0      \n\t"            \
2211
        "adc  %[h], r5    \n\t"            \
2212
        /* al * bh */                                    \
2213
        "lsr  r4, %[b], #16   \n\t"            \
2214
        "mul  r6, r4      \n\t"            \
2215
        "lsr  r4, r6, #16   \n\t"            \
2216
        "lsl  r6, r6, #16   \n\t"            \
2217
        "add  %[l], %[l], r6    \n\t"            \
2218
        "adc  %[h], r4    \n\t"            \
2219
        /* ah * bh */                                    \
2220
        "lsr  r6, %[a], #16   \n\t"            \
2221
        "lsr  r4, %[b], #16   \n\t"            \
2222
        "mul  r4, r6      \n\t"            \
2223
        "add  %[h], %[h], r4    \n\t"            \
2224
        /* ah * bl */                                    \
2225
        "uxth r4, %[b]    \n\t"            \
2226
        "mul  r6, r4      \n\t"            \
2227
        "lsr  r4, r6, #16   \n\t"            \
2228
        "lsl  r6, r6, #16   \n\t"            \
2229
        "add  %[l], %[l], r6    \n\t"            \
2230
        "adc  %[h], r4    \n\t"            \
2231
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2232
        : [a] "l" (va), [b] "l" (vb)                     \
2233
        : "r4", "r5", "r6", "cc"                         \
2234
    )
2235
#ifndef WOLFSSL_SP_SMALL
2236
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2237
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2238
    __asm__ __volatile__ (                               \
2239
        /* al * bl */                                    \
2240
        "uxth r6, %[a]    \n\t"            \
2241
        "uxth r7, %[b]    \n\t"            \
2242
        "mul  r7, r6      \n\t"            \
2243
        "add  %[l], %[l], r7    \n\t"            \
2244
        "mov  r5, #0      \n\t"            \
2245
        "adc  %[h], r5    \n\t"            \
2246
        "adc  %[o], r5    \n\t"            \
2247
        "add  %[l], %[l], r7    \n\t"            \
2248
        "adc  %[h], r5    \n\t"            \
2249
        "adc  %[o], r5    \n\t"            \
2250
        /* al * bh */                                    \
2251
        "lsr  r7, %[b], #16   \n\t"            \
2252
        "mul  r6, r7      \n\t"            \
2253
        "lsr  r7, r6, #16   \n\t"            \
2254
        "lsl  r6, r6, #16   \n\t"            \
2255
        "add  %[l], %[l], r6    \n\t"            \
2256
        "adc  %[h], r7    \n\t"            \
2257
        "adc  %[o], r5    \n\t"            \
2258
        "add  %[l], %[l], r6    \n\t"            \
2259
        "adc  %[h], r7    \n\t"            \
2260
        "adc  %[o], r5    \n\t"            \
2261
        /* ah * bh */                                    \
2262
        "lsr  r6, %[a], #16   \n\t"            \
2263
        "lsr  r7, %[b], #16   \n\t"            \
2264
        "mul  r7, r6      \n\t"            \
2265
        "add  %[h], %[h], r7    \n\t"            \
2266
        "adc  %[o], r5    \n\t"            \
2267
        "add  %[h], %[h], r7    \n\t"            \
2268
        "adc  %[o], r5    \n\t"            \
2269
        /* ah * bl */                                    \
2270
        "uxth r7, %[b]    \n\t"            \
2271
        "mul  r6, r7      \n\t"            \
2272
        "lsr  r7, r6, #16   \n\t"            \
2273
        "lsl  r6, r6, #16   \n\t"            \
2274
        "add  %[l], %[l], r6    \n\t"            \
2275
        "adc  %[h], r7    \n\t"            \
2276
        "adc  %[o], r5    \n\t"            \
2277
        "add  %[l], %[l], r6    \n\t"            \
2278
        "adc  %[h], r7    \n\t"            \
2279
        "adc  %[o], r5    \n\t"            \
2280
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2281
        : [a] "l" (va), [b] "l" (vb)                     \
2282
        : "r5", "r6", "r7", "cc"                         \
2283
    )
2284
#else
2285
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2286
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2287
    __asm__ __volatile__ (                               \
2288
        "mov    r8, %[a]                \n\t"            \
2289
        /* al * bl */                                    \
2290
        "uxth   r6, %[a]                \n\t"            \
2291
        "uxth   r5, %[b]                \n\t"            \
2292
        "mul    r5, r6                  \n\t"            \
2293
        "add    %[l], %[l], r5          \n\t"            \
2294
        "mov    %[a], #0                \n\t"            \
2295
        "adc    %[h], %[a]              \n\t"            \
2296
        "adc    %[o], %[a]              \n\t"            \
2297
        "add    %[l], %[l], r5          \n\t"            \
2298
        "adc    %[h], %[a]              \n\t"            \
2299
        "adc    %[o], %[a]              \n\t"            \
2300
        /* al * bh */                                    \
2301
        "lsr    r5, %[b], #16           \n\t"            \
2302
        "mul    r6, r5                  \n\t"            \
2303
        "lsr    r5, r6, #16             \n\t"            \
2304
        "lsl    r6, r6, #16             \n\t"            \
2305
        "add    %[l], %[l], r6          \n\t"            \
2306
        "adc    %[h], r5                \n\t"            \
2307
        "adc    %[o], %[a]              \n\t"            \
2308
        "add    %[l], %[l], r6          \n\t"            \
2309
        "adc    %[h], r5                \n\t"            \
2310
        "adc    %[o], %[a]              \n\t"            \
2311
        /* ah * bh */                                    \
2312
        "mov    %[a], r8                \n\t"            \
2313
        "lsr    r6, %[a], #16           \n\t"            \
2314
        "lsr    r5, %[b], #16           \n\t"            \
2315
        "mul    r5, r6                  \n\t"            \
2316
        "add    %[h], %[h], r5          \n\t"            \
2317
        "mov    %[a], #0                \n\t"            \
2318
        "adc    %[o], %[a]              \n\t"            \
2319
        "add    %[h], %[h], r5          \n\t"            \
2320
        "adc    %[o], %[a]              \n\t"            \
2321
        /* ah * bl */                                    \
2322
        "uxth   r5, %[b]                \n\t"            \
2323
        "mul    r6, r5                  \n\t"            \
2324
        "lsr    r5, r6, #16             \n\t"            \
2325
        "lsl    r6, r6, #16             \n\t"            \
2326
        "add    %[l], %[l], r6          \n\t"            \
2327
        "adc    %[h], r5                \n\t"            \
2328
        "adc    %[o], %[a]              \n\t"            \
2329
        "add    %[l], %[l], r6          \n\t"            \
2330
        "adc    %[h], r5                \n\t"            \
2331
        "adc    %[o], %[a]              \n\t"            \
2332
        "mov    %[a], r8                \n\t"            \
2333
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2334
        : [a] "l" (va), [b] "l" (vb)                     \
2335
        : "r5", "r6", "r8", "cc"                         \
2336
    )
2337
#endif
2338
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2339
 * Assumes first add will not overflow vh | vl
2340
 */
2341
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2342
    __asm__ __volatile__ (                               \
2343
        /* al * bl */                                    \
2344
        "uxth r6, %[a]    \n\t"            \
2345
        "uxth r7, %[b]    \n\t"            \
2346
        "mul  r7, r6      \n\t"            \
2347
        "add  %[l], %[l], r7    \n\t"            \
2348
        "mov  r5, #0      \n\t"            \
2349
        "adc  %[h], r5    \n\t"            \
2350
        "add  %[l], %[l], r7    \n\t"            \
2351
        "adc  %[h], r5    \n\t"            \
2352
        /* al * bh */                                    \
2353
        "lsr  r7, %[b], #16   \n\t"            \
2354
        "mul  r6, r7      \n\t"            \
2355
        "lsr  r7, r6, #16   \n\t"            \
2356
        "lsl  r6, r6, #16   \n\t"            \
2357
        "add  %[l], %[l], r6    \n\t"            \
2358
        "adc  %[h], r7    \n\t"            \
2359
        "add  %[l], %[l], r6    \n\t"            \
2360
        "adc  %[h], r7    \n\t"            \
2361
        "adc  %[o], r5    \n\t"            \
2362
        /* ah * bh */                                    \
2363
        "lsr  r6, %[a], #16   \n\t"            \
2364
        "lsr  r7, %[b], #16   \n\t"            \
2365
        "mul  r7, r6      \n\t"            \
2366
        "add  %[h], %[h], r7    \n\t"            \
2367
        "adc  %[o], r5    \n\t"            \
2368
        "add  %[h], %[h], r7    \n\t"            \
2369
        "adc  %[o], r5    \n\t"            \
2370
        /* ah * bl */                                    \
2371
        "uxth r7, %[b]    \n\t"            \
2372
        "mul  r6, r7      \n\t"            \
2373
        "lsr  r7, r6, #16   \n\t"            \
2374
        "lsl  r6, r6, #16   \n\t"            \
2375
        "add  %[l], %[l], r6    \n\t"            \
2376
        "adc  %[h], r7    \n\t"            \
2377
        "adc  %[o], r5    \n\t"            \
2378
        "add  %[l], %[l], r6    \n\t"            \
2379
        "adc  %[h], r7    \n\t"            \
2380
        "adc  %[o], r5    \n\t"            \
2381
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2382
        : [a] "l" (va), [b] "l" (vb)                     \
2383
        : "r5", "r6", "r7", "cc"                         \
2384
    )
2385
/* Square va and store double size result in: vh | vl */
2386
#define SP_ASM_SQR(vl, vh, va)                           \
2387
    __asm__ __volatile__ (                               \
2388
        "lsr  r5, %[a], #16   \n\t"            \
2389
        "uxth r6, %[a]    \n\t"            \
2390
        "mov  %[l], r6    \n\t"            \
2391
        "mov  %[h], r5    \n\t"            \
2392
        /* al * al */                                    \
2393
        "mul  %[l], %[l]    \n\t"            \
2394
        /* ah * ah */                                    \
2395
        "mul  %[h], %[h]    \n\t"            \
2396
        /* 2 * al * ah */                                \
2397
        "mul  r6, r5      \n\t"            \
2398
        "lsr  r5, r6, #15   \n\t"            \
2399
        "lsl  r6, r6, #17   \n\t"            \
2400
        "add  %[l], %[l], r6    \n\t"            \
2401
        "adc  %[h], r5    \n\t"            \
2402
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2403
        : [a] "l" (va)                                   \
2404
        : "r5", "r6", "cc"                               \
2405
    )
2406
/* Square va and add double size result into: vo | vh | vl */
2407
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2408
    __asm__ __volatile__ (                               \
2409
        "lsr  r4, %[a], #16   \n\t"            \
2410
        "uxth r6, %[a]    \n\t"            \
2411
        /* al * al */                                    \
2412
        "mul  r6, r6      \n\t"            \
2413
        /* ah * ah */                                    \
2414
        "mul  r4, r4      \n\t"            \
2415
        "add  %[l], %[l], r6    \n\t"            \
2416
        "adc  %[h], r4    \n\t"            \
2417
        "mov  r5, #0      \n\t"            \
2418
        "adc  %[o], r5    \n\t"            \
2419
        "lsr  r4, %[a], #16   \n\t"            \
2420
        "uxth r6, %[a]    \n\t"            \
2421
        /* 2 * al * ah */                                \
2422
        "mul  r6, r4      \n\t"            \
2423
        "lsr  r4, r6, #15   \n\t"            \
2424
        "lsl  r6, r6, #17   \n\t"            \
2425
        "add  %[l], %[l], r6    \n\t"            \
2426
        "adc  %[h], r4    \n\t"            \
2427
        "adc  %[o], r5    \n\t"            \
2428
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2429
        : [a] "l" (va)                                   \
2430
        : "r4", "r5", "r6", "cc"                         \
2431
    )
2432
/* Square va and add double size result into: vh | vl */
2433
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2434
    __asm__ __volatile__ (                               \
2435
        "lsr  r7, %[a], #16   \n\t"            \
2436
        "uxth r6, %[a]    \n\t"            \
2437
        /* al * al */                                    \
2438
        "mul  r6, r6      \n\t"            \
2439
        /* ah * ah */                                    \
2440
        "mul  r7, r7      \n\t"            \
2441
        "add  %[l], %[l], r6    \n\t"            \
2442
        "adc  %[h], r7    \n\t"            \
2443
        "lsr  r7, %[a], #16   \n\t"            \
2444
        "uxth r6, %[a]    \n\t"            \
2445
        /* 2 * al * ah */                                \
2446
        "mul  r6, r7      \n\t"            \
2447
        "lsr  r7, r6, #15   \n\t"            \
2448
        "lsl  r6, r6, #17   \n\t"            \
2449
        "add  %[l], %[l], r6    \n\t"            \
2450
        "adc  %[h], r7    \n\t"            \
2451
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2452
        : [a] "l" (va)                                   \
2453
        : "r6", "r7", "cc"                               \
2454
    )
2455
/* Add va into: vh | vl */
2456
#define SP_ASM_ADDC(vl, vh, va)                          \
2457
    __asm__ __volatile__ (                               \
2458
        "add  %[l], %[l], %[a]  \n\t"            \
2459
        "mov  r5, #0      \n\t"            \
2460
        "adc  %[h], r5    \n\t"            \
2461
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2462
        : [a] "l" (va)                                   \
2463
        : "r5", "cc"                                     \
2464
    )
2465
/* Sub va from: vh | vl */
2466
#define SP_ASM_SUBC(vl, vh, va)                          \
2467
    __asm__ __volatile__ (                               \
2468
        "sub  %[l], %[l], %[a]  \n\t"            \
2469
        "mov  r5, #0      \n\t"            \
2470
        "sbc  %[h], r5    \n\t"            \
2471
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2472
        : [a] "l" (va)                                   \
2473
        : "r5", "cc"                                     \
2474
    )
2475
/* Add two times vc | vb | va into vo | vh | vl */
2476
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2477
    __asm__ __volatile__ (                               \
2478
        "add  %[l], %[l], %[a]  \n\t"            \
2479
        "adc  %[h], %[b]    \n\t"            \
2480
        "adc  %[o], %[c]    \n\t"            \
2481
        "add  %[l], %[l], %[a]  \n\t"            \
2482
        "adc  %[h], %[b]    \n\t"            \
2483
        "adc  %[o], %[c]    \n\t"            \
2484
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2485
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2486
        : "cc"                                           \
2487
    )
2488
2489
#endif
2490
2491
#ifdef WOLFSSL_SP_DIV_WORD_HALF
2492
/* Divide a two digit number by a digit number and return. (hi | lo) / d
2493
 *
2494
 * No division instruction used - does operation bit by bit.
2495
 * Constant time.
2496
 *
2497
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
2498
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
2499
 * @param  [in]  d   SP integer digit. Number to divide by.
2500
 * @return  The division result.
2501
 */
2502
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2503
                                          sp_int_digit d)
2504
{
2505
    __asm__ __volatile__ (
2506
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2507
        "lsrs r3, %[d], #24\n\t"
2508
#else
2509
        "lsr  r3, %[d], #24\n\t"
2510
#endif
2511
        "beq  2%=f\n\t"
2512
  "\n1%=:\n\t"
2513
        "movs r3, #0\n\t"
2514
        "b  3%=f\n\t"
2515
  "\n2%=:\n\t"
2516
        "mov  r3, #8\n\t"
2517
  "\n3%=:\n\t"
2518
        "movs r4, #31\n\t"
2519
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2520
        "subs r4, r4, r3\n\t"
2521
#else
2522
        "sub  r4, r4, r3\n\t"
2523
#endif
2524
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2525
        "lsls %[d], %[d], r3\n\t"
2526
#else
2527
        "lsl  %[d], %[d], r3\n\t"
2528
#endif
2529
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2530
        "lsls %[hi], %[hi], r3\n\t"
2531
#else
2532
        "lsl  %[hi], %[hi], r3\n\t"
2533
#endif
2534
        "mov  r5, %[lo]\n\t"
2535
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2536
        "lsrs r5, r5, r4\n\t"
2537
#else
2538
        "lsr  r5, r5, r4\n\t"
2539
#endif
2540
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2541
        "lsls %[lo], %[lo], r3\n\t"
2542
#else
2543
        "lsl  %[lo], %[lo], r3\n\t"
2544
#endif
2545
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2546
        "lsrs r5, r5, #1\n\t"
2547
#else
2548
        "lsr  r5, r5, #1\n\t"
2549
#endif
2550
#if defined(WOLFSSL_KEIL)
2551
        "orrs %[hi], %[hi], r5\n\t"
2552
#elif defined(__clang__)
2553
        "orrs %[hi], r5\n\t"
2554
#else
2555
        "orr  %[hi], r5\n\t"
2556
#endif
2557
2558
        "movs   r3, #0\n\t"
2559
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2560
        "lsrs   r5, %[d], #1\n\t"
2561
#else
2562
        "lsr    r5, %[d], #1\n\t"
2563
#endif
2564
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2565
        "adds   r5, r5, #1\n\t"
2566
#else
2567
        "add    r5, r5, #1\n\t"
2568
#endif
2569
        "mov    r8, %[lo]\n\t"
2570
        "mov    r9, %[hi]\n\t"
2571
        /* Do top 32 */
2572
        "movs   r6, r5\n\t"
2573
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2574
        "subs   r6, r6, %[hi]\n\t"
2575
#else
2576
        "sub    r6, r6, %[hi]\n\t"
2577
#endif
2578
#ifdef WOLFSSL_KEIL
2579
        "sbcs   r6, r6, r6\n\t"
2580
#elif defined(__clang__)
2581
        "sbcs   r6, r6\n\t"
2582
#else
2583
        "sbc    r6, r6\n\t"
2584
#endif
2585
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2586
        "adds   r3, r3, r3\n\t"
2587
#else
2588
        "add    r3, r3, r3\n\t"
2589
#endif
2590
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2591
        "subs   r3, r3, r6\n\t"
2592
#else
2593
        "sub    r3, r3, r6\n\t"
2594
#endif
2595
#ifdef WOLFSSL_KEIL
2596
        "ands   r6, r6, r5\n\t"
2597
#elif defined(__clang__)
2598
        "ands   r6, r5\n\t"
2599
#else
2600
        "and    r6, r5\n\t"
2601
#endif
2602
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2603
        "subs   %[hi], %[hi], r6\n\t"
2604
#else
2605
        "sub    %[hi], %[hi], r6\n\t"
2606
#endif
2607
        "movs   r4, #29\n\t"
2608
        "\n"
2609
    "L_sp_div_word_loop%=:\n\t"
2610
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2611
        "lsls   %[lo], %[lo], #1\n\t"
2612
#else
2613
        "lsl    %[lo], %[lo], #1\n\t"
2614
#endif
2615
#ifdef WOLFSSL_KEIL
2616
        "adcs   %[hi], %[hi], %[hi]\n\t"
2617
#elif defined(__clang__)
2618
        "adcs   %[hi], %[hi]\n\t"
2619
#else
2620
        "adc    %[hi], %[hi]\n\t"
2621
#endif
2622
        "movs   r6, r5\n\t"
2623
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2624
        "subs   r6, r6, %[hi]\n\t"
2625
#else
2626
        "sub    r6, r6, %[hi]\n\t"
2627
#endif
2628
#ifdef WOLFSSL_KEIL
2629
        "sbcs   r6, r6, r6\n\t"
2630
#elif defined(__clang__)
2631
        "sbcs   r6, r6\n\t"
2632
#else
2633
        "sbc    r6, r6\n\t"
2634
#endif
2635
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2636
        "adds   r3, r3, r3\n\t"
2637
#else
2638
        "add    r3, r3, r3\n\t"
2639
#endif
2640
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2641
        "subs   r3, r3, r6\n\t"
2642
#else
2643
        "sub    r3, r3, r6\n\t"
2644
#endif
2645
#ifdef WOLFSSL_KEIL
2646
        "ands   r6, r6, r5\n\t"
2647
#elif defined(__clang__)
2648
        "ands   r6, r5\n\t"
2649
#else
2650
        "and    r6, r5\n\t"
2651
#endif
2652
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2653
        "subs   %[hi], %[hi], r6\n\t"
2654
#else
2655
        "sub    %[hi], %[hi], r6\n\t"
2656
#endif
2657
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2658
        "subs   r4, r4, #1\n\t"
2659
#else
2660
        "sub    r4, r4, #1\n\t"
2661
#endif
2662
        "bpl    L_sp_div_word_loop%=\n\t"
2663
        "movs   r7, #0\n\t"
2664
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2665
        "adds   r3, r3, r3\n\t"
2666
#else
2667
        "add    r3, r3, r3\n\t"
2668
#endif
2669
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2670
        "adds   r3, r3, #1\n\t"
2671
#else
2672
        "add    r3, r3, #1\n\t"
2673
#endif
2674
        /* r * d - Start */
2675
        "uxth   %[hi], r3\n\t"
2676
        "uxth   r4, %[d]\n\t"
2677
#ifdef WOLFSSL_KEIL
2678
        "muls   r4, %[hi], r4\n\t"
2679
#elif defined(__clang__)
2680
        "muls   r4, %[hi]\n\t"
2681
#else
2682
        "mul    r4, %[hi]\n\t"
2683
#endif
2684
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2685
        "lsrs   r6, %[d], #16\n\t"
2686
#else
2687
        "lsr    r6, %[d], #16\n\t"
2688
#endif
2689
#ifdef WOLFSSL_KEIL
2690
        "muls   %[hi], r6, %[hi]\n\t"
2691
#elif defined(__clang__)
2692
        "muls   %[hi], r6\n\t"
2693
#else
2694
        "mul    %[hi], r6\n\t"
2695
#endif
2696
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2697
        "lsrs   r5, %[hi], #16\n\t"
2698
#else
2699
        "lsr    r5, %[hi], #16\n\t"
2700
#endif
2701
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2702
        "lsls   %[hi], %[hi], #16\n\t"
2703
#else
2704
        "lsl    %[hi], %[hi], #16\n\t"
2705
#endif
2706
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2707
        "adds   r4, r4, %[hi]\n\t"
2708
#else
2709
        "add    r4, r4, %[hi]\n\t"
2710
#endif
2711
#ifdef WOLFSSL_KEIL
2712
        "adcs   r5, r5, r7\n\t"
2713
#elif defined(__clang__)
2714
        "adcs   r5, r7\n\t"
2715
#else
2716
        "adc    r5, r7\n\t"
2717
#endif
2718
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2719
        "lsrs   %[hi], r3, #16\n\t"
2720
#else
2721
        "lsr    %[hi], r3, #16\n\t"
2722
#endif
2723
#ifdef WOLFSSL_KEIL
2724
        "muls   r6, %[hi], r6\n\t"
2725
#elif defined(__clang__)
2726
        "muls   r6, %[hi]\n\t"
2727
#else
2728
        "mul    r6, %[hi]\n\t"
2729
#endif
2730
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2731
        "adds   r5, r5, r6\n\t"
2732
#else
2733
        "add    r5, r5, r6\n\t"
2734
#endif
2735
        "uxth   r6, %[d]\n\t"
2736
#ifdef WOLFSSL_KEIL
2737
        "muls   %[hi], r6, %[hi]\n\t"
2738
#elif defined(__clang__)
2739
        "muls   %[hi], r6\n\t"
2740
#else
2741
        "mul    %[hi], r6\n\t"
2742
#endif
2743
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2744
        "lsrs   r6, %[hi], #16\n\t"
2745
#else
2746
        "lsr    r6, %[hi], #16\n\t"
2747
#endif
2748
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2749
        "lsls   %[hi], %[hi], #16\n\t"
2750
#else
2751
        "lsl    %[hi], %[hi], #16\n\t"
2752
#endif
2753
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2754
        "adds   r4, r4, %[hi]\n\t"
2755
#else
2756
        "add    r4, r4, %[hi]\n\t"
2757
#endif
2758
#ifdef WOLFSSL_KEIL
2759
        "adcs   r5, r5, r6\n\t"
2760
#elif defined(__clang__)
2761
        "adcs   r5, r6\n\t"
2762
#else
2763
        "adc    r5, r6\n\t"
2764
#endif
2765
        /* r * d - Done */
2766
        "mov    %[hi], r8\n\t"
2767
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2768
        "subs   %[hi], %[hi], r4\n\t"
2769
#else
2770
        "sub    %[hi], %[hi], r4\n\t"
2771
#endif
2772
        "movs   r4, %[hi]\n\t"
2773
        "mov    %[hi], r9\n\t"
2774
#ifdef WOLFSSL_KEIL
2775
        "sbcs   %[hi], %[hi], r5\n\t"
2776
#elif defined(__clang__)
2777
        "sbcs   %[hi], r5\n\t"
2778
#else
2779
        "sbc    %[hi], r5\n\t"
2780
#endif
2781
        "movs   r5, %[hi]\n\t"
2782
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2783
        "adds   r3, r3, r5\n\t"
2784
#else
2785
        "add    r3, r3, r5\n\t"
2786
#endif
2787
        /* r * d - Start */
2788
        "uxth   %[hi], r3\n\t"
2789
        "uxth   r4, %[d]\n\t"
2790
#ifdef WOLFSSL_KEIL
2791
        "muls   r4, %[hi], r4\n\t"
2792
#elif defined(__clang__)
2793
        "muls   r4, %[hi]\n\t"
2794
#else
2795
        "mul    r4, %[hi]\n\t"
2796
#endif
2797
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2798
        "lsrs   r6, %[d], #16\n\t"
2799
#else
2800
        "lsr    r6, %[d], #16\n\t"
2801
#endif
2802
#ifdef WOLFSSL_KEIL
2803
        "muls   %[hi], r6, %[hi]\n\t"
2804
#elif defined(__clang__)
2805
        "muls   %[hi], r6\n\t"
2806
#else
2807
        "mul    %[hi], r6\n\t"
2808
#endif
2809
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2810
        "lsrs   r5, %[hi], #16\n\t"
2811
#else
2812
        "lsr    r5, %[hi], #16\n\t"
2813
#endif
2814
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2815
        "lsls   %[hi], %[hi], #16\n\t"
2816
#else
2817
        "lsl    %[hi], %[hi], #16\n\t"
2818
#endif
2819
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2820
        "adds   r4, r4, %[hi]\n\t"
2821
#else
2822
        "add    r4, r4, %[hi]\n\t"
2823
#endif
2824
#ifdef WOLFSSL_KEIL
2825
        "adcs   r5, r5, r7\n\t"
2826
#elif defined(__clang__)
2827
        "adcs   r5, r7\n\t"
2828
#else
2829
        "adc    r5, r7\n\t"
2830
#endif
2831
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2832
        "lsrs   %[hi], r3, #16\n\t"
2833
#else
2834
        "lsr    %[hi], r3, #16\n\t"
2835
#endif
2836
#ifdef WOLFSSL_KEIL
2837
        "muls   r6, %[hi], r6\n\t"
2838
#elif defined(__clang__)
2839
        "muls   r6, %[hi]\n\t"
2840
#else
2841
        "mul    r6, %[hi]\n\t"
2842
#endif
2843
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2844
        "adds   r5, r5, r6\n\t"
2845
#else
2846
        "add    r5, r5, r6\n\t"
2847
#endif
2848
        "uxth   r6, %[d]\n\t"
2849
#ifdef WOLFSSL_KEIL
2850
        "muls   %[hi], r6, %[hi]\n\t"
2851
#elif defined(__clang__)
2852
        "muls   %[hi], r6\n\t"
2853
#else
2854
        "mul    %[hi], r6\n\t"
2855
#endif
2856
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2857
        "lsrs   r6, %[hi], #16\n\t"
2858
#else
2859
        "lsr    r6, %[hi], #16\n\t"
2860
#endif
2861
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2862
        "lsls   %[hi], %[hi], #16\n\t"
2863
#else
2864
        "lsl    %[hi], %[hi], #16\n\t"
2865
#endif
2866
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2867
        "adds   r4, r4, %[hi]\n\t"
2868
#else
2869
        "add    r4, r4, %[hi]\n\t"
2870
#endif
2871
#ifdef WOLFSSL_KEIL
2872
        "adcs   r5, r5, r6\n\t"
2873
#elif defined(__clang__)
2874
        "adcs   r5, r6\n\t"
2875
#else
2876
        "adc    r5, r6\n\t"
2877
#endif
2878
        /* r * d - Done */
2879
        "mov    %[hi], r8\n\t"
2880
        "mov    r6, r9\n\t"
2881
#ifdef WOLFSSL_KEIL
2882
        "subs   r4, %[hi], r4\n\t"
2883
#else
2884
#ifdef __clang__
2885
        "subs   r4, %[hi], r4\n\t"
2886
#else
2887
        "sub    r4, %[hi], r4\n\t"
2888
#endif
2889
#endif
2890
#ifdef WOLFSSL_KEIL
2891
        "sbcs   r6, r6, r5\n\t"
2892
#elif defined(__clang__)
2893
        "sbcs   r6, r5\n\t"
2894
#else
2895
        "sbc    r6, r5\n\t"
2896
#endif
2897
        "movs   r5, r6\n\t"
2898
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2899
        "adds   r3, r3, r5\n\t"
2900
#else
2901
        "add    r3, r3, r5\n\t"
2902
#endif
2903
        /* r * d - Start */
2904
        "uxth   %[hi], r3\n\t"
2905
        "uxth   r4, %[d]\n\t"
2906
#ifdef WOLFSSL_KEIL
2907
        "muls   r4, %[hi], r4\n\t"
2908
#elif defined(__clang__)
2909
        "muls   r4, %[hi]\n\t"
2910
#else
2911
        "mul    r4, %[hi]\n\t"
2912
#endif
2913
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2914
        "lsrs   r6, %[d], #16\n\t"
2915
#else
2916
        "lsr    r6, %[d], #16\n\t"
2917
#endif
2918
#ifdef WOLFSSL_KEIL
2919
        "muls   %[hi], r6, %[hi]\n\t"
2920
#elif defined(__clang__)
2921
        "muls   %[hi], r6\n\t"
2922
#else
2923
        "mul    %[hi], r6\n\t"
2924
#endif
2925
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2926
        "lsrs   r5, %[hi], #16\n\t"
2927
#else
2928
        "lsr    r5, %[hi], #16\n\t"
2929
#endif
2930
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2931
        "lsls   %[hi], %[hi], #16\n\t"
2932
#else
2933
        "lsl    %[hi], %[hi], #16\n\t"
2934
#endif
2935
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2936
        "adds   r4, r4, %[hi]\n\t"
2937
#else
2938
        "add    r4, r4, %[hi]\n\t"
2939
#endif
2940
#ifdef WOLFSSL_KEIL
2941
        "adcs   r5, r5, r7\n\t"
2942
#elif defined(__clang__)
2943
        "adcs   r5, r7\n\t"
2944
#else
2945
        "adc    r5, r7\n\t"
2946
#endif
2947
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2948
        "lsrs   %[hi], r3, #16\n\t"
2949
#else
2950
        "lsr    %[hi], r3, #16\n\t"
2951
#endif
2952
#ifdef WOLFSSL_KEIL
2953
        "muls   r6, %[hi], r6\n\t"
2954
#elif defined(__clang__)
2955
        "muls   r6, %[hi]\n\t"
2956
#else
2957
        "mul    r6, %[hi]\n\t"
2958
#endif
2959
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2960
        "adds   r5, r5, r6\n\t"
2961
#else
2962
        "add    r5, r5, r6\n\t"
2963
#endif
2964
        "uxth   r6, %[d]\n\t"
2965
#ifdef WOLFSSL_KEIL
2966
        "muls   %[hi], r6, %[hi]\n\t"
2967
#elif defined(__clang__)
2968
        "muls   %[hi], r6\n\t"
2969
#else
2970
        "mul    %[hi], r6\n\t"
2971
#endif
2972
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2973
        "lsrs   r6, %[hi], #16\n\t"
2974
#else
2975
        "lsr    r6, %[hi], #16\n\t"
2976
#endif
2977
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2978
        "lsls   %[hi], %[hi], #16\n\t"
2979
#else
2980
        "lsl    %[hi], %[hi], #16\n\t"
2981
#endif
2982
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2983
        "adds   r4, r4, %[hi]\n\t"
2984
#else
2985
        "add    r4, r4, %[hi]\n\t"
2986
#endif
2987
#ifdef WOLFSSL_KEIL
2988
        "adcs   r5, r5, r6\n\t"
2989
#elif defined(__clang__)
2990
        "adcs   r5, r6\n\t"
2991
#else
2992
        "adc    r5, r6\n\t"
2993
#endif
2994
        /* r * d - Done */
2995
        "mov    %[hi], r8\n\t"
2996
        "mov    r6, r9\n\t"
2997
#ifdef WOLFSSL_KEIL
2998
        "subs   r4, %[hi], r4\n\t"
2999
#else
3000
#ifdef __clang__
3001
        "subs   r4, %[hi], r4\n\t"
3002
#else
3003
        "sub    r4, %[hi], r4\n\t"
3004
#endif
3005
#endif
3006
#ifdef WOLFSSL_KEIL
3007
        "sbcs   r6, r6, r5\n\t"
3008
#elif defined(__clang__)
3009
        "sbcs   r6, r5\n\t"
3010
#else
3011
        "sbc    r6, r5\n\t"
3012
#endif
3013
        "movs   r5, r6\n\t"
3014
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3015
        "adds   r3, r3, r5\n\t"
3016
#else
3017
        "add    r3, r3, r5\n\t"
3018
#endif
3019
        "movs   r6, %[d]\n\t"
3020
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3021
        "subs   r6, r6, r4\n\t"
3022
#else
3023
        "sub    r6, r6, r4\n\t"
3024
#endif
3025
#ifdef WOLFSSL_KEIL
3026
        "sbcs   r6, r6, r6\n\t"
3027
#elif defined(__clang__)
3028
        "sbcs   r6, r6\n\t"
3029
#else
3030
        "sbc    r6, r6\n\t"
3031
#endif
3032
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3033
        "subs   r3, r3, r6\n\t"
3034
#else
3035
        "sub    r3, r3, r6\n\t"
3036
#endif
3037
        "movs   %[hi], r3\n\t"
3038
        : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3039
        :
3040
        : "r3", "r4", "r5", "r6", "r7", "r8", "r9"
3041
    );
3042
    return (uint32_t)(size_t)hi;
3043
}
3044
3045
#define SP_ASM_DIV_WORD
3046
#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3047
3048
#define SP_INT_ASM_AVAILABLE
3049
3050
    #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3051
3052
    #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3053
/*
3054
 * CPU: PPC64
3055
 */
3056
3057
/* Multiply va by vb and store double size result in: vh | vl */
3058
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3059
    __asm__ __volatile__ (                               \
3060
        "mulld  %[l], %[a], %[b]  \n\t"            \
3061
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3062
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3063
        : [a] "r" (va), [b] "r" (vb)                     \
3064
        : "memory"                                       \
3065
    )
3066
/* Multiply va by vb and store double size result in: vo | vh | vl */
3067
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3068
    __asm__ __volatile__ (                               \
3069
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3070
        "mulld  %[l], %[a], %[b]  \n\t"            \
3071
        "li %[o], 0     \n\t"            \
3072
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3073
        : [a] "r" (va), [b] "r" (vb)                     \
3074
        :                                                \
3075
    )
3076
/* Multiply va by vb and add double size result into: vo | vh | vl */
3077
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3078
    __asm__ __volatile__ (                               \
3079
        "mulld  16, %[a], %[b]    \n\t"            \
3080
        "mulhdu 17, %[a], %[b]    \n\t"            \
3081
        "addc %[l], %[l], 16    \n\t"            \
3082
        "adde %[h], %[h], 17    \n\t"            \
3083
        "addze  %[o], %[o]    \n\t"            \
3084
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3085
        : [a] "r" (va), [b] "r" (vb)                     \
3086
        : "16", "17", "cc"                               \
3087
    )
3088
/* Multiply va by vb and add double size result into: vh | vl */
3089
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3090
    __asm__ __volatile__ (                               \
3091
        "mulld  16, %[a], %[b]    \n\t"            \
3092
        "mulhdu 17, %[a], %[b]    \n\t"            \
3093
        "addc %[l], %[l], 16    \n\t"            \
3094
        "adde %[h], %[h], 17    \n\t"            \
3095
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3096
        : [a] "r" (va), [b] "r" (vb)                     \
3097
        : "16", "17", "cc"                               \
3098
    )
3099
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3100
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3101
    __asm__ __volatile__ (                               \
3102
        "mulld  16, %[a], %[b]    \n\t"            \
3103
        "mulhdu 17, %[a], %[b]    \n\t"            \
3104
        "addc %[l], %[l], 16    \n\t"            \
3105
        "adde %[h], %[h], 17    \n\t"            \
3106
        "addze  %[o], %[o]    \n\t"            \
3107
        "addc %[l], %[l], 16    \n\t"            \
3108
        "adde %[h], %[h], 17    \n\t"            \
3109
        "addze  %[o], %[o]    \n\t"            \
3110
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3111
        : [a] "r" (va), [b] "r" (vb)                     \
3112
        : "16", "17", "cc"                               \
3113
    )
3114
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3115
 * Assumes first add will not overflow vh | vl
3116
 */
3117
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3118
    __asm__ __volatile__ (                               \
3119
        "mulld  16, %[a], %[b]    \n\t"            \
3120
        "mulhdu 17, %[a], %[b]    \n\t"            \
3121
        "addc %[l], %[l], 16    \n\t"            \
3122
        "adde %[h], %[h], 17    \n\t"            \
3123
        "addc %[l], %[l], 16    \n\t"            \
3124
        "adde %[h], %[h], 17    \n\t"            \
3125
        "addze  %[o], %[o]    \n\t"            \
3126
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3127
        : [a] "r" (va), [b] "r" (vb)                     \
3128
        : "16", "17", "cc"                               \
3129
    )
3130
/* Square va and store double size result in: vh | vl */
3131
#define SP_ASM_SQR(vl, vh, va)                           \
3132
    __asm__ __volatile__ (                               \
3133
        "mulld  %[l], %[a], %[a]  \n\t"            \
3134
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3135
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3136
        : [a] "r" (va)                                   \
3137
        : "memory"                                       \
3138
    )
3139
/* Square va and add double size result into: vo | vh | vl */
3140
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3141
    __asm__ __volatile__ (                               \
3142
        "mulld  16, %[a], %[a]    \n\t"            \
3143
        "mulhdu 17, %[a], %[a]    \n\t"            \
3144
        "addc %[l], %[l], 16    \n\t"            \
3145
        "adde %[h], %[h], 17    \n\t"            \
3146
        "addze  %[o], %[o]    \n\t"            \
3147
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3148
        : [a] "r" (va)                                   \
3149
        : "16", "17", "cc"                               \
3150
    )
3151
/* Square va and add double size result into: vh | vl */
3152
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3153
    __asm__ __volatile__ (                               \
3154
        "mulld  16, %[a], %[a]    \n\t"            \
3155
        "mulhdu 17, %[a], %[a]    \n\t"            \
3156
        "addc %[l], %[l], 16    \n\t"            \
3157
        "adde %[h], %[h], 17    \n\t"            \
3158
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3159
        : [a] "r" (va)                                   \
3160
        : "16", "17", "cc"                               \
3161
    )
3162
/* Add va into: vh | vl */
3163
#define SP_ASM_ADDC(vl, vh, va)                          \
3164
    __asm__ __volatile__ (                               \
3165
        "addc %[l], %[l], %[a]  \n\t"            \
3166
        "addze  %[h], %[h]    \n\t"            \
3167
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3168
        : [a] "r" (va)                                   \
3169
        : "cc"                                           \
3170
    )
3171
/* Sub va from: vh | vl */
3172
#define SP_ASM_SUBC(vl, vh, va)                          \
3173
    __asm__ __volatile__ (                               \
3174
        "subfc  %[l], %[a], %[l]  \n\t"            \
3175
        "li    16, 0      \n\t"            \
3176
        "subfe %[h], 16, %[h]   \n\t"            \
3177
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3178
        : [a] "r" (va)                                   \
3179
        : "16", "cc"                                     \
3180
    )
3181
/* Add two times vc | vb | va into vo | vh | vl */
3182
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3183
    __asm__ __volatile__ (                               \
3184
        "addc %[l], %[l], %[a]  \n\t"            \
3185
        "adde %[h], %[h], %[b]  \n\t"            \
3186
        "adde %[o], %[o], %[c]  \n\t"            \
3187
        "addc %[l], %[l], %[a]  \n\t"            \
3188
        "adde %[h], %[h], %[b]  \n\t"            \
3189
        "adde %[o], %[o], %[c]  \n\t"            \
3190
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3191
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3192
        : "cc"                                           \
3193
    )
3194
3195
#define SP_INT_ASM_AVAILABLE
3196
3197
    #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3198
3199
    #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3200
/*
3201
 * CPU: PPC 32-bit
3202
 */
3203
3204
/* Multiply va by vb and store double size result in: vh | vl */
3205
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3206
    __asm__ __volatile__ (                               \
3207
        "mullw  %[l], %[a], %[b]  \n\t"            \
3208
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3209
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3210
        : [a] "r" (va), [b] "r" (vb)                     \
3211
        : "memory"                                       \
3212
    )
3213
/* Multiply va by vb and store double size result in: vo | vh | vl */
3214
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3215
    __asm__ __volatile__ (                               \
3216
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3217
        "mullw  %[l], %[a], %[b]  \n\t"            \
3218
        "li %[o], 0     \n\t"            \
3219
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3220
        : [a] "r" (va), [b] "r" (vb)                     \
3221
        :                                                \
3222
    )
3223
/* Multiply va by vb and add double size result into: vo | vh | vl */
3224
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3225
    __asm__ __volatile__ (                               \
3226
        "mullw  16, %[a], %[b]    \n\t"            \
3227
        "mulhwu 17, %[a], %[b]    \n\t"            \
3228
        "addc %[l], %[l], 16    \n\t"            \
3229
        "adde %[h], %[h], 17    \n\t"            \
3230
        "addze  %[o], %[o]    \n\t"            \
3231
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3232
        : [a] "r" (va), [b] "r" (vb)                     \
3233
        : "16", "17", "cc"                               \
3234
    )
3235
/* Multiply va by vb and add double size result into: vh | vl */
3236
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3237
    __asm__ __volatile__ (                               \
3238
        "mullw  16, %[a], %[b]    \n\t"            \
3239
        "mulhwu 17, %[a], %[b]    \n\t"            \
3240
        "addc %[l], %[l], 16    \n\t"            \
3241
        "adde %[h], %[h], 17    \n\t"            \
3242
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3243
        : [a] "r" (va), [b] "r" (vb)                     \
3244
        : "16", "17", "cc"                               \
3245
    )
3246
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3247
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3248
    __asm__ __volatile__ (                               \
3249
        "mullw  16, %[a], %[b]    \n\t"            \
3250
        "mulhwu 17, %[a], %[b]    \n\t"            \
3251
        "addc %[l], %[l], 16    \n\t"            \
3252
        "adde %[h], %[h], 17    \n\t"            \
3253
        "addze  %[o], %[o]    \n\t"            \
3254
        "addc %[l], %[l], 16    \n\t"            \
3255
        "adde %[h], %[h], 17    \n\t"            \
3256
        "addze  %[o], %[o]    \n\t"            \
3257
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3258
        : [a] "r" (va), [b] "r" (vb)                     \
3259
        : "16", "17", "cc"                               \
3260
    )
3261
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3262
 * Assumes first add will not overflow vh | vl
3263
 */
3264
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3265
    __asm__ __volatile__ (                               \
3266
        "mullw  16, %[a], %[b]    \n\t"            \
3267
        "mulhwu 17, %[a], %[b]    \n\t"            \
3268
        "addc %[l], %[l], 16    \n\t"            \
3269
        "adde %[h], %[h], 17    \n\t"            \
3270
        "addc %[l], %[l], 16    \n\t"            \
3271
        "adde %[h], %[h], 17    \n\t"            \
3272
        "addze  %[o], %[o]    \n\t"            \
3273
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3274
        : [a] "r" (va), [b] "r" (vb)                     \
3275
        : "16", "17", "cc"                               \
3276
    )
3277
/* Square va and store double size result in: vh | vl */
3278
#define SP_ASM_SQR(vl, vh, va)                           \
3279
    __asm__ __volatile__ (                               \
3280
        "mullw  %[l], %[a], %[a]  \n\t"            \
3281
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3282
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3283
        : [a] "r" (va)                                   \
3284
        : "memory"                                       \
3285
    )
3286
/* Square va and add double size result into: vo | vh | vl */
3287
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3288
    __asm__ __volatile__ (                               \
3289
        "mullw  16, %[a], %[a]    \n\t"            \
3290
        "mulhwu 17, %[a], %[a]    \n\t"            \
3291
        "addc %[l], %[l], 16    \n\t"            \
3292
        "adde %[h], %[h], 17    \n\t"            \
3293
        "addze  %[o], %[o]    \n\t"            \
3294
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3295
        : [a] "r" (va)                                   \
3296
        : "16", "17", "cc"                               \
3297
    )
3298
/* Square va and add double size result into: vh | vl */
3299
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3300
    __asm__ __volatile__ (                               \
3301
        "mullw  16, %[a], %[a]    \n\t"            \
3302
        "mulhwu 17, %[a], %[a]    \n\t"            \
3303
        "addc %[l], %[l], 16    \n\t"            \
3304
        "adde %[h], %[h], 17    \n\t"            \
3305
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3306
        : [a] "r" (va)                                   \
3307
        : "16", "17", "cc"                               \
3308
    )
3309
/* Add va into: vh | vl */
3310
#define SP_ASM_ADDC(vl, vh, va)                          \
3311
    __asm__ __volatile__ (                               \
3312
        "addc %[l], %[l], %[a]  \n\t"            \
3313
        "addze  %[h], %[h]    \n\t"            \
3314
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3315
        : [a] "r" (va)                                   \
3316
        : "cc"                                           \
3317
    )
3318
/* Sub va from: vh | vl */
3319
#define SP_ASM_SUBC(vl, vh, va)                          \
3320
    __asm__ __volatile__ (                               \
3321
        "subfc  %[l], %[a], %[l]  \n\t"            \
3322
        "li 16, 0     \n\t"            \
3323
        "subfe  %[h], 16, %[h]    \n\t"            \
3324
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3325
        : [a] "r" (va)                                   \
3326
        : "16", "cc"                                     \
3327
    )
3328
/* Add two times vc | vb | va into vo | vh | vl */
3329
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3330
    __asm__ __volatile__ (                               \
3331
        "addc %[l], %[l], %[a]  \n\t"            \
3332
        "adde %[h], %[h], %[b]  \n\t"            \
3333
        "adde %[o], %[o], %[c]  \n\t"            \
3334
        "addc %[l], %[l], %[a]  \n\t"            \
3335
        "adde %[h], %[h], %[b]  \n\t"            \
3336
        "adde %[o], %[o], %[c]  \n\t"            \
3337
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3338
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3339
        : "cc"                                           \
3340
    )
3341
3342
#define SP_INT_ASM_AVAILABLE
3343
3344
    #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
3345
3346
    #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
3347
/*
3348
 * CPU: MIPS 64-bit
3349
 */
3350
3351
/* Multiply va by vb and store double size result in: vh | vl */
3352
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3353
    __asm__ __volatile__ (                               \
3354
        "dmultu %[a], %[b]    \n\t"            \
3355
        "mflo %[l]      \n\t"            \
3356
        "mfhi %[h]      \n\t"            \
3357
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3358
        : [a] "r" (va), [b] "r" (vb)                     \
3359
        : "memory", "$lo", "$hi"                         \
3360
    )
3361
/* Multiply va by vb and store double size result in: vo | vh | vl */
3362
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3363
    __asm__ __volatile__ (                               \
3364
        "dmultu %[a], %[b]    \n\t"            \
3365
        "mflo %[l]      \n\t"            \
3366
        "mfhi %[h]      \n\t"            \
3367
        "move %[o], $0    \n\t"            \
3368
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3369
        : [a] "r" (va), [b] "r" (vb)                     \
3370
        : "$lo", "$hi"                                   \
3371
    )
3372
/* Multiply va by vb and add double size result into: vo | vh | vl */
3373
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3374
    __asm__ __volatile__ (                               \
3375
        "dmultu %[a], %[b]    \n\t"            \
3376
        "mflo $10     \n\t"            \
3377
        "mfhi $11     \n\t"            \
3378
        "daddu  %[l], %[l], $10   \n\t"            \
3379
        "sltu $12, %[l], $10    \n\t"            \
3380
        "daddu  %[h], %[h], $12   \n\t"            \
3381
        "sltu $12, %[h], $12    \n\t"            \
3382
        "daddu  %[o], %[o], $12   \n\t"            \
3383
        "daddu  %[h], %[h], $11   \n\t"            \
3384
        "sltu $12, %[h], $11    \n\t"            \
3385
        "daddu  %[o], %[o], $12   \n\t"            \
3386
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3387
        : [a] "r" (va), [b] "r" (vb)                     \
3388
        : "$10", "$11", "$12", "$lo", "$hi"              \
3389
    )
3390
/* Multiply va by vb and add double size result into: vh | vl */
3391
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3392
    __asm__ __volatile__ (                               \
3393
        "dmultu %[a], %[b]    \n\t"            \
3394
        "mflo $10     \n\t"            \
3395
        "mfhi $11     \n\t"            \
3396
        "daddu  %[l], %[l], $10   \n\t"            \
3397
        "sltu $12, %[l], $10    \n\t"            \
3398
        "daddu  %[h], %[h], $11   \n\t"            \
3399
        "daddu  %[h], %[h], $12   \n\t"            \
3400
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3401
        : [a] "r" (va), [b] "r" (vb)                     \
3402
        : "$10", "$11", "$12", "$lo", "$hi"              \
3403
    )
3404
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3405
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3406
    __asm__ __volatile__ (                               \
3407
        "dmultu %[a], %[b]    \n\t"            \
3408
        "mflo $10     \n\t"            \
3409
        "mfhi $11     \n\t"            \
3410
        "daddu  %[l], %[l], $10   \n\t"            \
3411
        "sltu $12, %[l], $10    \n\t"            \
3412
        "daddu  %[h], %[h], $12   \n\t"            \
3413
        "sltu $12, %[h], $12    \n\t"            \
3414
        "daddu  %[o], %[o], $12   \n\t"            \
3415
        "daddu  %[h], %[h], $11   \n\t"            \
3416
        "sltu $12, %[h], $11    \n\t"            \
3417
        "daddu  %[o], %[o], $12   \n\t"            \
3418
        "daddu  %[l], %[l], $10   \n\t"            \
3419
        "sltu $12, %[l], $10    \n\t"            \
3420
        "daddu  %[h], %[h], $12   \n\t"            \
3421
        "sltu $12, %[h], $12    \n\t"            \
3422
        "daddu  %[o], %[o], $12   \n\t"            \
3423
        "daddu  %[h], %[h], $11   \n\t"            \
3424
        "sltu $12, %[h], $11    \n\t"            \
3425
        "daddu  %[o], %[o], $12   \n\t"            \
3426
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3427
        : [a] "r" (va), [b] "r" (vb)                     \
3428
        : "$10", "$11", "$12", "$lo", "$hi"              \
3429
    )
3430
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3431
 * Assumes first add will not overflow vh | vl
3432
 */
3433
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3434
    __asm__ __volatile__ (                               \
3435
        "dmultu %[a], %[b]    \n\t"            \
3436
        "mflo $10     \n\t"            \
3437
        "mfhi $11     \n\t"            \
3438
        "daddu  %[l], %[l], $10   \n\t"            \
3439
        "sltu $12, %[l], $10    \n\t"            \
3440
        "daddu  %[h], %[h], $11   \n\t"            \
3441
        "daddu  %[h], %[h], $12   \n\t"            \
3442
        "daddu  %[l], %[l], $10   \n\t"            \
3443
        "sltu $12, %[l], $10    \n\t"            \
3444
        "daddu  %[h], %[h], $12   \n\t"            \
3445
        "sltu $12, %[h], $12    \n\t"            \
3446
        "daddu  %[o], %[o], $12   \n\t"            \
3447
        "daddu  %[h], %[h], $11   \n\t"            \
3448
        "sltu $12, %[h], $11    \n\t"            \
3449
        "daddu  %[o], %[o], $12   \n\t"            \
3450
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3451
        : [a] "r" (va), [b] "r" (vb)                     \
3452
        : "$10", "$11", "$12", "$lo", "$hi"              \
3453
    )
3454
/* Square va and store double size result in: vh | vl */
3455
#define SP_ASM_SQR(vl, vh, va)                           \
3456
    __asm__ __volatile__ (                               \
3457
        "dmultu %[a], %[a]    \n\t"            \
3458
        "mflo %[l]      \n\t"            \
3459
        "mfhi %[h]      \n\t"            \
3460
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3461
        : [a] "r" (va)                                   \
3462
        : "memory", "$lo", "$hi"                         \
3463
    )
3464
/* Square va and add double size result into: vo | vh | vl */
3465
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3466
    __asm__ __volatile__ (                               \
3467
        "dmultu %[a], %[a]    \n\t"            \
3468
        "mflo $10     \n\t"            \
3469
        "mfhi $11     \n\t"            \
3470
        "daddu  %[l], %[l], $10   \n\t"            \
3471
        "sltu $12, %[l], $10    \n\t"            \
3472
        "daddu  %[h], %[h], $12   \n\t"            \
3473
        "sltu $12, %[h], $12    \n\t"            \
3474
        "daddu  %[o], %[o], $12   \n\t"            \
3475
        "daddu  %[h], %[h], $11   \n\t"            \
3476
        "sltu $12, %[h], $11    \n\t"            \
3477
        "daddu  %[o], %[o], $12   \n\t"            \
3478
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3479
        : [a] "r" (va)                                   \
3480
        : "$10", "$11", "$12", "$lo", "$hi"              \
3481
    )
3482
/* Square va and add double size result into: vh | vl */
3483
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3484
    __asm__ __volatile__ (                               \
3485
        "dmultu %[a], %[a]    \n\t"            \
3486
        "mflo $10     \n\t"            \
3487
        "mfhi $11     \n\t"            \
3488
        "daddu  %[l], %[l], $10   \n\t"            \
3489
        "sltu $12, %[l], $10    \n\t"            \
3490
        "daddu  %[h], %[h], $11   \n\t"            \
3491
        "daddu  %[h], %[h], $12   \n\t"            \
3492
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3493
        : [a] "r" (va)                                   \
3494
        : "$10", "$11", "$12", "$lo", "$hi"              \
3495
    )
3496
/* Add va into: vh | vl */
3497
#define SP_ASM_ADDC(vl, vh, va)                          \
3498
    __asm__ __volatile__ (                               \
3499
        "daddu  %[l], %[l], %[a]  \n\t"            \
3500
        "sltu $12, %[l], %[a]   \n\t"            \
3501
        "daddu  %[h], %[h], $12   \n\t"            \
3502
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3503
        : [a] "r" (va)                                   \
3504
        : "$12"                                          \
3505
    )
3506
/* Sub va from: vh | vl */
3507
#define SP_ASM_SUBC(vl, vh, va)                          \
3508
    __asm__ __volatile__ (                               \
3509
        "move $12, %[l]   \n\t"            \
3510
        "dsubu  %[l], $12, %[a]   \n\t"            \
3511
        "sltu $12, $12, %[l]    \n\t"            \
3512
        "dsubu  %[h], %[h], $12   \n\t"            \
3513
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3514
        : [a] "r" (va)                                   \
3515
        : "$12"                                          \
3516
    )
3517
/* Add two times vc | vb | va into vo | vh | vl */
3518
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3519
    __asm__ __volatile__ (                               \
3520
        "daddu  %[l], %[l], %[a]  \n\t"            \
3521
        "sltu $12, %[l], %[a]   \n\t"            \
3522
        "daddu  %[h], %[h], $12   \n\t"            \
3523
        "sltu $12, %[h], $12    \n\t"            \
3524
        "daddu  %[o], %[o], $12   \n\t"            \
3525
        "daddu  %[h], %[h], %[b]  \n\t"            \
3526
        "sltu $12, %[h], %[b]   \n\t"            \
3527
        "daddu  %[o], %[o], %[c]  \n\t"            \
3528
        "daddu  %[o], %[o], $12   \n\t"            \
3529
        "daddu  %[l], %[l], %[a]  \n\t"            \
3530
        "sltu $12, %[l], %[a]   \n\t"            \
3531
        "daddu  %[h], %[h], $12   \n\t"            \
3532
        "sltu $12, %[h], $12    \n\t"            \
3533
        "daddu  %[o], %[o], $12   \n\t"            \
3534
        "daddu  %[h], %[h], %[b]  \n\t"            \
3535
        "sltu $12, %[h], %[b]   \n\t"            \
3536
        "daddu  %[o], %[o], %[c]  \n\t"            \
3537
        "daddu  %[o], %[o], $12   \n\t"            \
3538
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3539
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3540
        : "$12"                                          \
3541
    )
3542
3543
#define SP_INT_ASM_AVAILABLE
3544
3545
    #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
3546
3547
    #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
3548
/*
3549
 * CPU: MIPS 32-bit
3550
 */
3551
3552
/* Multiply va by vb and store double size result in: vh | vl */
3553
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3554
    __asm__ __volatile__ (                               \
3555
        "multu  %[a], %[b]    \n\t"            \
3556
        "mflo %[l]      \n\t"            \
3557
        "mfhi %[h]      \n\t"            \
3558
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3559
        : [a] "r" (va), [b] "r" (vb)                     \
3560
        : "memory", "%lo", "%hi"                         \
3561
    )
3562
/* Multiply va by vb and store double size result in: vo | vh | vl */
3563
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3564
    __asm__ __volatile__ (                               \
3565
        "multu  %[a], %[b]    \n\t"            \
3566
        "mflo %[l]      \n\t"            \
3567
        "mfhi %[h]      \n\t"            \
3568
        "move %[o], $0    \n\t"            \
3569
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3570
        : [a] "r" (va), [b] "r" (vb)                     \
3571
        : "%lo", "%hi"                                   \
3572
    )
3573
/* Multiply va by vb and add double size result into: vo | vh | vl */
3574
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3575
    __asm__ __volatile__ (                               \
3576
        "multu  %[a], %[b]    \n\t"            \
3577
        "mflo $10     \n\t"            \
3578
        "mfhi $11     \n\t"            \
3579
        "addu %[l], %[l], $10   \n\t"            \
3580
        "sltu $12, %[l], $10    \n\t"            \
3581
        "addu %[h], %[h], $12   \n\t"            \
3582
        "sltu $12, %[h], $12    \n\t"            \
3583
        "addu %[o], %[o], $12   \n\t"            \
3584
        "addu %[h], %[h], $11   \n\t"            \
3585
        "sltu $12, %[h], $11    \n\t"            \
3586
        "addu %[o], %[o], $12   \n\t"            \
3587
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3588
        : [a] "r" (va), [b] "r" (vb)                     \
3589
        : "$10", "$11", "$12", "%lo", "%hi"              \
3590
    )
3591
/* Multiply va by vb and add double size result into: vh | vl */
3592
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3593
    __asm__ __volatile__ (                               \
3594
        "multu  %[a], %[b]    \n\t"            \
3595
        "mflo $10     \n\t"            \
3596
        "mfhi $11     \n\t"            \
3597
        "addu %[l], %[l], $10   \n\t"            \
3598
        "sltu $12, %[l], $10    \n\t"            \
3599
        "addu %[h], %[h], $11   \n\t"            \
3600
        "addu %[h], %[h], $12   \n\t"            \
3601
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3602
        : [a] "r" (va), [b] "r" (vb)                     \
3603
        : "$10", "$11", "$12", "%lo", "%hi"              \
3604
    )
3605
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3606
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3607
    __asm__ __volatile__ (                               \
3608
        "multu  %[a], %[b]    \n\t"            \
3609
        "mflo $10     \n\t"            \
3610
        "mfhi $11     \n\t"            \
3611
        "addu %[l], %[l], $10   \n\t"            \
3612
        "sltu $12, %[l], $10    \n\t"            \
3613
        "addu %[h], %[h], $12   \n\t"            \
3614
        "sltu $12, %[h], $12    \n\t"            \
3615
        "addu %[o], %[o], $12   \n\t"            \
3616
        "addu %[h], %[h], $11   \n\t"            \
3617
        "sltu $12, %[h], $11    \n\t"            \
3618
        "addu %[o], %[o], $12   \n\t"            \
3619
        "addu %[l], %[l], $10   \n\t"            \
3620
        "sltu $12, %[l], $10    \n\t"            \
3621
        "addu %[h], %[h], $12   \n\t"            \
3622
        "sltu $12, %[h], $12    \n\t"            \
3623
        "addu %[o], %[o], $12   \n\t"            \
3624
        "addu %[h], %[h], $11   \n\t"            \
3625
        "sltu $12, %[h], $11    \n\t"            \
3626
        "addu %[o], %[o], $12   \n\t"            \
3627
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3628
        : [a] "r" (va), [b] "r" (vb)                     \
3629
        : "$10", "$11", "$12", "%lo", "%hi"              \
3630
    )
3631
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3632
 * Assumes first add will not overflow vh | vl
3633
 */
3634
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3635
    __asm__ __volatile__ (                               \
3636
        "multu  %[a], %[b]    \n\t"            \
3637
        "mflo $10     \n\t"            \
3638
        "mfhi $11     \n\t"            \
3639
        "addu %[l], %[l], $10   \n\t"            \
3640
        "sltu $12, %[l], $10    \n\t"            \
3641
        "addu %[h], %[h], $11   \n\t"            \
3642
        "addu %[h], %[h], $12   \n\t"            \
3643
        "addu %[l], %[l], $10   \n\t"            \
3644
        "sltu $12, %[l], $10    \n\t"            \
3645
        "addu %[h], %[h], $12   \n\t"            \
3646
        "sltu $12, %[h], $12    \n\t"            \
3647
        "addu %[o], %[o], $12   \n\t"            \
3648
        "addu %[h], %[h], $11   \n\t"            \
3649
        "sltu $12, %[h], $11    \n\t"            \
3650
        "addu %[o], %[o], $12   \n\t"            \
3651
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3652
        : [a] "r" (va), [b] "r" (vb)                     \
3653
        : "$10", "$11", "$12", "%lo", "%hi"              \
3654
    )
3655
/* Square va and store double size result in: vh | vl */
3656
#define SP_ASM_SQR(vl, vh, va)                           \
3657
    __asm__ __volatile__ (                               \
3658
        "multu  %[a], %[a]    \n\t"            \
3659
        "mflo %[l]      \n\t"            \
3660
        "mfhi %[h]      \n\t"            \
3661
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3662
        : [a] "r" (va)                                   \
3663
        : "memory", "%lo", "%hi"                         \
3664
    )
3665
/* Square va and add double size result into: vo | vh | vl */
3666
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3667
    __asm__ __volatile__ (                               \
3668
        "multu  %[a], %[a]    \n\t"            \
3669
        "mflo $10     \n\t"            \
3670
        "mfhi $11     \n\t"            \
3671
        "addu %[l], %[l], $10   \n\t"            \
3672
        "sltu $12, %[l], $10    \n\t"            \
3673
        "addu %[h], %[h], $12   \n\t"            \
3674
        "sltu $12, %[h], $12    \n\t"            \
3675
        "addu %[o], %[o], $12   \n\t"            \
3676
        "addu %[h], %[h], $11   \n\t"            \
3677
        "sltu $12, %[h], $11    \n\t"            \
3678
        "addu %[o], %[o], $12   \n\t"            \
3679
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3680
        : [a] "r" (va)                                   \
3681
        : "$10", "$11", "$12", "%lo", "%hi"              \
3682
    )
3683
/* Square va and add double size result into: vh | vl */
3684
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3685
    __asm__ __volatile__ (                               \
3686
        "multu  %[a], %[a]    \n\t"            \
3687
        "mflo $10     \n\t"            \
3688
        "mfhi $11     \n\t"            \
3689
        "addu %[l], %[l], $10   \n\t"            \
3690
        "sltu $12, %[l], $10    \n\t"            \
3691
        "addu %[h], %[h], $11   \n\t"            \
3692
        "addu %[h], %[h], $12   \n\t"            \
3693
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3694
        : [a] "r" (va)                                   \
3695
        : "$10", "$11", "$12", "%lo", "%hi"              \
3696
    )
3697
/* Add va into: vh | vl */
3698
#define SP_ASM_ADDC(vl, vh, va)                          \
3699
    __asm__ __volatile__ (                               \
3700
        "addu %[l], %[l], %[a]  \n\t"            \
3701
        "sltu $12, %[l], %[a]   \n\t"            \
3702
        "addu %[h], %[h], $12   \n\t"            \
3703
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3704
        : [a] "r" (va)                                   \
3705
        : "$12"                                          \
3706
    )
3707
/* Sub va from: vh | vl */
3708
#define SP_ASM_SUBC(vl, vh, va)                          \
3709
    __asm__ __volatile__ (                               \
3710
        "move $12, %[l]   \n\t"            \
3711
        "subu %[l], $12, %[a]   \n\t"            \
3712
        "sltu $12, $12, %[l]    \n\t"            \
3713
        "subu %[h], %[h], $12   \n\t"            \
3714
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3715
        : [a] "r" (va)                                   \
3716
        : "$12"                                          \
3717
    )
3718
/* Add two times vc | vb | va into vo | vh | vl */
3719
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3720
    __asm__ __volatile__ (                               \
3721
        "addu %[l], %[l], %[a]  \n\t"            \
3722
        "sltu $12, %[l], %[a]   \n\t"            \
3723
        "addu %[h], %[h], $12   \n\t"            \
3724
        "sltu $12, %[h], $12    \n\t"            \
3725
        "addu %[o], %[o], $12   \n\t"            \
3726
        "addu %[h], %[h], %[b]  \n\t"            \
3727
        "sltu $12, %[h], %[b]   \n\t"            \
3728
        "addu %[o], %[o], %[c]  \n\t"            \
3729
        "addu %[o], %[o], $12   \n\t"            \
3730
        "addu %[l], %[l], %[a]  \n\t"            \
3731
        "sltu $12, %[l], %[a]   \n\t"            \
3732
        "addu %[h], %[h], $12   \n\t"            \
3733
        "sltu $12, %[h], $12    \n\t"            \
3734
        "addu %[o], %[o], $12   \n\t"            \
3735
        "addu %[h], %[h], %[b]  \n\t"            \
3736
        "sltu $12, %[h], %[b]   \n\t"            \
3737
        "addu %[o], %[o], %[c]  \n\t"            \
3738
        "addu %[o], %[o], $12   \n\t"            \
3739
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3740
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3741
        : "$12"                                          \
3742
    )
3743
3744
#define SP_INT_ASM_AVAILABLE
3745
3746
    #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
3747
3748
    #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
3749
/*
3750
 * CPU: RISCV 64-bit
3751
 */
3752
3753
/* Multiply va by vb and store double size result in: vh | vl */
3754
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3755
    __asm__ __volatile__ (                               \
3756
        "mul  %[l], %[a], %[b]  \n\t"            \
3757
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3758
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3759
        : [a] "r" (va), [b] "r" (vb)                     \
3760
        : "memory"                                       \
3761
    )
3762
/* Multiply va by vb and store double size result in: vo | vh | vl */
3763
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3764
    __asm__ __volatile__ (                               \
3765
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3766
        "mul  %[l], %[a], %[b]  \n\t"            \
3767
        "add  %[o], zero, zero  \n\t"            \
3768
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3769
        : [a] "r" (va), [b] "r" (vb)                     \
3770
        :                                                \
3771
    )
3772
/* Multiply va by vb and add double size result into: vo | vh | vl */
3773
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3774
    __asm__ __volatile__ (                               \
3775
        "mul  a5, %[a], %[b]    \n\t"            \
3776
        "mulhu  a6, %[a], %[b]    \n\t"            \
3777
        "add  %[l], %[l], a5    \n\t"            \
3778
        "sltu a7, %[l], a5    \n\t"            \
3779
        "add  %[h], %[h], a7    \n\t"            \
3780
        "sltu a7, %[h], a7    \n\t"            \
3781
        "add  %[o], %[o], a7    \n\t"            \
3782
        "add  %[h], %[h], a6    \n\t"            \
3783
        "sltu a7, %[h], a6    \n\t"            \
3784
        "add  %[o], %[o], a7    \n\t"            \
3785
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3786
        : [a] "r" (va), [b] "r" (vb)                     \
3787
        : "a5", "a6", "a7"                               \
3788
    )
3789
/* Multiply va by vb and add double size result into: vh | vl */
3790
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3791
    __asm__ __volatile__ (                               \
3792
        "mul  a5, %[a], %[b]    \n\t"            \
3793
        "mulhu  a6, %[a], %[b]    \n\t"            \
3794
        "add  %[l], %[l], a5    \n\t"            \
3795
        "sltu a7, %[l], a5    \n\t"            \
3796
        "add  %[h], %[h], a6    \n\t"            \
3797
        "add  %[h], %[h], a7    \n\t"            \
3798
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3799
        : [a] "r" (va), [b] "r" (vb)                     \
3800
        : "a5", "a6", "a7"                               \
3801
    )
3802
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3803
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3804
    __asm__ __volatile__ (                               \
3805
        "mul  a5, %[a], %[b]    \n\t"            \
3806
        "mulhu  a6, %[a], %[b]    \n\t"            \
3807
        "add  %[l], %[l], a5    \n\t"            \
3808
        "sltu a7, %[l], a5    \n\t"            \
3809
        "add  %[h], %[h], a7    \n\t"            \
3810
        "sltu a7, %[h], a7    \n\t"            \
3811
        "add  %[o], %[o], a7    \n\t"            \
3812
        "add  %[h], %[h], a6    \n\t"            \
3813
        "sltu a7, %[h], a6    \n\t"            \
3814
        "add  %[o], %[o], a7    \n\t"            \
3815
        "add  %[l], %[l], a5    \n\t"            \
3816
        "sltu a7, %[l], a5    \n\t"            \
3817
        "add  %[h], %[h], a7    \n\t"            \
3818
        "sltu a7, %[h], a7    \n\t"            \
3819
        "add  %[o], %[o], a7    \n\t"            \
3820
        "add  %[h], %[h], a6    \n\t"            \
3821
        "sltu a7, %[h], a6    \n\t"            \
3822
        "add  %[o], %[o], a7    \n\t"            \
3823
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3824
        : [a] "r" (va), [b] "r" (vb)                     \
3825
        : "a5", "a6", "a7"                               \
3826
    )
3827
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3828
 * Assumes first add will not overflow vh | vl
3829
 */
3830
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3831
    __asm__ __volatile__ (                               \
3832
        "mul  a5, %[a], %[b]    \n\t"            \
3833
        "mulhu  a6, %[a], %[b]    \n\t"            \
3834
        "add  %[l], %[l], a5    \n\t"            \
3835
        "sltu a7, %[l], a5    \n\t"            \
3836
        "add  %[h], %[h], a6    \n\t"            \
3837
        "add  %[h], %[h], a7    \n\t"            \
3838
        "add  %[l], %[l], a5    \n\t"            \
3839
        "sltu a7, %[l], a5    \n\t"            \
3840
        "add  %[h], %[h], a7    \n\t"            \
3841
        "sltu a7, %[h], a7    \n\t"            \
3842
        "add  %[o], %[o], a7    \n\t"            \
3843
        "add  %[h], %[h], a6    \n\t"            \
3844
        "sltu a7, %[h], a6    \n\t"            \
3845
        "add  %[o], %[o], a7    \n\t"            \
3846
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3847
        : [a] "r" (va), [b] "r" (vb)                     \
3848
        : "a5", "a6", "a7"                               \
3849
    )
3850
/* Square va and store double size result in: vh | vl */
3851
#define SP_ASM_SQR(vl, vh, va)                           \
3852
    __asm__ __volatile__ (                               \
3853
        "mul  %[l], %[a], %[a]  \n\t"            \
3854
        "mulhu  %[h], %[a], %[a]  \n\t"            \
3855
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3856
        : [a] "r" (va)                                   \
3857
        : "memory"                                       \
3858
    )
3859
/* Square va and add double size result into: vo | vh | vl */
3860
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3861
    __asm__ __volatile__ (                               \
3862
        "mul  a5, %[a], %[a]    \n\t"            \
3863
        "mulhu  a6, %[a], %[a]    \n\t"            \
3864
        "add  %[l], %[l], a5    \n\t"            \
3865
        "sltu a7, %[l], a5    \n\t"            \
3866
        "add  %[h], %[h], a7    \n\t"            \
3867
        "sltu a7, %[h], a7    \n\t"            \
3868
        "add  %[o], %[o], a7    \n\t"            \
3869
        "add  %[h], %[h], a6    \n\t"            \
3870
        "sltu a7, %[h], a6    \n\t"            \
3871
        "add  %[o], %[o], a7    \n\t"            \
3872
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3873
        : [a] "r" (va)                                   \
3874
        : "a5", "a6", "a7"                               \
3875
    )
3876
/* Square va and add double size result into: vh | vl */
3877
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3878
    __asm__ __volatile__ (                               \
3879
        "mul  a5, %[a], %[a]    \n\t"            \
3880
        "mulhu  a6, %[a], %[a]    \n\t"            \
3881
        "add  %[l], %[l], a5    \n\t"            \
3882
        "sltu a7, %[l], a5    \n\t"            \
3883
        "add  %[h], %[h], a6    \n\t"            \
3884
        "add  %[h], %[h], a7    \n\t"            \
3885
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3886
        : [a] "r" (va)                                   \
3887
        : "a5", "a6", "a7"                               \
3888
    )
3889
/* Add va into: vh | vl */
3890
#define SP_ASM_ADDC(vl, vh, va)                          \
3891
    __asm__ __volatile__ (                               \
3892
        "add  %[l], %[l], %[a]  \n\t"            \
3893
        "sltu a7, %[l], %[a]    \n\t"            \
3894
        "add  %[h], %[h], a7    \n\t"            \
3895
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3896
        : [a] "r" (va)                                   \
3897
        : "a7"                                           \
3898
    )
3899
/* Sub va from: vh | vl */
3900
#define SP_ASM_SUBC(vl, vh, va)                          \
3901
    __asm__ __volatile__ (                               \
3902
        "add  a7, %[l], zero    \n\t"            \
3903
        "sub  %[l], a7, %[a]    \n\t"            \
3904
        "sltu a7, a7, %[l]    \n\t"            \
3905
        "sub  %[h], %[h], a7    \n\t"            \
3906
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3907
        : [a] "r" (va)                                   \
3908
        : "a7"                                           \
3909
    )
3910
/* Add two times vc | vb | va into vo | vh | vl */
3911
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3912
    __asm__ __volatile__ (                               \
3913
        "add  %[l], %[l], %[a]  \n\t"            \
3914
        "sltu a7, %[l], %[a]    \n\t"            \
3915
        "add  %[h], %[h], a7    \n\t"            \
3916
        "sltu a7, %[h], a7    \n\t"            \
3917
        "add  %[o], %[o], a7    \n\t"            \
3918
        "add  %[h], %[h], %[b]  \n\t"            \
3919
        "sltu a7, %[h], %[b]    \n\t"            \
3920
        "add  %[o], %[o], %[c]  \n\t"            \
3921
        "add  %[o], %[o], a7    \n\t"            \
3922
        "add  %[l], %[l], %[a]  \n\t"            \
3923
        "sltu a7, %[l], %[a]    \n\t"            \
3924
        "add  %[h], %[h], a7    \n\t"            \
3925
        "sltu a7, %[h], a7    \n\t"            \
3926
        "add  %[o], %[o], a7    \n\t"            \
3927
        "add  %[h], %[h], %[b]  \n\t"            \
3928
        "sltu a7, %[h], %[b]    \n\t"            \
3929
        "add  %[o], %[o], %[c]  \n\t"            \
3930
        "add  %[o], %[o], a7    \n\t"            \
3931
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3932
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3933
        : "a7"                                           \
3934
    )
3935
3936
#define SP_INT_ASM_AVAILABLE
3937
3938
    #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
3939
3940
    #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
3941
/*
3942
 * CPU: RISCV 32-bit
3943
 */
3944
3945
/* Multiply va by vb and store double size result in: vh | vl */
3946
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3947
    __asm__ __volatile__ (                               \
3948
        "mul  %[l], %[a], %[b]  \n\t"            \
3949
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3950
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3951
        : [a] "r" (va), [b] "r" (vb)                     \
3952
        : "memory"                                       \
3953
    )
3954
/* Multiply va by vb and store double size result in: vo | vh | vl */
3955
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3956
    __asm__ __volatile__ (                               \
3957
        "mulhu  %[h], %[a], %[b]  \n\t"            \
3958
        "mul  %[l], %[a], %[b]  \n\t"            \
3959
        "add  %[o], zero, zero  \n\t"            \
3960
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3961
        : [a] "r" (va), [b] "r" (vb)                     \
3962
        :                                                \
3963
    )
3964
/* Multiply va by vb and add double size result into: vo | vh | vl */
3965
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3966
    __asm__ __volatile__ (                               \
3967
        "mul  a5, %[a], %[b]    \n\t"            \
3968
        "mulhu  a6, %[a], %[b]    \n\t"            \
3969
        "add  %[l], %[l], a5    \n\t"            \
3970
        "sltu a7, %[l], a5    \n\t"            \
3971
        "add  %[h], %[h], a7    \n\t"            \
3972
        "sltu a7, %[h], a7    \n\t"            \
3973
        "add  %[o], %[o], a7    \n\t"            \
3974
        "add  %[h], %[h], a6    \n\t"            \
3975
        "sltu a7, %[h], a6    \n\t"            \
3976
        "add  %[o], %[o], a7    \n\t"            \
3977
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3978
        : [a] "r" (va), [b] "r" (vb)                     \
3979
        : "a5", "a6", "a7"                               \
3980
    )
3981
/* Multiply va by vb and add double size result into: vh | vl */
3982
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3983
    __asm__ __volatile__ (                               \
3984
        "mul  a5, %[a], %[b]    \n\t"            \
3985
        "mulhu  a6, %[a], %[b]    \n\t"            \
3986
        "add  %[l], %[l], a5    \n\t"            \
3987
        "sltu a7, %[l], a5    \n\t"            \
3988
        "add  %[h], %[h], a6    \n\t"            \
3989
        "add  %[h], %[h], a7    \n\t"            \
3990
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3991
        : [a] "r" (va), [b] "r" (vb)                     \
3992
        : "a5", "a6", "a7"                               \
3993
    )
3994
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3995
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3996
    __asm__ __volatile__ (                               \
3997
        "mul  a5, %[a], %[b]    \n\t"            \
3998
        "mulhu  a6, %[a], %[b]    \n\t"            \
3999
        "add  %[l], %[l], a5    \n\t"            \
4000
        "sltu a7, %[l], a5    \n\t"            \
4001
        "add  %[h], %[h], a7    \n\t"            \
4002
        "sltu a7, %[h], a7    \n\t"            \
4003
        "add  %[o], %[o], a7    \n\t"            \
4004
        "add  %[h], %[h], a6    \n\t"            \
4005
        "sltu a7, %[h], a6    \n\t"            \
4006
        "add  %[o], %[o], a7    \n\t"            \
4007
        "add  %[l], %[l], a5    \n\t"            \
4008
        "sltu a7, %[l], a5    \n\t"            \
4009
        "add  %[h], %[h], a7    \n\t"            \
4010
        "sltu a7, %[h], a7    \n\t"            \
4011
        "add  %[o], %[o], a7    \n\t"            \
4012
        "add  %[h], %[h], a6    \n\t"            \
4013
        "sltu a7, %[h], a6    \n\t"            \
4014
        "add  %[o], %[o], a7    \n\t"            \
4015
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4016
        : [a] "r" (va), [b] "r" (vb)                     \
4017
        : "a5", "a6", "a7"                               \
4018
    )
4019
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4020
 * Assumes first add will not overflow vh | vl
4021
 */
4022
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4023
    __asm__ __volatile__ (                               \
4024
        "mul  a5, %[a], %[b]    \n\t"            \
4025
        "mulhu  a6, %[a], %[b]    \n\t"            \
4026
        "add  %[l], %[l], a5    \n\t"            \
4027
        "sltu a7, %[l], a5    \n\t"            \
4028
        "add  %[h], %[h], a6    \n\t"            \
4029
        "add  %[h], %[h], a7    \n\t"            \
4030
        "add  %[l], %[l], a5    \n\t"            \
4031
        "sltu a7, %[l], a5    \n\t"            \
4032
        "add  %[h], %[h], a7    \n\t"            \
4033
        "sltu a7, %[h], a7    \n\t"            \
4034
        "add  %[o], %[o], a7    \n\t"            \
4035
        "add  %[h], %[h], a6    \n\t"            \
4036
        "sltu a7, %[h], a6    \n\t"            \
4037
        "add  %[o], %[o], a7    \n\t"            \
4038
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4039
        : [a] "r" (va), [b] "r" (vb)                     \
4040
        : "a5", "a6", "a7"                               \
4041
    )
4042
/* Square va and store double size result in: vh | vl */
4043
#define SP_ASM_SQR(vl, vh, va)                           \
4044
    __asm__ __volatile__ (                               \
4045
        "mul  %[l], %[a], %[a]  \n\t"            \
4046
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4047
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4048
        : [a] "r" (va)                                   \
4049
        : "memory"                                       \
4050
    )
4051
/* Square va and add double size result into: vo | vh | vl */
4052
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4053
    __asm__ __volatile__ (                               \
4054
        "mul  a5, %[a], %[a]    \n\t"            \
4055
        "mulhu  a6, %[a], %[a]    \n\t"            \
4056
        "add  %[l], %[l], a5    \n\t"            \
4057
        "sltu a7, %[l], a5    \n\t"            \
4058
        "add  %[h], %[h], a7    \n\t"            \
4059
        "sltu a7, %[h], a7    \n\t"            \
4060
        "add  %[o], %[o], a7    \n\t"            \
4061
        "add  %[h], %[h], a6    \n\t"            \
4062
        "sltu a7, %[h], a6    \n\t"            \
4063
        "add  %[o], %[o], a7    \n\t"            \
4064
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4065
        : [a] "r" (va)                                   \
4066
        : "a5", "a6", "a7"                               \
4067
    )
4068
/* Square va and add double size result into: vh | vl */
4069
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4070
    __asm__ __volatile__ (                               \
4071
        "mul  a5, %[a], %[a]    \n\t"            \
4072
        "mulhu  a6, %[a], %[a]    \n\t"            \
4073
        "add  %[l], %[l], a5    \n\t"            \
4074
        "sltu a7, %[l], a5    \n\t"            \
4075
        "add  %[h], %[h], a6    \n\t"            \
4076
        "add  %[h], %[h], a7    \n\t"            \
4077
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4078
        : [a] "r" (va)                                   \
4079
        : "a5", "a6", "a7"                               \
4080
    )
4081
/* Add va into: vh | vl */
4082
#define SP_ASM_ADDC(vl, vh, va)                          \
4083
    __asm__ __volatile__ (                               \
4084
        "add  %[l], %[l], %[a]  \n\t"            \
4085
        "sltu a7, %[l], %[a]    \n\t"            \
4086
        "add  %[h], %[h], a7    \n\t"            \
4087
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4088
        : [a] "r" (va)                                   \
4089
        : "a7"                                           \
4090
    )
4091
/* Sub va from: vh | vl */
4092
#define SP_ASM_SUBC(vl, vh, va)                          \
4093
    __asm__ __volatile__ (                               \
4094
        "add  a7, %[l], zero    \n\t"            \
4095
        "sub  %[l], a7, %[a]    \n\t"            \
4096
        "sltu a7, a7, %[l]    \n\t"            \
4097
        "sub  %[h], %[h], a7    \n\t"            \
4098
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4099
        : [a] "r" (va)                                   \
4100
        : "a7"                                           \
4101
    )
4102
/* Add two times vc | vb | va into vo | vh | vl */
4103
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4104
    __asm__ __volatile__ (                               \
4105
        "add  %[l], %[l], %[a]  \n\t"            \
4106
        "sltu a7, %[l], %[a]    \n\t"            \
4107
        "add  %[h], %[h], a7    \n\t"            \
4108
        "sltu a7, %[h], a7    \n\t"            \
4109
        "add  %[o], %[o], a7    \n\t"            \
4110
        "add  %[h], %[h], %[b]  \n\t"            \
4111
        "sltu a7, %[h], %[b]    \n\t"            \
4112
        "add  %[o], %[o], %[c]  \n\t"            \
4113
        "add  %[o], %[o], a7    \n\t"            \
4114
        "add  %[l], %[l], %[a]  \n\t"            \
4115
        "sltu a7, %[l], %[a]    \n\t"            \
4116
        "add  %[h], %[h], a7    \n\t"            \
4117
        "sltu a7, %[h], a7    \n\t"            \
4118
        "add  %[o], %[o], a7    \n\t"            \
4119
        "add  %[h], %[h], %[b]  \n\t"            \
4120
        "sltu a7, %[h], %[b]    \n\t"            \
4121
        "add  %[o], %[o], %[c]  \n\t"            \
4122
        "add  %[o], %[o], a7    \n\t"            \
4123
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4124
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4125
        : "a7"                                           \
4126
    )
4127
4128
#define SP_INT_ASM_AVAILABLE
4129
4130
    #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4131
4132
    #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4133
/*
4134
 * CPU: Intel s390x
4135
 */
4136
4137
/* Multiply va by vb and store double size result in: vh | vl */
4138
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4139
    __asm__ __volatile__ (                               \
4140
        "lgr  %%r1, %[a]    \n\t"            \
4141
        "mlgr %%r0, %[b]    \n\t"            \
4142
        "lgr  %[l], %%r1    \n\t"            \
4143
        "lgr  %[h], %%r0    \n\t"            \
4144
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4145
        : [a] "r" (va), [b] "r" (vb)                     \
4146
        : "memory", "r0", "r1"                           \
4147
    )
4148
/* Multiply va by vb and store double size result in: vo | vh | vl */
4149
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4150
    __asm__ __volatile__ (                               \
4151
        "lgr  %%r1, %[a]    \n\t"            \
4152
        "mlgr %%r0, %[b]    \n\t"            \
4153
        "lghi %[o], 0     \n\t"            \
4154
        "lgr  %[l], %%r1    \n\t"            \
4155
        "lgr  %[h], %%r0    \n\t"            \
4156
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4157
        : [a] "r" (va), [b] "r" (vb)                     \
4158
        : "r0", "r1"                                     \
4159
    )
4160
/* Multiply va by vb and add double size result into: vo | vh | vl */
4161
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4162
    __asm__ __volatile__ (                               \
4163
        "lghi %%r10, 0  \n\t"                    \
4164
        "lgr  %%r1, %[a]    \n\t"            \
4165
        "mlgr %%r0, %[b]    \n\t"            \
4166
        "algr %[l], %%r1  \n\t"                    \
4167
        "alcgr  %[h], %%r0  \n\t"                    \
4168
        "alcgr  %[o], %%r10 \n\t"                    \
4169
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4170
        : [a] "r" (va), [b] "r" (vb)                     \
4171
        : "r0", "r1", "r10", "cc"                        \
4172
    )
4173
/* Multiply va by vb and add double size result into: vh | vl */
4174
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4175
    __asm__ __volatile__ (                               \
4176
        "lgr  %%r1, %[a]    \n\t"            \
4177
        "mlgr %%r0, %[b]    \n\t"            \
4178
        "algr %[l], %%r1  \n\t"                    \
4179
        "alcgr  %[h], %%r0  \n\t"                    \
4180
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4181
        : [a] "r" (va), [b] "r" (vb)                     \
4182
        : "r0", "r1", "cc"                               \
4183
    )
4184
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4185
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4186
    __asm__ __volatile__ (                               \
4187
        "lghi %%r10, 0  \n\t"                    \
4188
        "lgr  %%r1, %[a]    \n\t"            \
4189
        "mlgr %%r0, %[b]    \n\t"            \
4190
        "algr %[l], %%r1  \n\t"                    \
4191
        "alcgr  %[h], %%r0  \n\t"                    \
4192
        "alcgr  %[o], %%r10 \n\t"                    \
4193
        "algr %[l], %%r1  \n\t"                    \
4194
        "alcgr  %[h], %%r0  \n\t"                    \
4195
        "alcgr  %[o], %%r10 \n\t"                    \
4196
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4197
        : [a] "r" (va), [b] "r" (vb)                     \
4198
        : "r0", "r1", "r10", "cc"                        \
4199
    )
4200
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4201
 * Assumes first add will not overflow vh | vl
4202
 */
4203
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4204
    __asm__ __volatile__ (                               \
4205
        "lghi %%r10, 0  \n\t"                    \
4206
        "lgr  %%r1, %[a]    \n\t"            \
4207
        "mlgr %%r0, %[b]    \n\t"            \
4208
        "algr %[l], %%r1  \n\t"                    \
4209
        "alcgr  %[h], %%r0  \n\t"                    \
4210
        "algr %[l], %%r1  \n\t"                    \
4211
        "alcgr  %[h], %%r0  \n\t"                    \
4212
        "alcgr  %[o], %%r10 \n\t"                    \
4213
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4214
        : [a] "r" (va), [b] "r" (vb)                     \
4215
        : "r0", "r1", "r10", "cc"                        \
4216
    )
4217
/* Square va and store double size result in: vh | vl */
4218
#define SP_ASM_SQR(vl, vh, va)                           \
4219
    __asm__ __volatile__ (                               \
4220
        "lgr  %%r1, %[a]    \n\t"            \
4221
        "mlgr %%r0, %%r1    \n\t"            \
4222
        "lgr  %[l], %%r1    \n\t"            \
4223
        "lgr  %[h], %%r0    \n\t"            \
4224
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4225
        : [a] "r" (va)                                   \
4226
        : "memory", "r0", "r1"                           \
4227
    )
4228
/* Square va and add double size result into: vo | vh | vl */
4229
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4230
    __asm__ __volatile__ (                               \
4231
        "lghi %%r10, 0  \n\t"                    \
4232
        "lgr  %%r1, %[a]    \n\t"            \
4233
        "mlgr %%r0, %%r1    \n\t"            \
4234
        "algr %[l], %%r1  \n\t"                    \
4235
        "alcgr  %[h], %%r0  \n\t"                    \
4236
        "alcgr  %[o], %%r10 \n\t"                    \
4237
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4238
        : [a] "r" (va)                                   \
4239
        : "r0", "r1", "r10", "cc"                        \
4240
    )
4241
/* Square va and add double size result into: vh | vl */
4242
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4243
    __asm__ __volatile__ (                               \
4244
        "lgr  %%r1, %[a]    \n\t"            \
4245
        "mlgr %%r0, %%r1    \n\t"            \
4246
        "algr %[l], %%r1  \n\t"                    \
4247
        "alcgr  %[h], %%r0  \n\t"                    \
4248
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4249
        : [a] "r" (va)                                   \
4250
        : "r0", "r1", "cc"                               \
4251
    )
4252
/* Add va into: vh | vl */
4253
#define SP_ASM_ADDC(vl, vh, va)                          \
4254
    __asm__ __volatile__ (                               \
4255
        "lghi %%r10, 0  \n\t"                    \
4256
        "algr %[l], %[a]  \n\t"                    \
4257
        "alcgr  %[h], %%r10 \n\t"                    \
4258
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4259
        : [a] "r" (va)                                   \
4260
        : "r10", "cc"                                    \
4261
    )
4262
/* Sub va from: vh | vl */
4263
#define SP_ASM_SUBC(vl, vh, va)                          \
4264
    __asm__ __volatile__ (                               \
4265
        "lghi %%r10, 0  \n\t"                    \
4266
        "slgr %[l], %[a]  \n\t"                    \
4267
        "slbgr  %[h], %%r10 \n\t"                    \
4268
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4269
        : [a] "r" (va)                                   \
4270
        : "r10", "cc"                                    \
4271
    )
4272
/* Add two times vc | vb | va into vo | vh | vl */
4273
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4274
    __asm__ __volatile__ (                               \
4275
        "algr %[l], %[a]  \n\t"                    \
4276
        "alcgr  %[h], %[b]  \n\t"                    \
4277
        "alcgr  %[o], %[c]  \n\t"                    \
4278
        "algr %[l], %[a]  \n\t"                    \
4279
        "alcgr  %[h], %[b]  \n\t"                    \
4280
        "alcgr  %[o], %[c]  \n\t"                    \
4281
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4282
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4283
        : "cc"                                           \
4284
    )
4285
4286
#define SP_INT_ASM_AVAILABLE
4287
4288
    #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
4289
4290
#ifdef SP_INT_ASM_AVAILABLE
4291
    #ifndef SP_INT_NO_ASM
4292
        #define SQR_MUL_ASM
4293
    #endif
4294
    #ifndef SP_ASM_ADDC_REG
4295
        #define SP_ASM_ADDC_REG  SP_ASM_ADDC
4296
    #endif /* SP_ASM_ADDC_REG */
4297
#endif /* SQR_MUL_ASM */
4298
4299
#endif /* !WOLFSSL_NO_ASM */
4300
4301
4302
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
4303
    !defined(NO_DSA) || !defined(NO_DH) || \
4304
    (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
4305
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4306
#ifndef WC_NO_CACHE_RESISTANT
4307
    /* Mask of address for constant time operations. */
4308
    const size_t sp_off_on_addr[2] =
4309
    {
4310
        (size_t) 0,
4311
        (size_t)-1
4312
    };
4313
#endif
4314
#endif
4315
4316
4317
#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
4318
4319
#ifdef __cplusplus
4320
extern "C" {
4321
#endif
4322
4323
/* Modular exponentiation implementations using Single Precision. */
4324
WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
4325
    sp_int* res);
4326
WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
4327
    sp_int* res);
4328
WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
4329
    sp_int* res);
4330
WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
4331
    sp_int* res);
4332
WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
4333
    sp_int* res);
4334
4335
#ifdef __cplusplus
4336
} /* extern "C" */
4337
#endif
4338
4339
#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
4340
4341
4342
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
4343
static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp);
4344
#endif
4345
4346
/* Set the multi-precision number to zero.
4347
 *
4348
 * Assumes a is not NULL.
4349
 *
4350
 * @param  [out]  a  SP integer to set to zero.
4351
 */
4352
static void _sp_zero(sp_int* a)
4353
5.16M
{
4354
5.16M
    a->used = 0;
4355
5.16M
    a->dp[0] = 0;
4356
#ifdef WOLFSSL_SP_INT_NEGATIVE
4357
    a->sign = MP_ZPOS;
4358
#endif
4359
5.16M
}
4360
4361
/* Initialize the multi-precision number to be zero.
4362
 *
4363
 * @param  [out]  a  SP integer.
4364
 *
4365
 * @return  MP_OKAY on success.
4366
 * @return  MP_VAL when a is NULL.
4367
 */
4368
int sp_init(sp_int* a)
4369
3.45M
{
4370
3.45M
    int err = MP_OKAY;
4371
4372
3.45M
    if (a == NULL) {
4373
0
        err = MP_VAL;
4374
0
    }
4375
3.45M
    if (err == MP_OKAY) {
4376
    #ifdef HAVE_WOLF_BIGINT
4377
        wc_bigint_init(&a->raw);
4378
    #endif
4379
3.45M
        _sp_zero(a);
4380
3.45M
        a->size = SP_INT_DIGITS;
4381
3.45M
    }
4382
4383
3.45M
    return err;
4384
3.45M
}
4385
4386
/* Initialize the multi-precision number to be zero and have a maximum size.
4387
 *
4388
 * @param  [out]  a     SP integer.
4389
 * @param  [in]   size  Number of words to say are available.
4390
 *
4391
 * @return  MP_OKAY on success.
4392
 * @return  MP_VAL when a is NULL.
4393
 */
4394
int sp_init_size(sp_int* a, int size)
4395
3.03M
{
4396
3.03M
    int err = sp_init(a);
4397
4398
3.03M
    if (err == MP_OKAY) {
4399
3.03M
        a->size = size;
4400
3.03M
    }
4401
4402
3.03M
    return err;
4403
3.03M
}
4404
4405
#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
4406
/* Initialize up to six multi-precision numbers to be zero.
4407
 *
4408
 * @param  [out]  n1  SP integer.
4409
 * @param  [out]  n2  SP integer.
4410
 * @param  [out]  n3  SP integer.
4411
 * @param  [out]  n4  SP integer.
4412
 * @param  [out]  n5  SP integer.
4413
 * @param  [out]  n6  SP integer.
4414
 *
4415
 * @return  MP_OKAY on success.
4416
 */
4417
int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
4418
                  sp_int* n6)
4419
26.9k
{
4420
26.9k
    if (n1 != NULL) {
4421
    #ifdef HAVE_WOLF_BIGINT
4422
        wc_bigint_init(&n1->raw);
4423
    #endif
4424
26.9k
        _sp_zero(n1);
4425
26.9k
        n1->dp[0] = 0;
4426
26.9k
        n1->size = SP_INT_DIGITS;
4427
    #ifdef HAVE_WOLF_BIGINT
4428
        wc_bigint_init(&n1->raw);
4429
    #endif
4430
26.9k
    }
4431
26.9k
    if (n2 != NULL) {
4432
    #ifdef HAVE_WOLF_BIGINT
4433
        wc_bigint_init(&n2->raw);
4434
    #endif
4435
26.9k
        _sp_zero(n2);
4436
26.9k
        n2->dp[0] = 0;
4437
26.9k
        n2->size = SP_INT_DIGITS;
4438
    #ifdef HAVE_WOLF_BIGINT
4439
        wc_bigint_init(&n2->raw);
4440
    #endif
4441
26.9k
    }
4442
26.9k
    if (n3 != NULL) {
4443
    #ifdef HAVE_WOLF_BIGINT
4444
        wc_bigint_init(&n3->raw);
4445
    #endif
4446
24.2k
        _sp_zero(n3);
4447
24.2k
        n3->dp[0] = 0;
4448
24.2k
        n3->size = SP_INT_DIGITS;
4449
    #ifdef HAVE_WOLF_BIGINT
4450
        wc_bigint_init(&n3->raw);
4451
    #endif
4452
24.2k
    }
4453
26.9k
    if (n4 != NULL) {
4454
    #ifdef HAVE_WOLF_BIGINT
4455
        wc_bigint_init(&n4->raw);
4456
    #endif
4457
11.6k
        _sp_zero(n4);
4458
11.6k
        n4->dp[0] = 0;
4459
11.6k
        n4->size = SP_INT_DIGITS;
4460
    #ifdef HAVE_WOLF_BIGINT
4461
        wc_bigint_init(&n4->raw);
4462
    #endif
4463
11.6k
    }
4464
26.9k
    if (n5 != NULL) {
4465
    #ifdef HAVE_WOLF_BIGINT
4466
        wc_bigint_init(&n5->raw);
4467
    #endif
4468
0
        _sp_zero(n5);
4469
0
        n5->dp[0] = 0;
4470
0
        n5->size = SP_INT_DIGITS;
4471
    #ifdef HAVE_WOLF_BIGINT
4472
        wc_bigint_init(&n5->raw);
4473
    #endif
4474
0
    }
4475
26.9k
    if (n6 != NULL) {
4476
    #ifdef HAVE_WOLF_BIGINT
4477
        wc_bigint_init(&n6->raw);
4478
    #endif
4479
0
        _sp_zero(n6);
4480
0
        n6->dp[0] = 0;
4481
0
        n6->size = SP_INT_DIGITS;
4482
    #ifdef HAVE_WOLF_BIGINT
4483
        wc_bigint_init(&n6->raw);
4484
    #endif
4485
0
    }
4486
4487
26.9k
    return MP_OKAY;
4488
26.9k
}
4489
#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
4490
4491
/* Free the memory allocated in the multi-precision number.
4492
 *
4493
 * @param  [in]  a  SP integer.
4494
 */
4495
void sp_free(sp_int* a)
4496
1.49M
{
4497
1.49M
    if (a != NULL) {
4498
    #ifdef HAVE_WOLF_BIGINT
4499
        wc_bigint_free(&a->raw);
4500
    #endif
4501
1.49M
    }
4502
1.49M
}
4503
4504
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
4505
/* Grow multi-precision number to be able to hold l digits.
4506
 * This function does nothing as the number of digits is fixed.
4507
 *
4508
 * @param  [in,out]  a  SP integer.
4509
 * @param  [in]      l  Number of digits to grow to.
4510
 *
4511
 * @return  MP_OKAY on success
4512
 * @return  MP_MEM if the number of digits requested is more than available.
4513
 */
4514
int sp_grow(sp_int* a, int l)
4515
20.0k
{
4516
20.0k
    int err = MP_OKAY;
4517
4518
20.0k
    if (a == NULL) {
4519
0
        err = MP_VAL;
4520
0
    }
4521
20.0k
    if ((err == MP_OKAY) && (l > a->size)) {
4522
1
        err = MP_MEM;
4523
1
    }
4524
20.0k
    if (err == MP_OKAY) {
4525
20.0k
        int i;
4526
4527
142k
        for (i = a->used; i < l; i++) {
4528
122k
            a->dp[i] = 0;
4529
122k
        }
4530
20.0k
    }
4531
4532
20.0k
    return err;
4533
20.0k
}
4534
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
4535
4536
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(HAVE_ECC)
4537
/* Set the multi-precision number to zero.
4538
 *
4539
 * @param  [out]  a  SP integer to set to zero.
4540
 */
4541
void sp_zero(sp_int* a)
4542
547
{
4543
547
    if (a != NULL) {
4544
547
        _sp_zero(a);
4545
547
    }
4546
547
}
4547
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
4548
4549
/* Clear the data from the multi-precision number and set to zero.
4550
 *
4551
 * @param  [out]  a  SP integer.
4552
 */
4553
void sp_clear(sp_int* a)
4554
1.48M
{
4555
1.48M
    if (a != NULL) {
4556
1.48M
        int i;
4557
4558
13.7M
        for (i = 0; i < a->used; i++) {
4559
12.3M
            a->dp[i] = 0;
4560
12.3M
        }
4561
1.48M
        _sp_zero(a);
4562
1.48M
        sp_free(a);
4563
1.48M
    }
4564
1.48M
}
4565
4566
#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
4567
/* Ensure the data in the multi-precision number is zeroed.
4568
 *
4569
 * Use when security sensitive data needs to be wiped.
4570
 *
4571
 * @param  [in]  a  SP integer.
4572
 */
4573
void sp_forcezero(sp_int* a)
4574
9.16k
{
4575
9.16k
    if (a != NULL) {
4576
        /* Ensure all data zeroized - data not zeroed when used decreases. */
4577
9.16k
        ForceZero(a->dp, a->used * sizeof(sp_int_digit));
4578
9.16k
        _sp_zero(a);
4579
    #ifdef HAVE_WOLF_BIGINT
4580
        wc_bigint_zero(&a->raw);
4581
    #endif
4582
9.16k
        sp_free(a);
4583
9.16k
    }
4584
9.16k
}
4585
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
4586
4587
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4588
    !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
4589
/* Copy value of multi-precision number a into r.
4590
 *
4591
 * @param  [in]   a  SP integer - source.
4592
 * @param  [out]  r  SP integer - destination.
4593
 *
4594
 * @return  MP_OKAY on success.
4595
 */
4596
int sp_copy(const sp_int* a, sp_int* r)
4597
4.95M
{
4598
4.95M
    int err = MP_OKAY;
4599
4600
4.95M
    if ((a == NULL) || (r == NULL)) {
4601
0
        err = MP_VAL;
4602
0
    }
4603
4.95M
    else if (a != r) {
4604
2.77M
        XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
4605
2.77M
        if (a->used == 0)
4606
413k
            r->dp[0] = 0;
4607
2.77M
        r->used = a->used;
4608
#ifdef WOLFSSL_SP_INT_NEGATIVE
4609
        r->sign = a->sign;
4610
#endif
4611
2.77M
    }
4612
4613
4.95M
    return err;
4614
4.95M
}
4615
#endif
4616
4617
#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
4618
/* Initializes r and copies in value from a.
4619
 *
4620
 * @param  [out]  r  SP integer - destination.
4621
 * @param  [in]   a  SP integer - source.
4622
 *
4623
 * @return  MP_OKAY on success.
4624
 * @return  MP_VAL when a or r is NULL.
4625
 */
4626
int sp_init_copy(sp_int* r, sp_int* a)
4627
{
4628
    int err;
4629
4630
    err = sp_init(r);
4631
    if (err == MP_OKAY) {
4632
        err = sp_copy(a, r);
4633
    }
4634
    return err;
4635
}
4636
#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
4637
4638
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4639
    !defined(NO_DH) || !defined(NO_DSA)
4640
/* Exchange the values in a and b.
4641
 *
4642
 * @param  [in,out]  a  SP integer to swap.
4643
 * @param  [in,out]  b  SP integer to swap.
4644
 *
4645
 * @return  MP_OKAY on success.
4646
 * @return  MP_VAL when a or b is NULL.
4647
 * @return  MP_MEM when dynamic memory allocation fails.
4648
 */
4649
int sp_exch(sp_int* a, sp_int* b)
4650
25
{
4651
25
    int err = MP_OKAY;
4652
25
    DECL_SP_INT(t, (a != NULL) ? a->used : 1);
4653
4654
25
    if ((a == NULL) || (b == NULL)) {
4655
0
        err = MP_VAL;
4656
0
    }
4657
25
    if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
4658
0
        err = MP_VAL;
4659
0
    }
4660
4661
25
    ALLOC_SP_INT(t, a->used, err, NULL);
4662
25
    if (err == MP_OKAY) {
4663
18
        int asize = a->size;
4664
18
        int bsize = b->size;
4665
18
        XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
4666
18
        XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
4667
18
        XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
4668
18
        a->size = asize;
4669
18
        b->size = bsize;
4670
18
    }
4671
4672
25
    FREE_SP_INT(t, NULL);
4673
25
    return err;
4674
25
}
4675
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
4676
        * !NO_DSA */
4677
4678
#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
4679
    !defined(WC_NO_CACHE_RESISTANT)
4680
int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m)
4681
0
{
4682
0
    int i;
4683
0
    int err = MP_OKAY;
4684
0
    sp_int_digit mask = (sp_int_digit)0 - m;
4685
0
    DECL_SP_INT(t, c);
4686
4687
0
    ALLOC_SP_INT(t, c, err, NULL);
4688
0
    if (err == MP_OKAY) {
4689
0
        t->used = (int)((a->used ^ b->used) & mask);
4690
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4691
        t->sign = (int)((a->sign ^ b->sign) & mask);
4692
    #endif
4693
0
        for (i = 0; i < c; i++) {
4694
0
            t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
4695
0
        }
4696
0
        a->used ^= t->used;
4697
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4698
        a->sign ^= t->sign;
4699
    #endif
4700
0
        for (i = 0; i < c; i++) {
4701
0
            a->dp[i] ^= t->dp[i];
4702
0
        }
4703
0
        b->used ^= t->used;
4704
    #ifdef WOLFSSL_SP_INT_NEGATIVE
4705
        b->sign ^= b->sign;
4706
    #endif
4707
0
        for (i = 0; i < c; i++) {
4708
0
            b->dp[i] ^= t->dp[i];
4709
0
        }
4710
0
    }
4711
4712
0
    FREE_SP_INT(t, NULL);
4713
0
    return err;
4714
0
}
4715
#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
4716
4717
#ifdef WOLFSSL_SP_INT_NEGATIVE
4718
/* Calculate the absolute value of the multi-precision number.
4719
 *
4720
 * @param  [in]   a  SP integer to calculate absolute value of.
4721
 * @param  [out]  r  SP integer to hold result.
4722
 *
4723
 * @return  MP_OKAY on success.
4724
 * @return  MP_VAL when a or r is NULL.
4725
 */
4726
int sp_abs(sp_int* a, sp_int* r)
4727
{
4728
    int err;
4729
4730
    err = sp_copy(a, r);
4731
    if (r != NULL) {
4732
        r->sign = MP_ZPOS;
4733
    }
4734
4735
    return err;
4736
}
4737
#endif /* WOLFSSL_SP_INT_NEGATIVE */
4738
4739
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4740
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4741
/* Compare absolute value of two multi-precision numbers.
4742
 *
4743
 * @param  [in]  a  SP integer.
4744
 * @param  [in]  b  SP integer.
4745
 *
4746
 * @return  MP_GT when a is greater than b.
4747
 * @return  MP_LT when a is less than b.
4748
 * @return  MP_EQ when a is equals b.
4749
 */
4750
static int _sp_cmp_abs(sp_int* a, sp_int* b)
4751
1.46M
{
4752
1.46M
    int ret = MP_EQ;
4753
4754
1.46M
    if (a->used > b->used) {
4755
993k
        ret = MP_GT;
4756
993k
    }
4757
466k
    else if (a->used < b->used) {
4758
185k
        ret = MP_LT;
4759
185k
    }
4760
281k
    else {
4761
281k
        int i;
4762
4763
345k
        for (i = a->used - 1; i >= 0; i--) {
4764
326k
            if (a->dp[i] > b->dp[i]) {
4765
136k
                ret = MP_GT;
4766
136k
                break;
4767
136k
            }
4768
189k
            else if (a->dp[i] < b->dp[i]) {
4769
125k
                ret = MP_LT;
4770
125k
                break;
4771
125k
            }
4772
326k
        }
4773
281k
    }
4774
4775
1.46M
    return ret;
4776
1.46M
}
4777
#endif
4778
4779
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
4780
/* Compare absolute value of two multi-precision numbers.
4781
 *
4782
 * @param  [in]  a  SP integer.
4783
 * @param  [in]  b  SP integer.
4784
 *
4785
 * @return  MP_GT when a is greater than b.
4786
 * @return  MP_LT when a is less than b.
4787
 * @return  MP_EQ when a is equals b.
4788
 */
4789
int sp_cmp_mag(sp_int* a, sp_int* b)
4790
{
4791
    int ret;
4792
4793
    if (a == b) {
4794
        ret = MP_EQ;
4795
    }
4796
    else if (a == NULL) {
4797
        ret = MP_LT;
4798
    }
4799
    else if (b == NULL) {
4800
        ret = MP_GT;
4801
    }
4802
    else
4803
    {
4804
        ret = _sp_cmp_abs(a, b);
4805
    }
4806
4807
    return ret;
4808
}
4809
#endif
4810
4811
#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
4812
    defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
4813
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4814
/* Compare two multi-precision numbers.
4815
 *
4816
 * Assumes a and b are not NULL.
4817
 *
4818
 * @param  [in]  a  SP integer.
4819
 * @param  [in]  a  SP integer.
4820
 *
4821
 * @return  MP_GT when a is greater than b.
4822
 * @return  MP_LT when a is less than b.
4823
 * @return  MP_EQ when a is equals b.
4824
 */
4825
static int _sp_cmp(sp_int* a, sp_int* b)
4826
270k
{
4827
270k
    int ret;
4828
4829
#ifdef WOLFSSL_SP_INT_NEGATIVE
4830
    if (a->sign == b->sign) {
4831
#endif
4832
270k
        ret = _sp_cmp_abs(a, b);
4833
#ifdef WOLFSSL_SP_INT_NEGATIVE
4834
        if (a->sign == MP_NEG) {
4835
            /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
4836
             * Swapping MP_GT and MP_LT results.
4837
             */
4838
            ret = -ret;
4839
        }
4840
    }
4841
    else if (a->sign > b->sign) {
4842
        ret = MP_LT;
4843
    }
4844
    else /* (a->sign < b->sign) */ {
4845
        ret = MP_GT;
4846
    }
4847
#endif
4848
4849
270k
    return ret;
4850
270k
}
4851
#endif
4852
4853
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4854
    !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
4855
    defined(WOLFSSL_SP_MATH_ALL)
4856
/* Compare two multi-precision numbers.
4857
 *
4858
 * Pointers are compared such that NULL is less than not NULL.
4859
 *
4860
 * @param  [in]  a  SP integer.
4861
 * @param  [in]  a  SP integer.
4862
 *
4863
 * @return  MP_GT when a is greater than b.
4864
 * @return  MP_LT when a is less than b.
4865
 * @return  MP_EQ when a is equals b.
4866
 */
4867
int sp_cmp(sp_int* a, sp_int* b)
4868
38.0k
{
4869
38.0k
    int ret;
4870
4871
38.0k
    if (a == b) {
4872
351
        ret = MP_EQ;
4873
351
    }
4874
37.7k
    else if (a == NULL) {
4875
0
        ret = MP_LT;
4876
0
    }
4877
37.7k
    else if (b == NULL) {
4878
0
        ret = MP_GT;
4879
0
    }
4880
37.7k
    else
4881
37.7k
    {
4882
37.7k
        ret = _sp_cmp(a, b);
4883
37.7k
    }
4884
4885
38.0k
    return ret;
4886
38.0k
}
4887
#endif
4888
4889
/*************************
4890
 * Bit check/set functions
4891
 *************************/
4892
4893
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || (defined(WOLFSSL_SP_MATH_ALL) && \
4894
    defined(HAVE_ECC))
4895
/* Check if a bit is set
4896
 *
4897
 * When a is NULL, result is 0.
4898
 *
4899
 * @param  [in]  a  SP integer.
4900
 * @param  [in]  b  Bit position to check.
4901
 *
4902
 * @return  0 when bit is not set.
4903
 * @return  1 when bit is set.
4904
 */
4905
int sp_is_bit_set(sp_int* a, unsigned int b)
4906
0
{
4907
0
    int ret = 0;
4908
0
    int i = (int)(b >> SP_WORD_SHIFT);
4909
0
    int s = (int)(b & SP_WORD_MASK);
4910
4911
0
    if ((a != NULL) && (i < a->used)) {
4912
0
        ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
4913
0
    }
4914
4915
0
    return ret;
4916
0
}
4917
#endif /* WOLFSSL_RSA_VERIFY_ONLY */
4918
4919
/* Count the number of bits in the multi-precision number.
4920
 *
4921
 * When a is not NULL, result is 0.
4922
 *
4923
 * @param  [in]  a  SP integer.
4924
 *
4925
 * @return  The number of bits in the number.
4926
 */
4927
int sp_count_bits(const sp_int* a)
4928
3.11M
{
4929
3.11M
    int r = 0;
4930
4931
3.11M
    if (a != NULL) {
4932
3.11M
        r = a->used - 1;
4933
3.11M
        while ((r >= 0) && (a->dp[r] == 0)) {
4934
1.08k
            r--;
4935
1.08k
        }
4936
3.11M
        if (r < 0) {
4937
9.04k
            r = 0;
4938
9.04k
        }
4939
3.10M
        else {
4940
3.10M
            sp_int_digit d;
4941
4942
3.10M
            d = a->dp[r];
4943
3.10M
            r *= SP_WORD_SIZE;
4944
3.10M
            if (d > SP_HALF_MAX) {
4945
2.80M
                r += SP_WORD_SIZE;
4946
7.54M
                while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
4947
4.73M
                    r--;
4948
4.73M
                    d <<= 1;
4949
4.73M
                }
4950
2.80M
            }
4951
300k
            else {
4952
3.39M
                while (d != 0) {
4953
3.09M
                    r++;
4954
3.09M
                    d >>= 1;
4955
3.09M
                }
4956
300k
            }
4957
3.10M
        }
4958
3.11M
    }
4959
4960
3.11M
    return r;
4961
3.11M
}
4962
4963
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
4964
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
4965
    (defined(HAVE_ECC) && defined(FP_ECC)) || \
4966
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
4967
4968
/* Number of entries in array of number of least significant zero bits. */
4969
#define SP_LNZ_CNT      16
4970
/* Number of bits the array checks. */
4971
2.86k
#define SP_LNZ_BITS     4
4972
/* Mask to apply to check with array. */
4973
4.95k
#define SP_LNZ_MASK     0xf
4974
/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
4975
static const int sp_lnz[SP_LNZ_CNT] = {
4976
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4977
};
4978
4979
/* Count the number of least significant zero bits.
4980
 *
4981
 * When a is not NULL, result is 0.
4982
 *
4983
 * @param  [in]   a  SP integer to use.
4984
 *
4985
 * @return  Number of leas significant zero bits.
4986
 */
4987
#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
4988
static
4989
#endif /* !HAVE_ECC || HAVE_COMP_KEY */
4990
int sp_cnt_lsb(sp_int* a)
4991
2.11k
{
4992
2.11k
    int bc = 0;
4993
4994
2.11k
    if ((a != NULL) && (!sp_iszero(a))) {
4995
2.08k
        int i;
4996
2.08k
        int j;
4997
2.08k
        int cnt = 0;
4998
4999
2.89k
        for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
5000
809
        }
5001
5002
4.95k
        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
5003
4.95k
            bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
5004
4.95k
            if (bc != 4) {
5005
2.08k
                bc += cnt + j;
5006
2.08k
                break;
5007
2.08k
            }
5008
4.95k
        }
5009
2.08k
    }
5010
5011
2.11k
    return bc;
5012
2.11k
}
5013
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
5014
5015
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5016
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
5017
/* Determine if the most significant byte of the encoded multi-precision number
5018
 * has the top bit set.
5019
 *
5020
 * When A is NULL, result is 0.
5021
 *
5022
 * @param  [in]  a  SP integer.
5023
 *
5024
 * @return  1 when the top bit of top byte is set.
5025
 * @return  0 when the top bit of top byte is not set.
5026
 */
5027
int sp_leading_bit(sp_int* a)
5028
10.3k
{
5029
10.3k
    int bit = 0;
5030
5031
10.3k
    if ((a != NULL) && (a->used > 0)) {
5032
10.2k
        sp_int_digit d = a->dp[a->used - 1];
5033
10.2k
    #if SP_WORD_SIZE > 8
5034
56.5k
        while (d > (sp_int_digit)0xff) {
5035
46.2k
            d >>= 8;
5036
46.2k
        }
5037
10.2k
    #endif
5038
10.2k
        bit = (int)(d >> 7);
5039
10.2k
    }
5040
5041
10.3k
    return bit;
5042
10.3k
}
5043
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
5044
5045
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5046
    defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
5047
    !defined(NO_RSA)
5048
/* Set a bit of a: a |= 1 << i
5049
 * The field 'used' is updated in a.
5050
 *
5051
 * @param  [in,out]  a  SP integer to set bit into.
5052
 * @param  [in]      i  Index of bit to set.
5053
 *
5054
 * @return  MP_OKAY on success.
5055
 * @return  MP_VAL when a is NULL or index is too large.
5056
 */
5057
int sp_set_bit(sp_int* a, int i)
5058
1.88k
{
5059
1.88k
    int err = MP_OKAY;
5060
1.88k
    int w = (int)(i >> SP_WORD_SHIFT);
5061
5062
1.88k
    if ((a == NULL) || (w >= a->size)) {
5063
34
        err = MP_VAL;
5064
34
    }
5065
1.85k
    else {
5066
1.85k
        int s = (int)(i & (SP_WORD_SIZE - 1));
5067
1.85k
        int j;
5068
5069
18.3k
        for (j = a->used; j <= w; j++) {
5070
16.5k
            a->dp[j] = 0;
5071
16.5k
        }
5072
1.85k
        a->dp[w] |= (sp_int_digit)1 << s;
5073
1.85k
        if (a->used <= w) {
5074
1.83k
            a->used = w + 1;
5075
1.83k
        }
5076
1.85k
    }
5077
1.88k
    return err;
5078
1.88k
}
5079
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
5080
        * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
5081
5082
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5083
    defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
5084
/* Exponentiate 2 to the power of e: a = 2^e
5085
 * This is done by setting the 'e'th bit.
5086
 *
5087
 * @param  [out]  a  SP integer to hold result.
5088
 * @param  [in]   e  Exponent.
5089
 *
5090
 * @return  MP_OKAY on success.
5091
 * @return  MP_VAL when a is NULL or 2^exponent is too large.
5092
 */
5093
int sp_2expt(sp_int* a, int e)
5094
96
{
5095
96
    int err = MP_OKAY;
5096
5097
96
    if (a == NULL) {
5098
0
        err = MP_VAL;
5099
0
    }
5100
96
    if (err == MP_OKAY) {
5101
96
        _sp_zero(a);
5102
96
        err = sp_set_bit(a, e);
5103
96
    }
5104
5105
96
    return err;
5106
96
}
5107
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5108
        * WOLFSSL_KEY_GEN || !NO_DH */
5109
5110
/**********************
5111
 * Digit/Long functions
5112
 **********************/
5113
5114
/* Set the multi-precision number to be the value of the digit.
5115
 *
5116
 * @param  [out]  a  SP integer to become number.
5117
 * @param  [in]   d  Digit to be set.
5118
 *
5119
 * @return  MP_OKAY on success.
5120
 * @return  MP_VAL when a is NULL.
5121
 */
5122
int sp_set(sp_int* a, sp_int_digit d)
5123
4.18k
{
5124
4.18k
    int err = MP_OKAY;
5125
5126
4.18k
    if (a == NULL) {
5127
0
        err = MP_VAL;
5128
0
    }
5129
4.18k
    if (err == MP_OKAY) {
5130
        /* gcc-11 reports out-of-bounds array access if the byte array backing
5131
         * the sp_int* is smaller than sizeof(sp_int), as occurs when
5132
         * WOLFSSL_SP_SMALL.
5133
         */
5134
4.18k
        PRAGMA_GCC_DIAG_PUSH;
5135
4.18k
        PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"");
5136
4.18k
        a->dp[0] = d;
5137
4.18k
        a->used = d > 0;
5138
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5139
        a->sign = MP_ZPOS;
5140
    #endif
5141
4.18k
        PRAGMA_GCC_DIAG_POP;
5142
4.18k
    }
5143
5144
4.18k
    return err;
5145
4.18k
}
5146
5147
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
5148
/* Set a number into the multi-precision number.
5149
 *
5150
 * Number may be larger than the size of a digit.
5151
 *
5152
 * @param  [out]  a  SP integer to set.
5153
 * @param  [in]   n  Long value to set.
5154
 *
5155
 * @return  MP_OKAY on success.
5156
 * @return  MP_VAL when a is NULL.
5157
 */
5158
int sp_set_int(sp_int* a, unsigned long n)
5159
12
{
5160
12
    int err = MP_OKAY;
5161
5162
12
    if (a == NULL) {
5163
0
        err = MP_VAL;
5164
0
    }
5165
5166
12
    if (err == MP_OKAY) {
5167
    #if SP_WORD_SIZE < SP_ULONG_BITS
5168
        if (n <= (sp_int_digit)SP_DIGIT_MAX) {
5169
    #endif
5170
12
            a->dp[0] = (sp_int_digit)n;
5171
12
            a->used = (n != 0);
5172
    #if SP_WORD_SIZE < SP_ULONG_BITS
5173
        }
5174
        else {
5175
            int i;
5176
5177
            for (i = 0; n > 0; i++,n >>= SP_WORD_SIZE) {
5178
                a->dp[i] = (sp_int_digit)n;
5179
            }
5180
            a->used = i;
5181
        }
5182
    #endif
5183
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5184
        a->sign = MP_ZPOS;
5185
    #endif
5186
12
    }
5187
5188
12
    return err;
5189
12
}
5190
#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA  */
5191
5192
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5193
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_DH))
5194
/* Compare a one digit number with a multi-precision number.
5195
 *
5196
 * When a is NULL, MP_LT is returned.
5197
 *
5198
 * @param  [in]  a  SP integer to compare.
5199
 * @param  [in]  d  Digit to compare with.
5200
 *
5201
 * @return  MP_GT when a is greater than d.
5202
 * @return  MP_LT when a is less than d.
5203
 * @return  MP_EQ when a is equals d.
5204
 */
5205
int sp_cmp_d(sp_int* a, sp_int_digit d)
5206
33.3k
{
5207
33.3k
    int ret = MP_EQ;
5208
5209
33.3k
    if (a == NULL) {
5210
0
        ret = MP_LT;
5211
0
    }
5212
33.3k
    else
5213
#ifdef WOLFSSL_SP_INT_NEGATIVE
5214
    if (a->sign == MP_NEG) {
5215
        ret = MP_LT;
5216
    }
5217
    else
5218
#endif
5219
33.3k
    {
5220
        /* special case for zero*/
5221
33.3k
        if (a->used == 0) {
5222
21.7k
            if (d == 0) {
5223
50
                ret = MP_EQ;
5224
50
            }
5225
21.6k
            else {
5226
21.6k
                ret = MP_LT;
5227
21.6k
            }
5228
21.7k
        }
5229
11.5k
        else if (a->used > 1) {
5230
10.1k
            ret = MP_GT;
5231
10.1k
        }
5232
1.42k
        else {
5233
1.42k
            if (a->dp[0] > d) {
5234
737
                ret = MP_GT;
5235
737
            }
5236
691
            else if (a->dp[0] < d) {
5237
12
                ret = MP_LT;
5238
12
            }
5239
1.42k
        }
5240
33.3k
    }
5241
5242
33.3k
    return ret;
5243
33.3k
}
5244
#endif
5245
5246
#if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
5247
    !defined(NO_DSA) || \
5248
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5249
    defined(OPENSSL_EXTRA)
5250
#define WOLFSSL_SP_ADD_D
5251
#endif
5252
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5253
    !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
5254
#define WOLFSSL_SP_SUB_D
5255
#endif
5256
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
5257
    !defined(WOLFSSL_RSA_VERIFY_ONLY)
5258
#define WOLFSSL_SP_READ_RADIX_10
5259
#endif
5260
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
5261
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5262
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5263
#define WOLFSSL_SP_INVMOD
5264
#endif
5265
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
5266
#define WOLFSSL_SP_INVMOD_MONT_CT
5267
#endif
5268
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5269
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5270
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5271
#define WOLFSSL_SP_PRIME_GEN
5272
#endif
5273
5274
#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
5275
    defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
5276
/* Add a one digit number to the multi-precision number.
5277
 *
5278
 * @param  [in]   a  SP integer be added to.
5279
 * @param  [in]   d  Digit to add.
5280
 * @param  [out]  r  SP integer to store result in.
5281
 *
5282
 * @return  MP_OKAY on success.
5283
 * @return  MP_VAL when result is too large for fixed size dp array.
5284
 */
5285
static int _sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
5286
101
{
5287
101
    int err = MP_OKAY;
5288
101
    int i = 0;
5289
101
    sp_int_digit t;
5290
5291
101
    r->used = a->used;
5292
101
    if (a->used == 0) {
5293
14
        r->used = d > 0;
5294
14
    }
5295
101
    t = a->dp[0] + d;
5296
101
    if (t < a->dp[0]) {
5297
113
        for (++i; i < a->used; i++) {
5298
103
            r->dp[i] = a->dp[i] + 1;
5299
103
            if (r->dp[i] != 0) {
5300
49
               break;
5301
49
            }
5302
103
        }
5303
59
        if (i == a->used) {
5304
10
            if (i < r->size) {
5305
10
                r->used++;
5306
10
                r->dp[i] = 1;
5307
10
            }
5308
0
            else {
5309
0
                err = MP_VAL;
5310
0
            }
5311
10
        }
5312
59
    }
5313
101
    if (err == MP_OKAY) {
5314
101
        r->dp[0] = t;
5315
101
        if (r != a) {
5316
915
            for (++i; i < a->used; i++) {
5317
814
                r->dp[i] = a->dp[i];
5318
814
            }
5319
101
        }
5320
101
    }
5321
5322
101
    return err;
5323
101
}
5324
#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
5325
        * defined(WOLFSSL_SP_READ_RADIX_10) */
5326
5327
#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
5328
    defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
5329
    defined(WOLFSSL_SP_INVMOD_MONT_CT) || defined(WOLFSSL_SP_PRIME_GEN)
5330
/* Sub a one digit number from the multi-precision number.
5331
 *
5332
 * returns MP_OKAY always.
5333
 * @param  [in]   a  SP integer be subtracted from.
5334
 * @param  [in]   d  Digit to subtract.
5335
 * @param  [out]  r  SP integer to store result in.
5336
 */
5337
static void _sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
5338
4.06k
{
5339
4.06k
    int i = 0;
5340
4.06k
    sp_int_digit t;
5341
5342
4.06k
    r->used = a->used;
5343
4.06k
    if (a->used == 0) {
5344
18
        r->dp[0] = 0;
5345
18
    }
5346
4.04k
    else {
5347
4.04k
        t = a->dp[0] - d;
5348
4.04k
        if (t > a->dp[0]) {
5349
887
            for (++i; i < a->used; i++) {
5350
867
                r->dp[i] = a->dp[i] - 1;
5351
867
                if (r->dp[i] != SP_DIGIT_MAX) {
5352
313
                   break;
5353
313
                }
5354
867
            }
5355
333
        }
5356
4.04k
        r->dp[0] = t;
5357
4.04k
        if (r != a) {
5358
7.05k
            for (++i; i < a->used; i++) {
5359
5.46k
                r->dp[i] = a->dp[i];
5360
5.46k
            }
5361
1.59k
        }
5362
4.04k
        sp_clamp(r);
5363
4.04k
    }
5364
4.06k
}
5365
#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
5366
        * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
5367
        * WOLFSSL_SP_PRIME_GEN */
5368
5369
#ifdef WOLFSSL_SP_ADD_D
5370
/* Add a one digit number to the multi-precision number.
5371
 *
5372
 * @param  [in]   a  SP integer be added to.
5373
 * @param  [in]   d  Digit to add.
5374
 * @param  [out]  r  SP integer to store result in.
5375
 *
5376
 * @return  MP_OKAY on success.
5377
 * @return  MP_VAL when result is too large for fixed size dp array.
5378
 */
5379
int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
5380
101
{
5381
101
    int err = MP_OKAY;
5382
5383
    /* Check validity of parameters. */
5384
101
    if ((a == NULL) || (r == NULL)) {
5385
0
        err = MP_VAL;
5386
0
    }
5387
101
    else
5388
101
    {
5389
101
    #ifndef WOLFSSL_SP_INT_NEGATIVE
5390
        /* Positive only so just use internal function. */
5391
101
        err = _sp_add_d(a, d, r);
5392
    #else
5393
        if (a->sign == MP_ZPOS) {
5394
            /* Positive so use interal function. */
5395
            r->sign = MP_ZPOS;
5396
            err = _sp_add_d(a, d, r);
5397
        }
5398
        else if ((a->used > 1) || (a->dp[0] > d)) {
5399
            /* Negative value bigger than digit so subtract digit. */
5400
            r->sign = MP_NEG;
5401
            _sp_sub_d(a, d, r);
5402
        }
5403
        else {
5404
            /* Negative value smaller or equal to digit. */
5405
            r->sign = MP_ZPOS;
5406
            /* Subtract negative value from digit. */
5407
            r->dp[0] = d - a->dp[0];
5408
            /* Result is a digit equal to or greater than zero. */
5409
            r->used = ((r->dp[0] == 0) ? 0 : 1);
5410
        }
5411
    #endif
5412
101
    }
5413
5414
101
    return err;
5415
101
}
5416
#endif /* WOLFSSL_SP_ADD_D */
5417
5418
#ifdef WOLFSSL_SP_SUB_D
5419
/* Sub a one digit number from the multi-precision number.
5420
 *
5421
 * @param  [in]   a  SP integer be subtracted from.
5422
 * @param  [in]   d  Digit to subtract.
5423
 * @param  [out]  r  SP integer to store result in.
5424
 *
5425
 * @return  MP_OKAY on success.
5426
 * @return  MP_VAL when a or r is NULL.
5427
 */
5428
int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
5429
109
{
5430
109
    int err = MP_OKAY;
5431
5432
    /* Check validity of parameters. */
5433
109
    if ((a == NULL) || (r == NULL)) {
5434
0
        err = MP_VAL;
5435
0
    }
5436
109
    else {
5437
109
    #ifndef WOLFSSL_SP_INT_NEGATIVE
5438
        /* Positive only so just use internal function. */
5439
109
        _sp_sub_d(a, d, r);
5440
    #else
5441
        if (a->sign == MP_NEG) {
5442
            /* Subtracting from negative use interal add. */
5443
            r->sign = MP_NEG;
5444
            err = _sp_add_d(a, d, r);
5445
        }
5446
        else if ((a->used > 1) || (a->dp[0] >= d)) {
5447
            /* Positive number greater than digit so add digit. */
5448
            r->sign = MP_ZPOS;
5449
            _sp_sub_d(a, d, r);
5450
        }
5451
        else {
5452
            /* Negative value smaller than digit. */
5453
            r->sign = MP_NEG;
5454
            /* Subtract positive value from digit. */
5455
            r->dp[0] = d - a->dp[0];
5456
            /* Result is a digit equal to or greater than zero. */
5457
            r->used = 1;
5458
        }
5459
    #endif
5460
109
    }
5461
5462
109
    return err;
5463
109
}
5464
#endif /* WOLFSSL_SP_SUB_D */
5465
5466
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5467
    defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
5468
    !defined(NO_DH) || defined(HAVE_ECC) || \
5469
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5470
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
5471
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
5472
/* Multiply a by digit n and put result into r shifting up o digits.
5473
 *   r = (a * n) << (o * SP_WORD_SIZE)
5474
 *
5475
 * @param  [in]   a  SP integer to be multiplied.
5476
 * @param  [in]   n  Number (SP digit) to multiply by.
5477
 * @param  [out]  r  SP integer result.
5478
 * @param  [in]   o  Number of digits to move result up by.
5479
 * @return  MP_OKAY on success.
5480
 * @return  MP_VAL when result is too large for sp_int.
5481
 */
5482
static int _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
5483
81
{
5484
81
    int err = MP_OKAY;
5485
81
    int i;
5486
81
#ifndef SQR_MUL_ASM
5487
81
    sp_int_word t = 0;
5488
#else
5489
    sp_int_digit l = 0;
5490
    sp_int_digit h = 0;
5491
#endif
5492
5493
#ifdef WOLFSSL_SP_SMALL
5494
    for (i = 0; i < o; i++) {
5495
        r->dp[i] = 0;
5496
    }
5497
#else
5498
    /* Don't use the offset. Only when doing small code size div. */
5499
81
    (void)o;
5500
81
#endif
5501
5502
528
    for (i = 0; i < a->used; i++, o++) {
5503
447
    #ifndef SQR_MUL_ASM
5504
447
        t += (sp_int_word)a->dp[i] * n;
5505
447
        r->dp[o] = (sp_int_digit)t;
5506
447
        t >>= SP_WORD_SIZE;
5507
    #else
5508
        SP_ASM_MUL_ADD_NO(l, h, a->dp[i], n);
5509
        r->dp[o] = l;
5510
        l = h;
5511
        h = 0;
5512
    #endif
5513
447
    }
5514
5515
81
#ifndef SQR_MUL_ASM
5516
81
    if (t > 0)
5517
#else
5518
    if (l > 0)
5519
#endif
5520
28
    {
5521
28
        if (o == r->size) {
5522
0
            err = MP_VAL;
5523
0
        }
5524
28
        else {
5525
28
        #ifndef SQR_MUL_ASM
5526
28
            r->dp[o++] = (sp_int_digit)t;
5527
        #else
5528
            r->dp[o++] = l;
5529
        #endif
5530
28
        }
5531
28
    }
5532
81
    r->used = o;
5533
81
    sp_clamp(r);
5534
5535
81
    return err;
5536
81
}
5537
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5538
        *  WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
5539
5540
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5541
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
5542
/* Multiply a by digit n and put result into r. r = a * n
5543
 *
5544
 * @param  [in]   a  SP integer to multiply.
5545
 * @param  [in]   n  Digit to multiply by.
5546
 * @param  [out]  r  SP integer to hold result.
5547
 *
5548
 * @return  MP_OKAY on success.
5549
 * @return  MP_VAL when a or b is NULL, or a has maximum number of digits used.
5550
 */
5551
int sp_mul_d(sp_int* a, sp_int_digit d, sp_int* r)
5552
81
{
5553
81
    int err = MP_OKAY;
5554
5555
81
    if ((a == NULL) || (r == NULL)) {
5556
0
        err = MP_VAL;
5557
0
    }
5558
81
    if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
5559
0
        err = MP_VAL;
5560
0
    }
5561
5562
81
    if (err == MP_OKAY) {
5563
81
        err = _sp_mul_d(a, d, r, 0);
5564
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5565
        if (d == 0) {
5566
            r->sign = MP_ZPOS;
5567
        }
5568
        else {
5569
            r->sign = a->sign;
5570
        }
5571
    #endif
5572
81
    }
5573
5574
81
    return err;
5575
81
}
5576
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
5577
        * (WOLFSSL_KEY_GEN && !NO_RSA) */
5578
5579
/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
5580
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5581
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
5582
    defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
5583
#define WOLFSSL_SP_DIV_D
5584
#endif
5585
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5586
    !defined(NO_DH) || \
5587
    (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
5588
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5589
#define WOLFSSL_SP_MOD_D
5590
#endif
5591
5592
#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5593
     (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5594
      !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
5595
    defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
5596
#ifndef SP_ASM_DIV_WORD
5597
/* Divide a two digit number by a digit number and return. (hi | lo) / d
5598
 *
5599
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
5600
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
5601
 * @param  [in]  d   SP integer digit. Number to divide by.
5602
 * @return  The division result.
5603
 */
5604
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
5605
                                          sp_int_digit d)
5606
10.3M
{
5607
#ifdef WOLFSSL_SP_DIV_WORD_HALF
5608
    sp_int_digit r;
5609
5610
    if (hi != 0) {
5611
        sp_int_digit divsz = d >> SP_HALF_SIZE;
5612
        sp_int_digit r2;
5613
        sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
5614
        sp_int_word trial;
5615
5616
        r = hi / divsz;
5617
        if (r > SP_HALF_MAX) {
5618
            r = SP_HALF_MAX;
5619
        }
5620
        r <<= SP_HALF_SIZE;
5621
        trial = r * (sp_int_word)d;
5622
        while (trial > w) {
5623
            r -= (sp_int_digit)1 << SP_HALF_SIZE;
5624
            trial -= (sp_int_word)d << SP_HALF_SIZE;
5625
        }
5626
        w -= trial;
5627
        r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divsz;
5628
        trial = r2 * (sp_int_word)d;
5629
        while (trial > w) {
5630
            r2--;
5631
            trial -= d;
5632
        }
5633
        w -= trial;
5634
        r += r2;
5635
        r2 = ((sp_int_digit)w) / d;
5636
        r += r2;
5637
    }
5638
    else {
5639
        r = lo / d;
5640
    }
5641
5642
    return r;
5643
#else
5644
10.3M
    sp_int_word w;
5645
10.3M
    sp_int_digit r;
5646
5647
10.3M
    w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
5648
10.3M
    w /= d;
5649
10.3M
    r = (sp_int_digit)w;
5650
5651
10.3M
    return r;
5652
10.3M
#endif /* WOLFSSL_SP_DIV_WORD_HALF */
5653
10.3M
}
5654
#endif /* !SP_ASM_DIV_WORD */
5655
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
5656
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
5657
5658
#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
5659
    !defined(WOLFSSL_SP_SMALL)
5660
5661
#if SP_WORD_SIZE == 64
5662
51.1k
    #define SP_DIV_3_CONST      0x5555555555555555L
5663
122
    #define SP_DIV_10_CONST     0x1999999999999999L
5664
#elif SP_WORD_SIZE == 32
5665
    #define SP_DIV_3_CONST      0x55555555
5666
    #define SP_DIV_10_CONST     0x19999999
5667
#elif SP_WORD_SIZE == 16
5668
    #define SP_DIV_3_CONST      0x5555
5669
    #define SP_DIV_10_CONST     0x1999
5670
#elif SP_WORD_SIZE == 8
5671
    #define SP_DIV_3_CONST      0x55
5672
    #define SP_DIV_10_CONST     0x19
5673
#endif
5674
5675
/* Divide by 3: r = a / 3 and rem = a % 3
5676
 *
5677
 * @param  [in]   a    SP integer to be divided.
5678
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5679
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5680
 */
5681
static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem)
5682
12.8k
{
5683
12.8k
    int i;
5684
12.8k
#ifndef SQR_MUL_ASM
5685
12.8k
    sp_int_word t;
5686
12.8k
    sp_int_digit tt;
5687
#else
5688
    sp_int_digit l = 0;
5689
    sp_int_digit tt = 0;
5690
    sp_int_digit t;
5691
#endif
5692
12.8k
    sp_int_digit tr = 0;
5693
12.8k
    static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
5694
12.8k
    static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
5695
5696
12.8k
    if (r == NULL) {
5697
63.9k
        for (i = a->used - 1; i >= 0; i--) {
5698
51.1k
    #ifndef SQR_MUL_ASM
5699
51.1k
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5700
51.1k
            tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
5701
51.1k
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
5702
    #else
5703
            t = SP_DIV_3_CONST;
5704
            SP_ASM_MUL(l, tt, a->dp[i], t);
5705
            tt += tr * SP_DIV_3_CONST;
5706
            tr = a->dp[i] - (tt * 3);
5707
    #endif
5708
51.1k
            tr = sp_rem6[tr];
5709
51.1k
        }
5710
12.8k
        *rem = tr;
5711
12.8k
    }
5712
0
    else {
5713
0
        for (i = a->used - 1; i >= 0; i--) {
5714
0
    #ifndef SQR_MUL_ASM
5715
0
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5716
0
            tt = (t * SP_DIV_3_CONST) >> SP_WORD_SIZE;
5717
0
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
5718
    #else
5719
            t = SP_DIV_3_CONST;
5720
            SP_ASM_MUL(l, tt, a->dp[i], t);
5721
            tt += tr * SP_DIV_3_CONST;
5722
            tr = a->dp[i] - (tt * 3);
5723
    #endif
5724
0
            tt += sp_r6[tr];
5725
0
            tr = sp_rem6[tr];
5726
0
            r->dp[i] = tt;
5727
0
        }
5728
0
        r->used = a->used;
5729
0
        sp_clamp(r);
5730
0
        if (rem != NULL) {
5731
0
            *rem = tr;
5732
0
        }
5733
0
    }
5734
12.8k
}
5735
5736
/* Divide by 10: r = a / 10 and rem = a % 10
5737
 *
5738
 * @param  [in]   a    SP integer to be divided.
5739
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5740
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5741
 */
5742
static void _sp_div_10(sp_int* a, sp_int* r, sp_int_digit* rem)
5743
56
{
5744
56
    int i;
5745
56
#ifndef SQR_MUL_ASM
5746
56
    sp_int_word t;
5747
56
    sp_int_digit tt;
5748
#else
5749
    sp_int_digit l = 0;
5750
    sp_int_digit tt = 0;
5751
    sp_int_digit t;
5752
#endif
5753
56
    sp_int_digit tr = 0;
5754
5755
56
    if (r == NULL) {
5756
178
        for (i = a->used - 1; i >= 0; i--) {
5757
122
    #ifndef SQR_MUL_ASM
5758
122
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5759
122
            tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
5760
122
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
5761
    #else
5762
            t = SP_DIV_10_CONST;
5763
            SP_ASM_MUL(l, tt, a->dp[i], t);
5764
            tt += tr * SP_DIV_10_CONST;
5765
            tr = a->dp[i] - (tt * 10);
5766
    #endif
5767
122
            tr = tr % 10;
5768
122
        }
5769
56
        *rem = tr;
5770
56
    }
5771
0
    else {
5772
0
        for (i = a->used - 1; i >= 0; i--) {
5773
0
    #ifndef SQR_MUL_ASM
5774
0
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5775
0
            tt = (t * SP_DIV_10_CONST) >> SP_WORD_SIZE;
5776
0
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
5777
    #else
5778
            t = SP_DIV_10_CONST;
5779
            SP_ASM_MUL(l, tt, a->dp[i], t);
5780
            tt += tr * SP_DIV_10_CONST;
5781
            tr = a->dp[i] - (tt * 10);
5782
    #endif
5783
0
            tt += tr / 10;
5784
0
            tr = tr % 10;
5785
0
            r->dp[i] = tt;
5786
0
        }
5787
0
        r->used = a->used;
5788
0
        sp_clamp(r);
5789
0
        if (rem != NULL) {
5790
0
            *rem = tr;
5791
0
        }
5792
0
    }
5793
56
}
5794
#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
5795
5796
#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
5797
/* Divide by small number: r = a / d and rem = a % d
5798
 *
5799
 * @param  [in]   a    SP integer to be divided.
5800
 * @param  [in]   d    Digit to divide by.
5801
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5802
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
5803
 */
5804
static void _sp_div_small(sp_int* a, sp_int_digit d, sp_int* r,
5805
                         sp_int_digit* rem)
5806
494k
{
5807
494k
    int i;
5808
494k
#ifndef SQR_MUL_ASM
5809
494k
    sp_int_word t;
5810
494k
    sp_int_digit tt;
5811
#else
5812
    sp_int_digit l = 0;
5813
    sp_int_digit tt = 0;
5814
#endif
5815
494k
    sp_int_digit tr = 0;
5816
494k
    sp_int_digit m;
5817
5818
494k
    if (r == NULL) {
5819
494k
        m = SP_DIGIT_MAX / d;
5820
2.45M
        for (i = a->used - 1; i >= 0; i--) {
5821
1.96M
    #ifndef SQR_MUL_ASM
5822
1.96M
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5823
1.96M
            tt = (t * m) >> SP_WORD_SIZE;
5824
1.96M
            tr = (sp_int_digit)(t - tt * d);
5825
    #else
5826
            SP_ASM_MUL(l, tt, a->dp[i], m);
5827
            tt += tr * m;
5828
            tr = a->dp[i] - (tt * d);
5829
    #endif
5830
1.96M
            tr = tr % d;
5831
1.96M
        }
5832
494k
        *rem = tr;
5833
494k
    }
5834
0
    else {
5835
0
        m = SP_DIGIT_MAX / d;
5836
0
        for (i = a->used - 1; i >= 0; i--) {
5837
0
    #ifndef SQR_MUL_ASM
5838
0
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
5839
0
            tt = (t * m) >> SP_WORD_SIZE;
5840
0
            tr = (sp_int_digit)(t - tt * d);
5841
    #else
5842
            SP_ASM_MUL(l, tt, a->dp[i], m);
5843
            tt += tr * m;
5844
            tr = a->dp[i] - (tt * d);
5845
    #endif
5846
0
            tt += tr / d;
5847
0
            tr = tr % d;
5848
0
            r->dp[i] = tt;
5849
0
        }
5850
0
        r->used = a->used;
5851
0
        sp_clamp(r);
5852
0
        if (rem != NULL) {
5853
0
            *rem = tr;
5854
0
        }
5855
0
    }
5856
494k
}
5857
#endif
5858
5859
#ifdef WOLFSSL_SP_DIV_D
5860
/* Divide a multi-precision number by a digit size number and calculate
5861
 * remainder.
5862
 *   r = a / d; rem = a % d
5863
 *
5864
 * @param  [in]   a    SP integer to be divided.
5865
 * @param  [in]   d    Digit to divide by.
5866
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
5867
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
5868
 *
5869
 * @return  MP_OKAY on success.
5870
 * @return  MP_VAL when a is NULL or d is 0.
5871
 */
5872
int sp_div_d(sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
5873
0
{
5874
0
    int err = MP_OKAY;
5875
5876
0
    if ((a == NULL) || (d == 0)) {
5877
0
        err = MP_VAL;
5878
0
    }
5879
5880
0
    if (err == MP_OKAY) {
5881
0
    #if !defined(WOLFSSL_SP_SMALL)
5882
0
        if (d == 3) {
5883
0
            _sp_div_3(a, r, rem);
5884
0
        }
5885
0
        else if (d == 10) {
5886
0
            _sp_div_10(a, r, rem);
5887
0
        }
5888
0
        else
5889
0
    #endif
5890
0
        if (d <= SP_HALF_MAX) {
5891
0
            _sp_div_small(a, d, r, rem);
5892
0
        }
5893
0
        else
5894
0
        {
5895
0
            int i;
5896
0
        #ifndef SQR_MUL_ASM
5897
0
            sp_int_word w = 0;
5898
        #else
5899
            sp_int_digit l;
5900
            sp_int_digit h = 0;
5901
        #endif
5902
0
            sp_int_digit t;
5903
5904
0
            for (i = a->used - 1; i >= 0; i--) {
5905
0
            #ifndef SQR_MUL_ASM
5906
0
                t = sp_div_word((sp_int_digit)w, a->dp[i], d);
5907
0
                w = (w << SP_WORD_SIZE) | a->dp[i];
5908
0
                w -= (sp_int_word)t * d;
5909
            #else
5910
                l = a->dp[i];
5911
                t = sp_div_word(h, l, d);
5912
                h = l - t * d;
5913
            #endif
5914
0
                if (r != NULL) {
5915
0
                    r->dp[i] = t;
5916
0
                }
5917
0
            }
5918
0
            if (r != NULL) {
5919
0
                r->used = a->used;
5920
0
                sp_clamp(r);
5921
0
            }
5922
5923
0
            if (rem != NULL) {
5924
0
            #ifndef SQR_MUL_ASM
5925
0
                *rem = (sp_int_digit)w;
5926
            #else
5927
                *rem = h;
5928
            #endif
5929
0
            }
5930
0
        }
5931
5932
    #ifdef WOLFSSL_SP_INT_NEGATIVE
5933
        if (r != NULL) {
5934
            r->sign = a->sign;
5935
        }
5936
    #endif
5937
0
    }
5938
5939
0
    return err;
5940
0
}
5941
#endif /* WOLFSSL_SP_DIV_D */
5942
5943
#ifdef WOLFSSL_SP_MOD_D
5944
/* Calculate a modulo the digit d into r: r = a mod d
5945
 *
5946
 * @param  [in]   a  SP integer to reduce.
5947
 * @param  [in]   d  Digit to that is the modulus.
5948
 * @param  [out]  r  Digit that is the result..
5949
 *
5950
 * @return  MP_OKAY on success.
5951
 * @return  MP_VAL when a is NULL or d is 0.
5952
 */
5953
#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
5954
    !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
5955
static
5956
#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
5957
int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
5958
523k
{
5959
523k
    int err = MP_OKAY;
5960
5961
523k
    if ((a == NULL) || (r == NULL) || (d == 0)) {
5962
0
        err = MP_VAL;
5963
0
    }
5964
5965
#if 0
5966
    sp_print(a, "a");
5967
    sp_print_digit(d, "m");
5968
#endif
5969
5970
523k
    if (err == MP_OKAY) {
5971
        /* Check whether d is a power of 2. */
5972
523k
        if ((d & (d - 1)) == 0) {
5973
13.4k
            if (a->used == 0) {
5974
0
                *r = 0;
5975
0
            }
5976
13.4k
            else {
5977
13.4k
                *r = a->dp[0] & (d - 1);
5978
13.4k
            }
5979
13.4k
        }
5980
509k
    #if !defined(WOLFSSL_SP_SMALL)
5981
509k
        else if (d == 3) {
5982
12.8k
            _sp_div_3(a, NULL, r);
5983
12.8k
        }
5984
496k
        else if (d == 10) {
5985
56
            _sp_div_10(a, NULL, r);
5986
56
        }
5987
496k
    #endif
5988
496k
        else if (d <= SP_HALF_MAX) {
5989
494k
            _sp_div_small(a, d, NULL, r);
5990
494k
        }
5991
2.81k
        else {
5992
2.81k
            int i;
5993
2.81k
        #ifndef SQR_MUL_ASM
5994
2.81k
            sp_int_word w = 0;
5995
        #else
5996
            sp_int_digit l;
5997
            sp_int_digit h = 0;
5998
        #endif
5999
2.81k
            sp_int_digit t;
6000
6001
6.11k
            for (i = a->used - 1; i >= 0; i--) {
6002
3.29k
            #ifndef SQR_MUL_ASM
6003
3.29k
                t = sp_div_word((sp_int_digit)w, a->dp[i], d);
6004
3.29k
                w = (w << SP_WORD_SIZE) | a->dp[i];
6005
3.29k
                w -= (sp_int_word)t * d;
6006
            #else
6007
                l = a->dp[i];
6008
                t = sp_div_word(h, l, d);
6009
                h = l - t * d;
6010
            #endif
6011
3.29k
            }
6012
6013
2.81k
        #ifndef SQR_MUL_ASM
6014
2.81k
            *r = (sp_int_digit)w;
6015
        #else
6016
            *r = h;
6017
        #endif
6018
2.81k
        }
6019
6020
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6021
        if (a->sign == MP_NEG) {
6022
            *r = d - *r;
6023
        }
6024
    #endif
6025
523k
    }
6026
6027
#if 0
6028
    sp_print_digit(*r, "rmod");
6029
#endif
6030
6031
523k
    return err;
6032
523k
}
6033
#endif /* WOLFSSL_SP_MOD_D */
6034
6035
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6036
/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
6037
 *
6038
 * r = a / 2 (mod m) - constant time (a < m and positive)
6039
 *
6040
 * @param  [in]   a  SP integer to divide.
6041
 * @param  [in]   m  SP integer that is modulus.
6042
 * @param  [out]  r  SP integer to hold result.
6043
 *
6044
 * @return  MP_OKAY on success.
6045
 * @return  MP_VAL when a, m or r is NULL.
6046
 */
6047
int sp_div_2_mod_ct(sp_int* a, sp_int* m, sp_int* r)
6048
{
6049
    int err = MP_OKAY;
6050
6051
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
6052
        err = MP_VAL;
6053
    }
6054
    if ((err == MP_OKAY) && (r->size < m->used + 1)) {
6055
        err = MP_VAL;
6056
    }
6057
6058
    if (err == MP_OKAY) {
6059
    #ifndef SQR_MUL_ASM
6060
        sp_int_word  w = 0;
6061
    #else
6062
        sp_int_digit l = 0;
6063
        sp_int_digit h = 0;
6064
        sp_int_digit t;
6065
    #endif
6066
        sp_int_digit mask;
6067
        int i;
6068
6069
    #if 0
6070
        sp_print(a, "a");
6071
        sp_print(m, "m");
6072
    #endif
6073
6074
        mask = (sp_int_digit)0 - (a->dp[0] & 1);
6075
        for (i = 0; i < m->used; i++) {
6076
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6077
6078
        #ifndef SQR_MUL_ASM
6079
            w         += m->dp[i] & mask;
6080
            w         += a->dp[i] & mask_a;
6081
            r->dp[i]   = (sp_int_digit)w;
6082
            w        >>= DIGIT_BIT;
6083
        #else
6084
            t        = m->dp[i] & mask;
6085
            SP_ASM_ADDC(l, h, t);
6086
            t        = a->dp[i] & mask_a;
6087
            SP_ASM_ADDC(l, h, t);
6088
            r->dp[i] = l;
6089
            l        = h;
6090
            h        = 0;
6091
        #endif
6092
        }
6093
    #ifndef SQR_MUL_ASM
6094
        r->dp[i] = (sp_int_digit)w;
6095
    #else
6096
        r->dp[i] = l;
6097
    #endif
6098
        r->used = i + 1;
6099
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6100
        r->sign = MP_ZPOS;
6101
    #endif
6102
        sp_clamp(r);
6103
        sp_div_2(r, r);
6104
6105
    #if 0
6106
        sp_print(r, "rd2");
6107
    #endif
6108
    }
6109
6110
    return err;
6111
}
6112
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6113
6114
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
6115
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6116
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
6117
/* Divides a by 2 and stores in r: r = a >> 1
6118
 *
6119
 * @param  [in]   a  SP integer to divide.
6120
 * @param  [out]  r  SP integer to hold result.
6121
 *
6122
 * @return  MP_OKAY on success.
6123
 * @return  MP_VAL when a or r is NULL.
6124
 */
6125
#if !(defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
6126
static
6127
#endif
6128
int sp_div_2(sp_int* a, sp_int* r)
6129
393k
{
6130
393k
    int err = MP_OKAY;
6131
6132
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6133
    /* Only when a public API. */
6134
    if ((a == NULL) || (r == NULL)) {
6135
        err = MP_VAL;
6136
    }
6137
#endif
6138
6139
393k
    if (err == MP_OKAY) {
6140
393k
        int i;
6141
6142
393k
        r->used = a->used;
6143
4.57M
        for (i = 0; i < a->used - 1; i++) {
6144
4.18M
            r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
6145
4.18M
        }
6146
393k
        r->dp[i] = a->dp[i] >> 1;
6147
393k
        r->used = i + 1;
6148
393k
        sp_clamp(r);
6149
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6150
        r->sign = a->sign;
6151
    #endif
6152
393k
    }
6153
6154
393k
    return err;
6155
393k
}
6156
#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
6157
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6158
6159
/************************
6160
 * Add/Subtract Functions
6161
 ************************/
6162
6163
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
6164
/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
6165
 *
6166
 * @param  [in]   a  SP integer to add to.
6167
 * @param  [in]   b  SP integer to add.
6168
 * @param  [out]  r  SP integer to store result in.
6169
 * @param  [in]   o  Number of digits to offset b.
6170
 *
6171
 * @return  MP_OKAY on success.
6172
 */
6173
static int _sp_add_off(sp_int* a, sp_int* b, sp_int* r, int o)
6174
141k
{
6175
141k
    int i;
6176
141k
    int j;
6177
141k
#ifndef SQR_MUL_ASM
6178
141k
    sp_int_word t = 0;
6179
#else
6180
    sp_int_digit l = 0;
6181
    sp_int_digit h = 0;
6182
    sp_int_digit t = 0;
6183
#endif
6184
6185
#ifdef SP_MATH_NEED_ADD_OFF
6186
    for (i = 0; (i < o) && (i < a->used); i++) {
6187
        r->dp[i] = a->dp[i];
6188
    }
6189
    for (; i < o; i++) {
6190
        r->dp[i] = 0;
6191
    }
6192
#else
6193
141k
    i = 0;
6194
141k
    (void)o;
6195
141k
#endif
6196
6197
2.26M
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
6198
2.12M
    #ifndef SQR_MUL_ASM
6199
2.12M
        t += a->dp[i];
6200
2.12M
        t += b->dp[j];
6201
2.12M
        r->dp[i] = (sp_int_digit)t;
6202
2.12M
        t >>= SP_WORD_SIZE;
6203
    #else
6204
        t = a->dp[i];
6205
        SP_ASM_ADDC(l, h, t);
6206
        t = b->dp[j];
6207
        SP_ASM_ADDC(l, h, t);
6208
        r->dp[i] = l;
6209
        l = h;
6210
        h = 0;
6211
    #endif
6212
2.12M
    }
6213
141k
    for (; i < a->used; i++) {
6214
344
    #ifndef SQR_MUL_ASM
6215
344
        t += a->dp[i];
6216
344
        r->dp[i] = (sp_int_digit)t;
6217
344
        t >>= SP_WORD_SIZE;
6218
    #else
6219
        t = a->dp[i];
6220
        SP_ASM_ADDC(l, h, t);
6221
        r->dp[i] = l;
6222
        l = h;
6223
        h = 0;
6224
    #endif
6225
344
    }
6226
164k
    for (; j < b->used; i++, j++) {
6227
22.8k
    #ifndef SQR_MUL_ASM
6228
22.8k
        t += b->dp[j];
6229
22.8k
        r->dp[i] = (sp_int_digit)t;
6230
22.8k
        t >>= SP_WORD_SIZE;
6231
    #else
6232
        t = b->dp[j];
6233
        SP_ASM_ADDC(l, h, t);
6234
        r->dp[i] = l;
6235
        l = h;
6236
        h = 0;
6237
    #endif
6238
22.8k
    }
6239
141k
    r->used = i;
6240
141k
#ifndef SQR_MUL_ASM
6241
141k
    if (t != 0) {
6242
15.6k
       r->dp[i] = (sp_int_digit)t;
6243
15.6k
       r->used++;
6244
15.6k
    }
6245
#else
6246
    if (l != 0) {
6247
       r->dp[i] = l;
6248
       r->used++;
6249
    }
6250
#endif
6251
6252
141k
    sp_clamp(r);
6253
6254
141k
    return MP_OKAY;
6255
141k
}
6256
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6257
6258
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
6259
    !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
6260
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
6261
/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
6262
 * a must be greater than b.
6263
 *
6264
 * @param  [in]   a  SP integer to subtract from.
6265
 * @param  [in]   b  SP integer to subtract.
6266
 * @param  [out]  r  SP integer to store result in.
6267
 * @param  [in]   o  Number of digits to offset b.
6268
 *
6269
 * @return  MP_OKAY on success.
6270
 */
6271
static int _sp_sub_off(sp_int* a, sp_int* b, sp_int* r, int o)
6272
214k
{
6273
214k
    int i;
6274
214k
    int j;
6275
214k
#ifndef SQR_MUL_ASM
6276
214k
    sp_int_sword t = 0;
6277
#else
6278
    sp_int_digit l = 0;
6279
    sp_int_digit h = 0;
6280
    sp_int_digit t = 0;
6281
#endif
6282
6283
239k
    for (i = 0; (i < o) && (i < a->used); i++) {
6284
25.2k
        r->dp[i] = a->dp[i];
6285
25.2k
    }
6286
2.26M
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
6287
2.04M
    #ifndef SQR_MUL_ASM
6288
2.04M
        t += a->dp[i];
6289
2.04M
        t -= b->dp[j];
6290
2.04M
        r->dp[i] = (sp_int_digit)t;
6291
2.04M
        t >>= SP_WORD_SIZE;
6292
    #else
6293
        t = a->dp[i];
6294
        SP_ASM_ADDC(l, h, t);
6295
        t = b->dp[j];
6296
        SP_ASM_SUBC(l, h, t);
6297
        r->dp[i] = l;
6298
        l = h;
6299
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6300
    #endif
6301
2.04M
    }
6302
659k
    for (; i < a->used; i++) {
6303
445k
    #ifndef SQR_MUL_ASM
6304
445k
        t += a->dp[i];
6305
445k
        r->dp[i] = (sp_int_digit)t;
6306
445k
        t >>= SP_WORD_SIZE;
6307
    #else
6308
        t = a->dp[i];
6309
        SP_ASM_ADDC(l, h, t);
6310
        r->dp[i] = l;
6311
        l = h;
6312
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6313
    #endif
6314
445k
    }
6315
214k
    r->used = i;
6316
214k
    sp_clamp(r);
6317
6318
214k
    return MP_OKAY;
6319
214k
}
6320
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
6321
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6322
6323
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
6324
/* Add b to a into r: r = a + b
6325
 *
6326
 * @param  [in]   a  SP integer to add to.
6327
 * @param  [in]   b  SP integer to add.
6328
 * @param  [out]  r  SP integer to store result in.
6329
 *
6330
 * @return  MP_OKAY on success.
6331
 * @return  MP_VAL when a, b, or r is NULL.
6332
 */
6333
int sp_add(sp_int* a, sp_int* b, sp_int* r)
6334
154
{
6335
154
    int err = MP_OKAY;
6336
6337
154
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
6338
0
        err = MP_VAL;
6339
0
    }
6340
154
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
6341
1
        err = MP_VAL;
6342
1
    }
6343
154
    if (err == MP_OKAY) {
6344
153
    #ifndef WOLFSSL_SP_INT_NEGATIVE
6345
153
        err = _sp_add_off(a, b, r, 0);
6346
    #else
6347
        if (a->sign == b->sign) {
6348
            r->sign = a->sign;
6349
            err = _sp_add_off(a, b, r, 0);
6350
        }
6351
        else if (_sp_cmp_abs(a, b) != MP_LT) {
6352
            err = _sp_sub_off(a, b, r, 0);
6353
            if (sp_iszero(r)) {
6354
                r->sign = MP_ZPOS;
6355
            }
6356
            else {
6357
                r->sign = a->sign;
6358
            }
6359
        }
6360
        else {
6361
            err = _sp_sub_off(b, a, r, 0);
6362
            if (sp_iszero(r)) {
6363
                r->sign = MP_ZPOS;
6364
            }
6365
            else {
6366
                r->sign = b->sign;
6367
            }
6368
        }
6369
    #endif
6370
153
    }
6371
6372
154
    return err;
6373
154
}
6374
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6375
6376
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6377
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
6378
/* Subtract b from a into r: r = a - b
6379
 *
6380
 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
6381
 *
6382
 * @param  [in]   a  SP integer to subtract from.
6383
 * @param  [in]   b  SP integer to subtract.
6384
 * @param  [out]  r  SP integer to store result in.
6385
 *
6386
 * @return  MP_OKAY on success.
6387
 * @return  MP_VAL when a, b, or r is NULL.
6388
 */
6389
int sp_sub(sp_int* a, sp_int* b, sp_int* r)
6390
3.54k
{
6391
3.54k
    int err = MP_OKAY;
6392
6393
3.54k
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
6394
0
        err = MP_VAL;
6395
0
    }
6396
3.54k
    else {
6397
3.54k
    #ifndef WOLFSSL_SP_INT_NEGATIVE
6398
3.54k
        err = _sp_sub_off(a, b, r, 0);
6399
    #else
6400
        if (a->sign != b->sign) {
6401
            r->sign = a->sign;
6402
            err = _sp_add_off(a, b, r, 0);
6403
        }
6404
        else if (_sp_cmp_abs(a, b) != MP_LT) {
6405
            err = _sp_sub_off(a, b, r, 0);
6406
            if (sp_iszero(r)) {
6407
                r->sign = MP_ZPOS;
6408
            }
6409
            else {
6410
                r->sign = a->sign;
6411
            }
6412
        }
6413
        else {
6414
            err = _sp_sub_off(b, a, r, 0);
6415
            if (sp_iszero(r)) {
6416
                r->sign = MP_ZPOS;
6417
            }
6418
            else {
6419
                r->sign = 1 - a->sign;
6420
            }
6421
        }
6422
    #endif
6423
3.54k
    }
6424
6425
3.54k
    return err;
6426
3.54k
}
6427
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6428
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
6429
6430
/****************************
6431
 * Add/Subtract mod functions
6432
 ****************************/
6433
6434
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6435
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
6436
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
6437
/* Add two value and reduce: r = (a + b) % m
6438
 *
6439
 * @param  [in]   a  SP integer to add.
6440
 * @param  [in]   b  SP integer to add with.
6441
 * @param  [in]   m  SP integer that is the modulus.
6442
 * @param  [out]  r  SP integer to hold result.
6443
 *
6444
 * @return  MP_OKAY on success.
6445
 * @return  MP_VAL when a, b, m or r is NULL.
6446
 * @return  MP_MEM when dynamic memory allocation fails.
6447
 */
6448
int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6449
{
6450
    int err = MP_OKAY;
6451
    int used = ((a == NULL) || (b == NULL)) ? 1 :
6452
                   ((a->used >= b->used) ? a->used + 1 : b->used + 1);
6453
    DECL_SP_INT(t, used);
6454
6455
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6456
        err = MP_VAL;
6457
    }
6458
6459
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
6460
#if 0
6461
    if (err == MP_OKAY) {
6462
        sp_print(a, "a");
6463
        sp_print(b, "b");
6464
        sp_print(m, "m");
6465
    }
6466
#endif
6467
6468
    if (err == MP_OKAY) {
6469
        err = sp_add(a, b, t);
6470
    }
6471
    if (err == MP_OKAY) {
6472
        err = sp_mod(t, m, r);
6473
    }
6474
6475
#if 0
6476
    if (err == MP_OKAY) {
6477
        sp_print(r, "rma");
6478
    }
6479
#endif
6480
6481
    FREE_SP_INT(t, NULL);
6482
    return err;
6483
}
6484
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
6485
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
6486
6487
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
6488
    defined(HAVE_ECC))
6489
/* Sub b from a and reduce: r = (a - b) % m
6490
 * Result is always positive.
6491
 *
6492
 * @param  [in]   a  SP integer to subtract from
6493
 * @param  [in]   b  SP integer to subtract.
6494
 * @param  [in]   m  SP integer that is the modulus.
6495
 * @param  [out]  r  SP integer to hold result.
6496
 *
6497
 * @return  MP_OKAY on success.
6498
 * @return  MP_VAL when a, b, m or r is NULL.
6499
 * @return  MP_MEM when dynamic memory allocation fails.
6500
 */
6501
int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6502
{
6503
#ifndef WOLFSSL_SP_INT_NEGATIVE
6504
    int err = MP_OKAY;
6505
    int used = ((a == NULL) || (b == NULL) || (m == NULL)) ? 1 :
6506
                   ((a->used >= m->used) ?
6507
                       ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
6508
                   ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
6509
    DECL_SP_INT_ARRAY(t, used, 2);
6510
6511
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6512
        err = MP_VAL;
6513
    }
6514
6515
#if 0
6516
    if (err == MP_OKAY) {
6517
        sp_print(a, "a");
6518
        sp_print(b, "b");
6519
        sp_print(m, "m");
6520
    }
6521
#endif
6522
6523
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
6524
    if (err == MP_OKAY) {
6525
        if (_sp_cmp(a, m) != MP_LT) {
6526
            err = sp_mod(a, m, t[0]);
6527
            a = t[0];
6528
        }
6529
    }
6530
    if (err == MP_OKAY) {
6531
        if (_sp_cmp(b, m) != MP_LT) {
6532
            err = sp_mod(b, m, t[1]);
6533
            b = t[1];
6534
        }
6535
    }
6536
    if (err == MP_OKAY) {
6537
        if (_sp_cmp(a, b) == MP_LT) {
6538
            err = sp_add(a, m, t[0]);
6539
            a = t[0];
6540
        }
6541
    }
6542
    if (err == MP_OKAY) {
6543
        err = sp_sub(a, b, r);
6544
    }
6545
6546
#if 0
6547
    if (err == MP_OKAY) {
6548
        sp_print(r, "rms");
6549
    }
6550
#endif
6551
6552
    FREE_SP_INT_ARRAY(t, NULL);
6553
    return err;
6554
6555
#else /* WOLFSSL_SP_INT_NEGATIVE */
6556
6557
    int err = MP_OKAY;
6558
    int used = ((a == NULL) || (b == NULL)) ? 1 :
6559
                   ((a->used >= b->used) ? a->used + 1 : b->used + 1);
6560
    DECL_SP_INT(t, used);
6561
6562
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
6563
        err = MP_VAL;
6564
    }
6565
6566
#if 0
6567
    if (err == MP_OKAY) {
6568
        sp_print(a, "a");
6569
        sp_print(b, "b");
6570
        sp_print(m, "m");
6571
    }
6572
#endif
6573
6574
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
6575
    if (err == MP_OKAY) {
6576
        err = sp_sub(a, b, t);
6577
    }
6578
    if (err == MP_OKAY) {
6579
        err = sp_mod(t, m, r);
6580
    }
6581
6582
#if 0
6583
    if (err == MP_OKAY) {
6584
        sp_print(r, "rms");
6585
    }
6586
#endif
6587
6588
    FREE_SP_INT(t, NULL);
6589
    return err;
6590
#endif /* WOLFSSL_SP_INT_NEGATIVE */
6591
}
6592
#endif /* WOLFSSL_SP_MATH_ALL */
6593
6594
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6595
/* Add two value and reduce: r = (a + b) % m
6596
 *
6597
 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
6598
 *
6599
 * Assumes a, b, m and r are not NULL.
6600
 * m and r must not be the same pointer.
6601
 *
6602
 * @param  [in]   a  SP integer to add.
6603
 * @param  [in]   b  SP integer to add with.
6604
 * @param  [in]   m  SP integer that is the modulus.
6605
 * @param  [out]  r  SP integer to hold result.
6606
 *
6607
 * @return  MP_OKAY on success.
6608
 */
6609
int sp_addmod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6610
{
6611
    int err = MP_OKAY;
6612
#ifndef SQR_MUL_ASM
6613
    sp_int_sword w;
6614
    sp_int_sword s;
6615
#else
6616
    sp_int_digit wl;
6617
    sp_int_digit wh;
6618
    sp_int_digit sl;
6619
    sp_int_digit sh;
6620
    sp_int_digit t;
6621
#endif
6622
    sp_int_digit mask;
6623
    int i;
6624
6625
    if (r->size < m->used) {
6626
        err = MP_VAL;
6627
    }
6628
    if ((err == MP_OKAY) && (r == m)) {
6629
        err = MP_VAL;
6630
    }
6631
6632
    if (err == MP_OKAY) {
6633
#if 0
6634
        sp_print(a, "a");
6635
        sp_print(b, "b");
6636
        sp_print(m, "m");
6637
#endif
6638
6639
        /* Add a to b into r. Do the subtract of modulus but don't store result.
6640
         * When subtract result is negative, the overflow will be negative.
6641
         * Only need to subtract mod when result is positive - overflow is
6642
         * positive.
6643
         */
6644
    #ifndef SQR_MUL_ASM
6645
        w = 0;
6646
        s = 0;
6647
    #else
6648
        wl = 0;
6649
        wh = 0;
6650
        sl = 0;
6651
        sh = 0;
6652
    #endif
6653
        for (i = 0; i < m->used; i++) {
6654
            /* Values past 'used' are not initialized. */
6655
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6656
            sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
6657
6658
        #ifndef SQR_MUL_ASM
6659
            w         += a->dp[i] & mask_a;
6660
            w         += b->dp[i] & mask_b;
6661
            r->dp[i]   = (sp_int_digit)w;
6662
            s         += (sp_int_digit)w;
6663
            s         -= m->dp[i];
6664
            s        >>= DIGIT_BIT;
6665
            w        >>= DIGIT_BIT;
6666
        #else
6667
            t = a->dp[i] & mask_a;
6668
            SP_ASM_ADDC(wl, wh, t);
6669
            t = b->dp[i] & mask_b;
6670
            SP_ASM_ADDC(wl, wh, t);
6671
            r->dp[i] = wl;
6672
            SP_ASM_ADDC(sl, sh, wl);
6673
            t = m->dp[i];
6674
            SP_ASM_SUBC(sl, sh, t);
6675
            sl = sh;
6676
            sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE-1));
6677
            wl = wh;
6678
            wh = 0;
6679
        #endif
6680
        }
6681
    #ifndef SQR_MUL_ASM
6682
        s += (sp_int_digit)w;
6683
        /* s will be positive when subtracting modulus is needed. */
6684
        mask = (sp_int_digit)0 - (s >= 0);
6685
    #else
6686
        SP_ASM_ADDC(sl, sh, wl);
6687
        /* s will be positive when subtracting modulus is needed. */
6688
        mask = (sh >> (SP_WORD_SIZE-1)) - 1;
6689
    #endif
6690
6691
        /* Constant time, conditionally, subtract modulus from sum. */
6692
    #ifndef SQR_MUL_ASM
6693
        w = 0;
6694
    #else
6695
        wl = 0;
6696
        wh = 0;
6697
    #endif
6698
        for (i = 0; i < m->used; i++) {
6699
        #ifndef SQR_MUL_ASM
6700
            w         += r->dp[i];
6701
            w         -= m->dp[i] & mask;
6702
            r->dp[i]   = (sp_int_digit)w;
6703
            w        >>= DIGIT_BIT;
6704
        #else
6705
            t = r->dp[i];
6706
            SP_ASM_ADDC(wl, wh, t);
6707
            t = m->dp[i] & mask;
6708
            SP_ASM_SUBC(wl, wh, t);
6709
            r->dp[i] = wl;
6710
            wl = wh;
6711
            wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
6712
        #endif
6713
        }
6714
        /* Result will always have digits equal to or less than those in
6715
         * modulus. */
6716
        r->used = i;
6717
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6718
        r->sign = MP_ZPOS;
6719
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
6720
        sp_clamp(r);
6721
6722
#if 0
6723
        sp_print(r, "rma");
6724
#endif
6725
    }
6726
6727
    return err;
6728
}
6729
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6730
6731
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
6732
/* Sub b from a and reduce: r = (a - b) % m
6733
 * Result is always positive.
6734
 *
6735
 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
6736
 *
6737
 * Assumes a, b, m and r are not NULL.
6738
 * m and r must not be the same pointer.
6739
 *
6740
 * @param  [in]   a  SP integer to subtract from
6741
 * @param  [in]   b  SP integer to subtract.
6742
 * @param  [in]   m  SP integer that is the modulus.
6743
 * @param  [out]  r  SP integer to hold result.
6744
 *
6745
 * @return  MP_OKAY on success.
6746
 */
6747
int sp_submod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
6748
{
6749
    int err = MP_OKAY;
6750
#ifndef SQR_MUL_ASM
6751
    sp_int_sword w;
6752
#else
6753
    sp_int_digit l;
6754
    sp_int_digit h;
6755
    sp_int_digit t;
6756
#endif
6757
    sp_int_digit mask;
6758
    int i;
6759
6760
    if (r->size < m->used + 1) {
6761
        err = MP_VAL;
6762
    }
6763
    if ((err == MP_OKAY) && (r == m)) {
6764
        err = MP_VAL;
6765
    }
6766
6767
    if (err == MP_OKAY) {
6768
#if 0
6769
        sp_print(a, "a");
6770
        sp_print(b, "b");
6771
        sp_print(m, "m");
6772
#endif
6773
6774
        /* In constant time, subtract b from a putting result in r. */
6775
    #ifndef SQR_MUL_ASM
6776
        w = 0;
6777
    #else
6778
        l = 0;
6779
        h = 0;
6780
    #endif
6781
        for (i = 0; i < m->used; i++) {
6782
            /* Values past 'used' are not initialized. */
6783
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
6784
            sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
6785
6786
        #ifndef SQR_MUL_ASM
6787
            w         += a->dp[i] & mask_a;
6788
            w         -= b->dp[i] & mask_b;
6789
            r->dp[i]   = (sp_int_digit)w;
6790
            w        >>= DIGIT_BIT;
6791
        #else
6792
            t = a->dp[i] & mask_a;
6793
            SP_ASM_ADDC(l, h, t);
6794
            t = b->dp[i] & mask_b;
6795
            SP_ASM_SUBC(l, h, t);
6796
            r->dp[i] = l;
6797
            l = h;
6798
            h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
6799
        #endif
6800
        }
6801
        /* When w is negative then we need to add modulus to make result
6802
         * positive. */
6803
    #ifndef SQR_MUL_ASM
6804
        mask = (sp_int_digit)0 - (w < 0);
6805
    #else
6806
        mask = h;
6807
    #endif
6808
        /* Constant time, conditionally, add modulus to difference. */
6809
    #ifndef SQR_MUL_ASM
6810
        w = 0;
6811
    #else
6812
        l = 0;
6813
        h = 0;
6814
    #endif
6815
        for (i = 0; i < m->used; i++) {
6816
        #ifndef SQR_MUL_ASM
6817
            w         += r->dp[i];
6818
            w         += m->dp[i] & mask;
6819
            r->dp[i]   = (sp_int_digit)w;
6820
            w        >>= DIGIT_BIT;
6821
        #else
6822
            t = r->dp[i];
6823
            SP_ASM_ADDC(l, h, t);
6824
            t = m->dp[i] & mask;
6825
            SP_ASM_ADDC(l, h, t);
6826
            r->dp[i] = l;
6827
            l = h;
6828
            h = 0;
6829
        #endif
6830
        }
6831
        r->used = i;
6832
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6833
        r->sign = MP_ZPOS;
6834
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
6835
        sp_clamp(r);
6836
6837
#if 0
6838
        sp_print(r, "rms");
6839
#endif
6840
    }
6841
6842
    return err;
6843
}
6844
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
6845
6846
/********************
6847
 * Shifting functoins
6848
 ********************/
6849
6850
#if !defined(NO_DH) || defined(HAVE_ECC) || (defined(WC_RSA_BLINDING) && \
6851
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
6852
/* Left shift the multi-precision number by a number of digits.
6853
 *
6854
 * @param  [in,out]  a  SP integer to shift.
6855
 * @param  [in]      s  Number of digits to shift.
6856
 *
6857
 * @return  MP_OKAY on success.
6858
 * @return  MP_VAL when a is NULL or the result is too big to fit in an SP.
6859
 */
6860
int sp_lshd(sp_int* a, int s)
6861
1
{
6862
1
    int err = MP_OKAY;
6863
6864
1
    if (a == NULL) {
6865
0
        err = MP_VAL;
6866
0
    }
6867
1
    if ((err == MP_OKAY) && (a->used + s > a->size)) {
6868
0
        err = MP_VAL;
6869
0
    }
6870
1
    if (err == MP_OKAY) {
6871
1
        XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
6872
1
        a->used += s;
6873
1
        XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
6874
1
        sp_clamp(a);
6875
1
    }
6876
6877
1
    return err;
6878
1
}
6879
#endif
6880
6881
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6882
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6883
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
6884
/* Left shift the multi-precision number by n bits.
6885
 * Bits may be larger than the word size.
6886
 *
6887
 * @param  [in,out]  a  SP integer to shift.
6888
 * @param  [in]      n  Number of bits to shift left.
6889
 *
6890
 * @return  MP_OKAY on success.
6891
 */
6892
static int sp_lshb(sp_int* a, int n)
6893
312k
{
6894
312k
    int err = MP_OKAY;
6895
6896
312k
    if (a->used != 0) {
6897
312k
        int s = n >> SP_WORD_SHIFT;
6898
312k
        int i;
6899
6900
312k
        if (a->used + s >= a->size) {
6901
0
            err = MP_VAL;
6902
0
        }
6903
312k
        if (err == MP_OKAY) {
6904
312k
            n &= SP_WORD_MASK;
6905
312k
            if (n != 0) {
6906
312k
                sp_int_digit v;
6907
6908
312k
                v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
6909
312k
                a->dp[a->used - 1 + s] = a->dp[a->used - 1] << n;
6910
7.28M
                for (i = a->used - 2; i >= 0; i--) {
6911
6.97M
                    a->dp[i + 1 + s] |= a->dp[i] >> (SP_WORD_SIZE - n);
6912
6.97M
                    a->dp[i     + s] = a->dp[i] << n;
6913
6.97M
                }
6914
312k
                if (v != 0) {
6915
87.3k
                    a->dp[a->used + s] = v;
6916
87.3k
                    a->used++;
6917
87.3k
                }
6918
312k
            }
6919
0
            else if (s > 0) {
6920
0
                for (i = a->used - 1; i >= 0; i--) {
6921
0
                    a->dp[i + s] = a->dp[i];
6922
0
                }
6923
0
            }
6924
312k
            a->used += s;
6925
312k
            XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
6926
312k
        }
6927
312k
    }
6928
6929
312k
    return err;
6930
312k
}
6931
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6932
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6933
6934
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6935
    !defined(NO_DH) || defined(HAVE_ECC) || \
6936
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
6937
/* Shift a right by n digits into r: r = a >> (n * SP_WORD_SIZE)
6938
 *
6939
 * @param  [in]   a  SP integer to shift.
6940
 * @param  [in]   n  Number of digits to shift.
6941
 * @param  [out]  r  SP integer to store result in.
6942
 */
6943
void sp_rshd(sp_int* a, int c)
6944
30
{
6945
30
    if (a != NULL) {
6946
30
        int i;
6947
30
        int j;
6948
6949
30
        if (c >= a->used) {
6950
13
            _sp_zero(a);
6951
13
        }
6952
17
        else {
6953
124
            for (i = c, j = 0; i < a->used; i++, j++) {
6954
107
                a->dp[j] = a->dp[i];
6955
107
            }
6956
17
            a->used -= c;
6957
17
        }
6958
30
    }
6959
30
}
6960
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
6961
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6962
6963
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6964
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6965
    defined(WOLFSSL_HAVE_SP_DH)
6966
/* Shift a right by n bits into r: r = a >> n
6967
 *
6968
 * @param  [in]   a  SP integer to shift.
6969
 * @param  [in]   n  Number of bits to shift.
6970
 * @param  [out]  r  SP integer to store result in.
6971
 */
6972
void sp_rshb(sp_int* a, int n, sp_int* r)
6973
157k
{
6974
157k
    int i = n >> SP_WORD_SHIFT;
6975
6976
157k
    if (i >= a->used) {
6977
79
        _sp_zero(r);
6978
79
    }
6979
157k
    else {
6980
157k
        int j;
6981
6982
157k
        n &= SP_WORD_SIZE - 1;
6983
157k
        if (n == 0) {
6984
275
            for (j = 0; i < a->used; i++, j++)
6985
216
                r->dp[j] = a->dp[i];
6986
59
            r->used = j;
6987
59
        }
6988
157k
        else if (n > 0) {
6989
2.76M
            for (j = 0; i < a->used-1; i++, j++)
6990
2.60M
                r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
6991
157k
            r->dp[j] = a->dp[i] >> n;
6992
157k
            r->used = j + 1;
6993
157k
            sp_clamp(r);
6994
157k
        }
6995
#ifdef WOLFSSL_SP_INT_NEGATIVE
6996
        if (sp_iszero(r)) {
6997
            r->sign = MP_ZPOS;
6998
        }
6999
        else {
7000
            r->sign = a->sign;
7001
        }
7002
#endif
7003
157k
    }
7004
157k
}
7005
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7006
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
7007
7008
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7009
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7010
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
7011
/* Divide a by d and return the quotient in r and the remainder in rem.
7012
 *   r = a / d; rem = a % d
7013
 *
7014
 * @param  [in]   a    SP integer to be divided.
7015
 * @param  [in]   d    SP integer to divide by.
7016
 * @param  [out]  r    SP integer that is the quotient.
7017
 * @param  [out]  rem  SP integer that is the remainder.
7018
 *
7019
 * @return  MP_OKAY on success.
7020
 * @return  MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
7021
 * @return  MP_MEM when dynamic memory allocation fails.
7022
 */
7023
#ifndef WOLFSSL_SP_MATH_ALL
7024
static
7025
#endif
7026
int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
7027
1.18M
{
7028
1.18M
    int err = MP_OKAY;
7029
1.18M
    int ret;
7030
1.18M
    int done = 0;
7031
1.18M
    int i;
7032
1.18M
    int s = 0;
7033
1.18M
    sp_int_digit dt;
7034
1.18M
    sp_int_digit t;
7035
1.18M
    sp_int* sa = NULL;
7036
1.18M
    sp_int* sd = NULL;
7037
1.18M
    sp_int* tr = NULL;
7038
1.18M
    sp_int* trial = NULL;
7039
#ifdef WOLFSSL_SP_INT_NEGATIVE
7040
    int aSign = MP_ZPOS;
7041
    int dSign = MP_ZPOS;
7042
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7043
1.18M
    DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4);
7044
7045
1.18M
    if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
7046
0
        err = MP_VAL;
7047
0
    }
7048
1.18M
    if ((err == MP_OKAY) && sp_iszero(d)) {
7049
54
        err = MP_VAL;
7050
54
    }
7051
1.18M
    if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
7052
0
        err = MP_VAL;
7053
0
    }
7054
1.18M
    if ((err == MP_OKAY) && (rem != NULL)) {
7055
1.18M
        if ((a->used <= d->used) && (rem->size < a->used + 1)) {
7056
0
            err = MP_VAL;
7057
0
        }
7058
1.18M
        else if ((a->used > d->used) && (rem->size < d->used + 1)) {
7059
0
            err = MP_VAL;
7060
0
        }
7061
1.18M
    }
7062
    /* May need to shift number being divided left into a new word. */
7063
1.18M
    if ((err == MP_OKAY) && (a->used == SP_INT_DIGITS)) {
7064
7
        int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
7065
7
        if ((bits != SP_WORD_SIZE) &&
7066
7
                (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
7067
1
            err = MP_VAL;
7068
1
        }
7069
7
    }
7070
7071
#if 0
7072
    if (err == MP_OKAY) {
7073
        sp_print(a, "a");
7074
        sp_print(d, "b");
7075
    }
7076
#endif
7077
7078
1.18M
    if (err == MP_OKAY) {
7079
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7080
        aSign = a->sign;
7081
        dSign = d->sign;
7082
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
7083
7084
1.18M
        ret = _sp_cmp_abs(a, d);
7085
1.18M
        if (ret == MP_LT) {
7086
170k
            if (rem != NULL) {
7087
170k
                sp_copy(a, rem);
7088
170k
            }
7089
170k
            if (r != NULL) {
7090
0
                sp_set(r, 0);
7091
0
            }
7092
170k
            done = 1;
7093
170k
        }
7094
1.01M
        else if (ret == MP_EQ) {
7095
104
            if (rem != NULL) {
7096
81
                sp_set(rem, 0);
7097
81
            }
7098
104
            if (r != NULL) {
7099
23
                sp_set(r, 1);
7100
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7101
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7102
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
7103
23
            }
7104
104
            done = 1;
7105
104
        }
7106
1.01M
        else if (sp_count_bits(a) == sp_count_bits(d)) {
7107
            /* a is greater than d but same bit length */
7108
5.72k
            if (rem != NULL) {
7109
5.72k
                _sp_sub_off(a, d, rem, 0);
7110
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7111
                rem->sign = aSign;
7112
            #endif
7113
5.72k
            }
7114
5.72k
            if (r != NULL) {
7115
0
                sp_set(r, 1);
7116
            #ifdef WOLFSSL_SP_INT_NEGATIVE
7117
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7118
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
7119
0
            }
7120
5.72k
            done = 1;
7121
5.72k
        }
7122
1.18M
    }
7123
7124
1.18M
    if ((!done) && (err == MP_OKAY)) {
7125
1.00M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7126
1.00M
    !defined(WOLFSSL_SP_NO_MALLOC)
7127
1.00M
        int cnt = 4;
7128
1.00M
        if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
7129
1.00M
            cnt--;
7130
1.00M
        }
7131
1.00M
        if ((r != NULL) && (r != d)) {
7132
475
            cnt--;
7133
475
        }
7134
        /* Macro always has code associated with it and checks err first. */
7135
1.00M
        ALLOC_SP_INT_ARRAY(td, a->used + 1, cnt, err, NULL);
7136
#else
7137
        ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL);
7138
#endif
7139
1.00M
    }
7140
7141
1.18M
    if ((!done) && (err == MP_OKAY)) {
7142
1.00M
        sd    = td[0];
7143
1.00M
        trial = td[1];
7144
1.00M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7145
1.00M
    !defined(WOLFSSL_SP_NO_MALLOC)
7146
1.00M
        i = 2;
7147
1.00M
        sa    = ((rem != NULL) && (rem != d) && (rem->size > a->used)) ? rem :
7148
1.00M
            td[i++];
7149
1.00M
        tr    = ((r != NULL) && (r != d)) ? r : td[i];
7150
#else
7151
        sa    = td[2];
7152
        tr    = td[3];
7153
#endif
7154
7155
1.00M
        sp_init_size(sd, d->used + 1);
7156
1.00M
        sp_init_size(trial, a->used + 1);
7157
1.00M
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
7158
1.00M
    !defined(WOLFSSL_SP_NO_MALLOC)
7159
1.00M
        if (sa != rem) {
7160
480
            sp_init_size(sa, a->used + 1);
7161
480
        }
7162
1.00M
        if (tr != r) {
7163
1.00M
            sp_init_size(tr, a->used - d->used + 2);
7164
1.00M
        }
7165
#else
7166
        sp_init_size(sa, a->used + 1);
7167
        sp_init_size(tr, a->used - d->used + 2);
7168
#endif
7169
7170
1.00M
        s = sp_count_bits(d);
7171
1.00M
        s = SP_WORD_SIZE - (s & SP_WORD_MASK);
7172
1.00M
        sp_copy(a, sa);
7173
1.00M
        if (s != SP_WORD_SIZE) {
7174
156k
            err = sp_lshb(sa, s);
7175
156k
            if (err == MP_OKAY) {
7176
156k
                sp_copy(d, sd);
7177
156k
                d = sd;
7178
156k
                err = sp_lshb(sd, s);
7179
156k
            }
7180
156k
        }
7181
1.00M
    }
7182
1.18M
    if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
7183
#ifdef WOLFSSL_SP_SMALL
7184
        int c;
7185
#else
7186
1.00M
        int j;
7187
1.00M
        int o;
7188
1.00M
    #ifndef SQR_MUL_ASM
7189
1.00M
        sp_int_sword sw;
7190
    #else
7191
        sp_int_digit sl;
7192
        sp_int_digit sh;
7193
        sp_int_digit st;
7194
    #endif
7195
1.00M
#endif /* WOLFSSL_SP_SMALL */
7196
#ifdef WOLFSSL_SP_INT_NEGATIVE
7197
        sa->sign = MP_ZPOS;
7198
        sd->sign = MP_ZPOS;
7199
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7200
7201
1.00M
        tr->used = sa->used - d->used + 1;
7202
1.00M
        sp_clear(tr);
7203
1.00M
        tr->used = sa->used - d->used + 1;
7204
1.00M
        dt = d->dp[d->used-1];
7205
7206
1.00M
        for (i = d->used - 1; i > 0; i--) {
7207
972k
            if (sa->dp[sa->used - d->used + i] != d->dp[i]) {
7208
972k
                break;
7209
972k
            }
7210
972k
        }
7211
1.00M
        if (sa->dp[sa->used - d->used + i] >= d->dp[i]) {
7212
2.58k
            i = sa->used;
7213
2.58k
            _sp_sub_off(sa, d, sa, sa->used - d->used);
7214
            /* Keep the same used so that 0 zeros will be put in. */
7215
2.58k
            sa->used = i;
7216
2.58k
            if (r != NULL) {
7217
22
                tr->dp[sa->used - d->used] = 1;
7218
22
            }
7219
2.58k
        }
7220
11.3M
        for (i = sa->used - 1; i >= d->used; i--) {
7221
10.3M
            if (sa->dp[i] == dt) {
7222
2.09k
                t = SP_DIGIT_MAX;
7223
2.09k
            }
7224
10.3M
            else {
7225
10.3M
                t = sp_div_word(sa->dp[i], sa->dp[i-1], dt);
7226
10.3M
            }
7227
7228
#ifdef WOLFSSL_SP_SMALL
7229
            do {
7230
                err = _sp_mul_d(d, t, trial, i - d->used);
7231
                if (err != MP_OKAY) {
7232
                    break;
7233
                }
7234
                c = _sp_cmp_abs(trial, sa);
7235
                if (c == MP_GT) {
7236
                    t--;
7237
                }
7238
            }
7239
            while (c == MP_GT);
7240
7241
            if (err != MP_OKAY) {
7242
                break;
7243
            }
7244
7245
            _sp_sub_off(sa, trial, sa, 0);
7246
            tr->dp[i - d->used] += t;
7247
            if (tr->dp[i - d->used] < t) {
7248
                tr->dp[i + 1 - d->used]++;
7249
            }
7250
#else
7251
10.3M
            o = i - d->used;
7252
12.5M
            do {
7253
12.5M
            #ifndef SQR_MUL_ASM
7254
12.5M
                sp_int_word tw = 0;
7255
            #else
7256
                sp_int_digit tl = 0;
7257
                sp_int_digit th = 0;
7258
            #endif
7259
508M
                for (j = 0; j < d->used; j++) {
7260
495M
                #ifndef SQR_MUL_ASM
7261
495M
                    tw += (sp_int_word)d->dp[j] * t;
7262
495M
                    trial->dp[j] = (sp_int_digit)tw;
7263
495M
                    tw >>= SP_WORD_SIZE;
7264
                #else
7265
                    SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
7266
                    trial->dp[j] = tl;
7267
                    tl = th;
7268
                    th = 0;
7269
                #endif
7270
495M
                }
7271
12.5M
              #ifndef SQR_MUL_ASM
7272
12.5M
                trial->dp[j] = (sp_int_digit)tw;
7273
              #else
7274
                trial->dp[j] = tl;
7275
              #endif
7276
7277
20.0M
                for (j = d->used; j > 0; j--) {
7278
20.0M
                    if (trial->dp[j] != sa->dp[j + o]) {
7279
12.5M
                        break;
7280
12.5M
                    }
7281
20.0M
                }
7282
12.5M
                if (trial->dp[j] > sa->dp[j + o]) {
7283
2.23M
                    t--;
7284
2.23M
                }
7285
12.5M
            }
7286
12.5M
            while (trial->dp[j] > sa->dp[j + o]);
7287
7288
10.3M
        #ifndef SQR_MUL_ASM
7289
10.3M
            sw = 0;
7290
        #else
7291
            sl = 0;
7292
            sh = 0;
7293
        #endif
7294
441M
            for (j = 0; j <= d->used; j++) {
7295
430M
            #ifndef SQR_MUL_ASM
7296
430M
                sw += sa->dp[j + o];
7297
430M
                sw -= trial->dp[j];
7298
430M
                sa->dp[j + o] = (sp_int_digit)sw;
7299
430M
                sw >>= SP_WORD_SIZE;
7300
            #else
7301
                st = sa->dp[j + o];
7302
                SP_ASM_ADDC(sl, sh, st);
7303
                st = trial->dp[j];
7304
                SP_ASM_SUBC(sl, sh, st);
7305
                sa->dp[j + o] = sl;
7306
                sl = sh;
7307
                sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
7308
            #endif
7309
430M
            }
7310
7311
10.3M
            tr->dp[o] = t;
7312
10.3M
#endif /* WOLFSSL_SP_SMALL */
7313
10.3M
        }
7314
1.00M
        sa->used = i + 1;
7315
7316
1.00M
        if ((err == MP_OKAY) && (rem != NULL)) {
7317
#ifdef WOLFSSL_SP_INT_NEGATIVE
7318
            sa->sign = (sa->used == 0) ? MP_ZPOS : aSign;
7319
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7320
1.00M
            if (s != SP_WORD_SIZE) {
7321
155k
                sp_rshb(sa, s, sa);
7322
155k
            }
7323
1.00M
            sp_copy(sa, rem);
7324
1.00M
            sp_clamp(rem);
7325
#ifdef WOLFSSL_SP_INT_NEGATIVE
7326
            if (sp_iszero(rem)) {
7327
                rem->sign = MP_ZPOS;
7328
            }
7329
#endif
7330
1.00M
        }
7331
1.00M
        if ((err == MP_OKAY) && (r != NULL)) {
7332
474
            sp_copy(tr, r);
7333
474
            sp_clamp(r);
7334
#ifdef WOLFSSL_SP_INT_NEGATIVE
7335
            if (sp_iszero(r)) {
7336
                r->sign = MP_ZPOS;
7337
            }
7338
            else {
7339
                r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
7340
            }
7341
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7342
474
        }
7343
1.00M
    }
7344
7345
#if 0
7346
    if (err == MP_OKAY) {
7347
        if (rem != NULL) {
7348
            sp_print(rem, "rdr");
7349
        }
7350
        if (r != NULL) {
7351
            sp_print(r, "rdw");
7352
        }
7353
    }
7354
#endif
7355
7356
1.18M
    FREE_SP_INT_ARRAY(td, NULL);
7357
1.18M
    return err;
7358
1.18M
}
7359
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
7360
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7361
7362
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7363
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7364
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
7365
#ifndef FREESCALE_LTC_TFM
7366
/* Calculate the remainder of dividing a by m: r = a mod m.
7367
 *
7368
 * @param  [in]   a  SP integer to reduce.
7369
 * @param  [in]   m  SP integer that is the modulus.
7370
 * @param  [out]  r  SP integer to store result in.
7371
 *
7372
 * @return  MP_OKAY on success.
7373
 * @return  MP_VAL when a, m or r is NULL or m is 0.
7374
 */
7375
int sp_mod(sp_int* a, sp_int* m, sp_int* r)
7376
1.18M
{
7377
1.18M
    int err = MP_OKAY;
7378
#ifdef WOLFSSL_SP_INT_NEGATIVE
7379
    DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
7380
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7381
7382
1.18M
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
7383
0
        err = MP_VAL;
7384
0
    }
7385
7386
1.18M
#ifndef WOLFSSL_SP_INT_NEGATIVE
7387
1.18M
    if (err == MP_OKAY) {
7388
1.18M
        err = sp_div(a, m, NULL, r);
7389
1.18M
    }
7390
#else
7391
    ALLOC_SP_INT(t, a->used + 1, err, NULL);
7392
    if (err == MP_OKAY) {
7393
        sp_init_size(t, a->used + 1);
7394
        err = sp_div(a, m, NULL, t);
7395
    }
7396
    if (err == MP_OKAY) {
7397
        if ((!sp_iszero(t)) && (t->sign != m->sign)) {
7398
            err = sp_add(t, m, r);
7399
        }
7400
        else {
7401
            err = sp_copy(t, r);
7402
        }
7403
    }
7404
7405
    FREE_SP_INT(t, NULL);
7406
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7407
7408
1.18M
    return err;
7409
1.18M
}
7410
#endif /* !FREESCALE_LTC_TFM */
7411
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
7412
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7413
7414
/* START SP_MUL implementations. */
7415
/* This code is generated.
7416
 * To generate:
7417
 *   cd scripts/sp/sp_int
7418
 *   ./gen.sh
7419
 * File sp_mul.c contains code.
7420
 */
7421
7422
#ifdef SQR_MUL_ASM
7423
/* Multiply a by b into r where a and b have same no. digits. r = a * b
7424
 *
7425
 * Optimised code for when number of digits in a and b are the same.
7426
 *
7427
 * @param  [in]   a    SP integer to mulitply.
7428
 * @param  [in]   b    SP integer to mulitply by.
7429
 * @param  [out]  r    SP integer to hod reult.
7430
 *
7431
 * @return  MP_OKAY otherwise.
7432
 * @return  MP_MEM when dynamic memory allocation fails.
7433
 */
7434
static int _sp_mul_nxn(sp_int* a, sp_int* b, sp_int* r)
7435
{
7436
    int err = MP_OKAY;
7437
    int i;
7438
    int j;
7439
    int k;
7440
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7441
    sp_int_digit* t = NULL;
7442
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7443
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7444
    sp_int_digit t[a->used * 2];
7445
#else
7446
    sp_int_digit t[SP_INT_DIGITS];
7447
#endif
7448
7449
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7450
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
7451
        DYNAMIC_TYPE_BIGINT);
7452
    if (t == NULL) {
7453
        err = MP_MEM;
7454
    }
7455
#endif
7456
    if (err == MP_OKAY) {
7457
        sp_int_digit l, h, o;
7458
        sp_int_digit* dp;
7459
7460
        h = 0;
7461
        l = 0;
7462
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7463
        t[0] = h;
7464
        h = 0;
7465
        o = 0;
7466
        for (k = 1; k <= a->used - 1; k++) {
7467
            j = k;
7468
            dp = a->dp;
7469
            for (; j >= 0; dp++, j--) {
7470
                SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
7471
            }
7472
            t[k] = l;
7473
            l = h;
7474
            h = o;
7475
            o = 0;
7476
        }
7477
        for (; k <= (a->used - 1) * 2; k++) {
7478
            i = k - (b->used - 1);
7479
            dp = &b->dp[b->used - 1];
7480
            for (; i < a->used; i++, dp--) {
7481
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
7482
            }
7483
            t[k] = l;
7484
            l = h;
7485
            h = o;
7486
            o = 0;
7487
        }
7488
        t[k] = l;
7489
        r->used = k + 1;
7490
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7491
        sp_clamp(r);
7492
    }
7493
7494
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7495
    if (t != NULL) {
7496
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7497
    }
7498
#endif
7499
    return err;
7500
}
7501
7502
/* Multiply a by b into r. r = a * b
7503
 *
7504
 * @param  [in]   a    SP integer to mulitply.
7505
 * @param  [in]   b    SP integer to mulitply by.
7506
 * @param  [out]  r    SP integer to hod reult.
7507
 *
7508
 * @return  MP_OKAY otherwise.
7509
 * @return  MP_MEM when dynamic memory allocation fails.
7510
 */
7511
static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
7512
{
7513
    int err = MP_OKAY;
7514
    int i;
7515
    int j;
7516
    int k;
7517
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7518
    sp_int_digit* t = NULL;
7519
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7520
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7521
    sp_int_digit t[a->used + b->used];
7522
#else
7523
    sp_int_digit t[SP_INT_DIGITS];
7524
#endif
7525
7526
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7527
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
7528
        DYNAMIC_TYPE_BIGINT);
7529
    if (t == NULL) {
7530
        err = MP_MEM;
7531
    }
7532
#endif
7533
    if (err == MP_OKAY) {
7534
        sp_int_digit l;
7535
        sp_int_digit h;
7536
        sp_int_digit o;
7537
7538
        h = 0;
7539
        l = 0;
7540
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7541
        t[0] = h;
7542
        h = 0;
7543
        o = 0;
7544
        for (k = 1; k <= b->used - 1; k++) {
7545
            i = 0;
7546
            j = k;
7547
            for (; (i < a->used) && (j >= 0); i++, j--) {
7548
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
7549
            }
7550
            t[k] = l;
7551
            l = h;
7552
            h = o;
7553
            o = 0;
7554
        }
7555
        for (; k <= (a->used - 1) + (b->used - 1); k++) {
7556
            j = b->used - 1;
7557
            i = k - j;
7558
            for (; (i < a->used) && (j >= 0); i++, j--) {
7559
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
7560
            }
7561
            t[k] = l;
7562
            l = h;
7563
            h = o;
7564
            o = 0;
7565
        }
7566
        t[k] = l;
7567
        r->used = k + 1;
7568
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7569
        sp_clamp(r);
7570
    }
7571
7572
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7573
    if (t != NULL) {
7574
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7575
    }
7576
#endif
7577
    return err;
7578
}
7579
#else
7580
/* Multiply a by b into r. r = a * b
7581
 *
7582
 * @param  [in]   a    SP integer to mulitply.
7583
 * @param  [in]   b    SP integer to mulitply by.
7584
 * @param  [out]  r    SP integer to hod reult.
7585
 *
7586
 * @return  MP_OKAY otherwise.
7587
 * @return  MP_MEM when dynamic memory allocation fails.
7588
 */
7589
static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
7590
528k
{
7591
528k
    int err = MP_OKAY;
7592
528k
    int i;
7593
528k
    int j;
7594
528k
    int k;
7595
528k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7596
528k
    sp_int_digit* t = NULL;
7597
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
7598
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
7599
    sp_int_digit t[a->used + b->used];
7600
#else
7601
    sp_int_digit t[SP_INT_DIGITS];
7602
#endif
7603
7604
528k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7605
528k
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
7606
528k
        DYNAMIC_TYPE_BIGINT);
7607
528k
    if (t == NULL) {
7608
105
        err = MP_MEM;
7609
105
    }
7610
528k
#endif
7611
528k
    if (err == MP_OKAY) {
7612
528k
        sp_int_word w;
7613
528k
        sp_int_word l;
7614
528k
        sp_int_word h;
7615
    #ifdef SP_WORD_OVERFLOW
7616
        sp_int_word o;
7617
    #endif
7618
7619
528k
        w = (sp_int_word)a->dp[0] * b->dp[0];
7620
528k
        t[0] = (sp_int_digit)w;
7621
528k
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
7622
528k
        h = 0;
7623
    #ifdef SP_WORD_OVERFLOW
7624
        o = 0;
7625
    #endif
7626
10.0M
        for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
7627
9.50M
            i = k - (b->used - 1);
7628
9.50M
            i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
7629
9.50M
            j = k - i;
7630
189M
            for (; (i < a->used) && (j >= 0); i++, j--) {
7631
180M
                w = (sp_int_word)a->dp[i] * b->dp[j];
7632
180M
                l += (sp_int_digit)w;
7633
180M
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
7634
            #ifdef SP_WORD_OVERFLOW
7635
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
7636
                l &= SP_MASK;
7637
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
7638
                h &= SP_MASK;
7639
            #endif
7640
180M
            }
7641
9.50M
            t[k] = (sp_int_digit)l;
7642
9.50M
            l >>= SP_WORD_SIZE;
7643
9.50M
            l += (sp_int_digit)h;
7644
9.50M
            h >>= SP_WORD_SIZE;
7645
        #ifdef SP_WORD_OVERFLOW
7646
            h += o & SP_MASK;
7647
            o >>= SP_WORD_SIZE;
7648
        #endif
7649
9.50M
        }
7650
528k
        t[k] = (sp_int_digit)l;
7651
528k
        r->used = k + 1;
7652
528k
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
7653
528k
        sp_clamp(r);
7654
528k
    }
7655
7656
528k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7657
528k
    if (t != NULL) {
7658
528k
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7659
528k
    }
7660
528k
#endif
7661
528k
    return err;
7662
528k
}
7663
#endif
7664
7665
#ifndef WOLFSSL_SP_SMALL
7666
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
7667
#if SP_WORD_SIZE == 64
7668
#ifndef SQR_MUL_ASM
7669
/* Multiply a by b and store in r: r = a * b
7670
 *
7671
 * Long-hand implementation.
7672
 *
7673
 * @param  [in]   a  SP integer to multiply.
7674
 * @param  [in]   b  SP integer to multiply.
7675
 * @param  [out]  r  SP integer result.
7676
 *
7677
 * @return  MP_OKAY on success.
7678
 * @return  MP_MEM when dynamic memory allocation fails.
7679
 */
7680
static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
7681
{
7682
    int err = MP_OKAY;
7683
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7684
    sp_int_word* w = NULL;
7685
#else
7686
    sp_int_word w[16];
7687
#endif
7688
    sp_int_digit* da = a->dp;
7689
    sp_int_digit* db = b->dp;
7690
7691
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7692
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
7693
        DYNAMIC_TYPE_BIGINT);
7694
    if (w == NULL) {
7695
        err = MP_MEM;
7696
    }
7697
#endif
7698
7699
    if (err == MP_OKAY) {
7700
        w[0] = (sp_int_word)da[0] * db[0];
7701
        w[1] = (sp_int_word)da[0] * db[1];
7702
        w[2] = (sp_int_word)da[1] * db[0];
7703
        w[3] = (sp_int_word)da[0] * db[2];
7704
        w[4] = (sp_int_word)da[1] * db[1];
7705
        w[5] = (sp_int_word)da[2] * db[0];
7706
        w[6] = (sp_int_word)da[0] * db[3];
7707
        w[7] = (sp_int_word)da[1] * db[2];
7708
        w[8] = (sp_int_word)da[2] * db[1];
7709
        w[9] = (sp_int_word)da[3] * db[0];
7710
        w[10] = (sp_int_word)da[1] * db[3];
7711
        w[11] = (sp_int_word)da[2] * db[2];
7712
        w[12] = (sp_int_word)da[3] * db[1];
7713
        w[13] = (sp_int_word)da[2] * db[3];
7714
        w[14] = (sp_int_word)da[3] * db[2];
7715
        w[15] = (sp_int_word)da[3] * db[3];
7716
7717
        r->dp[0] = w[0];
7718
        w[0] >>= SP_WORD_SIZE;
7719
        w[0] += (sp_int_digit)w[1];
7720
        w[0] += (sp_int_digit)w[2];
7721
        r->dp[1] = w[0];
7722
        w[0] >>= SP_WORD_SIZE;
7723
        w[1] >>= SP_WORD_SIZE;
7724
        w[0] += (sp_int_digit)w[1];
7725
        w[2] >>= SP_WORD_SIZE;
7726
        w[0] += (sp_int_digit)w[2];
7727
        w[0] += (sp_int_digit)w[3];
7728
        w[0] += (sp_int_digit)w[4];
7729
        w[0] += (sp_int_digit)w[5];
7730
        r->dp[2] = w[0];
7731
        w[0] >>= SP_WORD_SIZE;
7732
        w[3] >>= SP_WORD_SIZE;
7733
        w[0] += (sp_int_digit)w[3];
7734
        w[4] >>= SP_WORD_SIZE;
7735
        w[0] += (sp_int_digit)w[4];
7736
        w[5] >>= SP_WORD_SIZE;
7737
        w[0] += (sp_int_digit)w[5];
7738
        w[0] += (sp_int_digit)w[6];
7739
        w[0] += (sp_int_digit)w[7];
7740
        w[0] += (sp_int_digit)w[8];
7741
        w[0] += (sp_int_digit)w[9];
7742
        r->dp[3] = w[0];
7743
        w[0] >>= SP_WORD_SIZE;
7744
        w[6] >>= SP_WORD_SIZE;
7745
        w[0] += (sp_int_digit)w[6];
7746
        w[7] >>= SP_WORD_SIZE;
7747
        w[0] += (sp_int_digit)w[7];
7748
        w[8] >>= SP_WORD_SIZE;
7749
        w[0] += (sp_int_digit)w[8];
7750
        w[9] >>= SP_WORD_SIZE;
7751
        w[0] += (sp_int_digit)w[9];
7752
        w[0] += (sp_int_digit)w[10];
7753
        w[0] += (sp_int_digit)w[11];
7754
        w[0] += (sp_int_digit)w[12];
7755
        r->dp[4] = w[0];
7756
        w[0] >>= SP_WORD_SIZE;
7757
        w[10] >>= SP_WORD_SIZE;
7758
        w[0] += (sp_int_digit)w[10];
7759
        w[11] >>= SP_WORD_SIZE;
7760
        w[0] += (sp_int_digit)w[11];
7761
        w[12] >>= SP_WORD_SIZE;
7762
        w[0] += (sp_int_digit)w[12];
7763
        w[0] += (sp_int_digit)w[13];
7764
        w[0] += (sp_int_digit)w[14];
7765
        r->dp[5] = w[0];
7766
        w[0] >>= SP_WORD_SIZE;
7767
        w[13] >>= SP_WORD_SIZE;
7768
        w[0] += (sp_int_digit)w[13];
7769
        w[14] >>= SP_WORD_SIZE;
7770
        w[0] += (sp_int_digit)w[14];
7771
        w[0] += (sp_int_digit)w[15];
7772
        r->dp[6] = w[0];
7773
        w[0] >>= SP_WORD_SIZE;
7774
        w[15] >>= SP_WORD_SIZE;
7775
        w[0] += (sp_int_digit)w[15];
7776
        r->dp[7] = w[0];
7777
7778
        r->used = 8;
7779
        sp_clamp(r);
7780
    }
7781
7782
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7783
    if (w != NULL) {
7784
        XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
7785
    }
7786
#endif
7787
    return err;
7788
}
7789
#else /* SQR_MUL_ASM */
7790
/* Multiply a by b and store in r: r = a * b
7791
 *
7792
 * Comba implementation.
7793
 *
7794
 * @param  [in]   a  SP integer to multiply.
7795
 * @param  [in]   b  SP integer to multiply.
7796
 * @param  [out]  r  SP integer result.
7797
 *
7798
 * @return  MP_OKAY on success.
7799
 * @return  MP_MEM when dynamic memory allocation fails.
7800
 */
7801
static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
7802
{
7803
    sp_int_digit l = 0;
7804
    sp_int_digit h = 0;
7805
    sp_int_digit o = 0;
7806
    sp_int_digit t[4];
7807
7808
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7809
    t[0] = h;
7810
    h = 0;
7811
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7812
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7813
    t[1] = l;
7814
    l = h;
7815
    h = o;
7816
    o = 0;
7817
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7818
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7819
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7820
    t[2] = l;
7821
    l = h;
7822
    h = o;
7823
    o = 0;
7824
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7825
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7826
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7827
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7828
    t[3] = l;
7829
    l = h;
7830
    h = o;
7831
    o = 0;
7832
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
7833
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
7834
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
7835
    r->dp[4] = l;
7836
    l = h;
7837
    h = o;
7838
    o = 0;
7839
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
7840
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
7841
    r->dp[5] = l;
7842
    l = h;
7843
    h = o;
7844
    SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
7845
    r->dp[6] = l;
7846
    r->dp[7] = h;
7847
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
7848
    r->used = 8;
7849
    sp_clamp(r);
7850
7851
    return MP_OKAY;
7852
}
7853
#endif /* SQR_MUL_ASM */
7854
#endif /* SP_WORD_SIZE == 64 */
7855
#if SP_WORD_SIZE == 64
7856
#ifdef SQR_MUL_ASM
7857
/* Multiply a by b and store in r: r = a * b
7858
 *
7859
 * Comba implementation.
7860
 *
7861
 * @param  [in]   a  SP integer to multiply.
7862
 * @param  [in]   b  SP integer to multiply.
7863
 * @param  [out]  r  SP integer result.
7864
 *
7865
 * @return  MP_OKAY on success.
7866
 * @return  MP_MEM when dynamic memory allocation fails.
7867
 */
7868
static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r)
7869
{
7870
    sp_int_digit l = 0;
7871
    sp_int_digit h = 0;
7872
    sp_int_digit o = 0;
7873
    sp_int_digit t[6];
7874
7875
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7876
    t[0] = h;
7877
    h = 0;
7878
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7879
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7880
    t[1] = l;
7881
    l = h;
7882
    h = o;
7883
    o = 0;
7884
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7885
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7886
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7887
    t[2] = l;
7888
    l = h;
7889
    h = o;
7890
    o = 0;
7891
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7892
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7893
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7894
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7895
    t[3] = l;
7896
    l = h;
7897
    h = o;
7898
    o = 0;
7899
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
7900
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
7901
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
7902
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
7903
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
7904
    t[4] = l;
7905
    l = h;
7906
    h = o;
7907
    o = 0;
7908
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
7909
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
7910
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
7911
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
7912
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
7913
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
7914
    t[5] = l;
7915
    l = h;
7916
    h = o;
7917
    o = 0;
7918
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
7919
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
7920
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
7921
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
7922
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
7923
    r->dp[6] = l;
7924
    l = h;
7925
    h = o;
7926
    o = 0;
7927
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
7928
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
7929
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
7930
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
7931
    r->dp[7] = l;
7932
    l = h;
7933
    h = o;
7934
    o = 0;
7935
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
7936
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
7937
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
7938
    r->dp[8] = l;
7939
    l = h;
7940
    h = o;
7941
    o = 0;
7942
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
7943
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
7944
    r->dp[9] = l;
7945
    l = h;
7946
    h = o;
7947
    SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
7948
    r->dp[10] = l;
7949
    r->dp[11] = h;
7950
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
7951
    r->used = 12;
7952
    sp_clamp(r);
7953
7954
    return MP_OKAY;
7955
}
7956
#endif /* SQR_MUL_ASM */
7957
#endif /* SP_WORD_SIZE == 64 */
7958
#if SP_WORD_SIZE == 32
7959
#ifdef SQR_MUL_ASM
7960
/* Multiply a by b and store in r: r = a * b
7961
 *
7962
 * Comba implementation.
7963
 *
7964
 * @param  [in]   a  SP integer to multiply.
7965
 * @param  [in]   b  SP integer to multiply.
7966
 * @param  [out]  r  SP integer result.
7967
 *
7968
 * @return  MP_OKAY on success.
7969
 * @return  MP_MEM when dynamic memory allocation fails.
7970
 */
7971
static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r)
7972
{
7973
    sp_int_digit l = 0;
7974
    sp_int_digit h = 0;
7975
    sp_int_digit o = 0;
7976
    sp_int_digit t[8];
7977
7978
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
7979
    t[0] = h;
7980
    h = 0;
7981
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
7982
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
7983
    t[1] = l;
7984
    l = h;
7985
    h = o;
7986
    o = 0;
7987
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
7988
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
7989
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
7990
    t[2] = l;
7991
    l = h;
7992
    h = o;
7993
    o = 0;
7994
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
7995
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
7996
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
7997
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
7998
    t[3] = l;
7999
    l = h;
8000
    h = o;
8001
    o = 0;
8002
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8003
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8004
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8005
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8006
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8007
    t[4] = l;
8008
    l = h;
8009
    h = o;
8010
    o = 0;
8011
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8012
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8013
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8014
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8015
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8016
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8017
    t[5] = l;
8018
    l = h;
8019
    h = o;
8020
    o = 0;
8021
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8022
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8023
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8024
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8025
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8026
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8027
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8028
    t[6] = l;
8029
    l = h;
8030
    h = o;
8031
    o = 0;
8032
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8033
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8034
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8035
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8036
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8037
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8038
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8039
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8040
    t[7] = l;
8041
    l = h;
8042
    h = o;
8043
    o = 0;
8044
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8045
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8046
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8047
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8048
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8049
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8050
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8051
    r->dp[8] = l;
8052
    l = h;
8053
    h = o;
8054
    o = 0;
8055
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8056
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8057
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8058
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8059
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8060
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8061
    r->dp[9] = l;
8062
    l = h;
8063
    h = o;
8064
    o = 0;
8065
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8066
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8067
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8068
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8069
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8070
    r->dp[10] = l;
8071
    l = h;
8072
    h = o;
8073
    o = 0;
8074
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8075
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8076
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8077
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8078
    r->dp[11] = l;
8079
    l = h;
8080
    h = o;
8081
    o = 0;
8082
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8083
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8084
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8085
    r->dp[12] = l;
8086
    l = h;
8087
    h = o;
8088
    o = 0;
8089
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8090
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8091
    r->dp[13] = l;
8092
    l = h;
8093
    h = o;
8094
    SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
8095
    r->dp[14] = l;
8096
    r->dp[15] = h;
8097
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
8098
    r->used = 16;
8099
    sp_clamp(r);
8100
8101
    return MP_OKAY;
8102
}
8103
#endif /* SQR_MUL_ASM */
8104
#endif /* SP_WORD_SIZE == 32 */
8105
#if SP_WORD_SIZE == 32
8106
#ifdef SQR_MUL_ASM
8107
/* Multiply a by b and store in r: r = a * b
8108
 *
8109
 * Comba implementation.
8110
 *
8111
 * @param  [in]   a  SP integer to multiply.
8112
 * @param  [in]   b  SP integer to multiply.
8113
 * @param  [out]  r  SP integer result.
8114
 *
8115
 * @return  MP_OKAY on success.
8116
 * @return  MP_MEM when dynamic memory allocation fails.
8117
 */
8118
static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r)
8119
{
8120
    sp_int_digit l = 0;
8121
    sp_int_digit h = 0;
8122
    sp_int_digit o = 0;
8123
    sp_int_digit t[12];
8124
8125
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8126
    t[0] = h;
8127
    h = 0;
8128
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8129
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8130
    t[1] = l;
8131
    l = h;
8132
    h = o;
8133
    o = 0;
8134
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8135
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8136
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8137
    t[2] = l;
8138
    l = h;
8139
    h = o;
8140
    o = 0;
8141
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8142
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8143
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8144
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8145
    t[3] = l;
8146
    l = h;
8147
    h = o;
8148
    o = 0;
8149
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8150
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8151
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8152
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8153
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8154
    t[4] = l;
8155
    l = h;
8156
    h = o;
8157
    o = 0;
8158
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8159
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8160
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8161
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8162
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8163
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8164
    t[5] = l;
8165
    l = h;
8166
    h = o;
8167
    o = 0;
8168
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8169
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8170
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8171
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8172
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8173
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8174
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8175
    t[6] = l;
8176
    l = h;
8177
    h = o;
8178
    o = 0;
8179
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8180
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8181
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8182
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8183
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8184
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8185
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8186
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8187
    t[7] = l;
8188
    l = h;
8189
    h = o;
8190
    o = 0;
8191
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8192
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8193
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8194
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8195
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8196
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8197
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8198
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8199
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8200
    t[8] = l;
8201
    l = h;
8202
    h = o;
8203
    o = 0;
8204
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8205
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8206
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8207
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8208
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8209
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8210
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8211
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8212
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8213
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8214
    t[9] = l;
8215
    l = h;
8216
    h = o;
8217
    o = 0;
8218
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8219
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8220
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8221
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8222
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8223
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8224
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8225
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8226
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8227
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8228
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8229
    t[10] = l;
8230
    l = h;
8231
    h = o;
8232
    o = 0;
8233
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8234
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8235
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8236
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8237
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8238
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8239
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8240
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8241
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8242
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8243
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8244
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8245
    t[11] = l;
8246
    l = h;
8247
    h = o;
8248
    o = 0;
8249
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8250
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8251
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8252
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8253
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8254
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8255
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8256
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8257
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8258
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8259
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8260
    r->dp[12] = l;
8261
    l = h;
8262
    h = o;
8263
    o = 0;
8264
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8265
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8266
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8267
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8268
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8269
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8270
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8271
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8272
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8273
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8274
    r->dp[13] = l;
8275
    l = h;
8276
    h = o;
8277
    o = 0;
8278
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8279
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8280
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8281
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8282
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8283
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8284
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8285
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8286
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8287
    r->dp[14] = l;
8288
    l = h;
8289
    h = o;
8290
    o = 0;
8291
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
8292
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
8293
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
8294
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
8295
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
8296
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
8297
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
8298
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
8299
    r->dp[15] = l;
8300
    l = h;
8301
    h = o;
8302
    o = 0;
8303
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
8304
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
8305
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
8306
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
8307
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
8308
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
8309
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
8310
    r->dp[16] = l;
8311
    l = h;
8312
    h = o;
8313
    o = 0;
8314
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
8315
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
8316
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
8317
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
8318
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
8319
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
8320
    r->dp[17] = l;
8321
    l = h;
8322
    h = o;
8323
    o = 0;
8324
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
8325
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
8326
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
8327
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
8328
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
8329
    r->dp[18] = l;
8330
    l = h;
8331
    h = o;
8332
    o = 0;
8333
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
8334
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
8335
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
8336
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
8337
    r->dp[19] = l;
8338
    l = h;
8339
    h = o;
8340
    o = 0;
8341
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
8342
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
8343
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
8344
    r->dp[20] = l;
8345
    l = h;
8346
    h = o;
8347
    o = 0;
8348
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
8349
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
8350
    r->dp[21] = l;
8351
    l = h;
8352
    h = o;
8353
    SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
8354
    r->dp[22] = l;
8355
    r->dp[23] = h;
8356
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
8357
    r->used = 24;
8358
    sp_clamp(r);
8359
8360
    return MP_OKAY;
8361
}
8362
#endif /* SQR_MUL_ASM */
8363
#endif /* SP_WORD_SIZE == 32 */
8364
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
8365
8366
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
8367
    #if SP_INT_DIGITS >= 32
8368
/* Multiply a by b and store in r: r = a * b
8369
 *
8370
 * Comba implementation.
8371
 *
8372
 * @param  [in]   a  SP integer to multiply.
8373
 * @param  [in]   b  SP integer to multiply.
8374
 * @param  [out]  r  SP integer result.
8375
 *
8376
 * @return  MP_OKAY on success.
8377
 * @return  MP_MEM when dynamic memory allocation fails.
8378
 */
8379
static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r)
8380
{
8381
    int err = MP_OKAY;
8382
    sp_int_digit l = 0;
8383
    sp_int_digit h = 0;
8384
    sp_int_digit o = 0;
8385
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8386
    sp_int_digit* t = NULL;
8387
#else
8388
    sp_int_digit t[16];
8389
#endif
8390
8391
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8392
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
8393
         DYNAMIC_TYPE_BIGINT);
8394
     if (t == NULL) {
8395
         err = MP_MEM;
8396
     }
8397
#endif
8398
    if (err == MP_OKAY) {
8399
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8400
        t[0] = h;
8401
        h = 0;
8402
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8403
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8404
        t[1] = l;
8405
        l = h;
8406
        h = o;
8407
        o = 0;
8408
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8409
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8410
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8411
        t[2] = l;
8412
        l = h;
8413
        h = o;
8414
        o = 0;
8415
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8416
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8417
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8418
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8419
        t[3] = l;
8420
        l = h;
8421
        h = o;
8422
        o = 0;
8423
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8424
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8425
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8426
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8427
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8428
        t[4] = l;
8429
        l = h;
8430
        h = o;
8431
        o = 0;
8432
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8433
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8434
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8435
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8436
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8437
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8438
        t[5] = l;
8439
        l = h;
8440
        h = o;
8441
        o = 0;
8442
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8443
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8444
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8445
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8446
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8447
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8448
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8449
        t[6] = l;
8450
        l = h;
8451
        h = o;
8452
        o = 0;
8453
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8454
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8455
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8456
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8457
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8458
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8459
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8460
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8461
        t[7] = l;
8462
        l = h;
8463
        h = o;
8464
        o = 0;
8465
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8466
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8467
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8468
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8469
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8470
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8471
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8472
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8473
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8474
        t[8] = l;
8475
        l = h;
8476
        h = o;
8477
        o = 0;
8478
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8479
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8480
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8481
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8482
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8483
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8484
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8485
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8486
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8487
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8488
        t[9] = l;
8489
        l = h;
8490
        h = o;
8491
        o = 0;
8492
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8493
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8494
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8495
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8496
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8497
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8498
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8499
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8500
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8501
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8502
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8503
        t[10] = l;
8504
        l = h;
8505
        h = o;
8506
        o = 0;
8507
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8508
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8509
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8510
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8511
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8512
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8513
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8514
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8515
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8516
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8517
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8518
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8519
        t[11] = l;
8520
        l = h;
8521
        h = o;
8522
        o = 0;
8523
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
8524
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8525
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8526
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8527
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8528
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8529
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8530
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8531
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8532
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8533
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8534
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8535
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
8536
        t[12] = l;
8537
        l = h;
8538
        h = o;
8539
        o = 0;
8540
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
8541
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
8542
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8543
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8544
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8545
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8546
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8547
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8548
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8549
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8550
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8551
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8552
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
8553
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
8554
        t[13] = l;
8555
        l = h;
8556
        h = o;
8557
        o = 0;
8558
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
8559
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
8560
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
8561
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8562
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8563
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8564
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8565
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8566
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8567
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8568
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8569
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8570
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
8571
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
8572
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
8573
        t[14] = l;
8574
        l = h;
8575
        h = o;
8576
        o = 0;
8577
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
8578
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
8579
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
8580
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
8581
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
8582
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
8583
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
8584
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
8585
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
8586
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
8587
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
8588
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
8589
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
8590
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
8591
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
8592
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
8593
        t[15] = l;
8594
        l = h;
8595
        h = o;
8596
        o = 0;
8597
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
8598
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
8599
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
8600
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
8601
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
8602
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
8603
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
8604
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
8605
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
8606
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
8607
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
8608
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
8609
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
8610
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
8611
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
8612
        r->dp[16] = l;
8613
        l = h;
8614
        h = o;
8615
        o = 0;
8616
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
8617
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
8618
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
8619
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
8620
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
8621
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
8622
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
8623
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
8624
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
8625
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
8626
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
8627
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
8628
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
8629
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
8630
        r->dp[17] = l;
8631
        l = h;
8632
        h = o;
8633
        o = 0;
8634
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
8635
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
8636
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
8637
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
8638
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
8639
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
8640
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
8641
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
8642
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
8643
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
8644
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
8645
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
8646
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
8647
        r->dp[18] = l;
8648
        l = h;
8649
        h = o;
8650
        o = 0;
8651
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
8652
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
8653
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
8654
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
8655
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
8656
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
8657
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
8658
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
8659
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
8660
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
8661
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
8662
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
8663
        r->dp[19] = l;
8664
        l = h;
8665
        h = o;
8666
        o = 0;
8667
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
8668
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
8669
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
8670
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
8671
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
8672
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
8673
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
8674
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
8675
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
8676
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
8677
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
8678
        r->dp[20] = l;
8679
        l = h;
8680
        h = o;
8681
        o = 0;
8682
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
8683
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
8684
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
8685
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
8686
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
8687
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
8688
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
8689
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
8690
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
8691
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
8692
        r->dp[21] = l;
8693
        l = h;
8694
        h = o;
8695
        o = 0;
8696
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
8697
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
8698
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
8699
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
8700
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
8701
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
8702
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
8703
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
8704
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
8705
        r->dp[22] = l;
8706
        l = h;
8707
        h = o;
8708
        o = 0;
8709
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
8710
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
8711
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
8712
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
8713
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
8714
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
8715
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
8716
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
8717
        r->dp[23] = l;
8718
        l = h;
8719
        h = o;
8720
        o = 0;
8721
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
8722
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
8723
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
8724
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
8725
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
8726
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
8727
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
8728
        r->dp[24] = l;
8729
        l = h;
8730
        h = o;
8731
        o = 0;
8732
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
8733
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
8734
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
8735
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
8736
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
8737
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
8738
        r->dp[25] = l;
8739
        l = h;
8740
        h = o;
8741
        o = 0;
8742
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
8743
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
8744
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
8745
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
8746
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
8747
        r->dp[26] = l;
8748
        l = h;
8749
        h = o;
8750
        o = 0;
8751
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
8752
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
8753
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
8754
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
8755
        r->dp[27] = l;
8756
        l = h;
8757
        h = o;
8758
        o = 0;
8759
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
8760
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
8761
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
8762
        r->dp[28] = l;
8763
        l = h;
8764
        h = o;
8765
        o = 0;
8766
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
8767
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
8768
        r->dp[29] = l;
8769
        l = h;
8770
        h = o;
8771
        SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
8772
        r->dp[30] = l;
8773
        r->dp[31] = h;
8774
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
8775
        r->used = 32;
8776
        sp_clamp(r);
8777
    }
8778
8779
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8780
    if (t != NULL) {
8781
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
8782
    }
8783
#endif
8784
    return err;
8785
}
8786
    #endif /* SP_INT_DIGITS >= 32 */
8787
8788
    #if SP_INT_DIGITS >= 48
8789
/* Multiply a by b and store in r: r = a * b
8790
 *
8791
 * Comba implementation.
8792
 *
8793
 * @param  [in]   a  SP integer to multiply.
8794
 * @param  [in]   b  SP integer to multiply.
8795
 * @param  [out]  r  SP integer result.
8796
 *
8797
 * @return  MP_OKAY on success.
8798
 * @return  MP_MEM when dynamic memory allocation fails.
8799
 */
8800
static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r)
8801
{
8802
    int err = MP_OKAY;
8803
    sp_int_digit l = 0;
8804
    sp_int_digit h = 0;
8805
    sp_int_digit o = 0;
8806
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8807
    sp_int_digit* t = NULL;
8808
#else
8809
    sp_int_digit t[24];
8810
#endif
8811
8812
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8813
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
8814
         DYNAMIC_TYPE_BIGINT);
8815
     if (t == NULL) {
8816
         err = MP_MEM;
8817
     }
8818
#endif
8819
    if (err == MP_OKAY) {
8820
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
8821
        t[0] = h;
8822
        h = 0;
8823
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
8824
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
8825
        t[1] = l;
8826
        l = h;
8827
        h = o;
8828
        o = 0;
8829
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
8830
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
8831
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
8832
        t[2] = l;
8833
        l = h;
8834
        h = o;
8835
        o = 0;
8836
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
8837
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
8838
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
8839
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
8840
        t[3] = l;
8841
        l = h;
8842
        h = o;
8843
        o = 0;
8844
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
8845
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
8846
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
8847
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
8848
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
8849
        t[4] = l;
8850
        l = h;
8851
        h = o;
8852
        o = 0;
8853
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
8854
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
8855
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
8856
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
8857
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
8858
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
8859
        t[5] = l;
8860
        l = h;
8861
        h = o;
8862
        o = 0;
8863
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
8864
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
8865
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
8866
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
8867
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
8868
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
8869
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
8870
        t[6] = l;
8871
        l = h;
8872
        h = o;
8873
        o = 0;
8874
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
8875
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
8876
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
8877
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
8878
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
8879
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
8880
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
8881
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
8882
        t[7] = l;
8883
        l = h;
8884
        h = o;
8885
        o = 0;
8886
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
8887
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
8888
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
8889
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
8890
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
8891
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
8892
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
8893
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
8894
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
8895
        t[8] = l;
8896
        l = h;
8897
        h = o;
8898
        o = 0;
8899
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
8900
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
8901
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
8902
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
8903
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
8904
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
8905
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
8906
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
8907
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
8908
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
8909
        t[9] = l;
8910
        l = h;
8911
        h = o;
8912
        o = 0;
8913
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
8914
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
8915
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
8916
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
8917
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
8918
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
8919
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
8920
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
8921
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
8922
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
8923
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
8924
        t[10] = l;
8925
        l = h;
8926
        h = o;
8927
        o = 0;
8928
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
8929
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
8930
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
8931
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
8932
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
8933
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
8934
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
8935
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
8936
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
8937
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
8938
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
8939
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
8940
        t[11] = l;
8941
        l = h;
8942
        h = o;
8943
        o = 0;
8944
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
8945
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
8946
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
8947
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
8948
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
8949
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
8950
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
8951
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
8952
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
8953
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
8954
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
8955
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
8956
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
8957
        t[12] = l;
8958
        l = h;
8959
        h = o;
8960
        o = 0;
8961
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
8962
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
8963
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
8964
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
8965
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
8966
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
8967
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
8968
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
8969
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
8970
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
8971
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
8972
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
8973
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
8974
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
8975
        t[13] = l;
8976
        l = h;
8977
        h = o;
8978
        o = 0;
8979
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
8980
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
8981
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
8982
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
8983
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
8984
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
8985
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
8986
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
8987
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
8988
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
8989
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
8990
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
8991
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
8992
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
8993
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
8994
        t[14] = l;
8995
        l = h;
8996
        h = o;
8997
        o = 0;
8998
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
8999
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
9000
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
9001
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
9002
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
9003
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
9004
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
9005
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
9006
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
9007
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
9008
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
9009
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
9010
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
9011
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
9012
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
9013
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
9014
        t[15] = l;
9015
        l = h;
9016
        h = o;
9017
        o = 0;
9018
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
9019
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
9020
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
9021
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
9022
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
9023
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
9024
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
9025
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
9026
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
9027
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
9028
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
9029
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
9030
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
9031
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
9032
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
9033
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
9034
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
9035
        t[16] = l;
9036
        l = h;
9037
        h = o;
9038
        o = 0;
9039
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
9040
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
9041
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
9042
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
9043
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
9044
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
9045
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
9046
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
9047
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
9048
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
9049
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
9050
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
9051
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
9052
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
9053
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
9054
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
9055
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
9056
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
9057
        t[17] = l;
9058
        l = h;
9059
        h = o;
9060
        o = 0;
9061
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
9062
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
9063
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
9064
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
9065
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
9066
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
9067
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
9068
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
9069
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
9070
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
9071
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
9072
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
9073
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
9074
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
9075
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
9076
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
9077
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
9078
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
9079
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
9080
        t[18] = l;
9081
        l = h;
9082
        h = o;
9083
        o = 0;
9084
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
9085
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
9086
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
9087
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
9088
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
9089
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
9090
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
9091
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
9092
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
9093
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
9094
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
9095
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
9096
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
9097
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
9098
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
9099
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
9100
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
9101
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
9102
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
9103
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
9104
        t[19] = l;
9105
        l = h;
9106
        h = o;
9107
        o = 0;
9108
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
9109
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
9110
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
9111
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
9112
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
9113
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
9114
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
9115
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
9116
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
9117
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
9118
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
9119
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
9120
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
9121
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
9122
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
9123
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
9124
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
9125
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
9126
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
9127
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
9128
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
9129
        t[20] = l;
9130
        l = h;
9131
        h = o;
9132
        o = 0;
9133
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
9134
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
9135
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
9136
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
9137
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
9138
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
9139
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
9140
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
9141
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
9142
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
9143
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
9144
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
9145
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
9146
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
9147
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
9148
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
9149
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
9150
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
9151
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
9152
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
9153
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
9154
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
9155
        t[21] = l;
9156
        l = h;
9157
        h = o;
9158
        o = 0;
9159
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
9160
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
9161
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
9162
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
9163
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
9164
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
9165
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
9166
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
9167
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
9168
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
9169
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
9170
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
9171
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
9172
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
9173
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
9174
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
9175
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
9176
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
9177
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
9178
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
9179
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
9180
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
9181
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
9182
        t[22] = l;
9183
        l = h;
9184
        h = o;
9185
        o = 0;
9186
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
9187
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
9188
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
9189
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
9190
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
9191
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
9192
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
9193
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
9194
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
9195
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
9196
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
9197
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
9198
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
9199
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
9200
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
9201
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
9202
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
9203
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
9204
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
9205
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
9206
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
9207
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
9208
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
9209
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
9210
        t[23] = l;
9211
        l = h;
9212
        h = o;
9213
        o = 0;
9214
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
9215
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
9216
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
9217
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
9218
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
9219
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
9220
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
9221
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
9222
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
9223
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
9224
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
9225
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
9226
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
9227
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
9228
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
9229
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
9230
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
9231
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
9232
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
9233
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
9234
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
9235
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
9236
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
9237
        r->dp[24] = l;
9238
        l = h;
9239
        h = o;
9240
        o = 0;
9241
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
9242
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
9243
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
9244
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
9245
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
9246
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
9247
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
9248
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
9249
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
9250
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
9251
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
9252
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
9253
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
9254
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
9255
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
9256
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
9257
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
9258
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
9259
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
9260
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
9261
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
9262
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
9263
        r->dp[25] = l;
9264
        l = h;
9265
        h = o;
9266
        o = 0;
9267
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
9268
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
9269
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
9270
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
9271
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
9272
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
9273
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
9274
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
9275
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
9276
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
9277
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
9278
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
9279
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
9280
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
9281
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
9282
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
9283
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
9284
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
9285
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
9286
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
9287
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
9288
        r->dp[26] = l;
9289
        l = h;
9290
        h = o;
9291
        o = 0;
9292
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
9293
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
9294
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
9295
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
9296
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
9297
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
9298
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
9299
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
9300
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
9301
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
9302
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
9303
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
9304
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
9305
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
9306
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
9307
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
9308
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
9309
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
9310
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
9311
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
9312
        r->dp[27] = l;
9313
        l = h;
9314
        h = o;
9315
        o = 0;
9316
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
9317
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
9318
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
9319
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
9320
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
9321
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
9322
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
9323
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
9324
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
9325
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
9326
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
9327
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
9328
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
9329
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
9330
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
9331
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
9332
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
9333
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
9334
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
9335
        r->dp[28] = l;
9336
        l = h;
9337
        h = o;
9338
        o = 0;
9339
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
9340
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
9341
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
9342
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
9343
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
9344
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
9345
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
9346
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
9347
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
9348
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
9349
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
9350
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
9351
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
9352
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
9353
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
9354
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
9355
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
9356
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
9357
        r->dp[29] = l;
9358
        l = h;
9359
        h = o;
9360
        o = 0;
9361
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
9362
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
9363
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
9364
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
9365
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
9366
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
9367
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
9368
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
9369
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
9370
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
9371
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
9372
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
9373
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
9374
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
9375
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
9376
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
9377
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
9378
        r->dp[30] = l;
9379
        l = h;
9380
        h = o;
9381
        o = 0;
9382
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
9383
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
9384
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
9385
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
9386
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
9387
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
9388
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
9389
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
9390
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
9391
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
9392
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
9393
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
9394
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
9395
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
9396
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
9397
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
9398
        r->dp[31] = l;
9399
        l = h;
9400
        h = o;
9401
        o = 0;
9402
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
9403
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
9404
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
9405
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
9406
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
9407
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
9408
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
9409
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
9410
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
9411
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
9412
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
9413
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
9414
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
9415
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
9416
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
9417
        r->dp[32] = l;
9418
        l = h;
9419
        h = o;
9420
        o = 0;
9421
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
9422
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
9423
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
9424
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
9425
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
9426
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
9427
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
9428
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
9429
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
9430
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
9431
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
9432
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
9433
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
9434
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
9435
        r->dp[33] = l;
9436
        l = h;
9437
        h = o;
9438
        o = 0;
9439
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
9440
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
9441
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
9442
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
9443
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
9444
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
9445
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
9446
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
9447
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
9448
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
9449
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
9450
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
9451
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
9452
        r->dp[34] = l;
9453
        l = h;
9454
        h = o;
9455
        o = 0;
9456
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
9457
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
9458
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
9459
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
9460
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
9461
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
9462
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
9463
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
9464
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
9465
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
9466
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
9467
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
9468
        r->dp[35] = l;
9469
        l = h;
9470
        h = o;
9471
        o = 0;
9472
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
9473
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
9474
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
9475
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
9476
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
9477
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
9478
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
9479
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
9480
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
9481
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
9482
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
9483
        r->dp[36] = l;
9484
        l = h;
9485
        h = o;
9486
        o = 0;
9487
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
9488
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
9489
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
9490
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
9491
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
9492
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
9493
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
9494
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
9495
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
9496
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
9497
        r->dp[37] = l;
9498
        l = h;
9499
        h = o;
9500
        o = 0;
9501
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
9502
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
9503
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
9504
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
9505
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
9506
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
9507
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
9508
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
9509
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
9510
        r->dp[38] = l;
9511
        l = h;
9512
        h = o;
9513
        o = 0;
9514
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
9515
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
9516
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
9517
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
9518
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
9519
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
9520
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
9521
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
9522
        r->dp[39] = l;
9523
        l = h;
9524
        h = o;
9525
        o = 0;
9526
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
9527
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
9528
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
9529
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
9530
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
9531
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
9532
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
9533
        r->dp[40] = l;
9534
        l = h;
9535
        h = o;
9536
        o = 0;
9537
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
9538
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
9539
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
9540
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
9541
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
9542
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
9543
        r->dp[41] = l;
9544
        l = h;
9545
        h = o;
9546
        o = 0;
9547
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
9548
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
9549
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
9550
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
9551
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
9552
        r->dp[42] = l;
9553
        l = h;
9554
        h = o;
9555
        o = 0;
9556
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
9557
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
9558
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
9559
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
9560
        r->dp[43] = l;
9561
        l = h;
9562
        h = o;
9563
        o = 0;
9564
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
9565
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
9566
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
9567
        r->dp[44] = l;
9568
        l = h;
9569
        h = o;
9570
        o = 0;
9571
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
9572
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
9573
        r->dp[45] = l;
9574
        l = h;
9575
        h = o;
9576
        SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
9577
        r->dp[46] = l;
9578
        r->dp[47] = h;
9579
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
9580
        r->used = 48;
9581
        sp_clamp(r);
9582
    }
9583
9584
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9585
    if (t != NULL) {
9586
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9587
    }
9588
#endif
9589
    return err;
9590
}
9591
    #endif /* SP_INT_DIGITS >= 48 */
9592
9593
    #if SP_INT_DIGITS >= 64
9594
/* Multiply a by b and store in r: r = a * b
9595
 *
9596
 * Karatsuba implementaiton.
9597
 *
9598
 * @param  [in]   a  SP integer to multiply.
9599
 * @param  [in]   b  SP integer to multiply.
9600
 * @param  [out]  r  SP integer result.
9601
 *
9602
 * @return  MP_OKAY on success.
9603
 * @return  MP_MEM when dynamic memory allocation fails.
9604
 */
9605
static int _sp_mul_32(sp_int* a, sp_int* b, sp_int* r)
9606
{
9607
    int err = MP_OKAY;
9608
    int i;
9609
    sp_int_digit l;
9610
    sp_int_digit h;
9611
    sp_int* a1;
9612
    sp_int* b1;
9613
    sp_int* z0;
9614
    sp_int* z1;
9615
    sp_int* z2;
9616
    sp_int_digit ca;
9617
    sp_int_digit cb;
9618
    DECL_SP_INT_ARRAY(t, 16, 2);
9619
    DECL_SP_INT_ARRAY(z, 33, 2);
9620
9621
    ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
9622
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
9623
    if (err == MP_OKAY) {
9624
        a1 = t[0];
9625
        b1 = t[1];
9626
        z1 = z[0];
9627
        z2 = z[1];
9628
        z0 = r;
9629
9630
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
9631
        a1->used = 16;
9632
        XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
9633
        b1->used = 16;
9634
9635
        /* z2 = a1 * b1 */
9636
        err = _sp_mul_16(a1, b1, z2);
9637
    }
9638
    if (err == MP_OKAY) {
9639
        l = a1->dp[0];
9640
        h = 0;
9641
        SP_ASM_ADDC(l, h, a->dp[0]);
9642
        a1->dp[0] = l;
9643
        l = h;
9644
        h = 0;
9645
        for (i = 1; i < 16; i++) {
9646
            SP_ASM_ADDC(l, h, a1->dp[i]);
9647
            SP_ASM_ADDC(l, h, a->dp[i]);
9648
            a1->dp[i] = l;
9649
            l = h;
9650
            h = 0;
9651
        }
9652
        ca = l;
9653
        /* b01 = b0 + b1 */
9654
        l = b1->dp[0];
9655
        h = 0;
9656
        SP_ASM_ADDC(l, h, b->dp[0]);
9657
        b1->dp[0] = l;
9658
        l = h;
9659
        h = 0;
9660
        for (i = 1; i < 16; i++) {
9661
            SP_ASM_ADDC(l, h, b1->dp[i]);
9662
            SP_ASM_ADDC(l, h, b->dp[i]);
9663
            b1->dp[i] = l;
9664
            l = h;
9665
            h = 0;
9666
        }
9667
        cb = l;
9668
9669
        /* z0 = a0 * b0 */
9670
        err = _sp_mul_16(a, b, z0);
9671
    }
9672
    if (err == MP_OKAY) {
9673
        /* z1 = (a0 + a1) * (b0 + b1) */
9674
        err = _sp_mul_16(a1, b1, z1);
9675
    }
9676
    if (err == MP_OKAY) {
9677
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
9678
        /* r = z0 */
9679
        /* r += (z1 - z0 - z2) << 16 */
9680
        z1->dp[32] = ca & cb;
9681
        l = 0;
9682
        if (ca) {
9683
            h = 0;
9684
            for (i = 0; i < 16; i++) {
9685
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
9686
                SP_ASM_ADDC(l, h, b1->dp[i]);
9687
                z1->dp[i + 16] = l;
9688
                l = h;
9689
                h = 0;
9690
            }
9691
        }
9692
        z1->dp[32] += l;
9693
        l = 0;
9694
        if (cb) {
9695
            h = 0;
9696
            for (i = 0; i < 16; i++) {
9697
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
9698
                SP_ASM_ADDC(l, h, a1->dp[i]);
9699
                z1->dp[i + 16] = l;
9700
                l = h;
9701
                h = 0;
9702
            }
9703
        }
9704
        z1->dp[32] += l;
9705
        /* z1 = z1 - z0 - z1 */
9706
        l = 0;
9707
        h = 0;
9708
        for (i = 0; i < 32; i++) {
9709
            l += z1->dp[i];
9710
            SP_ASM_SUBC(l, h, z0->dp[i]);
9711
            SP_ASM_SUBC(l, h, z2->dp[i]);
9712
            z1->dp[i] = l;
9713
            l = h;
9714
            h = 0;
9715
        }
9716
        z1->dp[i] += l;
9717
        /* r += z1 << 16 */
9718
        l = 0;
9719
        h = 0;
9720
        for (i = 0; i < 16; i++) {
9721
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
9722
            SP_ASM_ADDC(l, h, z1->dp[i]);
9723
            r->dp[i + 16] = l;
9724
            l = h;
9725
            h = 0;
9726
        }
9727
        for (; i < 33; i++) {
9728
            SP_ASM_ADDC(l, h, z1->dp[i]);
9729
            r->dp[i + 16] = l;
9730
            l = h;
9731
            h = 0;
9732
        }
9733
        /* r += z2 << 32  */
9734
        l = 0;
9735
        h = 0;
9736
        for (i = 0; i < 17; i++) {
9737
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
9738
            SP_ASM_ADDC(l, h, z2->dp[i]);
9739
            r->dp[i + 32] = l;
9740
            l = h;
9741
            h = 0;
9742
        }
9743
        for (; i < 32; i++) {
9744
            SP_ASM_ADDC(l, h, z2->dp[i]);
9745
            r->dp[i + 32] = l;
9746
            l = h;
9747
            h = 0;
9748
        }
9749
        r->used = 64;
9750
        sp_clamp(r);
9751
    }
9752
9753
    FREE_SP_INT_ARRAY(z, NULL);
9754
    FREE_SP_INT_ARRAY(t, NULL);
9755
    return err;
9756
}
9757
    #endif /* SP_INT_DIGITS >= 64 */
9758
9759
    #if SP_INT_DIGITS >= 96
9760
/* Multiply a by b and store in r: r = a * b
9761
 *
9762
 * Karatsuba implementaiton.
9763
 *
9764
 * @param  [in]   a  SP integer to multiply.
9765
 * @param  [in]   b  SP integer to multiply.
9766
 * @param  [out]  r  SP integer result.
9767
 *
9768
 * @return  MP_OKAY on success.
9769
 * @return  MP_MEM when dynamic memory allocation fails.
9770
 */
9771
static int _sp_mul_48(sp_int* a, sp_int* b, sp_int* r)
9772
{
9773
    int err = MP_OKAY;
9774
    int i;
9775
    sp_int_digit l;
9776
    sp_int_digit h;
9777
    sp_int* a1;
9778
    sp_int* b1;
9779
    sp_int* z0;
9780
    sp_int* z1;
9781
    sp_int* z2;
9782
    sp_int_digit ca;
9783
    sp_int_digit cb;
9784
    DECL_SP_INT_ARRAY(t, 24, 2);
9785
    DECL_SP_INT_ARRAY(z, 49, 2);
9786
9787
    ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
9788
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
9789
    if (err == MP_OKAY) {
9790
        a1 = t[0];
9791
        b1 = t[1];
9792
        z1 = z[0];
9793
        z2 = z[1];
9794
        z0 = r;
9795
9796
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
9797
        a1->used = 24;
9798
        XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
9799
        b1->used = 24;
9800
9801
        /* z2 = a1 * b1 */
9802
        err = _sp_mul_24(a1, b1, z2);
9803
    }
9804
    if (err == MP_OKAY) {
9805
        l = a1->dp[0];
9806
        h = 0;
9807
        SP_ASM_ADDC(l, h, a->dp[0]);
9808
        a1->dp[0] = l;
9809
        l = h;
9810
        h = 0;
9811
        for (i = 1; i < 24; i++) {
9812
            SP_ASM_ADDC(l, h, a1->dp[i]);
9813
            SP_ASM_ADDC(l, h, a->dp[i]);
9814
            a1->dp[i] = l;
9815
            l = h;
9816
            h = 0;
9817
        }
9818
        ca = l;
9819
        /* b01 = b0 + b1 */
9820
        l = b1->dp[0];
9821
        h = 0;
9822
        SP_ASM_ADDC(l, h, b->dp[0]);
9823
        b1->dp[0] = l;
9824
        l = h;
9825
        h = 0;
9826
        for (i = 1; i < 24; i++) {
9827
            SP_ASM_ADDC(l, h, b1->dp[i]);
9828
            SP_ASM_ADDC(l, h, b->dp[i]);
9829
            b1->dp[i] = l;
9830
            l = h;
9831
            h = 0;
9832
        }
9833
        cb = l;
9834
9835
        /* z0 = a0 * b0 */
9836
        err = _sp_mul_24(a, b, z0);
9837
    }
9838
    if (err == MP_OKAY) {
9839
        /* z1 = (a0 + a1) * (b0 + b1) */
9840
        err = _sp_mul_24(a1, b1, z1);
9841
    }
9842
    if (err == MP_OKAY) {
9843
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
9844
        /* r = z0 */
9845
        /* r += (z1 - z0 - z2) << 24 */
9846
        z1->dp[48] = ca & cb;
9847
        l = 0;
9848
        if (ca) {
9849
            h = 0;
9850
            for (i = 0; i < 24; i++) {
9851
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
9852
                SP_ASM_ADDC(l, h, b1->dp[i]);
9853
                z1->dp[i + 24] = l;
9854
                l = h;
9855
                h = 0;
9856
            }
9857
        }
9858
        z1->dp[48] += l;
9859
        l = 0;
9860
        if (cb) {
9861
            h = 0;
9862
            for (i = 0; i < 24; i++) {
9863
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
9864
                SP_ASM_ADDC(l, h, a1->dp[i]);
9865
                z1->dp[i + 24] = l;
9866
                l = h;
9867
                h = 0;
9868
            }
9869
        }
9870
        z1->dp[48] += l;
9871
        /* z1 = z1 - z0 - z1 */
9872
        l = 0;
9873
        h = 0;
9874
        for (i = 0; i < 48; i++) {
9875
            l += z1->dp[i];
9876
            SP_ASM_SUBC(l, h, z0->dp[i]);
9877
            SP_ASM_SUBC(l, h, z2->dp[i]);
9878
            z1->dp[i] = l;
9879
            l = h;
9880
            h = 0;
9881
        }
9882
        z1->dp[i] += l;
9883
        /* r += z1 << 16 */
9884
        l = 0;
9885
        h = 0;
9886
        for (i = 0; i < 24; i++) {
9887
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
9888
            SP_ASM_ADDC(l, h, z1->dp[i]);
9889
            r->dp[i + 24] = l;
9890
            l = h;
9891
            h = 0;
9892
        }
9893
        for (; i < 49; i++) {
9894
            SP_ASM_ADDC(l, h, z1->dp[i]);
9895
            r->dp[i + 24] = l;
9896
            l = h;
9897
            h = 0;
9898
        }
9899
        /* r += z2 << 48  */
9900
        l = 0;
9901
        h = 0;
9902
        for (i = 0; i < 25; i++) {
9903
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
9904
            SP_ASM_ADDC(l, h, z2->dp[i]);
9905
            r->dp[i + 48] = l;
9906
            l = h;
9907
            h = 0;
9908
        }
9909
        for (; i < 48; i++) {
9910
            SP_ASM_ADDC(l, h, z2->dp[i]);
9911
            r->dp[i + 48] = l;
9912
            l = h;
9913
            h = 0;
9914
        }
9915
        r->used = 96;
9916
        sp_clamp(r);
9917
    }
9918
9919
    FREE_SP_INT_ARRAY(z, NULL);
9920
    FREE_SP_INT_ARRAY(t, NULL);
9921
    return err;
9922
}
9923
    #endif /* SP_INT_DIGITS >= 96 */
9924
9925
    #if SP_INT_DIGITS >= 128
9926
/* Multiply a by b and store in r: r = a * b
9927
 *
9928
 * Karatsuba implementaiton.
9929
 *
9930
 * @param  [in]   a  SP integer to multiply.
9931
 * @param  [in]   b  SP integer to multiply.
9932
 * @param  [out]  r  SP integer result.
9933
 *
9934
 * @return  MP_OKAY on success.
9935
 * @return  MP_MEM when dynamic memory allocation fails.
9936
 */
9937
static int _sp_mul_64(sp_int* a, sp_int* b, sp_int* r)
9938
{
9939
    int err = MP_OKAY;
9940
    int i;
9941
    sp_int_digit l;
9942
    sp_int_digit h;
9943
    sp_int* a1;
9944
    sp_int* b1;
9945
    sp_int* z0;
9946
    sp_int* z1;
9947
    sp_int* z2;
9948
    sp_int_digit ca;
9949
    sp_int_digit cb;
9950
    DECL_SP_INT_ARRAY(t, 32, 2);
9951
    DECL_SP_INT_ARRAY(z, 65, 2);
9952
9953
    ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
9954
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
9955
    if (err == MP_OKAY) {
9956
        a1 = t[0];
9957
        b1 = t[1];
9958
        z1 = z[0];
9959
        z2 = z[1];
9960
        z0 = r;
9961
9962
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
9963
        a1->used = 32;
9964
        XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
9965
        b1->used = 32;
9966
9967
        /* z2 = a1 * b1 */
9968
        err = _sp_mul_32(a1, b1, z2);
9969
    }
9970
    if (err == MP_OKAY) {
9971
        l = a1->dp[0];
9972
        h = 0;
9973
        SP_ASM_ADDC(l, h, a->dp[0]);
9974
        a1->dp[0] = l;
9975
        l = h;
9976
        h = 0;
9977
        for (i = 1; i < 32; i++) {
9978
            SP_ASM_ADDC(l, h, a1->dp[i]);
9979
            SP_ASM_ADDC(l, h, a->dp[i]);
9980
            a1->dp[i] = l;
9981
            l = h;
9982
            h = 0;
9983
        }
9984
        ca = l;
9985
        /* b01 = b0 + b1 */
9986
        l = b1->dp[0];
9987
        h = 0;
9988
        SP_ASM_ADDC(l, h, b->dp[0]);
9989
        b1->dp[0] = l;
9990
        l = h;
9991
        h = 0;
9992
        for (i = 1; i < 32; i++) {
9993
            SP_ASM_ADDC(l, h, b1->dp[i]);
9994
            SP_ASM_ADDC(l, h, b->dp[i]);
9995
            b1->dp[i] = l;
9996
            l = h;
9997
            h = 0;
9998
        }
9999
        cb = l;
10000
10001
        /* z0 = a0 * b0 */
10002
        err = _sp_mul_32(a, b, z0);
10003
    }
10004
    if (err == MP_OKAY) {
10005
        /* z1 = (a0 + a1) * (b0 + b1) */
10006
        err = _sp_mul_32(a1, b1, z1);
10007
    }
10008
    if (err == MP_OKAY) {
10009
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
10010
        /* r = z0 */
10011
        /* r += (z1 - z0 - z2) << 32 */
10012
        z1->dp[64] = ca & cb;
10013
        l = 0;
10014
        if (ca) {
10015
            h = 0;
10016
            for (i = 0; i < 32; i++) {
10017
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
10018
                SP_ASM_ADDC(l, h, b1->dp[i]);
10019
                z1->dp[i + 32] = l;
10020
                l = h;
10021
                h = 0;
10022
            }
10023
        }
10024
        z1->dp[64] += l;
10025
        l = 0;
10026
        if (cb) {
10027
            h = 0;
10028
            for (i = 0; i < 32; i++) {
10029
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
10030
                SP_ASM_ADDC(l, h, a1->dp[i]);
10031
                z1->dp[i + 32] = l;
10032
                l = h;
10033
                h = 0;
10034
            }
10035
        }
10036
        z1->dp[64] += l;
10037
        /* z1 = z1 - z0 - z1 */
10038
        l = 0;
10039
        h = 0;
10040
        for (i = 0; i < 64; i++) {
10041
            l += z1->dp[i];
10042
            SP_ASM_SUBC(l, h, z0->dp[i]);
10043
            SP_ASM_SUBC(l, h, z2->dp[i]);
10044
            z1->dp[i] = l;
10045
            l = h;
10046
            h = 0;
10047
        }
10048
        z1->dp[i] += l;
10049
        /* r += z1 << 16 */
10050
        l = 0;
10051
        h = 0;
10052
        for (i = 0; i < 32; i++) {
10053
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
10054
            SP_ASM_ADDC(l, h, z1->dp[i]);
10055
            r->dp[i + 32] = l;
10056
            l = h;
10057
            h = 0;
10058
        }
10059
        for (; i < 65; i++) {
10060
            SP_ASM_ADDC(l, h, z1->dp[i]);
10061
            r->dp[i + 32] = l;
10062
            l = h;
10063
            h = 0;
10064
        }
10065
        /* r += z2 << 64  */
10066
        l = 0;
10067
        h = 0;
10068
        for (i = 0; i < 33; i++) {
10069
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
10070
            SP_ASM_ADDC(l, h, z2->dp[i]);
10071
            r->dp[i + 64] = l;
10072
            l = h;
10073
            h = 0;
10074
        }
10075
        for (; i < 64; i++) {
10076
            SP_ASM_ADDC(l, h, z2->dp[i]);
10077
            r->dp[i + 64] = l;
10078
            l = h;
10079
            h = 0;
10080
        }
10081
        r->used = 128;
10082
        sp_clamp(r);
10083
    }
10084
10085
    FREE_SP_INT_ARRAY(z, NULL);
10086
    FREE_SP_INT_ARRAY(t, NULL);
10087
    return err;
10088
}
10089
    #endif /* SP_INT_DIGITS >= 128 */
10090
10091
    #if SP_INT_DIGITS >= 192
10092
/* Multiply a by b and store in r: r = a * b
10093
 *
10094
 * Karatsuba implementaiton.
10095
 *
10096
 * @param  [in]   a  SP integer to multiply.
10097
 * @param  [in]   b  SP integer to multiply.
10098
 * @param  [out]  r  SP integer result.
10099
 *
10100
 * @return  MP_OKAY on success.
10101
 * @return  MP_MEM when dynamic memory allocation fails.
10102
 */
10103
static int _sp_mul_96(sp_int* a, sp_int* b, sp_int* r)
10104
{
10105
    int err = MP_OKAY;
10106
    int i;
10107
    sp_int_digit l;
10108
    sp_int_digit h;
10109
    sp_int* a1;
10110
    sp_int* b1;
10111
    sp_int* z0;
10112
    sp_int* z1;
10113
    sp_int* z2;
10114
    sp_int_digit ca;
10115
    sp_int_digit cb;
10116
    DECL_SP_INT_ARRAY(t, 48, 2);
10117
    DECL_SP_INT_ARRAY(z, 97, 2);
10118
10119
    ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
10120
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
10121
    if (err == MP_OKAY) {
10122
        a1 = t[0];
10123
        b1 = t[1];
10124
        z1 = z[0];
10125
        z2 = z[1];
10126
        z0 = r;
10127
10128
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
10129
        a1->used = 48;
10130
        XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
10131
        b1->used = 48;
10132
10133
        /* z2 = a1 * b1 */
10134
        err = _sp_mul_48(a1, b1, z2);
10135
    }
10136
    if (err == MP_OKAY) {
10137
        l = a1->dp[0];
10138
        h = 0;
10139
        SP_ASM_ADDC(l, h, a->dp[0]);
10140
        a1->dp[0] = l;
10141
        l = h;
10142
        h = 0;
10143
        for (i = 1; i < 48; i++) {
10144
            SP_ASM_ADDC(l, h, a1->dp[i]);
10145
            SP_ASM_ADDC(l, h, a->dp[i]);
10146
            a1->dp[i] = l;
10147
            l = h;
10148
            h = 0;
10149
        }
10150
        ca = l;
10151
        /* b01 = b0 + b1 */
10152
        l = b1->dp[0];
10153
        h = 0;
10154
        SP_ASM_ADDC(l, h, b->dp[0]);
10155
        b1->dp[0] = l;
10156
        l = h;
10157
        h = 0;
10158
        for (i = 1; i < 48; i++) {
10159
            SP_ASM_ADDC(l, h, b1->dp[i]);
10160
            SP_ASM_ADDC(l, h, b->dp[i]);
10161
            b1->dp[i] = l;
10162
            l = h;
10163
            h = 0;
10164
        }
10165
        cb = l;
10166
10167
        /* z0 = a0 * b0 */
10168
        err = _sp_mul_48(a, b, z0);
10169
    }
10170
    if (err == MP_OKAY) {
10171
        /* z1 = (a0 + a1) * (b0 + b1) */
10172
        err = _sp_mul_48(a1, b1, z1);
10173
    }
10174
    if (err == MP_OKAY) {
10175
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
10176
        /* r = z0 */
10177
        /* r += (z1 - z0 - z2) << 48 */
10178
        z1->dp[96] = ca & cb;
10179
        l = 0;
10180
        if (ca) {
10181
            h = 0;
10182
            for (i = 0; i < 48; i++) {
10183
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
10184
                SP_ASM_ADDC(l, h, b1->dp[i]);
10185
                z1->dp[i + 48] = l;
10186
                l = h;
10187
                h = 0;
10188
            }
10189
        }
10190
        z1->dp[96] += l;
10191
        l = 0;
10192
        if (cb) {
10193
            h = 0;
10194
            for (i = 0; i < 48; i++) {
10195
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
10196
                SP_ASM_ADDC(l, h, a1->dp[i]);
10197
                z1->dp[i + 48] = l;
10198
                l = h;
10199
                h = 0;
10200
            }
10201
        }
10202
        z1->dp[96] += l;
10203
        /* z1 = z1 - z0 - z1 */
10204
        l = 0;
10205
        h = 0;
10206
        for (i = 0; i < 96; i++) {
10207
            l += z1->dp[i];
10208
            SP_ASM_SUBC(l, h, z0->dp[i]);
10209
            SP_ASM_SUBC(l, h, z2->dp[i]);
10210
            z1->dp[i] = l;
10211
            l = h;
10212
            h = 0;
10213
        }
10214
        z1->dp[i] += l;
10215
        /* r += z1 << 16 */
10216
        l = 0;
10217
        h = 0;
10218
        for (i = 0; i < 48; i++) {
10219
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
10220
            SP_ASM_ADDC(l, h, z1->dp[i]);
10221
            r->dp[i + 48] = l;
10222
            l = h;
10223
            h = 0;
10224
        }
10225
        for (; i < 97; i++) {
10226
            SP_ASM_ADDC(l, h, z1->dp[i]);
10227
            r->dp[i + 48] = l;
10228
            l = h;
10229
            h = 0;
10230
        }
10231
        /* r += z2 << 96  */
10232
        l = 0;
10233
        h = 0;
10234
        for (i = 0; i < 49; i++) {
10235
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
10236
            SP_ASM_ADDC(l, h, z2->dp[i]);
10237
            r->dp[i + 96] = l;
10238
            l = h;
10239
            h = 0;
10240
        }
10241
        for (; i < 96; i++) {
10242
            SP_ASM_ADDC(l, h, z2->dp[i]);
10243
            r->dp[i + 96] = l;
10244
            l = h;
10245
            h = 0;
10246
        }
10247
        r->used = 192;
10248
        sp_clamp(r);
10249
    }
10250
10251
    FREE_SP_INT_ARRAY(z, NULL);
10252
    FREE_SP_INT_ARRAY(t, NULL);
10253
    return err;
10254
}
10255
    #endif /* SP_INT_DIGITS >= 192 */
10256
10257
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
10258
#endif /* !WOLFSSL_SP_SMALL */
10259
10260
/* Multiply a by b and store in r: r = a * b
10261
 *
10262
 * @param  [in]   a  SP integer to multiply.
10263
 * @param  [in]   b  SP integer to multiply.
10264
 * @param  [out]  r  SP integer result.
10265
 *
10266
 * @return  MP_OKAY on success.
10267
 * @return  MP_VAL when a, b or is NULL; or the result will be too big for fixed
10268
 *          data length.
10269
 * @return  MP_MEM when dynamic memory allocation fails.
10270
 */
10271
int sp_mul(sp_int* a, sp_int* b, sp_int* r)
10272
578k
{
10273
578k
    int err = MP_OKAY;
10274
#ifdef WOLFSSL_SP_INT_NEGATIVE
10275
    int sign = MP_ZPOS;
10276
#endif
10277
10278
578k
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
10279
0
        err = MP_VAL;
10280
0
    }
10281
10282
    /* Need extra digit during calculation. */
10283
578k
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
10284
2
        err = MP_VAL;
10285
2
    }
10286
10287
#if 0
10288
    if (err == MP_OKAY) {
10289
        sp_print(a, "a");
10290
        sp_print(b, "b");
10291
    }
10292
#endif
10293
10294
578k
    if (err == MP_OKAY) {
10295
    #ifdef WOLFSSL_SP_INT_NEGATIVE
10296
        sign = a->sign ^ b->sign;
10297
    #endif
10298
10299
578k
        if ((a->used == 0) || (b->used == 0)) {
10300
49.6k
            _sp_zero(r);
10301
49.6k
        }
10302
528k
        else
10303
528k
#ifndef WOLFSSL_SP_SMALL
10304
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
10305
#if SP_WORD_SIZE == 64
10306
        if ((a->used == 4) && (b->used == 4)) {
10307
            err = _sp_mul_4(a, b, r);
10308
        }
10309
        else
10310
#endif /* SP_WORD_SIZE == 64 */
10311
#if SP_WORD_SIZE == 64
10312
#ifdef SQR_MUL_ASM
10313
        if ((a->used == 6) && (b->used == 6)) {
10314
            err = _sp_mul_6(a, b, r);
10315
        }
10316
        else
10317
#endif /* SQR_MUL_ASM */
10318
#endif /* SP_WORD_SIZE == 64 */
10319
#if SP_WORD_SIZE == 32
10320
#ifdef SQR_MUL_ASM
10321
        if ((a->used == 8) && (b->used == 8)) {
10322
            err = _sp_mul_8(a, b, r);
10323
        }
10324
        else
10325
#endif /* SQR_MUL_ASM */
10326
#endif /* SP_WORD_SIZE == 32 */
10327
#if SP_WORD_SIZE == 32
10328
#ifdef SQR_MUL_ASM
10329
        if ((a->used == 12) && (b->used == 12)) {
10330
            err = _sp_mul_12(a, b, r);
10331
        }
10332
        else
10333
#endif /* SQR_MUL_ASM */
10334
#endif /* SP_WORD_SIZE == 32 */
10335
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10336
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10337
    #if SP_INT_DIGITS >= 32
10338
        if ((a->used == 16) && (b->used == 16)) {
10339
            err = _sp_mul_16(a, b, r);
10340
        }
10341
        else
10342
    #endif /* SP_INT_DIGITS >= 32 */
10343
    #if SP_INT_DIGITS >= 48
10344
        if ((a->used == 24) && (b->used == 24)) {
10345
            err = _sp_mul_24(a, b, r);
10346
        }
10347
        else
10348
    #endif /* SP_INT_DIGITS >= 48 */
10349
    #if SP_INT_DIGITS >= 64
10350
        if ((a->used == 32) && (b->used == 32)) {
10351
            err = _sp_mul_32(a, b, r);
10352
        }
10353
        else
10354
    #endif /* SP_INT_DIGITS >= 64 */
10355
    #if SP_INT_DIGITS >= 96
10356
        if ((a->used == 48) && (b->used == 48)) {
10357
            err = _sp_mul_48(a, b, r);
10358
        }
10359
        else
10360
    #endif /* SP_INT_DIGITS >= 96 */
10361
    #if SP_INT_DIGITS >= 128
10362
        if ((a->used == 64) && (b->used == 64)) {
10363
            err = _sp_mul_64(a, b, r);
10364
        }
10365
        else
10366
    #endif /* SP_INT_DIGITS >= 128 */
10367
    #if SP_INT_DIGITS >= 192
10368
        if ((a->used == 96) && (b->used == 96)) {
10369
            err = _sp_mul_96(a, b, r);
10370
        }
10371
        else
10372
    #endif /* SP_INT_DIGITS >= 192 */
10373
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
10374
528k
#endif /* !WOLFSSL_SP_SMALL */
10375
10376
#ifdef SQR_MUL_ASM
10377
        if (a->used == b->used) {
10378
            err = _sp_mul_nxn(a, b, r);
10379
        }
10380
        else
10381
#endif
10382
528k
        {
10383
528k
            err = _sp_mul(a, b, r);
10384
528k
        }
10385
578k
    }
10386
10387
#ifdef WOLFSSL_SP_INT_NEGATIVE
10388
    if (err == MP_OKAY) {
10389
        r->sign = (r->used == 0) ? MP_ZPOS : sign;
10390
    }
10391
#endif
10392
10393
#if 0
10394
    if (err == MP_OKAY) {
10395
        sp_print(r, "rmul");
10396
    }
10397
#endif
10398
10399
578k
    return err;
10400
578k
}
10401
/* END SP_MUL implementations. */
10402
10403
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
10404
    defined(WOLFCRYPT_HAVE_ECCSI) || \
10405
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
10406
/* Multiply a by b mod m and store in r: r = (a * b) mod m
10407
 *
10408
 * @param  [in]   a  SP integer to multiply.
10409
 * @param  [in]   b  SP integer to multiply.
10410
 * @param  [in]   m  SP integer that is the modulus.
10411
 * @param  [out]  r  SP integer result.
10412
 *
10413
 * @return  MP_OKAY on success.
10414
 * @return  MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
10415
 *          fixed data length.
10416
 * @return  MP_MEM when dynamic memory allocation fails.
10417
 */
10418
int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
10419
578k
{
10420
578k
    int err = MP_OKAY;
10421
10422
578k
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
10423
0
        err = MP_VAL;
10424
0
    }
10425
578k
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
10426
17
        err = MP_VAL;
10427
17
    }
10428
10429
578k
    if (err == MP_OKAY) {
10430
578k
        if ((r == m) || (r->size < a->used + b->used)) {
10431
0
            DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 :
10432
0
                a->used + b->used);
10433
0
            ALLOC_SP_INT(t, a->used + b->used, err, NULL);
10434
0
            if (err == MP_OKAY) {
10435
0
                err = sp_init_size(t, a->used + b->used);
10436
0
            }
10437
0
            if (err == MP_OKAY) {
10438
0
                err = sp_mul(a, b, t);
10439
0
            }
10440
0
            if (err == MP_OKAY) {
10441
0
                err = sp_mod(t, m, r);
10442
0
            }
10443
10444
0
            FREE_SP_INT(t, NULL);
10445
0
        }
10446
578k
        else {
10447
578k
            err = sp_mul(a, b, r);
10448
578k
            if (err == MP_OKAY) {
10449
577k
                err = sp_mod(r, m, r);
10450
577k
            }
10451
578k
        }
10452
578k
    }
10453
578k
    return err;
10454
578k
}
10455
#endif
10456
10457
#ifdef WOLFSSL_SP_INVMOD
10458
/* Calculates the multiplicative inverse in the field.
10459
 *
10460
 * @param  [in]   a  SP integer to find inverse of.
10461
 * @param  [in]   m  SP integer this is the modulus.
10462
 * @param  [out]  r  SP integer to hold result. r cannot be m.
10463
 *
10464
 * @return  MP_OKAY on success.
10465
 * @return  MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
10466
 *          m is negative.
10467
 * @return  MP_MEM when dynamic memory allocation fails.
10468
 */
10469
int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
10470
1.10k
{
10471
1.10k
    int err = MP_OKAY;
10472
1.10k
    sp_int* u = NULL;
10473
1.10k
    sp_int* v = NULL;
10474
1.10k
    sp_int* b = NULL;
10475
1.10k
    sp_int* mm;
10476
1.10k
    int evenMod = 0;
10477
1.10k
    DECL_SP_INT_ARRAY(t, (m == NULL) ? 1 : (m->used + 1), 3);
10478
1.10k
    DECL_SP_INT(c, (m == NULL) ? 1 : (2 * m->used + 1));
10479
10480
1.10k
    if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
10481
0
        err = MP_VAL;
10482
0
    }
10483
1.10k
    if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
10484
9
        err = MP_VAL;
10485
9
    }
10486
10487
#ifdef WOLFSSL_SP_INT_NEGATIVE
10488
    if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
10489
        err = MP_VAL;
10490
    }
10491
#endif
10492
10493
1.10k
    ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
10494
1.10k
    ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
10495
1.10k
    if (err == MP_OKAY) {
10496
1.06k
        u = t[0];
10497
1.06k
        v = t[1];
10498
1.06k
        b = t[2];
10499
        /* c allocated separately and larger for even mod case. */
10500
10501
1.06k
        if (_sp_cmp_abs(a, m) != MP_LT) {
10502
272
            err = sp_mod(a, m, r);
10503
272
            a = r;
10504
272
        }
10505
1.06k
    }
10506
10507
#ifdef WOLFSSL_SP_INT_NEGATIVE
10508
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
10509
        /* Make 'a' positive */
10510
        err = sp_add(m, a, r);
10511
        a = r;
10512
    }
10513
#endif
10514
10515
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
10516
1.10k
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
10517
49
        err = MP_VAL;
10518
49
    }
10519
    /* r*2*x != n*2*y + 1 for integer x,y */
10520
1.10k
    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
10521
11
        err = MP_VAL;
10522
11
    }
10523
10524
    /* 1*1 = 0*m + 1  */
10525
1.10k
    if ((err == MP_OKAY) && sp_isone(a)) {
10526
31
        sp_set(r, 1);
10527
31
    }
10528
1.07k
    else if (err != MP_OKAY) {
10529
120
    }
10530
955
    else {
10531
955
        sp_init_size(u, m->used + 1);
10532
955
        sp_init_size(v, m->used + 1);
10533
955
        sp_init_size(b, m->used + 1);
10534
955
        sp_init_size(c, 2 * m->used + 1);
10535
10536
955
        if (sp_iseven(m)) {
10537
            /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
10538
573
            mm = a;
10539
573
            sp_copy(a, u);
10540
573
            sp_mod(m, a, v);
10541
            /* v == 0 when a divides m evenly - no inverse.  */
10542
573
            if (sp_iszero(v)) {
10543
                /* Force u to no inverse answer. */
10544
20
                sp_set(u, 0);
10545
20
            }
10546
573
            evenMod = 1;
10547
573
        }
10548
382
        else {
10549
382
            mm = m;
10550
382
            sp_copy(m, u);
10551
382
            sp_copy(a, v);
10552
382
        }
10553
955
        _sp_zero(b);
10554
955
        sp_set(c, 1);
10555
10556
299k
        while (!sp_isone(v) && !sp_iszero(u)) {
10557
298k
            if (sp_iseven(u)) {
10558
114k
                sp_div_2(u, u);
10559
114k
                if (sp_isodd(b)) {
10560
57.1k
                    _sp_add_off(b, mm, b, 0);
10561
57.1k
                }
10562
114k
                sp_div_2(b, b);
10563
114k
            }
10564
183k
            else if (sp_iseven(v)) {
10565
82.2k
                sp_div_2(v, v);
10566
82.2k
                if (sp_isodd(c)) {
10567
40.8k
                    _sp_add_off(c, mm, c, 0);
10568
40.8k
                }
10569
82.2k
                sp_div_2(c, c);
10570
82.2k
            }
10571
101k
            else if (_sp_cmp(u, v) != MP_LT) {
10572
57.9k
                _sp_sub_off(u, v, u, 0);
10573
57.9k
                if (_sp_cmp(b, c) == MP_LT) {
10574
21.7k
                    _sp_add_off(b, mm, b, 0);
10575
21.7k
                }
10576
57.9k
                _sp_sub_off(b, c, b, 0);
10577
57.9k
            }
10578
43.2k
            else {
10579
43.2k
                _sp_sub_off(v, u, v, 0);
10580
43.2k
                if (_sp_cmp(c, b) == MP_LT) {
10581
21.3k
                    _sp_add_off(c, mm, c, 0);
10582
21.3k
                }
10583
43.2k
                _sp_sub_off(c, b, c, 0);
10584
43.2k
            }
10585
298k
        }
10586
955
        if (sp_iszero(u)) {
10587
154
            err = MP_VAL;
10588
154
        }
10589
801
        else if (evenMod) {
10590
            /* Finish operation.
10591
             *    a^-1 mod m = m + ((1 - m*c) / a)
10592
             * => a^-1 mod m = m - ((m*c - 1) / a)
10593
             */
10594
499
            err = sp_mul(c, m, c);
10595
499
            if (err == MP_OKAY) {
10596
498
                _sp_sub_d(c, 1, c);
10597
498
                err = sp_div(c, a, c, NULL);
10598
498
            }
10599
499
            if (err == MP_OKAY) {
10600
497
                sp_sub(m, c, r);
10601
497
            }
10602
499
        }
10603
302
        else {
10604
302
            err = sp_copy(c, r);
10605
302
        }
10606
955
    }
10607
10608
1.10k
    FREE_SP_INT(c, NULL);
10609
1.10k
    FREE_SP_INT_ARRAY(t, NULL);
10610
1.10k
    return err;
10611
1.10k
}
10612
#endif /* WOLFSSL_SP_INVMOD */
10613
10614
#ifdef WOLFSSL_SP_INVMOD_MONT_CT
10615
10616
#define CT_INV_MOD_PRE_CNT      8
10617
10618
/* Calculates the multiplicative inverse in the field - constant time.
10619
 *
10620
 * Modulus (m) must be a prime and greater than 2.
10621
 *
10622
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
10623
 * @param  [in]   m   SP integer this is the modulus.
10624
 * @param  [out]  r   SP integer to hold result.
10625
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
10626
 *
10627
 * @return  MP_OKAY on success.
10628
 * @return  MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
10629
 * @return  MP_MEM when dynamic memory allocation fails.
10630
 */
10631
int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp)
10632
{
10633
    int err = MP_OKAY;
10634
    int i;
10635
    int j = 0;
10636
    sp_int* t = NULL;
10637
    sp_int* e = NULL;
10638
    DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1,
10639
                                                        CT_INV_MOD_PRE_CNT + 2);
10640
10641
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
10642
        err = MP_VAL;
10643
    }
10644
10645
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
10646
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
10647
                                              (m->used == 1 && m->dp[0] < 3))) {
10648
        err = MP_VAL;
10649
    }
10650
10651
    ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
10652
    if (err == MP_OKAY) {
10653
        t = pre[CT_INV_MOD_PRE_CNT + 0];
10654
        e = pre[CT_INV_MOD_PRE_CNT + 1];
10655
        sp_init_size(t, m->used * 2 + 1);
10656
        sp_init_size(e, m->used * 2 + 1);
10657
10658
        sp_init_size(pre[0], m->used * 2 + 1);
10659
        err = sp_copy(a, pre[0]);
10660
        for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
10661
            sp_init_size(pre[i], m->used * 2 + 1);
10662
            err = sp_sqr(pre[i-1], pre[i]);
10663
            if (err == MP_OKAY) {
10664
                err = _sp_mont_red(pre[i], m, mp);
10665
            }
10666
            if (err == MP_OKAY) {
10667
                err = sp_mul(pre[i], a, pre[i]);
10668
            }
10669
            if (err == MP_OKAY) {
10670
                err = _sp_mont_red(pre[i], m, mp);
10671
            }
10672
        }
10673
    }
10674
10675
    if (err == MP_OKAY) {
10676
        _sp_sub_d(m, 2, e);
10677
        for (i = sp_count_bits(e)-1, j = 0; i >= 0; i--, j++) {
10678
              if ((!sp_is_bit_set(e, i)) || (j == CT_INV_MOD_PRE_CNT)) {
10679
                  break;
10680
              }
10681
        }
10682
        err = sp_copy(pre[j-1], t);
10683
        for (j = 0; (err == MP_OKAY) && (i >= 0); i--) {
10684
            int set = sp_is_bit_set(e, i);
10685
10686
            if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) {
10687
                err = sp_mul(t, pre[j-1], t);
10688
                if (err == MP_OKAY) {
10689
                    err = _sp_mont_red(t, m, mp);
10690
                }
10691
                j = 0;
10692
            }
10693
            if (err == MP_OKAY) {
10694
                err = sp_sqr(t, t);
10695
                if (err == MP_OKAY) {
10696
                    err = _sp_mont_red(t, m, mp);
10697
                }
10698
            }
10699
            j += set;
10700
        }
10701
    }
10702
    if (err == MP_OKAY) {
10703
        if (j > 0) {
10704
            err = sp_mul(t, pre[j-1], r);
10705
            if (err == MP_OKAY) {
10706
                err = _sp_mont_red(r, m, mp);
10707
            }
10708
        }
10709
        else {
10710
            err = sp_copy(t, r);
10711
        }
10712
    }
10713
10714
    FREE_SP_INT_ARRAY(pre, NULL);
10715
    return err;
10716
}
10717
10718
#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
10719
10720
10721
/**************************
10722
 * Exponentiation functions
10723
 **************************/
10724
10725
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
10726
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
10727
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10728
 * Process the exponent one bit at a time.
10729
 * Is constant time and can be cache attack resistant.
10730
 *
10731
 * @param  [in]   b     SP integer that is the base.
10732
 * @param  [in]   e     SP integer that is the exponent.
10733
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10734
 *                      count of bits in b.
10735
 * @param  [in]   m     SP integer that is the modulus.
10736
 * @param  [out]  r     SP integer to hold result.
10737
 *
10738
 * @return  MP_OKAY on success.
10739
 * @return  MP_MEM when dynamic memory allocation fails.
10740
 */
10741
static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r)
10742
2.94k
{
10743
2.94k
    int i;
10744
2.94k
    int err = MP_OKAY;
10745
2.94k
    int done = 0;
10746
2.94k
    int j;
10747
2.94k
    int y;
10748
2.94k
    int seenTopBit = 0;
10749
#ifdef WC_NO_CACHE_RESISTANT
10750
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
10751
#else
10752
2.94k
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
10753
2.94k
#endif
10754
10755
#ifdef WC_NO_CACHE_RESISTANT
10756
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
10757
#else
10758
2.94k
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
10759
2.94k
#endif
10760
2.94k
    if (err == MP_OKAY) {
10761
2.87k
        sp_init_size(t[0], 2 * m->used + 1);
10762
2.87k
        sp_init_size(t[1], 2 * m->used + 1);
10763
2.87k
    #ifndef WC_NO_CACHE_RESISTANT
10764
2.87k
        sp_init_size(t[2], 2 * m->used + 1);
10765
2.87k
    #endif
10766
10767
        /* Ensure base is less than exponent. */
10768
2.87k
        if (_sp_cmp_abs(b, m) != MP_LT) {
10769
0
            err = sp_mod(b, m, t[0]);
10770
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
10771
0
                sp_set(r, 0);
10772
0
                done = 1;
10773
0
            }
10774
0
        }
10775
2.87k
        else {
10776
2.87k
            err = sp_copy(b, t[0]);
10777
2.87k
        }
10778
2.87k
    }
10779
10780
2.94k
    if ((!done) && (err == MP_OKAY)) {
10781
        /* t[0] is dummy value and t[1] is result */
10782
2.87k
        err = sp_copy(t[0], t[1]);
10783
10784
577k
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
10785
#ifdef WC_NO_CACHE_RESISTANT
10786
            /* Square real result if seen the top bit. */
10787
            err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]);
10788
            if (err == MP_OKAY) {
10789
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10790
                j = y & seenTopBit;
10791
                seenTopBit |= y;
10792
                /* Multiply real result if bit is set and seen the top bit. */
10793
                err = sp_mulmod(t[j], b, m, t[j]);
10794
            }
10795
#else
10796
            /* Square real result if seen the top bit. */
10797
574k
            sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10798
574k
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])),
10799
574k
                    t[2]);
10800
574k
            err = sp_sqrmod(t[2], m, t[2]);
10801
574k
            sp_copy(t[2],
10802
574k
                    (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10803
574k
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])));
10804
574k
            if (err == MP_OKAY) {
10805
574k
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10806
574k
                j = y & seenTopBit;
10807
574k
                seenTopBit |= y;
10808
                /* Multiply real result if bit is set and seen the top bit. */
10809
574k
                sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10810
574k
                                  ((size_t)t[1] & sp_off_on_addr[j  ])),
10811
574k
                        t[2]);
10812
574k
                err = sp_mulmod(t[2], b, m, t[2]);
10813
574k
                sp_copy(t[2],
10814
574k
                        (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10815
574k
                                  ((size_t)t[1] & sp_off_on_addr[j  ])));
10816
574k
            }
10817
574k
#endif
10818
574k
        }
10819
2.87k
    }
10820
2.94k
    if ((!done) && (err == MP_OKAY)) {
10821
2.55k
        err = sp_copy(t[1], r);
10822
2.55k
    }
10823
10824
2.94k
    FREE_SP_INT_ARRAY(t, NULL);
10825
2.94k
    return err;
10826
2.94k
}
10827
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
10828
        * WOLFSSL_HAVE_SP_DH */
10829
10830
#if defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
10831
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))
10832
#ifndef WC_NO_HARDEN
10833
#if !defined(WC_NO_CACHE_RESISTANT)
10834
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10835
 * Process the exponent one bit at a time with base in montgomery form.
10836
 * Is constant time and cache attack resistant.
10837
 *
10838
 * @param  [in]   b     SP integer that is the base.
10839
 * @param  [in]   e     SP integer that is the exponent.
10840
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10841
 *                      count of bits in b.
10842
 * @param  [in]   m     SP integer that is the modulus.
10843
 * @param  [out]  r     SP integer to hold result.
10844
 *
10845
 * @return  MP_OKAY on success.
10846
 * @return  MP_MEM when dynamic memory allocation fails.
10847
 */
10848
static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
10849
                               sp_int* r)
10850
{
10851
    int i;
10852
    int err = MP_OKAY;
10853
    int done = 0;
10854
    int j;
10855
    int y;
10856
    int seenTopBit = 0;
10857
    sp_int_digit mp;
10858
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
10859
10860
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
10861
    if (err == MP_OKAY) {
10862
        sp_init_size(t[0], m->used * 2 + 1);
10863
        sp_init_size(t[1], m->used * 2 + 1);
10864
        sp_init_size(t[2], m->used * 2 + 1);
10865
        sp_init_size(t[3], m->used * 2 + 1);
10866
10867
        /* Ensure base is less than exponent. */
10868
        if (_sp_cmp_abs(b, m) != MP_LT) {
10869
            err = sp_mod(b, m, t[0]);
10870
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
10871
                sp_set(r, 0);
10872
                done = 1;
10873
            }
10874
        }
10875
        else {
10876
            err = sp_copy(b, t[0]);
10877
        }
10878
    }
10879
10880
10881
    if ((!done) && (err == MP_OKAY)) {
10882
        err = sp_mont_setup(m, &mp);
10883
        if (err == MP_OKAY) {
10884
            err = sp_mont_norm(t[1], m);
10885
        }
10886
        if (err == MP_OKAY) {
10887
            /* Convert to montgomery form. */
10888
            err = sp_mulmod(t[0], t[1], m, t[0]);
10889
        }
10890
        if (err == MP_OKAY) {
10891
            /* t[0] is fake working value and t[1] is real working value. */
10892
            sp_copy(t[0], t[1]);
10893
            /* Montgomert form of base to multiply by. */
10894
            sp_copy(t[0], t[2]);
10895
        }
10896
10897
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
10898
            /* Square real working value if seen the top bit. */
10899
            sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10900
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])),
10901
                    t[3]);
10902
            err = sp_sqr(t[3], t[3]);
10903
            if (err == MP_OKAY) {
10904
                err = _sp_mont_red(t[3], m, mp);
10905
            }
10906
            sp_copy(t[3],
10907
                    (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
10908
                              ((size_t)t[1] & sp_off_on_addr[seenTopBit  ])));
10909
            if (err == MP_OKAY) {
10910
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
10911
                j = y & seenTopBit;
10912
                seenTopBit |= y;
10913
                /* Multiply real value if bit is set and seen the top bit. */
10914
                sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10915
                                  ((size_t)t[1] & sp_off_on_addr[j  ])),
10916
                        t[3]);
10917
                err = sp_mul(t[3], t[2], t[3]);
10918
                if (err == MP_OKAY) {
10919
                    err = _sp_mont_red(t[3], m, mp);
10920
                }
10921
                sp_copy(t[3],
10922
                        (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
10923
                                  ((size_t)t[1] & sp_off_on_addr[j  ])));
10924
            }
10925
        }
10926
        if (err == MP_OKAY) {
10927
            /* Convert from montgomery form. */
10928
            err = _sp_mont_red(t[1], m, mp);
10929
            /* Reduction implementation returns number to range < m. */
10930
        }
10931
    }
10932
    if ((!done) && (err == MP_OKAY)) {
10933
        err = sp_copy(t[1], r);
10934
    }
10935
10936
    FREE_SP_INT_ARRAY(t, NULL);
10937
    return err;
10938
}
10939
#else
10940
10941
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
10942
#define SP_ALLOC
10943
10944
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
10945
 * Creates a window of precalculated exponents with base in montgomery form.
10946
 * Is constant time but NOT cache attack resistant.
10947
 *
10948
 * @param  [in]   b     SP integer that is the base.
10949
 * @param  [in]   e     SP integer that is the exponent.
10950
 * @param  [in]   bits  Number of bits in base to use. May be greater than
10951
 *                      count of bits in b.
10952
 * @param  [in]   m     SP integer that is the modulus.
10953
 * @param  [out]  r     SP integer to hold result.
10954
 *
10955
 * @return  MP_OKAY on success.
10956
 * @return  MP_MEM when dynamic memory allocation fails.
10957
 */
10958
static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
10959
                               sp_int* r)
10960
{
10961
    int i;
10962
    int j;
10963
    int c;
10964
    int y;
10965
    int winBits;
10966
    int preCnt;
10967
    int err = MP_OKAY;
10968
    int done = 0;
10969
    sp_int_digit mp;
10970
    sp_int_digit n;
10971
    sp_int_digit mask;
10972
    sp_int* tr = NULL;
10973
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
10974
10975
    if (bits > 450) {
10976
        winBits = 6;
10977
    }
10978
    else if (bits <= 21) {
10979
        winBits = 1;
10980
    }
10981
    else if (bits <= 36) {
10982
        winBits = 3;
10983
    }
10984
    else if (bits <= 140) {
10985
        winBits = 4;
10986
    }
10987
    else {
10988
        winBits = 5;
10989
    }
10990
    preCnt = 1 << winBits;
10991
    mask = preCnt - 1;
10992
10993
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
10994
    if (err == MP_OKAY) {
10995
        tr = t[preCnt];
10996
10997
        for (i = 0; i < preCnt; i++) {
10998
            sp_init_size(t[i], m->used * 2 + 1);
10999
        }
11000
        sp_init_size(tr, m->used * 2 + 1);
11001
11002
        /* Ensure base is less than exponent. */
11003
        if (_sp_cmp_abs(b, m) != MP_LT) {
11004
            err = sp_mod(b, m, t[1]);
11005
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
11006
                sp_set(r, 0);
11007
                done = 1;
11008
            }
11009
        }
11010
        else {
11011
            err = sp_copy(b, t[1]);
11012
        }
11013
    }
11014
11015
    if ((!done) && (err == MP_OKAY)) {
11016
        err = sp_mont_setup(m, &mp);
11017
        if (err == MP_OKAY) {
11018
            /* Norm value is 1 in montgomery form. */
11019
            err = sp_mont_norm(t[0], m);
11020
        }
11021
        if (err == MP_OKAY) {
11022
            /* Convert base to montgomery form. */
11023
            err = sp_mulmod(t[1], t[0], m, t[1]);
11024
        }
11025
11026
        /* Pre-calculate values */
11027
        for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
11028
            if ((i & 1) == 0) {
11029
                err = sp_sqr(t[i/2], t[i]);
11030
            }
11031
            else {
11032
                err = sp_mul(t[i-1], t[1], t[i]);
11033
            }
11034
            if (err == MP_OKAY) {
11035
                err = _sp_mont_red(t[i], m, mp);
11036
            }
11037
        }
11038
11039
        if (err == MP_OKAY) {
11040
            /* Bits from the top that - possibly left over. */
11041
            i = (bits - 1) >> SP_WORD_SHIFT;
11042
            n = e->dp[i--];
11043
            c = bits & (SP_WORD_SIZE - 1);
11044
            if (c == 0) {
11045
                c = SP_WORD_SIZE;
11046
            }
11047
            c -= bits % winBits;
11048
            y = (int)(n >> c);
11049
            n <<= SP_WORD_SIZE - c;
11050
            /* Copy window number for top bits. */
11051
            sp_copy(t[y], tr);
11052
            for (; (i >= 0) || (c >= winBits); ) {
11053
                if (c == 0) {
11054
                    /* Bits up to end of digit */
11055
                    n = e->dp[i--];
11056
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
11057
                    n <<= winBits;
11058
                    c = SP_WORD_SIZE - winBits;
11059
                }
11060
                else if (c < winBits) {
11061
                    /* Bits to end of digit and part of next */
11062
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
11063
                    n = e->dp[i--];
11064
                    c = winBits - c;
11065
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11066
                    n <<= c;
11067
                    c = SP_WORD_SIZE - c;
11068
                }
11069
                else {
11070
                    /* Bits from middle of digit */
11071
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11072
                    n <<= winBits;
11073
                    c -= winBits;
11074
                }
11075
11076
                /* Square for number of bits in window. */
11077
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
11078
                    err = sp_sqr(tr, tr);
11079
                    if (err == MP_OKAY) {
11080
                        err = _sp_mont_red(tr, m, mp);
11081
                    }
11082
                }
11083
                /* Multiply by window number for next set of bits. */
11084
                if (err == MP_OKAY) {
11085
                    err = sp_mul(tr, t[y], tr);
11086
                }
11087
                if (err == MP_OKAY) {
11088
                    err = _sp_mont_red(tr, m, mp);
11089
                }
11090
            }
11091
        }
11092
11093
        if (err == MP_OKAY) {
11094
            /* Convert from montgomery form. */
11095
            err = _sp_mont_red(tr, m, mp);
11096
            /* Reduction implementation returns number to range < m. */
11097
        }
11098
    }
11099
    if ((!done) && (err == MP_OKAY)) {
11100
        err = sp_copy(tr, r);
11101
    }
11102
11103
    FREE_SP_INT_ARRAY(t, NULL);
11104
    return err;
11105
}
11106
11107
#undef SP_ALLOC
11108
11109
#endif /* !WC_NO_CACHE_RESISTANT */
11110
#endif /* !WC_NO_HARDEN */
11111
11112
#if SP_WORD_SIZE <= 16
11113
    #define EXP2_WINSIZE    2
11114
#elif SP_WORD_SIZE <= 32
11115
    #define EXP2_WINSIZE    3
11116
#elif SP_WORD_SIZE <= 64
11117
    #define EXP2_WINSIZE    4
11118
#elif SP_WORD_SIZE <= 128
11119
    #define EXP2_WINSIZE    5
11120
#endif
11121
11122
/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
11123
 * Is constant time and cache attack resistant.
11124
 *
11125
 * @param  [in]   e       SP integer that is the exponent.
11126
 * @param  [in]   digits  Number of digits in base to use. May be greater than
11127
 *                        count of bits in b.
11128
 * @param  [in]   m       SP integer that is the modulus.
11129
 * @param  [out]  r       SP integer to hold result.
11130
 *
11131
 * @return  MP_OKAY on success.
11132
 * @return  MP_MEM when dynamic memory allocation fails.
11133
 */
11134
static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r)
11135
{
11136
    int i = 0;
11137
    int j;
11138
    int c = 0;
11139
    int y;
11140
    int err = MP_OKAY;
11141
    sp_int* t = NULL;
11142
    sp_int* tr = NULL;
11143
    sp_int_digit mp = 0, n = 0;
11144
    DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
11145
11146
#if 0
11147
    sp_print_int(2, "a");
11148
    sp_print(e, "b");
11149
    sp_print(m, "m");
11150
#endif
11151
11152
    ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
11153
    if (err == MP_OKAY) {
11154
        t  = d[0];
11155
        tr = d[1];
11156
11157
        sp_init_size(t, m->used * 2 + 1);
11158
        sp_init_size(tr, m->used * 2 + 1);
11159
11160
        if (m->used > 1) {
11161
            err = sp_mont_setup(m, &mp);
11162
            if (err == MP_OKAY) {
11163
                /* Norm value is 1 in montgomery form. */
11164
                err = sp_mont_norm(tr, m);
11165
            }
11166
            if (err == MP_OKAY) {
11167
                err = sp_mul_2d(m, 1 << EXP2_WINSIZE, t);
11168
            }
11169
        }
11170
        else {
11171
            err = sp_set(tr, 1);
11172
        }
11173
11174
        if (err == MP_OKAY) {
11175
            /* Bits from the top. */
11176
            i = digits - 1;
11177
            n = e->dp[i--];
11178
            c = SP_WORD_SIZE;
11179
#if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
11180
            c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
11181
            if (c != SP_WORD_SIZE) {
11182
                y = (int)(n >> c);
11183
                n <<= SP_WORD_SIZE - c;
11184
            }
11185
            else
11186
#endif
11187
            {
11188
                y = 0;
11189
            }
11190
11191
            /* Multiply montgomery representation of 1 by 2 ^ top */
11192
            err = sp_mul_2d(tr, y, tr);
11193
        }
11194
        if ((err == MP_OKAY) && (m->used > 1)) {
11195
            err = sp_add(tr, t, tr);
11196
        }
11197
        if (err == MP_OKAY) {
11198
            err = sp_mod(tr, m, tr);
11199
        }
11200
        if (err == MP_OKAY) {
11201
            for (; (i >= 0) || (c >= EXP2_WINSIZE); ) {
11202
                if (c == 0) {
11203
                    /* Bits up to end of digit */
11204
                    n = e->dp[i--];
11205
                    y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
11206
                    n <<= EXP2_WINSIZE;
11207
                    c = SP_WORD_SIZE - EXP2_WINSIZE;
11208
                }
11209
#if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
11210
                else if (c < EXP2_WINSIZE) {
11211
                    /* Bits to end of digit and part of next */
11212
                    y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
11213
                    n = e->dp[i--];
11214
                    c = EXP2_WINSIZE - c;
11215
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11216
                    n <<= c;
11217
                    c = SP_WORD_SIZE - c;
11218
                }
11219
#endif
11220
                else {
11221
                    /* Bits from middle of digit */
11222
                    y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) &
11223
                              ((1 << EXP2_WINSIZE) - 1));
11224
                    n <<= EXP2_WINSIZE;
11225
                    c -= EXP2_WINSIZE;
11226
                }
11227
11228
                /* Square for number of bits in window. */
11229
                for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
11230
                    err = sp_sqr(tr, tr);
11231
                    if (err != MP_OKAY) {
11232
                        break;
11233
                    }
11234
                    if (m->used > 1) {
11235
                        err = _sp_mont_red(tr, m, mp);
11236
                    }
11237
                    else {
11238
                        err = sp_mod(tr, m, tr);
11239
                    }
11240
                }
11241
11242
                if (err == MP_OKAY) {
11243
                    /* then multiply by 2^y */
11244
                    err = sp_mul_2d(tr, y, tr);
11245
                }
11246
                if ((err == MP_OKAY) && (m->used > 1)) {
11247
                    /* Add in value to make mod operation take same time */
11248
                    err = sp_add(tr, t, tr);
11249
                }
11250
                if (err == MP_OKAY) {
11251
                    err = sp_mod(tr, m, tr);
11252
                }
11253
                if (err != MP_OKAY) {
11254
                    break;
11255
                }
11256
            }
11257
        }
11258
11259
        if ((err == MP_OKAY) && (m->used > 1)) {
11260
            /* Convert from montgomery form. */
11261
            err = _sp_mont_red(tr, m, mp);
11262
            /* Reduction implementation returns number to range < m. */
11263
        }
11264
    }
11265
    if (err == MP_OKAY) {
11266
        err = sp_copy(tr, r);
11267
    }
11268
11269
#if 0
11270
    sp_print(r, "rme");
11271
#endif
11272
11273
    FREE_SP_INT_ARRAY(d, NULL);
11274
    return err;
11275
}
11276
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
11277
11278
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
11279
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
11280
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11281
 *
11282
 * @param  [in]   b     SP integer that is the base.
11283
 * @param  [in]   e     SP integer that is the exponent.
11284
 * @param  [in]   bits  Number of bits in base to use. May be greater than
11285
 *                      count of bits in b.
11286
 * @param  [in]   m     SP integer that is the modulus.
11287
 * @param  [out]  r     SP integer to hold result.
11288
 *
11289
 * @return  MP_OKAY on success.
11290
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11291
 * @return  MP_MEM when dynamic memory allocation fails.
11292
 */
11293
int sp_exptmod_ex(sp_int* b, sp_int* e, int digits, sp_int* m, sp_int* r)
11294
3.41k
{
11295
3.41k
    int err = MP_OKAY;
11296
3.41k
    int done = 0;
11297
3.41k
    int mBits = sp_count_bits(m);
11298
3.41k
    int bBits = sp_count_bits(b);
11299
3.41k
    int eBits = sp_count_bits(e);
11300
11301
3.41k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11302
0
        err = MP_VAL;
11303
0
    }
11304
11305
#if 0
11306
    if (err == MP_OKAY) {
11307
        sp_print(b, "a");
11308
        sp_print(e, "b");
11309
        sp_print(m, "m");
11310
    }
11311
#endif
11312
11313
    /* Check for invalid modulus. */
11314
3.41k
    if ((err == MP_OKAY) && sp_iszero(m)) {
11315
69
        err = MP_VAL;
11316
69
    }
11317
#ifdef WOLFSSL_SP_INT_NEGATIVE
11318
    /* Check for unsupported negative values of exponent and modulus. */
11319
    if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
11320
        err = MP_VAL;
11321
    }
11322
#endif
11323
11324
    /* Check for degenerate cases. */
11325
3.41k
    if ((err == MP_OKAY) && sp_isone(m)) {
11326
14
        sp_set(r, 0);
11327
14
        done = 1;
11328
14
    }
11329
3.41k
    if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
11330
46
        sp_set(r, 1);
11331
46
        done = 1;
11332
46
    }
11333
11334
    /* Check whether base needs to be reduced. */
11335
3.41k
    if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
11336
175
        if ((r == e) || (r == m)) {
11337
0
            err = MP_VAL;
11338
0
        }
11339
175
        if (err == MP_OKAY) {
11340
175
            err = sp_mod(b, m, r);
11341
175
        }
11342
175
        if (err == MP_OKAY) {
11343
174
            b = r;
11344
174
        }
11345
175
    }
11346
    /* Check for degenerate case of base. */
11347
3.41k
    if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
11348
84
        sp_set(r, 0);
11349
84
        done = 1;
11350
84
    }
11351
11352
    /* Ensure SP integers have space for intermediate values. */
11353
3.41k
    if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
11354
23
        err = MP_VAL;
11355
23
    }
11356
11357
3.41k
    if ((!done) && (err == MP_OKAY)) {
11358
        /* Use code optimized for specific sizes if possible */
11359
3.17k
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
11360
3.17k
    (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
11361
3.17k
    #ifndef WOLFSSL_SP_NO_2048
11362
3.17k
        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
11363
3.17k
            (eBits <= 1024)) {
11364
53
            err = sp_ModExp_1024(b, e, m, r);
11365
53
            done = 1;
11366
53
        }
11367
3.12k
        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
11368
3.12k
                 (eBits <= 2048)) {
11369
60
            err = sp_ModExp_2048(b, e, m, r);
11370
60
            done = 1;
11371
60
        }
11372
3.06k
        else
11373
3.06k
    #endif
11374
3.06k
    #ifndef WOLFSSL_SP_NO_3072
11375
3.06k
        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
11376
3.06k
            (eBits <= 1536)) {
11377
43
            err = sp_ModExp_1536(b, e, m, r);
11378
43
            done = 1;
11379
43
        }
11380
3.01k
        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
11381
3.01k
                 (eBits <= 3072)) {
11382
33
            err = sp_ModExp_3072(b, e, m, r);
11383
33
            done = 1;
11384
33
        }
11385
2.98k
        else
11386
2.98k
    #endif
11387
2.98k
    #ifdef WOLFSSL_SP_4096
11388
2.98k
        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
11389
2.98k
            (eBits <= 4096)) {
11390
37
            err = sp_ModExp_4096(b, e, m, r);
11391
37
            done = 1;
11392
37
        }
11393
2.94k
        else
11394
2.94k
    #endif
11395
2.94k
#endif
11396
2.94k
        {
11397
2.94k
        }
11398
3.17k
    }
11399
3.41k
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH)
11400
#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
11401
    defined(NO_DH)
11402
    if ((!done) && (err == MP_OKAY))
11403
        err = sp_exptmod_nct(b, e, m, r);
11404
    }
11405
#else
11406
#if defined(WOLFSSL_SP_MATH_ALL)
11407
    if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
11408
         mp_isodd(m)) {
11409
        /* Use the generic base 2 implementation. */
11410
        err = _sp_exptmod_base_2(e, digits, m, r);
11411
    }
11412
    else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
11413
    #ifndef WC_NO_HARDEN
11414
        err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
11415
    #else
11416
        err = sp_exptmod_nct(b, e, m, r);
11417
    #endif
11418
    }
11419
    else
11420
#endif /* WOLFSSL_SP_MATH_ALL */
11421
3.41k
    if ((!done) && (err == MP_OKAY)) {
11422
        /* Otherwise use the generic implementation. */
11423
2.94k
        err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
11424
2.94k
    }
11425
3.41k
#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
11426
#else
11427
    if ((!done) && (err == MP_OKAY)) {
11428
        err = MP_VAL;
11429
    }
11430
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11431
11432
3.41k
    (void)mBits;
11433
3.41k
    (void)bBits;
11434
3.41k
    (void)eBits;
11435
3.41k
    (void)digits;
11436
11437
#if 0
11438
    if (err == MP_OKAY) {
11439
        sp_print(r, "rme");
11440
    }
11441
#endif
11442
3.41k
    return err;
11443
3.41k
}
11444
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11445
11446
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
11447
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
11448
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11449
 *
11450
 * @param  [in]   b  SP integer that is the base.
11451
 * @param  [in]   e  SP integer that is the exponent.
11452
 * @param  [in]   m  SP integer that is the modulus.
11453
 * @param  [out]  r  SP integer to hold result.
11454
 *
11455
 * @return  MP_OKAY on success.
11456
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11457
 * @return  MP_MEM when dynamic memory allocation fails.
11458
 */
11459
int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11460
3.33k
{
11461
3.33k
    int err = MP_OKAY;
11462
11463
3.33k
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11464
0
        err = MP_VAL;
11465
0
    }
11466
3.33k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
11467
3.33k
    if (err == MP_OKAY) {
11468
3.33k
        err = sp_exptmod_ex(b, e, e->used, m, r);
11469
3.33k
    }
11470
3.33k
    RESTORE_VECTOR_REGISTERS();
11471
3.33k
    return err;
11472
3.33k
}
11473
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
11474
        * WOLFSSL_HAVE_SP_DH */
11475
11476
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
11477
#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
11478
11479
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
11480
#define SP_ALLOC
11481
11482
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11483
 * Creates a window of precalculated exponents with base in montgomery form.
11484
 * Sliding window and is NOT constant time.
11485
 *
11486
 * @param  [in]   b     SP integer that is the base.
11487
 * @param  [in]   e     SP integer that is the exponent.
11488
 * @param  [in]   bits  Number of bits in base to use. May be greater than
11489
 *                      count of bits in b.
11490
 * @param  [in]   m     SP integer that is the modulus.
11491
 * @param  [out]  r     SP integer to hold result.
11492
 *
11493
 * @return  MP_OKAY on success.
11494
 * @return  MP_MEM when dynamic memory allocation fails.
11495
 */
11496
static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11497
0
{
11498
0
    int i = 0;
11499
0
    int j = 0;
11500
0
    int c = 0;
11501
0
    int y = 0;
11502
0
    int bits;
11503
0
    int winBits;
11504
0
    int preCnt;
11505
0
    int err = MP_OKAY;
11506
0
    int done = 0;
11507
0
    sp_int* tr = NULL;
11508
0
    sp_int* bm = NULL;
11509
0
    sp_int_digit mask;
11510
    /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
11511
0
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
11512
11513
0
    bits = sp_count_bits(e);
11514
11515
0
    if (bits > 450) {
11516
0
        winBits = 6;
11517
0
    }
11518
0
    else if (bits <= 21) {
11519
0
        winBits = 1;
11520
0
    }
11521
0
    else if (bits <= 36) {
11522
0
        winBits = 3;
11523
0
    }
11524
0
    else if (bits <= 140) {
11525
0
        winBits = 4;
11526
0
    }
11527
0
    else {
11528
0
        winBits = 5;
11529
0
    }
11530
0
    preCnt = 1 << (winBits - 1);
11531
0
    mask = preCnt - 1;
11532
11533
0
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL);
11534
0
    if (err == MP_OKAY) {
11535
        /* Initialize window numbers and temporary result. */
11536
0
        tr = t[preCnt + 0];
11537
0
        bm = t[preCnt + 1];
11538
11539
0
        for (i = 0; i < preCnt; i++) {
11540
0
            sp_init_size(t[i], m->used * 2 + 1);
11541
0
        }
11542
0
        sp_init_size(tr, m->used * 2 + 1);
11543
0
        sp_init_size(bm, m->used * 2 + 1);
11544
11545
        /* Ensure base is less than exponent. */
11546
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
11547
0
            err = sp_mod(b, m, bm);
11548
0
            if ((err == MP_OKAY) && sp_iszero(bm)) {
11549
0
                sp_set(r, 0);
11550
0
                done = 1;
11551
0
            }
11552
0
        }
11553
0
        else {
11554
0
            err = sp_copy(b, bm);
11555
0
        }
11556
0
    }
11557
11558
0
    if ((!done) && (err == MP_OKAY)) {
11559
0
        sp_int_digit mp;
11560
0
        sp_int_digit n;
11561
11562
0
        err = sp_mont_setup(m, &mp);
11563
0
        if (err == MP_OKAY) {
11564
0
            err = sp_mont_norm(t[0], m);
11565
0
        }
11566
0
        if (err == MP_OKAY) {
11567
0
            err = sp_mulmod(bm, t[0], m, bm);
11568
0
        }
11569
0
        if (err == MP_OKAY) {
11570
0
            err = sp_copy(bm, t[0]);
11571
0
        }
11572
0
        for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
11573
0
            err = sp_sqr(t[0], t[0]);
11574
0
            if (err == MP_OKAY) {
11575
0
                err = _sp_mont_red(t[0], m, mp);
11576
0
            }
11577
0
        }
11578
0
        for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
11579
0
            err = sp_mul(t[i-1], bm, t[i]);
11580
0
            if (err == MP_OKAY) {
11581
0
                err = _sp_mont_red(t[i], m, mp);
11582
0
            }
11583
0
        }
11584
11585
0
        if (err == MP_OKAY) {
11586
            /* Find the top bit. */
11587
0
            i = (bits - 1) >> SP_WORD_SHIFT;
11588
0
            n = e->dp[i--];
11589
0
            c = bits % SP_WORD_SIZE;
11590
0
            if (c == 0) {
11591
0
                c = SP_WORD_SIZE;
11592
0
            }
11593
            /* Put top bit at highest offset in digit. */
11594
0
            n <<= SP_WORD_SIZE - c;
11595
11596
0
            if (bits >= winBits) {
11597
                /* Top bit set. Copy from window. */
11598
0
                if (c < winBits) {
11599
                    /* Bits to end of digit and part of next */
11600
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11601
0
                    n = e->dp[i--];
11602
0
                    c = winBits - c;
11603
0
                    y |= (int)(n >> (SP_WORD_SIZE - c));
11604
0
                    n <<= c;
11605
0
                    c = SP_WORD_SIZE - c;
11606
0
                }
11607
0
                else {
11608
                    /* Bits from middle of digit */
11609
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
11610
0
                    n <<= winBits;
11611
0
                    c -= winBits;
11612
0
                }
11613
0
                err = sp_copy(t[y], tr);
11614
0
            }
11615
0
            else {
11616
                /* 1 in Montgomery form. */
11617
0
                err = sp_mont_norm(tr, m);
11618
0
            }
11619
0
            while (err == MP_OKAY) {
11620
                /* Sqaure until we find bit that is 1 or there's less than a
11621
                 * window of bits left.
11622
                 */
11623
0
                while (err == MP_OKAY && ((i >= 0) || (c >= winBits))) {
11624
0
                    sp_int_digit n2 = n;
11625
0
                    int c2 = c;
11626
0
                    int i2 = i;
11627
11628
                    /* Make sure n2 has bits from the right digit. */
11629
0
                    if (c2 == 0) {
11630
0
                        n2 = e->dp[i2--];
11631
0
                        c2 = SP_WORD_SIZE;
11632
0
                    }
11633
                    /* Mask off the next bit. */
11634
0
                    y = (int)((n2 >> (SP_WORD_SIZE - 1)) & 1);
11635
0
                    if (y == 1) {
11636
0
                        break;
11637
0
                    }
11638
11639
                    /* Square and update position. */
11640
0
                    err = sp_sqr(tr, tr);
11641
0
                    if (err == MP_OKAY) {
11642
0
                        err = _sp_mont_red(tr, m, mp);
11643
0
                    }
11644
0
                    n = n2 << 1;
11645
0
                    c = c2 - 1;
11646
0
                    i = i2;
11647
0
                }
11648
11649
0
                if (err == MP_OKAY) {
11650
                    /* Check we have enough bits left for a window. */
11651
0
                    if ((i < 0) && (c < winBits)) {
11652
0
                        break;
11653
0
                    }
11654
11655
0
                    if (c == 0) {
11656
                        /* Bits up to end of digit */
11657
0
                        n = e->dp[i--];
11658
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11659
0
                        n <<= winBits;
11660
0
                        c = SP_WORD_SIZE - winBits;
11661
0
                    }
11662
0
                    else if (c < winBits) {
11663
                        /* Bits to end of digit and part of next */
11664
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11665
0
                        n = e->dp[i--];
11666
0
                        c = winBits - c;
11667
0
                        y |= (int)(n >> (SP_WORD_SIZE - c));
11668
0
                        n <<= c;
11669
0
                        c = SP_WORD_SIZE - c;
11670
0
                    }
11671
0
                    else {
11672
                        /* Bits from middle of digit */
11673
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
11674
0
                        n <<= winBits;
11675
0
                        c -= winBits;
11676
0
                    }
11677
0
                    y &= mask;
11678
0
                }
11679
11680
                /* Square for number of bits in window. */
11681
0
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
11682
0
                    err = sp_sqr(tr, tr);
11683
0
                    if (err == MP_OKAY) {
11684
0
                        err = _sp_mont_red(tr, m, mp);
11685
0
                    }
11686
0
                }
11687
                /* Multiply by window number for next set of bits. */
11688
0
                if (err == MP_OKAY) {
11689
0
                    err = sp_mul(tr, t[y], tr);
11690
0
                }
11691
0
                if (err == MP_OKAY) {
11692
0
                    err = _sp_mont_red(tr, m, mp);
11693
0
                }
11694
0
            }
11695
0
            if ((err == MP_OKAY) && (c > 0)) {
11696
                /* Handle remaining bits.
11697
                 * Window values have top bit set and can't be used. */
11698
0
                n = e->dp[0];
11699
0
                for (--c; (err == MP_OKAY) && (c >= 0); c--) {
11700
0
                    err = sp_sqr(tr, tr);
11701
0
                    if (err == MP_OKAY) {
11702
0
                        err = _sp_mont_red(tr, m, mp);
11703
0
                    }
11704
0
                    if ((err == MP_OKAY) && ((n >> c) & 1)) {
11705
0
                        err = sp_mul(tr, bm, tr);
11706
0
                        if (err == MP_OKAY) {
11707
0
                            err = _sp_mont_red(tr, m, mp);
11708
0
                        }
11709
0
                    }
11710
0
                }
11711
0
            }
11712
0
        }
11713
11714
0
        if (err == MP_OKAY) {
11715
            /* Convert from montgomery form. */
11716
0
            err = _sp_mont_red(tr, m, mp);
11717
            /* Reduction implementation returns number to range < m. */
11718
0
        }
11719
0
    }
11720
0
    if ((!done) && (err == MP_OKAY)) {
11721
0
        err = sp_copy(tr, r);
11722
0
    }
11723
11724
0
    FREE_SP_INT_ARRAY(t, NULL);
11725
0
    return err;
11726
0
}
11727
11728
#undef SP_ALLOC
11729
11730
#else
11731
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11732
 * Non-constant time implementation.
11733
 *
11734
 * @param  [in]   b  SP integer that is the base.
11735
 * @param  [in]   e  SP integer that is the exponent.
11736
 * @param  [in]   m  SP integer that is the modulus.
11737
 * @param  [out]  r  SP integer to hold result.
11738
 *
11739
 * @return  MP_OKAY on success.
11740
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11741
 * @return  MP_MEM when dynamic memory allocation fails.
11742
 */
11743
static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11744
{
11745
    int i;
11746
    int err = MP_OKAY;
11747
    int done = 0;
11748
    int y = 0;
11749
    int bits = sp_count_bits(e);
11750
    sp_int_digit mp;
11751
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
11752
11753
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
11754
    if (err == MP_OKAY) {
11755
        sp_init_size(t[0], m->used * 2 + 1);
11756
        sp_init_size(t[1], m->used * 2 + 1);
11757
11758
        /* Ensure base is less than exponent. */
11759
        if (_sp_cmp_abs(b, m) != MP_LT) {
11760
            err = sp_mod(b, m, t[0]);
11761
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
11762
                sp_set(r, 0);
11763
                done = 1;
11764
            }
11765
        }
11766
        else {
11767
            err = sp_copy(b, t[0]);
11768
        }
11769
    }
11770
11771
    if ((!done) && (err == MP_OKAY)) {
11772
        err = sp_mont_setup(m, &mp);
11773
        if (err == MP_OKAY) {
11774
            err = sp_mont_norm(t[1], m);
11775
        }
11776
        if (err == MP_OKAY) {
11777
            /* Convert to montgomery form. */
11778
            err = sp_mulmod(t[0], t[1], m, t[0]);
11779
        }
11780
        if (err == MP_OKAY) {
11781
            /* Montgomert form of base to multiply by. */
11782
            sp_copy(t[0], t[1]);
11783
        }
11784
11785
        for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
11786
            err = sp_sqr(t[0], t[0]);
11787
            if (err == MP_OKAY) {
11788
                err = _sp_mont_red(t[0], m, mp);
11789
            }
11790
            if (err == MP_OKAY) {
11791
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
11792
                if (y != 0) {
11793
                    err = sp_mul(t[0], t[1], t[0]);
11794
                    if (err == MP_OKAY) {
11795
                        err = _sp_mont_red(t[0], m, mp);
11796
                    }
11797
                }
11798
            }
11799
        }
11800
        if (err == MP_OKAY) {
11801
            /* Convert from montgomery form. */
11802
            err = _sp_mont_red(t[0], m, mp);
11803
            /* Reduction implementation returns number to range < m. */
11804
        }
11805
    }
11806
    if ((!done) && (err == MP_OKAY)) {
11807
        err = sp_copy(t[0], r);
11808
    }
11809
11810
    FREE_SP_INT_ARRAY(t, NULL);
11811
    return err;
11812
}
11813
#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
11814
11815
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
11816
 * Non-constant time implementation.
11817
 *
11818
 * @param  [in]   b  SP integer that is the base.
11819
 * @param  [in]   e  SP integer that is the exponent.
11820
 * @param  [in]   m  SP integer that is the modulus.
11821
 * @param  [out]  r  SP integer to hold result.
11822
 *
11823
 * @return  MP_OKAY on success.
11824
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
11825
 * @return  MP_MEM when dynamic memory allocation fails.
11826
 */
11827
int sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
11828
0
{
11829
0
    int err = MP_OKAY;
11830
11831
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
11832
0
        err = MP_VAL;
11833
0
    }
11834
11835
#if 0
11836
    if (err == MP_OKAY) {
11837
        sp_print(b, "a");
11838
        sp_print(e, "b");
11839
        sp_print(m, "m");
11840
    }
11841
#endif
11842
11843
0
    if (err != MP_OKAY) {
11844
0
    }
11845
    /* Handle special cases. */
11846
0
    else if (sp_iszero(m)) {
11847
0
        err = MP_VAL;
11848
0
    }
11849
#ifdef WOLFSSL_SP_INT_NEGATIVE
11850
    else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
11851
        err = MP_VAL;
11852
    }
11853
#endif
11854
0
    else if (sp_isone(m)) {
11855
0
        sp_set(r, 0);
11856
0
    }
11857
0
    else if (sp_iszero(e)) {
11858
0
        sp_set(r, 1);
11859
0
    }
11860
0
    else if (sp_iszero(b)) {
11861
0
        sp_set(r, 0);
11862
0
    }
11863
    /* Ensure SP integers have space for intermediate values. */
11864
0
    else if (m->used * 2 >= r->size) {
11865
0
        err = MP_VAL;
11866
0
    }
11867
0
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
11868
0
    else if (mp_iseven(m)) {
11869
0
        err = _sp_exptmod_ex(b, e, e->used * SP_WORD_SIZE, m, r);
11870
0
    }
11871
0
#endif
11872
0
    else {
11873
0
        err = _sp_exptmod_nct(b, e, m, r);
11874
0
    }
11875
11876
#if 0
11877
    if (err == MP_OKAY) {
11878
        sp_print(r, "rme");
11879
    }
11880
#endif
11881
11882
0
    return err;
11883
0
}
11884
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
11885
11886
/***************
11887
 * 2^e functions
11888
 ***************/
11889
11890
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
11891
/* Divide by 2^e: r = a >> e and rem = bits shifted out
11892
 *
11893
 * @param  [in]   a    SP integer to divide.
11894
 * @param  [in]   e    Exponent bits (dividing by 2^e).
11895
 * @param  [in]   m    SP integer that is the modulus.
11896
 * @param  [out]  r    SP integer to hold result.
11897
 * @param  [out]  rem  SP integer to hold remainder.
11898
 *
11899
 * @return  MP_OKAY on success.
11900
 * @return  MP_VAL when a is NULL.
11901
 */
11902
int sp_div_2d(sp_int* a, int e, sp_int* r, sp_int* rem)
11903
{
11904
    int err = MP_OKAY;
11905
11906
    if (a == NULL) {
11907
        err = MP_VAL;
11908
    }
11909
11910
    if (err == MP_OKAY) {
11911
        int remBits = sp_count_bits(a) - e;
11912
11913
        if (remBits <= 0) {
11914
            /* Shifting down by more bits than in number. */
11915
            _sp_zero(r);
11916
            sp_copy(a, rem);
11917
        }
11918
        else {
11919
            if (rem != NULL) {
11920
                /* Copy a in to remainder. */
11921
                err = sp_copy(a, rem);
11922
            }
11923
            /* Shift a down by into result. */
11924
            sp_rshb(a, e, r);
11925
            if (rem != NULL) {
11926
                /* Set used and mask off top digit of remainder. */
11927
                rem->used = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
11928
                e &= SP_WORD_MASK;
11929
                if (e > 0) {
11930
                    rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
11931
                }
11932
                sp_clamp(rem);
11933
            #ifdef WOLFSSL_SP_INT_NEGATIVE
11934
                rem->sign = MP_ZPOS;
11935
            #endif
11936
            }
11937
        }
11938
    }
11939
11940
    return err;
11941
}
11942
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
11943
11944
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
11945
/* The bottom e bits: r = a & ((1 << e) - 1)
11946
 *
11947
 * @param  [in]   a  SP integer to reduce.
11948
 * @param  [in]   e  Modulus bits (modulus equals 2^e).
11949
 * @param  [out]  r  SP integer to hold result.
11950
 *
11951
 * @return  MP_OKAY on success.
11952
 * @return  MP_VAL when a or r is NULL.
11953
 */
11954
int sp_mod_2d(sp_int* a, int e, sp_int* r)
11955
{
11956
    int err = MP_OKAY;
11957
11958
    if ((a == NULL) || (r == NULL)) {
11959
        err = MP_VAL;
11960
    }
11961
11962
    if (err == MP_OKAY) {
11963
        int digits = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
11964
        if (a != r) {
11965
            XMEMCPY(r->dp, a->dp, digits * sizeof(sp_int_digit));
11966
            r->used = a->used;
11967
        #ifdef WOLFSSL_SP_INT_NEGATIVE
11968
            r->sign = a->sign;
11969
        #endif
11970
        }
11971
    #ifndef WOLFSSL_SP_INT_NEGATIVE
11972
        if (digits <= a->used)
11973
    #else
11974
        if ((a->sign != MP_ZPOS) || (digits <= a->used))
11975
    #endif
11976
        {
11977
        #ifdef WOLFSSL_SP_INT_NEGATIVE
11978
            if (a->sign == MP_NEG) {
11979
                int i;
11980
                sp_int_digit carry = 0;
11981
11982
                /* Negate value. */
11983
                for (i = 0; i < r->used; i++) {
11984
                    sp_int_digit next = r->dp[i] > 0;
11985
                    r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
11986
                    carry |= next;
11987
                }
11988
                for (; i < digits; i++) {
11989
                    r->dp[i] = (sp_int_digit)0 - carry;
11990
                }
11991
                r->sign = MP_ZPOS;
11992
            }
11993
        #endif
11994
            /* Set used and mask off top digit of result. */
11995
            r->used = digits;
11996
            e &= SP_WORD_MASK;
11997
            if (e > 0) {
11998
                r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
11999
            }
12000
            sp_clamp(r);
12001
        }
12002
    }
12003
12004
    return err;
12005
}
12006
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
12007
12008
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
12009
    !defined(NO_DH))
12010
/* Multiply by 2^e: r = a << e
12011
 *
12012
 * @param  [in]   a  SP integer to multiply.
12013
 * @param  [in]   e  Multiplier bits (multiplier equals 2^e).
12014
 * @param  [out]  r  SP integer to hold result.
12015
 *
12016
 * @return  MP_OKAY on success.
12017
 * @return  MP_VAL when a or r is NULL, or result is too big for fixed data
12018
 *          length.
12019
 */
12020
int sp_mul_2d(sp_int* a, int e, sp_int* r)
12021
{
12022
    int err = MP_OKAY;
12023
12024
    if ((a == NULL) || (r == NULL)) {
12025
        err = MP_VAL;
12026
    }
12027
12028
    if ((err == MP_OKAY) && (sp_count_bits(a) + e > r->size * SP_WORD_SIZE)) {
12029
        err = MP_VAL;
12030
    }
12031
12032
    if (err == MP_OKAY) {
12033
        /* Copy a into r as left shift function works on the number. */
12034
        if (a != r) {
12035
            err = sp_copy(a, r);
12036
        }
12037
    }
12038
12039
    if (err == MP_OKAY) {
12040
#if 0
12041
        sp_print(a, "a");
12042
        sp_print_int(e, "n");
12043
#endif
12044
        err = sp_lshb(r, e);
12045
#if 0
12046
        sp_print(r, "rsl");
12047
#endif
12048
    }
12049
12050
    return err;
12051
}
12052
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
12053
12054
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12055
    defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
12056
12057
/* START SP_SQR implementations */
12058
/* This code is generated.
12059
 * To generate:
12060
 *   cd scripts/sp/sp_int
12061
 *   ./gen.sh
12062
 * File sp_sqr.c contains code.
12063
 */
12064
12065
#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
12066
#ifdef SQR_MUL_ASM
12067
/* Square a and store in r. r = a * a
12068
 *
12069
 * @param  [in]   a  SP integer to square.
12070
 * @param  [out]  r  SP integer result.
12071
 *
12072
 * @return  MP_OKAY on success.
12073
 * @return  MP_MEM when dynamic memory allocation fails.
12074
 */
12075
static int _sp_sqr(sp_int* a, sp_int* r)
12076
{
12077
    int err = MP_OKAY;
12078
    int i;
12079
    int j;
12080
    int k;
12081
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12082
    sp_int_digit* t = NULL;
12083
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
12084
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
12085
    sp_int_digit t[a->used * 2];
12086
#else
12087
    sp_int_digit t[SP_INT_DIGITS];
12088
#endif
12089
12090
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12091
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
12092
        DYNAMIC_TYPE_BIGINT);
12093
    if (t == NULL) {
12094
        err = MP_MEM;
12095
    }
12096
#endif
12097
    if ((err == MP_OKAY) && (a->used <= 1)) {
12098
        sp_int_digit l, h;
12099
12100
        h = 0;
12101
        l = 0;
12102
        SP_ASM_SQR(h, l, a->dp[0]);
12103
        t[0] = h;
12104
        t[1] = l;
12105
    }
12106
    else if (err == MP_OKAY) {
12107
        sp_int_digit l, h, o;
12108
12109
        h = 0;
12110
        l = 0;
12111
        SP_ASM_SQR(h, l, a->dp[0]);
12112
        t[0] = h;
12113
        h = 0;
12114
        o = 0;
12115
        for (k = 1; k < (a->used + 1) / 2; k++) {
12116
            i = k;
12117
            j = k - 1;
12118
            for (; (j >= 0); i++, j--) {
12119
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12120
            }
12121
            t[k * 2 - 1] = l;
12122
            l = h;
12123
            h = o;
12124
            o = 0;
12125
12126
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
12127
            i = k + 1;
12128
            j = k - 1;
12129
            for (; (j >= 0); i++, j--) {
12130
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12131
            }
12132
            t[k * 2] = l;
12133
            l = h;
12134
            h = o;
12135
            o = 0;
12136
        }
12137
        for (; k < a->used; k++) {
12138
            i = k;
12139
            j = k - 1;
12140
            for (; (i < a->used); i++, j--) {
12141
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12142
            }
12143
            t[k * 2 - 1] = l;
12144
            l = h;
12145
            h = o;
12146
            o = 0;
12147
12148
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
12149
            i = k + 1;
12150
            j = k - 1;
12151
            for (; (i < a->used); i++, j--) {
12152
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
12153
            }
12154
            t[k * 2] = l;
12155
            l = h;
12156
            h = o;
12157
            o = 0;
12158
        }
12159
        t[k * 2 - 1] = l;
12160
    }
12161
12162
    if (err == MP_OKAY) {
12163
        r->used = a->used * 2;
12164
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
12165
        sp_clamp(r);
12166
    }
12167
12168
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12169
    if (t != NULL) {
12170
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
12171
    }
12172
#endif
12173
    return err;
12174
}
12175
#else /* !SQR_MUL_ASM */
12176
/* Square a and store in r. r = a * a
12177
 *
12178
 * @param  [in]   a  SP integer to square.
12179
 * @param  [out]  r  SP integer result.
12180
 *
12181
 * @return  MP_OKAY on success.
12182
 * @return  MP_MEM when dynamic memory allocation fails.
12183
 */
12184
static int _sp_sqr(sp_int* a, sp_int* r)
12185
526k
{
12186
526k
    int err = MP_OKAY;
12187
526k
    int i;
12188
526k
    int j;
12189
526k
    int k;
12190
526k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12191
526k
    sp_int_digit* t = NULL;
12192
#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
12193
    defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
12194
    sp_int_digit t[a->used * 2];
12195
#else
12196
    sp_int_digit t[SP_INT_DIGITS];
12197
#endif
12198
12199
526k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12200
526k
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
12201
526k
        DYNAMIC_TYPE_BIGINT);
12202
526k
    if (t == NULL) {
12203
134
        err = MP_MEM;
12204
134
    }
12205
526k
#endif
12206
526k
    if (err == MP_OKAY) {
12207
526k
        sp_int_word w;
12208
526k
        sp_int_word l;
12209
526k
        sp_int_word h;
12210
    #ifdef SP_WORD_OVERFLOW
12211
        sp_int_word o;
12212
    #endif
12213
12214
526k
        w = (sp_int_word)a->dp[0] * a->dp[0];
12215
526k
        t[0] = (sp_int_digit)w;
12216
526k
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
12217
526k
        h = 0;
12218
    #ifdef SP_WORD_OVERFLOW
12219
        o = 0;
12220
    #endif
12221
11.4M
        for (k = 1; k <= (a->used - 1) * 2; k++) {
12222
10.9M
            i = k / 2;
12223
10.9M
            j = k - i;
12224
10.9M
            if (i == j) {
12225
5.47M
                w = (sp_int_word)a->dp[i] * a->dp[j];
12226
5.47M
                l += (sp_int_digit)w;
12227
5.47M
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12228
            #ifdef SP_WORD_OVERFLOW
12229
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12230
                l &= SP_MASK;
12231
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12232
                h &= SP_MASK;
12233
            #endif
12234
5.47M
            }
12235
128M
            for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
12236
117M
                w = (sp_int_word)a->dp[i] * a->dp[j];
12237
117M
                l += (sp_int_digit)w;
12238
117M
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12239
            #ifdef SP_WORD_OVERFLOW
12240
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12241
                l &= SP_MASK;
12242
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12243
                h &= SP_MASK;
12244
            #endif
12245
117M
                l += (sp_int_digit)w;
12246
117M
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
12247
            #ifdef SP_WORD_OVERFLOW
12248
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
12249
                l &= SP_MASK;
12250
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
12251
                h &= SP_MASK;
12252
            #endif
12253
117M
            }
12254
10.9M
            t[k] = (sp_int_digit)l;
12255
10.9M
            l >>= SP_WORD_SIZE;
12256
10.9M
            l += (sp_int_digit)h;
12257
10.9M
            h >>= SP_WORD_SIZE;
12258
        #ifdef SP_WORD_OVERFLOW
12259
            h += o & SP_MASK;
12260
            o >>= SP_WORD_SIZE;
12261
        #endif
12262
10.9M
        }
12263
526k
        t[k] = (sp_int_digit)l;
12264
526k
        r->used = k + 1;
12265
526k
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
12266
526k
        sp_clamp(r);
12267
526k
    }
12268
12269
526k
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12270
526k
    if (t != NULL) {
12271
526k
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
12272
526k
    }
12273
526k
#endif
12274
526k
    return err;
12275
526k
}
12276
#endif /* SQR_MUL_ASM */
12277
#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
12278
12279
#ifndef WOLFSSL_SP_SMALL
12280
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12281
#if SP_WORD_SIZE == 64
12282
#ifndef SQR_MUL_ASM
12283
/* Square a and store in r. r = a * a
12284
 *
12285
 * Long-hand implementation.
12286
 *
12287
 * @param  [in]   a  SP integer to square.
12288
 * @param  [out]  r  SP integer result.
12289
 *
12290
 * @return  MP_OKAY on success.
12291
 * @return  MP_MEM when dynamic memory allocation fails.
12292
 */
12293
static int _sp_sqr_4(sp_int* a, sp_int* r)
12294
{
12295
    int err = MP_OKAY;
12296
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12297
    sp_int_word* w = NULL;
12298
#else
12299
    sp_int_word w[10];
12300
#endif
12301
    sp_int_digit* da = a->dp;
12302
12303
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12304
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
12305
        DYNAMIC_TYPE_BIGINT);
12306
    if (w == NULL) {
12307
        err = MP_MEM;
12308
    }
12309
#endif
12310
12311
12312
    if (err == MP_OKAY) {
12313
        w[0] = (sp_int_word)da[0] * da[0];
12314
        w[1] = (sp_int_word)da[0] * da[1];
12315
        w[2] = (sp_int_word)da[0] * da[2];
12316
        w[3] = (sp_int_word)da[1] * da[1];
12317
        w[4] = (sp_int_word)da[0] * da[3];
12318
        w[5] = (sp_int_word)da[1] * da[2];
12319
        w[6] = (sp_int_word)da[1] * da[3];
12320
        w[7] = (sp_int_word)da[2] * da[2];
12321
        w[8] = (sp_int_word)da[2] * da[3];
12322
        w[9] = (sp_int_word)da[3] * da[3];
12323
12324
        r->dp[0] = w[0];
12325
        w[0] >>= SP_WORD_SIZE;
12326
        w[0] += (sp_int_digit)w[1];
12327
        w[0] += (sp_int_digit)w[1];
12328
        r->dp[1] = w[0];
12329
        w[0] >>= SP_WORD_SIZE;
12330
        w[1] >>= SP_WORD_SIZE;
12331
        w[0] += (sp_int_digit)w[1];
12332
        w[0] += (sp_int_digit)w[1];
12333
        w[0] += (sp_int_digit)w[2];
12334
        w[0] += (sp_int_digit)w[2];
12335
        w[0] += (sp_int_digit)w[3];
12336
        r->dp[2] = w[0];
12337
        w[0] >>= SP_WORD_SIZE;
12338
        w[2] >>= SP_WORD_SIZE;
12339
        w[0] += (sp_int_digit)w[2];
12340
        w[0] += (sp_int_digit)w[2];
12341
        w[3] >>= SP_WORD_SIZE;
12342
        w[0] += (sp_int_digit)w[3];
12343
        w[0] += (sp_int_digit)w[4];
12344
        w[0] += (sp_int_digit)w[4];
12345
        w[0] += (sp_int_digit)w[5];
12346
        w[0] += (sp_int_digit)w[5];
12347
        r->dp[3] = w[0];
12348
        w[0] >>= SP_WORD_SIZE;
12349
        w[4] >>= SP_WORD_SIZE;
12350
        w[0] += (sp_int_digit)w[4];
12351
        w[0] += (sp_int_digit)w[4];
12352
        w[5] >>= SP_WORD_SIZE;
12353
        w[0] += (sp_int_digit)w[5];
12354
        w[0] += (sp_int_digit)w[5];
12355
        w[0] += (sp_int_digit)w[6];
12356
        w[0] += (sp_int_digit)w[6];
12357
        w[0] += (sp_int_digit)w[7];
12358
        r->dp[4] = w[0];
12359
        w[0] >>= SP_WORD_SIZE;
12360
        w[6] >>= SP_WORD_SIZE;
12361
        w[0] += (sp_int_digit)w[6];
12362
        w[0] += (sp_int_digit)w[6];
12363
        w[7] >>= SP_WORD_SIZE;
12364
        w[0] += (sp_int_digit)w[7];
12365
        w[0] += (sp_int_digit)w[8];
12366
        w[0] += (sp_int_digit)w[8];
12367
        r->dp[5] = w[0];
12368
        w[0] >>= SP_WORD_SIZE;
12369
        w[8] >>= SP_WORD_SIZE;
12370
        w[0] += (sp_int_digit)w[8];
12371
        w[0] += (sp_int_digit)w[8];
12372
        w[0] += (sp_int_digit)w[9];
12373
        r->dp[6] = w[0];
12374
        w[0] >>= SP_WORD_SIZE;
12375
        w[9] >>= SP_WORD_SIZE;
12376
        w[0] += (sp_int_digit)w[9];
12377
        r->dp[7] = w[0];
12378
12379
        r->used = 8;
12380
        sp_clamp(r);
12381
    }
12382
12383
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12384
    if (w != NULL) {
12385
        XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
12386
    }
12387
#endif
12388
    return err;
12389
}
12390
#else /* SQR_MUL_ASM */
12391
/* Square a and store in r. r = a * a
12392
 *
12393
 * Comba implementation.
12394
 *
12395
 * @param  [in]   a  SP integer to square.
12396
 * @param  [out]  r  SP integer result.
12397
 *
12398
 * @return  MP_OKAY on success.
12399
 * @return  MP_MEM when dynamic memory allocation fails.
12400
 */
12401
static int _sp_sqr_4(sp_int* a, sp_int* r)
12402
{
12403
    sp_int_digit l = 0;
12404
    sp_int_digit h = 0;
12405
    sp_int_digit o = 0;
12406
    sp_int_digit t[4];
12407
12408
    SP_ASM_SQR(h, l, a->dp[0]);
12409
    t[0] = h;
12410
    h = 0;
12411
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12412
    t[1] = l;
12413
    l = h;
12414
    h = o;
12415
    o = 0;
12416
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12417
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12418
    t[2] = l;
12419
    l = h;
12420
    h = o;
12421
    o = 0;
12422
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12423
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12424
    t[3] = l;
12425
    l = h;
12426
    h = o;
12427
    o = 0;
12428
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12429
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12430
    r->dp[4] = l;
12431
    l = h;
12432
    h = o;
12433
    o = 0;
12434
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
12435
    r->dp[5] = l;
12436
    l = h;
12437
    h = o;
12438
    SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
12439
    r->dp[6] = l;
12440
    r->dp[7] = h;
12441
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
12442
    r->used = 8;
12443
    sp_clamp(r);
12444
12445
    return MP_OKAY;
12446
}
12447
#endif /* SQR_MUL_ASM */
12448
#endif /* SP_WORD_SIZE == 64 */
12449
#if SP_WORD_SIZE == 64
12450
#ifdef SQR_MUL_ASM
12451
/* Square a and store in r. r = a * a
12452
 *
12453
 * Comba implementation.
12454
 *
12455
 * @param  [in]   a  SP integer to square.
12456
 * @param  [out]  r  SP integer result.
12457
 *
12458
 * @return  MP_OKAY on success.
12459
 * @return  MP_MEM when dynamic memory allocation fails.
12460
 */
12461
static int _sp_sqr_6(sp_int* a, sp_int* r)
12462
{
12463
    sp_int_digit l = 0;
12464
    sp_int_digit h = 0;
12465
    sp_int_digit o = 0;
12466
    sp_int_digit tl = 0;
12467
    sp_int_digit th = 0;
12468
    sp_int_digit to;
12469
    sp_int_digit t[6];
12470
12471
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12472
    to = 0;
12473
#endif
12474
12475
    SP_ASM_SQR(h, l, a->dp[0]);
12476
    t[0] = h;
12477
    h = 0;
12478
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12479
    t[1] = l;
12480
    l = h;
12481
    h = o;
12482
    o = 0;
12483
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12484
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12485
    t[2] = l;
12486
    l = h;
12487
    h = o;
12488
    o = 0;
12489
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12490
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12491
    t[3] = l;
12492
    l = h;
12493
    h = o;
12494
    o = 0;
12495
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12496
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12497
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12498
    t[4] = l;
12499
    l = h;
12500
    h = o;
12501
    o = 0;
12502
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12503
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12504
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12505
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12506
    t[5] = l;
12507
    l = h;
12508
    h = o;
12509
    o = 0;
12510
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
12511
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
12512
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12513
    r->dp[6] = l;
12514
    l = h;
12515
    h = o;
12516
    o = 0;
12517
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
12518
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
12519
    r->dp[7] = l;
12520
    l = h;
12521
    h = o;
12522
    o = 0;
12523
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
12524
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12525
    r->dp[8] = l;
12526
    l = h;
12527
    h = o;
12528
    o = 0;
12529
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
12530
    r->dp[9] = l;
12531
    l = h;
12532
    h = o;
12533
    SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
12534
    r->dp[10] = l;
12535
    r->dp[11] = h;
12536
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
12537
    r->used = 12;
12538
    sp_clamp(r);
12539
12540
    return MP_OKAY;
12541
}
12542
#endif /* SQR_MUL_ASM */
12543
#endif /* SP_WORD_SIZE == 64 */
12544
#if SP_WORD_SIZE == 32
12545
#ifdef SQR_MUL_ASM
12546
/* Square a and store in r. r = a * a
12547
 *
12548
 * Comba implementation.
12549
 *
12550
 * @param  [in]   a  SP integer to square.
12551
 * @param  [out]  r  SP integer result.
12552
 *
12553
 * @return  MP_OKAY on success.
12554
 * @return  MP_MEM when dynamic memory allocation fails.
12555
 */
12556
static int _sp_sqr_8(sp_int* a, sp_int* r)
12557
{
12558
    sp_int_digit l = 0;
12559
    sp_int_digit h = 0;
12560
    sp_int_digit o = 0;
12561
    sp_int_digit tl = 0;
12562
    sp_int_digit th = 0;
12563
    sp_int_digit to;
12564
    sp_int_digit t[8];
12565
12566
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12567
    to = 0;
12568
#endif
12569
12570
    SP_ASM_SQR(h, l, a->dp[0]);
12571
    t[0] = h;
12572
    h = 0;
12573
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12574
    t[1] = l;
12575
    l = h;
12576
    h = o;
12577
    o = 0;
12578
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12579
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12580
    t[2] = l;
12581
    l = h;
12582
    h = o;
12583
    o = 0;
12584
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12585
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12586
    t[3] = l;
12587
    l = h;
12588
    h = o;
12589
    o = 0;
12590
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12591
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12592
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12593
    t[4] = l;
12594
    l = h;
12595
    h = o;
12596
    o = 0;
12597
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12598
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12599
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12600
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12601
    t[5] = l;
12602
    l = h;
12603
    h = o;
12604
    o = 0;
12605
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12606
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12607
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12608
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12609
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12610
    t[6] = l;
12611
    l = h;
12612
    h = o;
12613
    o = 0;
12614
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12615
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12616
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12617
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12618
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12619
    t[7] = l;
12620
    l = h;
12621
    h = o;
12622
    o = 0;
12623
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
12624
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12625
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12626
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12627
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12628
    r->dp[8] = l;
12629
    l = h;
12630
    h = o;
12631
    o = 0;
12632
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
12633
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12634
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12635
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12636
    r->dp[9] = l;
12637
    l = h;
12638
    h = o;
12639
    o = 0;
12640
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
12641
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
12642
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
12643
    r->dp[10] = l;
12644
    l = h;
12645
    h = o;
12646
    o = 0;
12647
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
12648
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
12649
    r->dp[11] = l;
12650
    l = h;
12651
    h = o;
12652
    o = 0;
12653
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
12654
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
12655
    r->dp[12] = l;
12656
    l = h;
12657
    h = o;
12658
    o = 0;
12659
    SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
12660
    r->dp[13] = l;
12661
    l = h;
12662
    h = o;
12663
    SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
12664
    r->dp[14] = l;
12665
    r->dp[15] = h;
12666
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
12667
    r->used = 16;
12668
    sp_clamp(r);
12669
12670
    return MP_OKAY;
12671
}
12672
#endif /* SQR_MUL_ASM */
12673
#endif /* SP_WORD_SIZE == 32 */
12674
#if SP_WORD_SIZE == 32
12675
#ifdef SQR_MUL_ASM
12676
/* Square a and store in r. r = a * a
12677
 *
12678
 * Comba implementation.
12679
 *
12680
 * @param  [in]   a  SP integer to square.
12681
 * @param  [out]  r  SP integer result.
12682
 *
12683
 * @return  MP_OKAY on success.
12684
 * @return  MP_MEM when dynamic memory allocation fails.
12685
 */
12686
static int _sp_sqr_12(sp_int* a, sp_int* r)
12687
{
12688
    sp_int_digit l = 0;
12689
    sp_int_digit h = 0;
12690
    sp_int_digit o = 0;
12691
    sp_int_digit tl = 0;
12692
    sp_int_digit th = 0;
12693
    sp_int_digit to;
12694
    sp_int_digit t[12];
12695
12696
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12697
    to = 0;
12698
#endif
12699
12700
    SP_ASM_SQR(h, l, a->dp[0]);
12701
    t[0] = h;
12702
    h = 0;
12703
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12704
    t[1] = l;
12705
    l = h;
12706
    h = o;
12707
    o = 0;
12708
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12709
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12710
    t[2] = l;
12711
    l = h;
12712
    h = o;
12713
    o = 0;
12714
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12715
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12716
    t[3] = l;
12717
    l = h;
12718
    h = o;
12719
    o = 0;
12720
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12721
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12722
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12723
    t[4] = l;
12724
    l = h;
12725
    h = o;
12726
    o = 0;
12727
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12728
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12729
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12730
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12731
    t[5] = l;
12732
    l = h;
12733
    h = o;
12734
    o = 0;
12735
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12736
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12737
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12738
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12739
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12740
    t[6] = l;
12741
    l = h;
12742
    h = o;
12743
    o = 0;
12744
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12745
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12746
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12747
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12748
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12749
    t[7] = l;
12750
    l = h;
12751
    h = o;
12752
    o = 0;
12753
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
12754
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
12755
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12756
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12757
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12758
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12759
    t[8] = l;
12760
    l = h;
12761
    h = o;
12762
    o = 0;
12763
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
12764
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
12765
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
12766
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12767
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12768
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12769
    t[9] = l;
12770
    l = h;
12771
    h = o;
12772
    o = 0;
12773
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
12774
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
12775
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
12776
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
12777
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
12778
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
12779
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12780
    t[10] = l;
12781
    l = h;
12782
    h = o;
12783
    o = 0;
12784
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
12785
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
12786
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
12787
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
12788
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
12789
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
12790
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12791
    t[11] = l;
12792
    l = h;
12793
    h = o;
12794
    o = 0;
12795
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
12796
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
12797
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
12798
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
12799
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
12800
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
12801
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12802
    r->dp[12] = l;
12803
    l = h;
12804
    h = o;
12805
    o = 0;
12806
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
12807
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
12808
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
12809
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
12810
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
12811
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12812
    r->dp[13] = l;
12813
    l = h;
12814
    h = o;
12815
    o = 0;
12816
    SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
12817
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
12818
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
12819
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
12820
    SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
12821
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12822
    r->dp[14] = l;
12823
    l = h;
12824
    h = o;
12825
    o = 0;
12826
    SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
12827
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
12828
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
12829
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
12830
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12831
    r->dp[15] = l;
12832
    l = h;
12833
    h = o;
12834
    o = 0;
12835
    SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
12836
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
12837
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
12838
    SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
12839
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12840
    r->dp[16] = l;
12841
    l = h;
12842
    h = o;
12843
    o = 0;
12844
    SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
12845
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
12846
    SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
12847
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12848
    r->dp[17] = l;
12849
    l = h;
12850
    h = o;
12851
    o = 0;
12852
    SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
12853
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
12854
    SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
12855
    r->dp[18] = l;
12856
    l = h;
12857
    h = o;
12858
    o = 0;
12859
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
12860
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
12861
    r->dp[19] = l;
12862
    l = h;
12863
    h = o;
12864
    o = 0;
12865
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
12866
    SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
12867
    r->dp[20] = l;
12868
    l = h;
12869
    h = o;
12870
    o = 0;
12871
    SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
12872
    r->dp[21] = l;
12873
    l = h;
12874
    h = o;
12875
    SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
12876
    r->dp[22] = l;
12877
    r->dp[23] = h;
12878
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
12879
    r->used = 24;
12880
    sp_clamp(r);
12881
12882
    return MP_OKAY;
12883
}
12884
#endif /* SQR_MUL_ASM */
12885
#endif /* SP_WORD_SIZE == 32 */
12886
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12887
12888
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12889
    #if SP_INT_DIGITS >= 32
12890
/* Square a and store in r. r = a * a
12891
 *
12892
 * Comba implementation.
12893
 *
12894
 * @param  [in]   a  SP integer to square.
12895
 * @param  [out]  r  SP integer result.
12896
 *
12897
 * @return  MP_OKAY on success.
12898
 * @return  MP_MEM when dynamic memory allocation fails.
12899
 */
12900
static int _sp_sqr_16(sp_int* a, sp_int* r)
12901
{
12902
    int err = MP_OKAY;
12903
    sp_int_digit l = 0;
12904
    sp_int_digit h = 0;
12905
    sp_int_digit o = 0;
12906
    sp_int_digit tl = 0;
12907
    sp_int_digit th = 0;
12908
    sp_int_digit to;
12909
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12910
    sp_int_digit* t = NULL;
12911
#else
12912
    sp_int_digit t[16];
12913
#endif
12914
12915
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
12916
    to = 0;
12917
#endif
12918
12919
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12920
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
12921
         DYNAMIC_TYPE_BIGINT);
12922
     if (t == NULL) {
12923
         err = MP_MEM;
12924
     }
12925
#endif
12926
    if (err == MP_OKAY) {
12927
        SP_ASM_SQR(h, l, a->dp[0]);
12928
        t[0] = h;
12929
        h = 0;
12930
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
12931
        t[1] = l;
12932
        l = h;
12933
        h = o;
12934
        o = 0;
12935
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
12936
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
12937
        t[2] = l;
12938
        l = h;
12939
        h = o;
12940
        o = 0;
12941
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
12942
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
12943
        t[3] = l;
12944
        l = h;
12945
        h = o;
12946
        o = 0;
12947
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
12948
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
12949
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
12950
        t[4] = l;
12951
        l = h;
12952
        h = o;
12953
        o = 0;
12954
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
12955
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
12956
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
12957
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12958
        t[5] = l;
12959
        l = h;
12960
        h = o;
12961
        o = 0;
12962
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
12963
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
12964
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
12965
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
12966
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12967
        t[6] = l;
12968
        l = h;
12969
        h = o;
12970
        o = 0;
12971
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
12972
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
12973
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
12974
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
12975
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12976
        t[7] = l;
12977
        l = h;
12978
        h = o;
12979
        o = 0;
12980
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
12981
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
12982
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
12983
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
12984
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
12985
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12986
        t[8] = l;
12987
        l = h;
12988
        h = o;
12989
        o = 0;
12990
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
12991
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
12992
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
12993
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
12994
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
12995
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
12996
        t[9] = l;
12997
        l = h;
12998
        h = o;
12999
        o = 0;
13000
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
13001
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
13002
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
13003
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
13004
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
13005
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
13006
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13007
        t[10] = l;
13008
        l = h;
13009
        h = o;
13010
        o = 0;
13011
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
13012
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
13013
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
13014
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
13015
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
13016
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
13017
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13018
        t[11] = l;
13019
        l = h;
13020
        h = o;
13021
        o = 0;
13022
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
13023
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
13024
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
13025
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
13026
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
13027
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
13028
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
13029
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13030
        t[12] = l;
13031
        l = h;
13032
        h = o;
13033
        o = 0;
13034
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
13035
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
13036
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
13037
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
13038
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
13039
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
13040
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
13041
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13042
        t[13] = l;
13043
        l = h;
13044
        h = o;
13045
        o = 0;
13046
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
13047
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
13048
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
13049
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
13050
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
13051
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
13052
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
13053
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
13054
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13055
        t[14] = l;
13056
        l = h;
13057
        h = o;
13058
        o = 0;
13059
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
13060
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
13061
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
13062
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
13063
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
13064
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
13065
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
13066
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
13067
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13068
        t[15] = l;
13069
        l = h;
13070
        h = o;
13071
        o = 0;
13072
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
13073
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
13074
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
13075
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
13076
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
13077
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
13078
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
13079
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
13080
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13081
        r->dp[16] = l;
13082
        l = h;
13083
        h = o;
13084
        o = 0;
13085
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
13086
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
13087
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
13088
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
13089
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
13090
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
13091
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
13092
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13093
        r->dp[17] = l;
13094
        l = h;
13095
        h = o;
13096
        o = 0;
13097
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
13098
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
13099
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
13100
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
13101
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
13102
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
13103
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
13104
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13105
        r->dp[18] = l;
13106
        l = h;
13107
        h = o;
13108
        o = 0;
13109
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
13110
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
13111
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
13112
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
13113
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
13114
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
13115
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13116
        r->dp[19] = l;
13117
        l = h;
13118
        h = o;
13119
        o = 0;
13120
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
13121
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
13122
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
13123
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
13124
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
13125
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
13126
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13127
        r->dp[20] = l;
13128
        l = h;
13129
        h = o;
13130
        o = 0;
13131
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
13132
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
13133
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
13134
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
13135
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
13136
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13137
        r->dp[21] = l;
13138
        l = h;
13139
        h = o;
13140
        o = 0;
13141
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
13142
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
13143
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
13144
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
13145
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
13146
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13147
        r->dp[22] = l;
13148
        l = h;
13149
        h = o;
13150
        o = 0;
13151
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
13152
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
13153
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
13154
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
13155
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13156
        r->dp[23] = l;
13157
        l = h;
13158
        h = o;
13159
        o = 0;
13160
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
13161
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
13162
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
13163
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
13164
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13165
        r->dp[24] = l;
13166
        l = h;
13167
        h = o;
13168
        o = 0;
13169
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
13170
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
13171
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
13172
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13173
        r->dp[25] = l;
13174
        l = h;
13175
        h = o;
13176
        o = 0;
13177
        SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
13178
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
13179
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
13180
        r->dp[26] = l;
13181
        l = h;
13182
        h = o;
13183
        o = 0;
13184
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
13185
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
13186
        r->dp[27] = l;
13187
        l = h;
13188
        h = o;
13189
        o = 0;
13190
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
13191
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
13192
        r->dp[28] = l;
13193
        l = h;
13194
        h = o;
13195
        o = 0;
13196
        SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
13197
        r->dp[29] = l;
13198
        l = h;
13199
        h = o;
13200
        SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
13201
        r->dp[30] = l;
13202
        r->dp[31] = h;
13203
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
13204
        r->used = 32;
13205
        sp_clamp(r);
13206
    }
13207
13208
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13209
    if (t != NULL) {
13210
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
13211
    }
13212
#endif
13213
    return err;
13214
}
13215
    #endif /* SP_INT_DIGITS >= 32 */
13216
13217
    #if SP_INT_DIGITS >= 48
13218
/* Square a and store in r. r = a * a
13219
 *
13220
 * Comba implementation.
13221
 *
13222
 * @param  [in]   a  SP integer to square.
13223
 * @param  [out]  r  SP integer result.
13224
 *
13225
 * @return  MP_OKAY on success.
13226
 * @return  MP_MEM when dynamic memory allocation fails.
13227
 */
13228
static int _sp_sqr_24(sp_int* a, sp_int* r)
13229
{
13230
    int err = MP_OKAY;
13231
    sp_int_digit l = 0;
13232
    sp_int_digit h = 0;
13233
    sp_int_digit o = 0;
13234
    sp_int_digit tl = 0;
13235
    sp_int_digit th = 0;
13236
    sp_int_digit to;
13237
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13238
    sp_int_digit* t = NULL;
13239
#else
13240
    sp_int_digit t[24];
13241
#endif
13242
13243
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
13244
    to = 0;
13245
#endif
13246
13247
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13248
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
13249
         DYNAMIC_TYPE_BIGINT);
13250
     if (t == NULL) {
13251
         err = MP_MEM;
13252
     }
13253
#endif
13254
    if (err == MP_OKAY) {
13255
        SP_ASM_SQR(h, l, a->dp[0]);
13256
        t[0] = h;
13257
        h = 0;
13258
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
13259
        t[1] = l;
13260
        l = h;
13261
        h = o;
13262
        o = 0;
13263
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
13264
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
13265
        t[2] = l;
13266
        l = h;
13267
        h = o;
13268
        o = 0;
13269
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
13270
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
13271
        t[3] = l;
13272
        l = h;
13273
        h = o;
13274
        o = 0;
13275
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
13276
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
13277
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
13278
        t[4] = l;
13279
        l = h;
13280
        h = o;
13281
        o = 0;
13282
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
13283
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
13284
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
13285
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13286
        t[5] = l;
13287
        l = h;
13288
        h = o;
13289
        o = 0;
13290
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
13291
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
13292
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
13293
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
13294
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13295
        t[6] = l;
13296
        l = h;
13297
        h = o;
13298
        o = 0;
13299
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
13300
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
13301
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
13302
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
13303
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13304
        t[7] = l;
13305
        l = h;
13306
        h = o;
13307
        o = 0;
13308
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
13309
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
13310
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
13311
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
13312
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
13313
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13314
        t[8] = l;
13315
        l = h;
13316
        h = o;
13317
        o = 0;
13318
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
13319
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
13320
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
13321
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
13322
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
13323
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13324
        t[9] = l;
13325
        l = h;
13326
        h = o;
13327
        o = 0;
13328
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
13329
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
13330
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
13331
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
13332
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
13333
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
13334
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13335
        t[10] = l;
13336
        l = h;
13337
        h = o;
13338
        o = 0;
13339
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
13340
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
13341
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
13342
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
13343
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
13344
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
13345
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13346
        t[11] = l;
13347
        l = h;
13348
        h = o;
13349
        o = 0;
13350
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
13351
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
13352
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
13353
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
13354
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
13355
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
13356
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
13357
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13358
        t[12] = l;
13359
        l = h;
13360
        h = o;
13361
        o = 0;
13362
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
13363
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
13364
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
13365
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
13366
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
13367
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
13368
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
13369
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13370
        t[13] = l;
13371
        l = h;
13372
        h = o;
13373
        o = 0;
13374
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
13375
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
13376
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
13377
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
13378
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
13379
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
13380
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
13381
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
13382
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13383
        t[14] = l;
13384
        l = h;
13385
        h = o;
13386
        o = 0;
13387
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
13388
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
13389
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
13390
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
13391
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
13392
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
13393
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
13394
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
13395
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13396
        t[15] = l;
13397
        l = h;
13398
        h = o;
13399
        o = 0;
13400
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
13401
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
13402
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
13403
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
13404
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
13405
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
13406
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
13407
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
13408
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
13409
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13410
        t[16] = l;
13411
        l = h;
13412
        h = o;
13413
        o = 0;
13414
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
13415
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
13416
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
13417
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
13418
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
13419
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
13420
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
13421
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
13422
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
13423
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13424
        t[17] = l;
13425
        l = h;
13426
        h = o;
13427
        o = 0;
13428
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
13429
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
13430
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
13431
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
13432
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
13433
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
13434
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
13435
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
13436
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
13437
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
13438
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13439
        t[18] = l;
13440
        l = h;
13441
        h = o;
13442
        o = 0;
13443
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
13444
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
13445
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
13446
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
13447
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
13448
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
13449
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
13450
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
13451
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
13452
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
13453
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13454
        t[19] = l;
13455
        l = h;
13456
        h = o;
13457
        o = 0;
13458
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
13459
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
13460
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
13461
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
13462
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
13463
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
13464
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
13465
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
13466
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
13467
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
13468
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
13469
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13470
        t[20] = l;
13471
        l = h;
13472
        h = o;
13473
        o = 0;
13474
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
13475
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
13476
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
13477
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
13478
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
13479
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
13480
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
13481
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
13482
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
13483
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
13484
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
13485
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13486
        t[21] = l;
13487
        l = h;
13488
        h = o;
13489
        o = 0;
13490
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
13491
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
13492
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
13493
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
13494
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
13495
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
13496
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
13497
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
13498
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
13499
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
13500
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
13501
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
13502
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13503
        t[22] = l;
13504
        l = h;
13505
        h = o;
13506
        o = 0;
13507
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
13508
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
13509
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
13510
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
13511
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
13512
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
13513
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
13514
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
13515
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
13516
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
13517
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
13518
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
13519
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13520
        t[23] = l;
13521
        l = h;
13522
        h = o;
13523
        o = 0;
13524
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
13525
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
13526
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
13527
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
13528
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
13529
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
13530
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
13531
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
13532
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
13533
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
13534
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
13535
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
13536
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13537
        r->dp[24] = l;
13538
        l = h;
13539
        h = o;
13540
        o = 0;
13541
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
13542
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
13543
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
13544
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
13545
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
13546
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
13547
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
13548
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
13549
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
13550
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
13551
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
13552
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13553
        r->dp[25] = l;
13554
        l = h;
13555
        h = o;
13556
        o = 0;
13557
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
13558
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
13559
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
13560
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
13561
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
13562
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
13563
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
13564
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
13565
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
13566
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
13567
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
13568
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13569
        r->dp[26] = l;
13570
        l = h;
13571
        h = o;
13572
        o = 0;
13573
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
13574
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
13575
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
13576
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
13577
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
13578
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
13579
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
13580
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
13581
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
13582
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
13583
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13584
        r->dp[27] = l;
13585
        l = h;
13586
        h = o;
13587
        o = 0;
13588
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
13589
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
13590
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
13591
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
13592
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
13593
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
13594
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
13595
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
13596
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
13597
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
13598
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13599
        r->dp[28] = l;
13600
        l = h;
13601
        h = o;
13602
        o = 0;
13603
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
13604
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
13605
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
13606
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
13607
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
13608
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
13609
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
13610
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
13611
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
13612
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13613
        r->dp[29] = l;
13614
        l = h;
13615
        h = o;
13616
        o = 0;
13617
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
13618
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
13619
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
13620
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
13621
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
13622
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
13623
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
13624
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
13625
        SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
13626
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13627
        r->dp[30] = l;
13628
        l = h;
13629
        h = o;
13630
        o = 0;
13631
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
13632
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
13633
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
13634
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
13635
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
13636
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
13637
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
13638
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
13639
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13640
        r->dp[31] = l;
13641
        l = h;
13642
        h = o;
13643
        o = 0;
13644
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
13645
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
13646
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
13647
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
13648
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
13649
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
13650
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
13651
        SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
13652
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13653
        r->dp[32] = l;
13654
        l = h;
13655
        h = o;
13656
        o = 0;
13657
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
13658
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
13659
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
13660
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
13661
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
13662
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
13663
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
13664
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13665
        r->dp[33] = l;
13666
        l = h;
13667
        h = o;
13668
        o = 0;
13669
        SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
13670
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
13671
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
13672
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
13673
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
13674
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
13675
        SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
13676
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13677
        r->dp[34] = l;
13678
        l = h;
13679
        h = o;
13680
        o = 0;
13681
        SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
13682
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
13683
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
13684
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
13685
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
13686
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
13687
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13688
        r->dp[35] = l;
13689
        l = h;
13690
        h = o;
13691
        o = 0;
13692
        SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
13693
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
13694
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
13695
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
13696
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
13697
        SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
13698
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13699
        r->dp[36] = l;
13700
        l = h;
13701
        h = o;
13702
        o = 0;
13703
        SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
13704
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
13705
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
13706
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
13707
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
13708
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13709
        r->dp[37] = l;
13710
        l = h;
13711
        h = o;
13712
        o = 0;
13713
        SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
13714
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
13715
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
13716
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
13717
        SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
13718
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13719
        r->dp[38] = l;
13720
        l = h;
13721
        h = o;
13722
        o = 0;
13723
        SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
13724
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
13725
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
13726
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
13727
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13728
        r->dp[39] = l;
13729
        l = h;
13730
        h = o;
13731
        o = 0;
13732
        SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
13733
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
13734
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
13735
        SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
13736
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13737
        r->dp[40] = l;
13738
        l = h;
13739
        h = o;
13740
        o = 0;
13741
        SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
13742
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
13743
        SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
13744
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
13745
        r->dp[41] = l;
13746
        l = h;
13747
        h = o;
13748
        o = 0;
13749
        SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
13750
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
13751
        SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
13752
        r->dp[42] = l;
13753
        l = h;
13754
        h = o;
13755
        o = 0;
13756
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
13757
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
13758
        r->dp[43] = l;
13759
        l = h;
13760
        h = o;
13761
        o = 0;
13762
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
13763
        SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
13764
        r->dp[44] = l;
13765
        l = h;
13766
        h = o;
13767
        o = 0;
13768
        SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
13769
        r->dp[45] = l;
13770
        l = h;
13771
        h = o;
13772
        SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
13773
        r->dp[46] = l;
13774
        r->dp[47] = h;
13775
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
13776
        r->used = 48;
13777
        sp_clamp(r);
13778
    }
13779
13780
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13781
    if (t != NULL) {
13782
        XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
13783
    }
13784
#endif
13785
    return err;
13786
}
13787
    #endif /* SP_INT_DIGITS >= 48 */
13788
13789
    #if SP_INT_DIGITS >= 64
13790
/* Square a and store in r. r = a * a
13791
 *
13792
 * Karatsuba implementation.
13793
 *
13794
 * @param  [in]   a  SP integer to square.
13795
 * @param  [out]  r  SP integer result.
13796
 *
13797
 * @return  MP_OKAY on success.
13798
 * @return  MP_MEM when dynamic memory allocation fails.
13799
 */
13800
static int _sp_sqr_32(sp_int* a, sp_int* r)
13801
{
13802
    int err = MP_OKAY;
13803
    int i;
13804
    sp_int_digit l;
13805
    sp_int_digit h;
13806
    sp_int* z0;
13807
    sp_int* z1;
13808
    sp_int* z2;
13809
    sp_int_digit ca;
13810
    DECL_SP_INT(a1, 16);
13811
    DECL_SP_INT_ARRAY(z, 33, 2);
13812
13813
    ALLOC_SP_INT(a1, 16, err, NULL);
13814
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
13815
    if (err == MP_OKAY) {
13816
        z1 = z[0];
13817
        z2 = z[1];
13818
        z0 = r;
13819
13820
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
13821
        a1->used = 16;
13822
13823
        /* z2 = a1 ^ 2 */
13824
        err = _sp_sqr_16(a1, z2);
13825
    }
13826
    if (err == MP_OKAY) {
13827
        l = 0;
13828
        h = 0;
13829
        for (i = 0; i < 16; i++) {
13830
            SP_ASM_ADDC(l, h, a1->dp[i]);
13831
            SP_ASM_ADDC(l, h, a->dp[i]);
13832
            a1->dp[i] = l;
13833
            l = h;
13834
            h = 0;
13835
        }
13836
        ca = l;
13837
13838
        /* z0 = a0 ^ 2 */
13839
        err = _sp_sqr_16(a, z0);
13840
    }
13841
    if (err == MP_OKAY) {
13842
        /* z1 = (a0 + a1) ^ 2 */
13843
        err = _sp_sqr_16(a1, z1);
13844
    }
13845
    if (err == MP_OKAY) {
13846
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
13847
        /* r = z0 */
13848
        /* r += (z1 - z0 - z2) << 16 */
13849
        z1->dp[32] = ca;
13850
        l = 0;
13851
        if (ca) {
13852
            l = z1->dp[0 + 16];
13853
            h = 0;
13854
            SP_ASM_ADDC(l, h, a1->dp[0]);
13855
            SP_ASM_ADDC(l, h, a1->dp[0]);
13856
            z1->dp[0 + 16] = l;
13857
            l = h;
13858
            h = 0;
13859
            for (i = 1; i < 16; i++) {
13860
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
13861
                SP_ASM_ADDC(l, h, a1->dp[i]);
13862
                SP_ASM_ADDC(l, h, a1->dp[i]);
13863
                z1->dp[i + 16] = l;
13864
                l = h;
13865
                h = 0;
13866
            }
13867
        }
13868
        z1->dp[32] += l;
13869
        /* z1 = z1 - z0 - z1 */
13870
        l = z1->dp[0];
13871
        h = 0;
13872
        SP_ASM_SUBC(l, h, z0->dp[0]);
13873
        SP_ASM_SUBC(l, h, z2->dp[0]);
13874
        z1->dp[0] = l;
13875
        l = h;
13876
        h = 0;
13877
        for (i = 1; i < 32; i++) {
13878
            l += z1->dp[i];
13879
            SP_ASM_SUBC(l, h, z0->dp[i]);
13880
            SP_ASM_SUBC(l, h, z2->dp[i]);
13881
            z1->dp[i] = l;
13882
            l = h;
13883
            h = 0;
13884
        }
13885
        z1->dp[i] += l;
13886
        /* r += z1 << 16 */
13887
        l = 0;
13888
        h = 0;
13889
        for (i = 0; i < 16; i++) {
13890
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
13891
            SP_ASM_ADDC(l, h, z1->dp[i]);
13892
            r->dp[i + 16] = l;
13893
            l = h;
13894
            h = 0;
13895
        }
13896
        for (; i < 33; i++) {
13897
            SP_ASM_ADDC(l, h, z1->dp[i]);
13898
            r->dp[i + 16] = l;
13899
            l = h;
13900
            h = 0;
13901
        }
13902
        /* r += z2 << 32  */
13903
        l = 0;
13904
        h = 0;
13905
        for (i = 0; i < 17; i++) {
13906
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
13907
            SP_ASM_ADDC(l, h, z2->dp[i]);
13908
            r->dp[i + 32] = l;
13909
            l = h;
13910
            h = 0;
13911
        }
13912
        for (; i < 32; i++) {
13913
            SP_ASM_ADDC(l, h, z2->dp[i]);
13914
            r->dp[i + 32] = l;
13915
            l = h;
13916
            h = 0;
13917
        }
13918
        r->used = 64;
13919
        sp_clamp(r);
13920
    }
13921
13922
    FREE_SP_INT_ARRAY(z, NULL);
13923
    FREE_SP_INT(a1, NULL);
13924
    return err;
13925
}
13926
    #endif /* SP_INT_DIGITS >= 64 */
13927
13928
    #if SP_INT_DIGITS >= 96
13929
/* Square a and store in r. r = a * a
13930
 *
13931
 * Karatsuba implementation.
13932
 *
13933
 * @param  [in]   a  SP integer to square.
13934
 * @param  [out]  r  SP integer result.
13935
 *
13936
 * @return  MP_OKAY on success.
13937
 * @return  MP_MEM when dynamic memory allocation fails.
13938
 */
13939
static int _sp_sqr_48(sp_int* a, sp_int* r)
13940
{
13941
    int err = MP_OKAY;
13942
    int i;
13943
    sp_int_digit l;
13944
    sp_int_digit h;
13945
    sp_int* z0;
13946
    sp_int* z1;
13947
    sp_int* z2;
13948
    sp_int_digit ca;
13949
    DECL_SP_INT(a1, 24);
13950
    DECL_SP_INT_ARRAY(z, 49, 2);
13951
13952
    ALLOC_SP_INT(a1, 24, err, NULL);
13953
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
13954
    if (err == MP_OKAY) {
13955
        z1 = z[0];
13956
        z2 = z[1];
13957
        z0 = r;
13958
13959
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
13960
        a1->used = 24;
13961
13962
        /* z2 = a1 ^ 2 */
13963
        err = _sp_sqr_24(a1, z2);
13964
    }
13965
    if (err == MP_OKAY) {
13966
        l = 0;
13967
        h = 0;
13968
        for (i = 0; i < 24; i++) {
13969
            SP_ASM_ADDC(l, h, a1->dp[i]);
13970
            SP_ASM_ADDC(l, h, a->dp[i]);
13971
            a1->dp[i] = l;
13972
            l = h;
13973
            h = 0;
13974
        }
13975
        ca = l;
13976
13977
        /* z0 = a0 ^ 2 */
13978
        err = _sp_sqr_24(a, z0);
13979
    }
13980
    if (err == MP_OKAY) {
13981
        /* z1 = (a0 + a1) ^ 2 */
13982
        err = _sp_sqr_24(a1, z1);
13983
    }
13984
    if (err == MP_OKAY) {
13985
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
13986
        /* r = z0 */
13987
        /* r += (z1 - z0 - z2) << 24 */
13988
        z1->dp[48] = ca;
13989
        l = 0;
13990
        if (ca) {
13991
            l = z1->dp[0 + 24];
13992
            h = 0;
13993
            SP_ASM_ADDC(l, h, a1->dp[0]);
13994
            SP_ASM_ADDC(l, h, a1->dp[0]);
13995
            z1->dp[0 + 24] = l;
13996
            l = h;
13997
            h = 0;
13998
            for (i = 1; i < 24; i++) {
13999
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
14000
                SP_ASM_ADDC(l, h, a1->dp[i]);
14001
                SP_ASM_ADDC(l, h, a1->dp[i]);
14002
                z1->dp[i + 24] = l;
14003
                l = h;
14004
                h = 0;
14005
            }
14006
        }
14007
        z1->dp[48] += l;
14008
        /* z1 = z1 - z0 - z1 */
14009
        l = z1->dp[0];
14010
        h = 0;
14011
        SP_ASM_SUBC(l, h, z0->dp[0]);
14012
        SP_ASM_SUBC(l, h, z2->dp[0]);
14013
        z1->dp[0] = l;
14014
        l = h;
14015
        h = 0;
14016
        for (i = 1; i < 48; i++) {
14017
            l += z1->dp[i];
14018
            SP_ASM_SUBC(l, h, z0->dp[i]);
14019
            SP_ASM_SUBC(l, h, z2->dp[i]);
14020
            z1->dp[i] = l;
14021
            l = h;
14022
            h = 0;
14023
        }
14024
        z1->dp[i] += l;
14025
        /* r += z1 << 16 */
14026
        l = 0;
14027
        h = 0;
14028
        for (i = 0; i < 24; i++) {
14029
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
14030
            SP_ASM_ADDC(l, h, z1->dp[i]);
14031
            r->dp[i + 24] = l;
14032
            l = h;
14033
            h = 0;
14034
        }
14035
        for (; i < 49; i++) {
14036
            SP_ASM_ADDC(l, h, z1->dp[i]);
14037
            r->dp[i + 24] = l;
14038
            l = h;
14039
            h = 0;
14040
        }
14041
        /* r += z2 << 48  */
14042
        l = 0;
14043
        h = 0;
14044
        for (i = 0; i < 25; i++) {
14045
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
14046
            SP_ASM_ADDC(l, h, z2->dp[i]);
14047
            r->dp[i + 48] = l;
14048
            l = h;
14049
            h = 0;
14050
        }
14051
        for (; i < 48; i++) {
14052
            SP_ASM_ADDC(l, h, z2->dp[i]);
14053
            r->dp[i + 48] = l;
14054
            l = h;
14055
            h = 0;
14056
        }
14057
        r->used = 96;
14058
        sp_clamp(r);
14059
    }
14060
14061
    FREE_SP_INT_ARRAY(z, NULL);
14062
    FREE_SP_INT(a1, NULL);
14063
    return err;
14064
}
14065
    #endif /* SP_INT_DIGITS >= 96 */
14066
14067
    #if SP_INT_DIGITS >= 128
14068
/* Square a and store in r. r = a * a
14069
 *
14070
 * Karatsuba implementation.
14071
 *
14072
 * @param  [in]   a  SP integer to square.
14073
 * @param  [out]  r  SP integer result.
14074
 *
14075
 * @return  MP_OKAY on success.
14076
 * @return  MP_MEM when dynamic memory allocation fails.
14077
 */
14078
static int _sp_sqr_64(sp_int* a, sp_int* r)
14079
{
14080
    int err = MP_OKAY;
14081
    int i;
14082
    sp_int_digit l;
14083
    sp_int_digit h;
14084
    sp_int* z0;
14085
    sp_int* z1;
14086
    sp_int* z2;
14087
    sp_int_digit ca;
14088
    DECL_SP_INT(a1, 32);
14089
    DECL_SP_INT_ARRAY(z, 65, 2);
14090
14091
    ALLOC_SP_INT(a1, 32, err, NULL);
14092
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
14093
    if (err == MP_OKAY) {
14094
        z1 = z[0];
14095
        z2 = z[1];
14096
        z0 = r;
14097
14098
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
14099
        a1->used = 32;
14100
14101
        /* z2 = a1 ^ 2 */
14102
        err = _sp_sqr_32(a1, z2);
14103
    }
14104
    if (err == MP_OKAY) {
14105
        l = 0;
14106
        h = 0;
14107
        for (i = 0; i < 32; i++) {
14108
            SP_ASM_ADDC(l, h, a1->dp[i]);
14109
            SP_ASM_ADDC(l, h, a->dp[i]);
14110
            a1->dp[i] = l;
14111
            l = h;
14112
            h = 0;
14113
        }
14114
        ca = l;
14115
14116
        /* z0 = a0 ^ 2 */
14117
        err = _sp_sqr_32(a, z0);
14118
    }
14119
    if (err == MP_OKAY) {
14120
        /* z1 = (a0 + a1) ^ 2 */
14121
        err = _sp_sqr_32(a1, z1);
14122
    }
14123
    if (err == MP_OKAY) {
14124
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
14125
        /* r = z0 */
14126
        /* r += (z1 - z0 - z2) << 32 */
14127
        z1->dp[64] = ca;
14128
        l = 0;
14129
        if (ca) {
14130
            l = z1->dp[0 + 32];
14131
            h = 0;
14132
            SP_ASM_ADDC(l, h, a1->dp[0]);
14133
            SP_ASM_ADDC(l, h, a1->dp[0]);
14134
            z1->dp[0 + 32] = l;
14135
            l = h;
14136
            h = 0;
14137
            for (i = 1; i < 32; i++) {
14138
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
14139
                SP_ASM_ADDC(l, h, a1->dp[i]);
14140
                SP_ASM_ADDC(l, h, a1->dp[i]);
14141
                z1->dp[i + 32] = l;
14142
                l = h;
14143
                h = 0;
14144
            }
14145
        }
14146
        z1->dp[64] += l;
14147
        /* z1 = z1 - z0 - z1 */
14148
        l = z1->dp[0];
14149
        h = 0;
14150
        SP_ASM_SUBC(l, h, z0->dp[0]);
14151
        SP_ASM_SUBC(l, h, z2->dp[0]);
14152
        z1->dp[0] = l;
14153
        l = h;
14154
        h = 0;
14155
        for (i = 1; i < 64; i++) {
14156
            l += z1->dp[i];
14157
            SP_ASM_SUBC(l, h, z0->dp[i]);
14158
            SP_ASM_SUBC(l, h, z2->dp[i]);
14159
            z1->dp[i] = l;
14160
            l = h;
14161
            h = 0;
14162
        }
14163
        z1->dp[i] += l;
14164
        /* r += z1 << 16 */
14165
        l = 0;
14166
        h = 0;
14167
        for (i = 0; i < 32; i++) {
14168
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
14169
            SP_ASM_ADDC(l, h, z1->dp[i]);
14170
            r->dp[i + 32] = l;
14171
            l = h;
14172
            h = 0;
14173
        }
14174
        for (; i < 65; i++) {
14175
            SP_ASM_ADDC(l, h, z1->dp[i]);
14176
            r->dp[i + 32] = l;
14177
            l = h;
14178
            h = 0;
14179
        }
14180
        /* r += z2 << 64  */
14181
        l = 0;
14182
        h = 0;
14183
        for (i = 0; i < 33; i++) {
14184
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
14185
            SP_ASM_ADDC(l, h, z2->dp[i]);
14186
            r->dp[i + 64] = l;
14187
            l = h;
14188
            h = 0;
14189
        }
14190
        for (; i < 64; i++) {
14191
            SP_ASM_ADDC(l, h, z2->dp[i]);
14192
            r->dp[i + 64] = l;
14193
            l = h;
14194
            h = 0;
14195
        }
14196
        r->used = 128;
14197
        sp_clamp(r);
14198
    }
14199
14200
    FREE_SP_INT_ARRAY(z, NULL);
14201
    FREE_SP_INT(a1, NULL);
14202
    return err;
14203
}
14204
    #endif /* SP_INT_DIGITS >= 128 */
14205
14206
    #if SP_INT_DIGITS >= 192
14207
/* Square a and store in r. r = a * a
14208
 *
14209
 * Karatsuba implementation.
14210
 *
14211
 * @param  [in]   a  SP integer to square.
14212
 * @param  [out]  r  SP integer result.
14213
 *
14214
 * @return  MP_OKAY on success.
14215
 * @return  MP_MEM when dynamic memory allocation fails.
14216
 */
14217
static int _sp_sqr_96(sp_int* a, sp_int* r)
14218
{
14219
    int err = MP_OKAY;
14220
    int i;
14221
    sp_int_digit l;
14222
    sp_int_digit h;
14223
    sp_int* z0;
14224
    sp_int* z1;
14225
    sp_int* z2;
14226
    sp_int_digit ca;
14227
    DECL_SP_INT(a1, 48);
14228
    DECL_SP_INT_ARRAY(z, 97, 2);
14229
14230
    ALLOC_SP_INT(a1, 48, err, NULL);
14231
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
14232
    if (err == MP_OKAY) {
14233
        z1 = z[0];
14234
        z2 = z[1];
14235
        z0 = r;
14236
14237
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
14238
        a1->used = 48;
14239
14240
        /* z2 = a1 ^ 2 */
14241
        err = _sp_sqr_48(a1, z2);
14242
    }
14243
    if (err == MP_OKAY) {
14244
        l = 0;
14245
        h = 0;
14246
        for (i = 0; i < 48; i++) {
14247
            SP_ASM_ADDC(l, h, a1->dp[i]);
14248
            SP_ASM_ADDC(l, h, a->dp[i]);
14249
            a1->dp[i] = l;
14250
            l = h;
14251
            h = 0;
14252
        }
14253
        ca = l;
14254
14255
        /* z0 = a0 ^ 2 */
14256
        err = _sp_sqr_48(a, z0);
14257
    }
14258
    if (err == MP_OKAY) {
14259
        /* z1 = (a0 + a1) ^ 2 */
14260
        err = _sp_sqr_48(a1, z1);
14261
    }
14262
    if (err == MP_OKAY) {
14263
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
14264
        /* r = z0 */
14265
        /* r += (z1 - z0 - z2) << 48 */
14266
        z1->dp[96] = ca;
14267
        l = 0;
14268
        if (ca) {
14269
            l = z1->dp[0 + 48];
14270
            h = 0;
14271
            SP_ASM_ADDC(l, h, a1->dp[0]);
14272
            SP_ASM_ADDC(l, h, a1->dp[0]);
14273
            z1->dp[0 + 48] = l;
14274
            l = h;
14275
            h = 0;
14276
            for (i = 1; i < 48; i++) {
14277
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
14278
                SP_ASM_ADDC(l, h, a1->dp[i]);
14279
                SP_ASM_ADDC(l, h, a1->dp[i]);
14280
                z1->dp[i + 48] = l;
14281
                l = h;
14282
                h = 0;
14283
            }
14284
        }
14285
        z1->dp[96] += l;
14286
        /* z1 = z1 - z0 - z1 */
14287
        l = z1->dp[0];
14288
        h = 0;
14289
        SP_ASM_SUBC(l, h, z0->dp[0]);
14290
        SP_ASM_SUBC(l, h, z2->dp[0]);
14291
        z1->dp[0] = l;
14292
        l = h;
14293
        h = 0;
14294
        for (i = 1; i < 96; i++) {
14295
            l += z1->dp[i];
14296
            SP_ASM_SUBC(l, h, z0->dp[i]);
14297
            SP_ASM_SUBC(l, h, z2->dp[i]);
14298
            z1->dp[i] = l;
14299
            l = h;
14300
            h = 0;
14301
        }
14302
        z1->dp[i] += l;
14303
        /* r += z1 << 16 */
14304
        l = 0;
14305
        h = 0;
14306
        for (i = 0; i < 48; i++) {
14307
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
14308
            SP_ASM_ADDC(l, h, z1->dp[i]);
14309
            r->dp[i + 48] = l;
14310
            l = h;
14311
            h = 0;
14312
        }
14313
        for (; i < 97; i++) {
14314
            SP_ASM_ADDC(l, h, z1->dp[i]);
14315
            r->dp[i + 48] = l;
14316
            l = h;
14317
            h = 0;
14318
        }
14319
        /* r += z2 << 96  */
14320
        l = 0;
14321
        h = 0;
14322
        for (i = 0; i < 49; i++) {
14323
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
14324
            SP_ASM_ADDC(l, h, z2->dp[i]);
14325
            r->dp[i + 96] = l;
14326
            l = h;
14327
            h = 0;
14328
        }
14329
        for (; i < 96; i++) {
14330
            SP_ASM_ADDC(l, h, z2->dp[i]);
14331
            r->dp[i + 96] = l;
14332
            l = h;
14333
            h = 0;
14334
        }
14335
        r->used = 192;
14336
        sp_clamp(r);
14337
    }
14338
14339
    FREE_SP_INT_ARRAY(z, NULL);
14340
    FREE_SP_INT(a1, NULL);
14341
    return err;
14342
}
14343
    #endif /* SP_INT_DIGITS >= 192 */
14344
14345
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
14346
#endif /* !WOLFSSL_SP_SMALL */
14347
14348
/* Square a and store in r. r = a * a
14349
 *
14350
 * @param  [in]   a  SP integer to square.
14351
 * @param  [out]  r  SP integer result.
14352
 *
14353
 * @return  MP_OKAY on success.
14354
 * @return  MP_VAL when a or r is NULL, or the result will be too big for fixed
14355
 *          data length.
14356
 * @return  MP_MEM when dynamic memory allocation fails.
14357
 */
14358
int sp_sqr(sp_int* a, sp_int* r)
14359
598k
{
14360
#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
14361
    return sp_mul(a, a, r);
14362
#else
14363
598k
    int err = MP_OKAY;
14364
14365
598k
    if ((a == NULL) || (r == NULL)) {
14366
0
        err = MP_VAL;
14367
0
    }
14368
    /* Need extra digit during calculation. */
14369
598k
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
14370
13
        err = MP_VAL;
14371
13
    }
14372
14373
#if 0
14374
    if (err == MP_OKAY) {
14375
        sp_print(a, "a");
14376
    }
14377
#endif
14378
14379
598k
    if (err == MP_OKAY) {
14380
598k
        if (a->used == 0) {
14381
72.2k
            _sp_zero(r);
14382
72.2k
        }
14383
526k
    else
14384
526k
#ifndef WOLFSSL_SP_SMALL
14385
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
14386
#if SP_WORD_SIZE == 64
14387
        if (a->used == 4) {
14388
            err = _sp_sqr_4(a, r);
14389
        }
14390
        else
14391
#endif /* SP_WORD_SIZE == 64 */
14392
#if SP_WORD_SIZE == 64
14393
#ifdef SQR_MUL_ASM
14394
        if (a->used == 6) {
14395
            err = _sp_sqr_6(a, r);
14396
        }
14397
        else
14398
#endif /* SQR_MUL_ASM */
14399
#endif /* SP_WORD_SIZE == 64 */
14400
#if SP_WORD_SIZE == 32
14401
#ifdef SQR_MUL_ASM
14402
        if (a->used == 8) {
14403
            err = _sp_sqr_8(a, r);
14404
        }
14405
        else
14406
#endif /* SQR_MUL_ASM */
14407
#endif /* SP_WORD_SIZE == 32 */
14408
#if SP_WORD_SIZE == 32
14409
#ifdef SQR_MUL_ASM
14410
        if (a->used == 12) {
14411
            err = _sp_sqr_12(a, r);
14412
        }
14413
        else
14414
#endif /* SQR_MUL_ASM */
14415
#endif /* SP_WORD_SIZE == 32 */
14416
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
14417
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
14418
    #if SP_INT_DIGITS >= 32
14419
        if (a->used == 16) {
14420
            err = _sp_sqr_16(a, r);
14421
        }
14422
        else
14423
    #endif /* SP_INT_DIGITS >= 32 */
14424
    #if SP_INT_DIGITS >= 48
14425
        if (a->used == 24) {
14426
            err = _sp_sqr_24(a, r);
14427
        }
14428
        else
14429
    #endif /* SP_INT_DIGITS >= 48 */
14430
    #if SP_INT_DIGITS >= 64
14431
        if (a->used == 32) {
14432
            err = _sp_sqr_32(a, r);
14433
        }
14434
        else
14435
    #endif /* SP_INT_DIGITS >= 64 */
14436
    #if SP_INT_DIGITS >= 96
14437
        if (a->used == 48) {
14438
            err = _sp_sqr_48(a, r);
14439
        }
14440
        else
14441
    #endif /* SP_INT_DIGITS >= 96 */
14442
    #if SP_INT_DIGITS >= 128
14443
        if (a->used == 64) {
14444
            err = _sp_sqr_64(a, r);
14445
        }
14446
        else
14447
    #endif /* SP_INT_DIGITS >= 128 */
14448
    #if SP_INT_DIGITS >= 192
14449
        if (a->used == 96) {
14450
            err = _sp_sqr_96(a, r);
14451
        }
14452
        else
14453
    #endif /* SP_INT_DIGITS >= 192 */
14454
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
14455
526k
#endif /* !WOLFSSL_SP_SMALL */
14456
526k
        {
14457
526k
            err = _sp_sqr(a, r);
14458
526k
        }
14459
598k
    }
14460
14461
#ifdef WOLFSSL_SP_INT_NEGATIVE
14462
    if (err == MP_OKAY) {
14463
        r->sign = MP_ZPOS;
14464
    }
14465
#endif
14466
14467
#if 0
14468
    if (err == MP_OKAY) {
14469
        sp_print(r, "rsqr");
14470
    }
14471
#endif
14472
14473
598k
    return err;
14474
598k
#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
14475
598k
}
14476
/* END SP_SQR implementations */
14477
14478
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
14479
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
14480
14481
#if (!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
14482
     !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH)
14483
/* Square a mod m and store in r: r = (a * a) mod m
14484
 *
14485
 * @param  [in]   a  SP integer to square.
14486
 * @param  [in]   m  SP integer that is the modulus.
14487
 * @param  [out]  r  SP integer result.
14488
 *
14489
 * @return  MP_OKAY on success.
14490
 * @return  MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
14491
 *          for fixed data length.
14492
 * @return  MP_MEM when dynamic memory allocation fails.
14493
 */
14494
int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
14495
598k
{
14496
598k
    int err = MP_OKAY;
14497
14498
598k
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
14499
0
        err = MP_VAL;
14500
0
    }
14501
598k
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
14502
13
        err = MP_VAL;
14503
13
    }
14504
14505
598k
    if (err == MP_OKAY) {
14506
598k
        err = sp_sqr(a, r);
14507
598k
    }
14508
598k
    if (err == MP_OKAY) {
14509
598k
        err = sp_mod(r, m, r);
14510
598k
    }
14511
14512
598k
    return err;
14513
598k
}
14514
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
14515
14516
/**********************
14517
 * Montgomery functions
14518
 **********************/
14519
14520
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
14521
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
14522
/* Reduce a number in montgomery form.
14523
 *
14524
 * Assumes a and m are not NULL and m is not 0.
14525
 *
14526
 * @param  [in,out]  a   SP integer to Montgomery reduce.
14527
 * @param  [in]      m   SP integer that is the modulus.
14528
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
14529
 *
14530
 * @return  MP_OKAY on success.
14531
 */
14532
static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
14533
0
{
14534
0
#if !defined(SQR_MUL_ASM)
14535
0
    int i;
14536
0
    int bits;
14537
0
    sp_int_word w;
14538
0
    sp_int_digit mu;
14539
14540
#if 0
14541
    sp_print(a, "a");
14542
    sp_print(m, "m");
14543
#endif
14544
14545
0
    bits = sp_count_bits(m);
14546
14547
0
    for (i = a->used; i < m->used * 2; i++) {
14548
0
        a->dp[i] = 0;
14549
0
    }
14550
14551
0
    if (m->used == 1) {
14552
0
        mu = mp * a->dp[0];
14553
0
        w = a->dp[0];
14554
0
        w += (sp_int_word)mu * m->dp[0];
14555
0
        a->dp[0] = (sp_int_digit)w;
14556
0
        w >>= SP_WORD_SIZE;
14557
0
        w += a->dp[1];
14558
0
        a->dp[1] = (sp_int_digit)w;
14559
0
        w >>= SP_WORD_SIZE;
14560
0
        a->dp[2] = (sp_int_digit)w;
14561
0
        a->used = 3;
14562
        /* mp is SP_WORD_SIZE */
14563
0
        bits = SP_WORD_SIZE;
14564
0
    }
14565
0
    else {
14566
0
        sp_int_digit mask = (sp_int_digit)
14567
0
                            ((1UL << (bits & (SP_WORD_SIZE - 1))) - 1);
14568
0
        sp_int_word o = 0;
14569
0
        for (i = 0; i < m->used; i++) {
14570
0
            int j;
14571
14572
0
            mu = mp * a->dp[i];
14573
0
            if ((i == m->used - 1) && (mask != 0)) {
14574
0
                mu &= mask;
14575
0
            }
14576
0
            w = a->dp[i];
14577
0
            w += (sp_int_word)mu * m->dp[0];
14578
0
            a->dp[i] = (sp_int_digit)w;
14579
0
            w >>= SP_WORD_SIZE;
14580
0
            for (j = 1; j < m->used - 1; j++) {
14581
0
                w += a->dp[i + j];
14582
0
                w += (sp_int_word)mu * m->dp[j];
14583
0
                a->dp[i + j] = (sp_int_digit)w;
14584
0
                w >>= SP_WORD_SIZE;
14585
0
            }
14586
0
            w += o;
14587
0
            w += a->dp[i + j];
14588
0
            o = (sp_int_digit)(w >> SP_WORD_SIZE);
14589
0
            w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
14590
0
            a->dp[i + j] = (sp_int_digit)w;
14591
0
            w >>= SP_WORD_SIZE;
14592
0
            o += w;
14593
0
        }
14594
0
        o += a->dp[m->used * 2 - 1];
14595
0
        a->dp[m->used * 2 - 1] = (sp_int_digit)o;
14596
0
        o >>= SP_WORD_SIZE;
14597
0
        a->dp[m->used * 2] = (sp_int_digit)o;
14598
0
        a->used = m->used * 2 + 1;
14599
0
    }
14600
14601
0
    sp_clamp(a);
14602
0
    sp_rshb(a, bits, a);
14603
14604
0
    if (_sp_cmp_abs(a, m) != MP_LT) {
14605
0
        _sp_sub_off(a, m, a, 0);
14606
0
    }
14607
14608
#if 0
14609
    sp_print(a, "rr");
14610
#endif
14611
14612
0
    return MP_OKAY;
14613
#else /* !SQR_MUL_ASM */
14614
    int i;
14615
    int j;
14616
    int bits;
14617
    sp_int_digit mu;
14618
    sp_int_digit o;
14619
    sp_int_digit mask;
14620
14621
    bits = sp_count_bits(m);
14622
    mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
14623
14624
    for (i = a->used; i < m->used * 2; i++) {
14625
        a->dp[i] = 0;
14626
    }
14627
14628
    if (m->used <= 1) {
14629
    #ifndef SQR_MUL_ASM
14630
        sp_int_word w;
14631
    #else
14632
        sp_int_digit l;
14633
        sp_int_digit h;
14634
        sp_int_digit t;
14635
    #endif
14636
14637
        mu = mp * a->dp[0];
14638
    #ifndef SQR_MUL_ASM
14639
        w = a->dp[0];
14640
        w += (sp_int_word)mu * m->dp[0];
14641
        a->dp[0] = (sp_int_digit)w;
14642
        w >>= SP_WORD_SIZE;
14643
        w += a->dp[1];
14644
        a->dp[1] = (sp_int_digit)w;
14645
        w >>= SP_WORD_SIZE;
14646
        a->dp[2] = (sp_int_digit)w;
14647
    #else
14648
        l = a->dp[0];
14649
        h = 0;
14650
        t = m->dp[0];
14651
        SP_ASM_MUL_ADD_NO(l, h, mu, t);
14652
        a->dp[0] = l;
14653
        l = h;
14654
        h = 0;
14655
        t = a->dp[1];
14656
        SP_ASM_ADDC(l, h, t);
14657
        a->dp[1] = l;
14658
        a->dp[2] = h;
14659
    #endif
14660
        a->used = m->used * 2 + 1;
14661
        /* mp is SP_WORD_SIZE */
14662
        bits = SP_WORD_SIZE;
14663
    }
14664
#ifndef WOLFSSL_HAVE_SP_ECC
14665
#if SP_WORD_SIZE == 64
14666
    else if ((m->used == 4) && (mask == 0)) {
14667
        sp_int_digit l;
14668
        sp_int_digit h;
14669
        sp_int_digit o2;
14670
14671
        l = 0;
14672
        h = 0;
14673
        o = 0;
14674
        o2 = 0;
14675
        for (i = 0; i < 4; i++) {
14676
            mu = mp * a->dp[0];
14677
            l = a->dp[0];
14678
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
14679
            l = h;
14680
            h = 0;
14681
            SP_ASM_ADDC(l, h, a->dp[1]);
14682
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
14683
            a->dp[0] = l;
14684
            l = h;
14685
            h = 0;
14686
            SP_ASM_ADDC(l, h, a->dp[2]);
14687
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
14688
            a->dp[1] = l;
14689
            l = h;
14690
            h = o2;
14691
            o2 = 0;
14692
            SP_ASM_ADDC_REG(l, h, o);
14693
            SP_ASM_ADDC(l, h, a->dp[i + 3]);
14694
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
14695
            a->dp[2] = l;
14696
            o = h;
14697
            l = h;
14698
            h = 0;
14699
        }
14700
        h = o2;
14701
        SP_ASM_ADDC(l, h, a->dp[7]);
14702
        a->dp[3] = l;
14703
        a->dp[4] = h;
14704
        a->used = 5;
14705
14706
        sp_clamp(a);
14707
14708
        if (_sp_cmp_abs(a, m) != MP_LT) {
14709
            sp_sub(a, m, a);
14710
        }
14711
14712
        return MP_OKAY;
14713
    }
14714
    else if ((m->used == 6) && (mask == 0)) {
14715
        sp_int_digit l;
14716
        sp_int_digit h;
14717
        sp_int_digit o2;
14718
14719
        l = 0;
14720
        h = 0;
14721
        o = 0;
14722
        o2 = 0;
14723
        for (i = 0; i < 6; i++) {
14724
            mu = mp * a->dp[0];
14725
            l = a->dp[0];
14726
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
14727
            l = h;
14728
            h = 0;
14729
            SP_ASM_ADDC(l, h, a->dp[1]);
14730
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
14731
            a->dp[0] = l;
14732
            l = h;
14733
            h = 0;
14734
            SP_ASM_ADDC(l, h, a->dp[2]);
14735
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
14736
            a->dp[1] = l;
14737
            l = h;
14738
            h = 0;
14739
            SP_ASM_ADDC(l, h, a->dp[3]);
14740
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
14741
            a->dp[2] = l;
14742
            l = h;
14743
            h = 0;
14744
            SP_ASM_ADDC(l, h, a->dp[4]);
14745
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
14746
            a->dp[3] = l;
14747
            l = h;
14748
            h = o2;
14749
            o2 = 0;
14750
            SP_ASM_ADDC_REG(l, h, o);
14751
            SP_ASM_ADDC(l, h, a->dp[i + 5]);
14752
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
14753
            a->dp[4] = l;
14754
            o = h;
14755
            l = h;
14756
            h = 0;
14757
        }
14758
        h = o2;
14759
        SP_ASM_ADDC(l, h, a->dp[11]);
14760
        a->dp[5] = l;
14761
        a->dp[6] = h;
14762
        a->used = 7;
14763
14764
        sp_clamp(a);
14765
14766
        if (_sp_cmp_abs(a, m) != MP_LT) {
14767
            sp_sub(a, m, a);
14768
        }
14769
14770
        return MP_OKAY;
14771
    }
14772
#elif SP_WORD_SIZE == 32
14773
    else if ((m->used <= 12) && (mask == 0)) {
14774
        sp_int_digit l;
14775
        sp_int_digit h;
14776
        sp_int_digit o2;
14777
        sp_int_digit* ad;
14778
        sp_int_digit* md;
14779
14780
        o = 0;
14781
        o2 = 0;
14782
        ad = a->dp;
14783
        for (i = 0; i < m->used; i++) {
14784
            md = m->dp;
14785
            mu = mp * ad[0];
14786
            l = ad[0];
14787
            h = 0;
14788
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14789
            l = h;
14790
            for (j = 1; j + 1 < m->used - 1; j += 2) {
14791
                h = 0;
14792
                SP_ASM_ADDC(l, h, ad[j]);
14793
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14794
                ad[j - 1] = l;
14795
                l = 0;
14796
                SP_ASM_ADDC(h, l, ad[j + 1]);
14797
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
14798
                ad[j] = h;
14799
            }
14800
            for (; j < m->used - 1; j++) {
14801
                h = 0;
14802
                SP_ASM_ADDC(l, h, ad[j]);
14803
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14804
                ad[j - 1] = l;
14805
                l = h;
14806
            }
14807
            h = o2;
14808
            o2 = 0;
14809
            SP_ASM_ADDC_REG(l, h, o);
14810
            SP_ASM_ADDC(l, h, ad[i + j]);
14811
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
14812
            ad[j - 1] = l;
14813
            o = h;
14814
        }
14815
        l = o;
14816
        h = o2;
14817
        SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
14818
        a->dp[m->used  - 1] = l;
14819
        a->dp[m->used] = h;
14820
        a->used = m->used + 1;
14821
14822
        sp_clamp(a);
14823
14824
        if (_sp_cmp_abs(a, m) != MP_LT) {
14825
            sp_sub(a, m, a);
14826
        }
14827
14828
        return MP_OKAY;
14829
    }
14830
#endif /* SP_WORD_SIZE == 64 | 32 */
14831
#endif /* WOLFSSL_HAVE_SP_ECC */
14832
    else {
14833
        sp_int_digit l;
14834
        sp_int_digit h;
14835
        sp_int_digit o2;
14836
        sp_int_digit* ad;
14837
        sp_int_digit* md;
14838
14839
        o = 0;
14840
        o2 = 0;
14841
        ad = a->dp;
14842
        for (i = 0; i < m->used; i++, ad++) {
14843
            md = m->dp;
14844
            mu = mp * ad[0];
14845
            if ((i == m->used - 1) && (mask != 0)) {
14846
                mu &= mask;
14847
            }
14848
            l = ad[0];
14849
            h = 0;
14850
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14851
            ad[0] = l;
14852
            l = h;
14853
            for (j = 1; j + 1 < m->used - 1; j += 2) {
14854
                h = 0;
14855
                SP_ASM_ADDC(l, h, ad[j + 0]);
14856
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14857
                ad[j + 0] = l;
14858
                l = 0;
14859
                SP_ASM_ADDC(h, l, ad[j + 1]);
14860
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
14861
                ad[j + 1] = h;
14862
            }
14863
            for (; j < m->used - 1; j++) {
14864
                h = 0;
14865
                SP_ASM_ADDC(l, h, ad[j]);
14866
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
14867
                ad[j] = l;
14868
                l = h;
14869
            }
14870
            h = o2;
14871
            o2 = 0;
14872
            SP_ASM_ADDC_REG(l, h, o);
14873
            SP_ASM_ADDC(l, h, ad[j]);
14874
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
14875
            ad[j] = l;
14876
            o = h;
14877
        }
14878
        l = o;
14879
        h = o2;
14880
        SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
14881
        a->dp[m->used * 2 - 1] = l;
14882
        a->dp[m->used * 2] = h;
14883
        a->used = m->used * 2 + 1;
14884
    }
14885
14886
    sp_clamp(a);
14887
    sp_rshb(a, bits, a);
14888
14889
    if (_sp_cmp_abs(a, m) != MP_LT) {
14890
        sp_sub(a, m, a);
14891
    }
14892
14893
    return MP_OKAY;
14894
#endif /* !SQR_MUL_ASM */
14895
0
}
14896
14897
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14898
    (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
14899
/* Reduce a number in montgomery form.
14900
 *
14901
 * @param  [in,out]  a   SP integer to Montgomery reduce.
14902
 * @param  [in]      m   SP integer that is the modulus.
14903
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
14904
 *
14905
 * @return  MP_OKAY on success.
14906
 * @return  MP_VAL when a or m is NULL or m is zero.
14907
 */
14908
int sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
14909
0
{
14910
0
    int err;
14911
14912
0
    if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
14913
0
        err = MP_VAL;
14914
0
    }
14915
0
    else if (a->size < m->used * 2 + 1) {
14916
0
        err = MP_VAL;
14917
0
    }
14918
0
    else {
14919
0
        err = _sp_mont_red(a, m, mp);
14920
0
    }
14921
14922
0
    return err;
14923
0
}
14924
#endif
14925
14926
/* Calculate the bottom digit of the inverse of negative m.
14927
 *
14928
 * Used when performing Montgomery Reduction.
14929
 *
14930
 * @param  [in]   m   SP integer that is the modulus.
14931
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
14932
 *
14933
 * @return  MP_OKAY on success.
14934
 * @return  MP_VAL when m or rho is NULL.
14935
 */
14936
int sp_mont_setup(sp_int* m, sp_int_digit* rho)
14937
834
{
14938
834
    int err = MP_OKAY;
14939
14940
834
    if ((m == NULL) || (rho == NULL)) {
14941
0
        err = MP_VAL;
14942
0
    }
14943
834
    if ((err == MP_OKAY) && !sp_isodd(m)) {
14944
0
        err = MP_VAL;
14945
0
    }
14946
14947
834
    if (err == MP_OKAY) {
14948
834
        sp_int_digit x;
14949
834
        sp_int_digit b;
14950
14951
834
        b = m->dp[0];
14952
834
        x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
14953
834
        x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
14954
834
    #if SP_WORD_SIZE >= 16
14955
834
        x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
14956
834
    #if SP_WORD_SIZE >= 32
14957
834
        x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
14958
834
    #if SP_WORD_SIZE >= 64
14959
834
        x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
14960
834
    #endif /* SP_WORD_SIZE >= 64 */
14961
834
    #endif /* SP_WORD_SIZE >= 32 */
14962
834
    #endif /* SP_WORD_SIZE >= 16 */
14963
14964
        /* rho = -1/m mod b, subtract x (unsigned) from 0, assign negative */
14965
834
        *rho = (sp_int_digit)((sp_int_digit)0 - (sp_sint_digit)x);
14966
834
    }
14967
14968
834
    return err;
14969
834
}
14970
14971
/* Calculate the normalization value of m.
14972
 *   norm = 2^k - m, where k is the number of bits in m
14973
 *
14974
 * @param  [out]  norm   SP integer that normalises numbers into Montgomery
14975
 *                       form.
14976
 * @param  [in]   m      SP integer that is the modulus.
14977
 *
14978
 * @return  MP_OKAY on success.
14979
 * @return  MP_VAL when norm or m is NULL, or number of bits in m is maximual.
14980
 */
14981
int sp_mont_norm(sp_int* norm, sp_int* m)
14982
1.71k
{
14983
1.71k
    int err = MP_OKAY;
14984
1.71k
    int bits = 0;
14985
14986
1.71k
    if ((norm == NULL) || (m == NULL)) {
14987
0
        err = MP_VAL;
14988
0
    }
14989
1.71k
    if (err == MP_OKAY) {
14990
1.71k
        bits = sp_count_bits(m);
14991
1.71k
        if (bits == m->size * SP_WORD_SIZE) {
14992
0
            err = MP_VAL;
14993
0
        }
14994
1.71k
    }
14995
1.71k
    if (err == MP_OKAY) {
14996
1.71k
        if (bits < SP_WORD_SIZE) {
14997
0
            bits = SP_WORD_SIZE;
14998
0
        }
14999
1.71k
        _sp_zero(norm);
15000
1.71k
        sp_set_bit(norm, bits);
15001
1.71k
        err = sp_sub(norm, m, norm);
15002
1.71k
    }
15003
1.71k
    if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
15004
0
        norm->dp[0] %= m->dp[0];
15005
0
    }
15006
1.71k
    if (err == MP_OKAY) {
15007
1.71k
        sp_clamp(norm);
15008
1.71k
    }
15009
15010
1.71k
    return err;
15011
1.71k
}
15012
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
15013
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
15014
15015
/*********************************
15016
 * To and from binary and strings.
15017
 *********************************/
15018
15019
/* Calculate the number of 8-bit values required to represent the
15020
 * multi-precision number.
15021
 *
15022
 * When a is NULL, return s 0.
15023
 *
15024
 * @param  [in]  a  SP integer.
15025
 *
15026
 * @return  The count of 8-bit values.
15027
 */
15028
int sp_unsigned_bin_size(const sp_int* a)
15029
50.6k
{
15030
50.6k
    int cnt = 0;
15031
15032
50.6k
    if (a != NULL) {
15033
50.6k
        cnt = (sp_count_bits(a) + 7) / 8;
15034
50.6k
    }
15035
15036
50.6k
    return cnt;
15037
50.6k
}
15038
15039
/* Convert a number as an array of bytes in big-endian format to a
15040
 * multi-precision number.
15041
 *
15042
 * @param  [out]  a     SP integer.
15043
 * @param  [in]   in    Array of bytes.
15044
 * @param  [in]   inSz  Number of data bytes in array.
15045
 *
15046
 * @return  MP_OKAY on success.
15047
 * @return  MP_VAL when the number is too big to fit in an SP.
15048
 */
15049
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
15050
22.8k
{
15051
22.8k
    int err = MP_OKAY;
15052
15053
22.8k
    if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
15054
0
        err = MP_VAL;
15055
0
    }
15056
15057
22.8k
    if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
15058
70
        err = MP_VAL;
15059
70
    }
15060
15061
#ifndef LITTLE_ENDIAN_ORDER
15062
    if (err == MP_OKAY) {
15063
        int i;
15064
        int j;
15065
        int s;
15066
15067
        a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15068
15069
    #ifndef WOLFSSL_SP_INT_DIGIT_ALIGN
15070
        for (i = inSz-1,j = 0; i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF,j++) {
15071
            a->dp[j] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
15072
        }
15073
    #else
15074
        for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
15075
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0);
15076
        #if SP_WORD_SIZE >= 16
15077
            a->dp[j] |= ((sp_int_digit)in[i - 1] <<  8);
15078
        #endif
15079
        #if SP_WORD_SIZE >= 32
15080
            a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
15081
                        ((sp_int_digit)in[i - 3] << 24);
15082
        #endif
15083
        #if SP_WORD_SIZE >= 64
15084
            a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
15085
                        ((sp_int_digit)in[i - 5] << 40) |
15086
                        ((sp_int_digit)in[i - 6] << 48) |
15087
                        ((sp_int_digit)in[i - 7] << 56);
15088
        #endif
15089
            j++;
15090
        }
15091
    #endif
15092
        if (i >= 0) {
15093
            a->dp[a->used - 1] = 0;
15094
            for (s = 0; i >= 0; i--,s += 8) {
15095
                a->dp[j] |= ((sp_int_digit)in[i]) << s;
15096
            }
15097
        }
15098
15099
        sp_clamp(a);
15100
    }
15101
#else
15102
22.8k
    if (err == MP_OKAY) {
15103
22.7k
        int i;
15104
22.7k
        int j;
15105
15106
22.7k
        a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15107
15108
196k
        for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
15109
173k
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0);
15110
173k
        #if SP_WORD_SIZE >= 16
15111
173k
            a->dp[j] |= ((sp_int_digit)in[i - 1] <<  8);
15112
173k
        #endif
15113
173k
        #if SP_WORD_SIZE >= 32
15114
173k
            a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
15115
173k
                        ((sp_int_digit)in[i - 3] << 24);
15116
173k
        #endif
15117
173k
        #if SP_WORD_SIZE >= 64
15118
173k
            a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
15119
173k
                        ((sp_int_digit)in[i - 5] << 40) |
15120
173k
                        ((sp_int_digit)in[i - 6] << 48) |
15121
173k
                        ((sp_int_digit)in[i - 7] << 56);
15122
173k
        #endif
15123
173k
            j++;
15124
173k
        }
15125
15126
22.7k
    #if SP_WORD_SIZE >= 16
15127
22.7k
        if (i >= 0) {
15128
12.2k
            byte *d = (byte*)a->dp;
15129
15130
12.2k
            a->dp[a->used - 1] = 0;
15131
12.2k
            switch (i) {
15132
432
                case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
15133
776
                case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
15134
1.46k
                case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
15135
2.14k
                case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
15136
2.68k
                case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
15137
4.73k
                case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
15138
12.2k
                case 0: d[inSz - 1 - 0] = in[0];
15139
12.2k
            }
15140
12.2k
        }
15141
22.7k
    #endif
15142
15143
22.7k
        sp_clamp(a);
15144
22.7k
    }
15145
22.8k
#endif /* LITTLE_ENDIAN_ORDER */
15146
15147
22.8k
    return err;
15148
22.8k
}
15149
15150
/* Convert the multi-precision number to an array of bytes in big-endian format.
15151
 *
15152
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15153
 * to calculate the number of bytes required.
15154
 *
15155
 * @param  [in]   a    SP integer.
15156
 * @param  [out]  out  Array to put encoding into.
15157
 *
15158
 * @return  MP_OKAY on success.
15159
 * @return  MP_VAL when a or out is NULL.
15160
 */
15161
int sp_to_unsigned_bin(sp_int* a, byte* out)
15162
6.51k
{
15163
6.51k
    return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
15164
6.51k
}
15165
15166
/* Convert the multi-precision number to an array of bytes in big-endian format.
15167
 *
15168
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15169
 * to calculate the number of bytes required.
15170
 * Front-pads the output array with zeros make number the size of the array.
15171
 *
15172
 * @param  [in]   a      SP integer.
15173
 * @param  [out]  out    Array to put encoding into.
15174
 * @param  [in]   outSz  Size of the array in bytes.
15175
 *
15176
 * @return  MP_OKAY on success.
15177
 * @return  MP_VAL when a or out is NULL.
15178
 */
15179
int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
15180
26.8k
{
15181
26.8k
    int err = MP_OKAY;
15182
15183
26.8k
    if ((a == NULL) || (out == NULL)) {
15184
2.65k
        err = MP_VAL;
15185
2.65k
    }
15186
26.8k
    if (err == MP_OKAY) {
15187
24.2k
        int j = outSz - 1;
15188
15189
24.2k
        if (!sp_iszero(a)) {
15190
22.8k
            int i;
15191
163k
            for (i = 0; (j >= 0) && (i < a->used); i++) {
15192
140k
                int b;
15193
1.17M
                for (b = 0; b < SP_WORD_SIZE; b += 8) {
15194
1.05M
                    out[j--] = (byte)(a->dp[i] >> b);
15195
1.05M
                    if (j < 0) {
15196
20.6k
                        break;
15197
20.6k
                    }
15198
1.05M
                }
15199
140k
            }
15200
22.8k
        }
15201
6.88M
        for (; j >= 0; j--) {
15202
6.86M
            out[j] = 0;
15203
6.86M
        }
15204
24.2k
    }
15205
15206
26.8k
    return err;
15207
26.8k
}
15208
15209
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
15210
/* Store the number in big-endian format in array at an offset.
15211
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
15212
 * to calculate the number of bytes required.
15213
 *
15214
 * @param  [in]   o    Offset into array o start encoding.
15215
 * @param  [in]   a    SP integer.
15216
 * @param  [out]  out  Array to put encoding into.
15217
 *
15218
 * @return  Index of next byte after data.
15219
 * @return  MP_VAL when a or out is NULL.
15220
 */
15221
int sp_to_unsigned_bin_at_pos(int o, sp_int*a, unsigned char* out)
15222
{
15223
    int ret = sp_to_unsigned_bin(a, out + o);
15224
15225
    if (ret == MP_OKAY) {
15226
        ret = o + sp_unsigned_bin_size(a);
15227
    }
15228
15229
    return ret;
15230
}
15231
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
15232
15233
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15234
    defined(HAVE_ECC) || !defined(NO_DSA)
15235
/* Convert hexadecimal number as string in big-endian format to a
15236
 * multi-precision number.
15237
 *
15238
 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
15239
 *
15240
 * @param  [out]  a   SP integer.
15241
 * @param  [in]   in  NUL terminated string.
15242
 *
15243
 * @return  MP_OKAY on success.
15244
 * @return  MP_VAL when radix not supported, value is negative, or a character
15245
 *          is not valid.
15246
 */
15247
static int _sp_read_radix_16(sp_int* a, const char* in)
15248
116k
{
15249
116k
    int  err = MP_OKAY;
15250
116k
    int  i;
15251
116k
    int  s = 0;
15252
116k
    int  j = 0;
15253
15254
#ifdef WOLFSSL_SP_INT_NEGATIVE
15255
    if (*in == '-') {
15256
        a->sign = MP_NEG;
15257
        in++;
15258
    }
15259
#endif
15260
15261
229k
    while (*in == '0') {
15262
112k
        in++;
15263
112k
    }
15264
15265
116k
    a->dp[0] = 0;
15266
6.19M
    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
15267
6.07M
        int ch = (int)HexCharToByte(in[i]);
15268
6.07M
        if (ch < 0) {
15269
605
            err = MP_VAL;
15270
605
            break;
15271
605
        }
15272
15273
6.07M
        if (s == SP_WORD_SIZE) {
15274
338k
            j++;
15275
338k
            if (j >= a->size) {
15276
89
                err = MP_VAL;
15277
89
                break;
15278
89
            }
15279
338k
            s = 0;
15280
338k
            a->dp[j] = 0;
15281
338k
        }
15282
15283
6.07M
        a->dp[j] |= ((sp_int_digit)ch) << s;
15284
6.07M
        s += 4;
15285
6.07M
    }
15286
15287
116k
    if (err == MP_OKAY) {
15288
115k
        a->used = j + 1;
15289
115k
        sp_clamp(a);
15290
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15291
        if (sp_iszero(a)) {
15292
            a->sign = MP_ZPOS;
15293
        }
15294
    #endif
15295
115k
    }
15296
116k
    return err;
15297
116k
}
15298
#endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
15299
15300
#ifdef WOLFSSL_SP_READ_RADIX_10
15301
/* Convert decimal number as string in big-endian format to a multi-precision
15302
 * number.
15303
 *
15304
 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
15305
 *
15306
 * @param  [out]  a   SP integer.
15307
 * @param  [in]   in  NUL terminated string.
15308
 *
15309
 * @return  MP_OKAY on success.
15310
 * @return  MP_VAL when radix not supported, value is negative, or a character
15311
 *          is not valid.
15312
 */
15313
static int _sp_read_radix_10(sp_int* a, const char* in)
15314
{
15315
    int  err = MP_OKAY;
15316
    int  i;
15317
    int  len;
15318
    char ch;
15319
15320
    _sp_zero(a);
15321
#ifdef WOLFSSL_SP_INT_NEGATIVE
15322
    if (*in == '-') {
15323
        a->sign = MP_NEG;
15324
        in++;
15325
    }
15326
#endif /* WOLFSSL_SP_INT_NEGATIVE */
15327
15328
    while (*in == '0') {
15329
        in++;
15330
    }
15331
15332
    len = (int)XSTRLEN(in);
15333
    for (i = 0; i < len; i++) {
15334
        ch = in[i];
15335
        if ((ch >= '0') && (ch <= '9')) {
15336
            ch -= '0';
15337
        }
15338
        else {
15339
            err = MP_VAL;
15340
            break;
15341
        }
15342
        err = _sp_mul_d(a, 10, a, 0);
15343
        if (err != MP_OKAY) {
15344
            break;
15345
        }
15346
        err = _sp_add_d(a, ch, a);
15347
        if (err != MP_OKAY) {
15348
            break;
15349
        }
15350
    }
15351
#ifdef WOLFSSL_SP_INT_NEGATIVE
15352
    if ((err == MP_OKAY) && sp_iszero(a)) {
15353
        a->sign = MP_ZPOS;
15354
    }
15355
#endif
15356
15357
    return err;
15358
}
15359
#endif /* WOLFSSL_SP_READ_RADIX_10 */
15360
15361
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
15362
    !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || !defined(NO_DSA)
15363
/* Convert a number as string in big-endian format to a big number.
15364
 * Only supports base-16 (hexadecimal) and base-10 (decimal).
15365
 *
15366
 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
15367
 *
15368
 * @param  [out]  a      SP integer.
15369
 * @param  [in]   in     NUL terminated string.
15370
 * @param  [in]   radix  Number of values in a digit.
15371
 *
15372
 * @return  MP_OKAY on success.
15373
 * @return  MP_VAL when a or in is NULL, radix not supported, value is negative,
15374
 *          or a character is not valid.
15375
 */
15376
int sp_read_radix(sp_int* a, const char* in, int radix)
15377
117k
{
15378
117k
    int err = MP_OKAY;
15379
15380
117k
    if ((a == NULL) || (in == NULL)) {
15381
0
        err = MP_VAL;
15382
0
    }
15383
15384
117k
    if (err == MP_OKAY) {
15385
117k
    #ifndef WOLFSSL_SP_INT_NEGATIVE
15386
117k
        if (*in == '-') {
15387
1.67k
            err = MP_VAL;
15388
1.67k
        }
15389
116k
        else
15390
116k
    #endif
15391
116k
        if (radix == 16) {
15392
116k
            err = _sp_read_radix_16(a, in);
15393
116k
        }
15394
    #ifdef WOLFSSL_SP_READ_RADIX_10
15395
        else if (radix == 10) {
15396
            err = _sp_read_radix_10(a, in);
15397
        }
15398
    #endif
15399
0
        else {
15400
0
            err = MP_VAL;
15401
0
        }
15402
117k
    }
15403
15404
117k
    return err;
15405
117k
}
15406
#endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
15407
15408
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15409
    defined(WC_MP_TO_RADIX)
15410
15411
/* Put the big-endian, hex string encoding of a into str.
15412
 *
15413
 * Assumes str is large enough for result.
15414
 * Use sp_radix_size() to calculate required length.
15415
 *
15416
 * @param  [in]   a    SP integer to convert.
15417
 * @param  [out]  str  String to hold hex string result.
15418
 *
15419
 * @return  MP_OKAY on success.
15420
 * @return  MP_VAL when a or str is NULL.
15421
 */
15422
int sp_tohex(sp_int* a, char* str)
15423
15.8k
{
15424
15.8k
    int err = MP_OKAY;
15425
15.8k
    int i;
15426
15.8k
    int j;
15427
15428
15.8k
    if ((a == NULL) || (str == NULL)) {
15429
0
        err = MP_VAL;
15430
0
    }
15431
15.8k
    if (err == MP_OKAY) {
15432
        /* quick out if its zero */
15433
15.8k
        if (sp_iszero(a) == MP_YES) {
15434
1.18k
    #ifndef WC_DISABLE_RADIX_ZERO_PAD
15435
1.18k
            *str++ = '0';
15436
1.18k
    #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15437
1.18k
            *str++ = '0';
15438
1.18k
            *str = '\0';
15439
1.18k
        }
15440
14.6k
        else {
15441
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15442
            if (a->sign == MP_NEG) {
15443
                *str = '-';
15444
                str++;
15445
            }
15446
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
15447
15448
14.6k
            i = a->used - 1;
15449
14.6k
    #ifndef WC_DISABLE_RADIX_ZERO_PAD
15450
            /* Find highest non-zero byte in most-significant word. */
15451
57.3k
            for (j = SP_WORD_SIZE - 8; j >= 0; j -= 8) {
15452
57.3k
                if (((a->dp[i] >> j) & 0xff) != 0) {
15453
14.6k
                    break;
15454
14.6k
                }
15455
42.6k
                else if (j == 0) {
15456
0
                    j = SP_WORD_SIZE - 8;
15457
0
                    --i;
15458
0
                }
15459
57.3k
            }
15460
            /* Start with high nibble of byte. */
15461
14.6k
            j += 4;
15462
    #else
15463
            /* Find highest non-zero nibble in most-significant word. */
15464
            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
15465
                if (((a->dp[i] >> j) & 0xf) != 0) {
15466
                    break;
15467
                }
15468
                else if (j == 0) {
15469
                    j = SP_WORD_SIZE - 4;
15470
                    --i;
15471
                }
15472
            }
15473
    #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15474
            /* Most-significant word. */
15475
163k
            for (; j >= 0; j -= 4) {
15476
149k
                *(str++) = ByteToHex((byte)(a->dp[i] >> j));
15477
149k
            }
15478
106k
            for (--i; i >= 0; i--) {
15479
1.56M
                for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
15480
1.46M
                    *(str++) = (byte)ByteToHex((byte)(a->dp[i] >> j));
15481
1.46M
                }
15482
91.8k
            }
15483
14.6k
            *str = '\0';
15484
14.6k
        }
15485
15.8k
    }
15486
15487
15.8k
    return err;
15488
15.8k
}
15489
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15490
15491
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15492
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
15493
    defined(WC_MP_TO_RADIX)
15494
/* Put the big-endian, decimal string encoding of a into str.
15495
 *
15496
 * Assumes str is large enough for result.
15497
 * Use sp_radix_size() to calculate required length.
15498
 *
15499
 * @param  [in]   a    SP integer to convert.
15500
 * @param  [out]  str  String to hold hex string result.
15501
 *
15502
 * @return  MP_OKAY on success.
15503
 * @return  MP_VAL when a or str is NULL.
15504
 * @return  MP_MEM when dynamic memory allocation fails.
15505
 */
15506
int sp_todecimal(sp_int* a, char* str)
15507
0
{
15508
0
    int err = MP_OKAY;
15509
0
    int i;
15510
0
    int j;
15511
0
    sp_int_digit d;
15512
15513
0
    if ((a == NULL) || (str == NULL)) {
15514
0
        err = MP_VAL;
15515
0
    }
15516
    /* quick out if its zero */
15517
0
    else if (sp_iszero(a) == MP_YES) {
15518
0
        *str++ = '0';
15519
0
        *str = '\0';
15520
0
    }
15521
0
    else {
15522
0
        DECL_SP_INT(t, a->used + 1);
15523
15524
0
        ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
15525
0
        if (err == MP_OKAY) {
15526
0
            err = sp_copy(a, t);
15527
0
        }
15528
0
        if (err == MP_OKAY) {
15529
15530
        #ifdef WOLFSSL_SP_INT_NEGATIVE
15531
            if (a->sign == MP_NEG) {
15532
                *str = '-';
15533
                str++;
15534
            }
15535
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
15536
15537
0
            i = 0;
15538
0
            while (!sp_iszero(t)) {
15539
0
                sp_div_d(t, 10, t, &d);
15540
0
                str[i++] = (char)('0' + d);
15541
0
            }
15542
0
            str[i] = '\0';
15543
15544
0
            for (j = 0; j <= (i - 1) / 2; j++) {
15545
0
                int c = (unsigned char)str[j];
15546
0
                str[j] = str[i - 1 - j];
15547
0
                str[i - 1 - j] = (char)c;
15548
0
            }
15549
0
        }
15550
15551
0
        FREE_SP_INT(t, NULL);
15552
0
    }
15553
15554
0
    return err;
15555
0
}
15556
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15557
15558
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15559
    defined(WC_MP_TO_RADIX)
15560
/* Put the string version, big-endian, of a in str using the given radix.
15561
 *
15562
 * @param  [in]   a      SP integer to convert.
15563
 * @param  [out]  str    String to hold hex string result.
15564
 * @param  [in]   radix  Base of character.
15565
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
15566
 *
15567
 * @return  MP_OKAY on success.
15568
 * @return  MP_VAL when a or str is NULL, or radix not supported.
15569
 */
15570
int sp_toradix(sp_int* a, char* str, int radix)
15571
0
{
15572
0
    int err = MP_OKAY;
15573
15574
0
    if ((a == NULL) || (str == NULL)) {
15575
0
        err = MP_VAL;
15576
0
    }
15577
0
    else if (radix == MP_RADIX_HEX) {
15578
0
        err = sp_tohex(a, str);
15579
0
    }
15580
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
15581
0
    defined(HAVE_COMP_KEY)
15582
0
    else if (radix == MP_RADIX_DEC) {
15583
0
        err = sp_todecimal(a, str);
15584
0
    }
15585
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15586
0
    else {
15587
0
        err = MP_VAL;
15588
0
    }
15589
15590
0
    return err;
15591
0
}
15592
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15593
15594
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
15595
    defined(WC_MP_TO_RADIX)
15596
/* Calculate the length of the string version, big-endian, of a using the given
15597
 * radix.
15598
 *
15599
 * @param  [in]   a      SP integer to convert.
15600
 * @param  [in]   radix  Base of character.
15601
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
15602
 * @param  [out]  size   The number of characters in encoding.
15603
 *
15604
 * @return  MP_OKAY on success.
15605
 * @return  MP_VAL when a or size is NULL, or radix not supported.
15606
 */
15607
int sp_radix_size(sp_int* a, int radix, int* size)
15608
0
{
15609
0
    int err = MP_OKAY;
15610
15611
0
    if ((a == NULL) || (size == NULL)) {
15612
0
        err = MP_VAL;
15613
0
    }
15614
0
    else if (radix == MP_RADIX_HEX) {
15615
0
        if (a->used == 0) {
15616
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
15617
            /* 00 and '\0' */
15618
0
            *size = 2 + 1;
15619
        #else
15620
            /* Zero and '\0' */
15621
            *size = 1 + 1;
15622
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15623
0
        }
15624
0
        else {
15625
0
            int nibbles = (sp_count_bits(a) + 3) / 4;
15626
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
15627
0
            if (nibbles & 1) {
15628
0
                nibbles++;
15629
0
            }
15630
0
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
15631
        #ifdef WOLFSSL_SP_INT_NEGATIVE
15632
            if (a->sign == MP_NEG) {
15633
                nibbles++;
15634
            }
15635
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
15636
            /* One more for \0 */
15637
0
            *size = nibbles + 1;
15638
0
        }
15639
0
    }
15640
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
15641
0
    defined(HAVE_COMP_KEY)
15642
0
    else if (radix == MP_RADIX_DEC) {
15643
0
        int i;
15644
0
        sp_int_digit d;
15645
15646
        /* quick out if its zero */
15647
0
        if (sp_iszero(a) == MP_YES) {
15648
            /* Zero and '\0' */
15649
0
            *size = 1 + 1;
15650
0
        }
15651
0
        else {
15652
0
            DECL_SP_INT(t, a->used + 1);
15653
15654
0
            ALLOC_SP_INT(t, a->used + 1, err, NULL);
15655
0
            if (err == MP_OKAY) {
15656
0
        #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15657
0
                t->size = a->used + 1;
15658
0
        #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */
15659
0
                err = sp_copy(a, t);
15660
0
            }
15661
15662
0
            if (err == MP_OKAY) {
15663
15664
0
                for (i = 0; !sp_iszero(t); i++) {
15665
0
                    sp_div_d(t, 10, t, &d);
15666
0
                }
15667
            #ifdef WOLFSSL_SP_INT_NEGATIVE
15668
                if (a->sign == MP_NEG) {
15669
                    i++;
15670
                }
15671
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
15672
                /* One more for \0 */
15673
0
                *size = i + 1;
15674
0
            }
15675
15676
0
            FREE_SP_INT(t, NULL);
15677
0
        }
15678
0
    }
15679
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
15680
0
    else {
15681
0
        err = MP_VAL;
15682
0
    }
15683
15684
0
    return err;
15685
0
}
15686
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
15687
15688
/***************************************
15689
 * Prime number generation and checking.
15690
 ***************************************/
15691
15692
#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
15693
    !defined(NO_DSA)) && !defined(WC_NO_RNG)
15694
/* Generate a random prime for RSA only.
15695
 *
15696
 * @param  [out]  r     SP integer to hold result.
15697
 * @param  [in]   len   Number of bytes in prime.
15698
 * @param  [in]   rng   Random number generator.
15699
 * @param  [in]   heap  Heap hint. Unused.
15700
 *
15701
 * @return  MP_OKAY on success
15702
 * @return  MP_VAL when r or rng is NULL, length is not supported or random
15703
 *          number generator fails.
15704
 */
15705
int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
15706
207
{
15707
207
    static const int USE_BBS = 1;
15708
207
    int   err = MP_OKAY;
15709
207
    int   type = 0;
15710
207
    int   isPrime = MP_NO;
15711
#ifdef WOLFSSL_SP_MATH_ALL
15712
    int   bits = 0;
15713
#endif /* WOLFSSL_SP_MATH_ALL */
15714
15715
207
    (void)heap;
15716
15717
    /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
15718
207
    if ((r == NULL) || (rng == NULL) || (len == 0)) {
15719
11
        err = MP_VAL;
15720
11
    }
15721
15722
207
    if (err == MP_OKAY) {
15723
        /* get type */
15724
196
        if (len < 0) {
15725
0
            type = USE_BBS;
15726
0
            len = -len;
15727
0
        }
15728
15729
196
    #ifndef WOLFSSL_SP_MATH_ALL
15730
        /* For minimal maths, support only what's in SP and needed for DH. */
15731
196
    #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
15732
196
        if (len == 32) {
15733
171
        }
15734
25
        else
15735
25
    #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
15736
        /* Generate RSA primes that are half the modulus length. */
15737
25
    #ifndef WOLFSSL_SP_NO_3072
15738
25
        if ((len != 128) && (len != 192))
15739
    #else
15740
        if (len != 128)
15741
    #endif /* WOLFSSL_SP_NO_3072 */
15742
25
        {
15743
25
            err = MP_VAL;
15744
25
        }
15745
196
    #endif /* !WOLFSSL_SP_MATH_ALL */
15746
15747
    #ifdef WOLFSSL_SP_INT_NEGATIVE
15748
        r->sign = MP_ZPOS;
15749
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
15750
196
        r->used = (len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
15751
    #ifdef WOLFSSL_SP_MATH_ALL
15752
        bits = (len * 8) & SP_WORD_MASK;
15753
    #endif /* WOLFSSL_SP_MATH_ALL */
15754
196
    }
15755
15756
    /* Assume the candidate is probably prime and then test until
15757
     * it is proven composite. */
15758
12.9k
    while (err == MP_OKAY && isPrime == MP_NO) {
15759
#ifdef SHOW_GEN
15760
        printf(".");
15761
        fflush(stdout);
15762
#endif /* SHOW_GEN */
15763
        /* generate value */
15764
12.7k
        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
15765
12.7k
        if (err != 0) {
15766
57
            err = MP_VAL;
15767
57
            break;
15768
57
        }
15769
15770
        /* munge bits */
15771
#ifndef LITTLE_ENDIAN_ORDER
15772
        ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
15773
#else
15774
12.7k
        ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
15775
12.7k
#endif /* LITTLE_ENDIAN_ORDER */
15776
12.7k
        r->dp[0]              |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
15777
15778
#ifndef LITTLE_ENDIAN_ORDER
15779
        if (((len * 8) & SP_WORD_MASK) != 0) {
15780
            r->dp[r->used-1] >>= SP_WORD_SIZE - ((len * 8) & SP_WORD_MASK);
15781
        }
15782
#endif /* LITTLE_ENDIAN_ORDER */
15783
#ifdef WOLFSSL_SP_MATH_ALL
15784
        if (bits > 0) {
15785
            r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
15786
        }
15787
#endif /* WOLFSSL_SP_MATH_ALL */
15788
15789
        /* test */
15790
        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
15791
         * of a 1024-bit candidate being a false positive, when it is our
15792
         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
15793
         * Using 8 because we've always used 8 */
15794
12.7k
        sp_prime_is_prime_ex(r, 8, &isPrime, rng);
15795
12.7k
    }
15796
15797
207
    return err;
15798
207
}
15799
#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
15800
15801
#ifdef WOLFSSL_SP_PRIME_GEN
15802
/* Miller-Rabin test of "a" to the base of "b" as described in
15803
 * HAC pp. 139 Algorithm 4.24
15804
 *
15805
 * Sets result to 0 if definitely composite or 1 if probably prime.
15806
 * Randomly the chance of error is no more than 1/4 and often
15807
 * very much lower.
15808
 *
15809
 * @param  [in]   a       SP integer to check.
15810
 * @param  [in]   b       SP integer that is a small prime.
15811
 * @param  [out]  result  MP_YES when number is likey prime.
15812
 *                        MP_NO otherwise.
15813
 * @param  [in]   n1      SP integer temporary.
15814
 * @param  [in]   y       SP integer temporary.
15815
 * @param  [in]   r       SP integer temporary.
15816
 *
15817
 * @return  MP_OKAY on success.
15818
 * @return  MP_MEM when dynamic memory allocation fails.
15819
 */
15820
static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result,
15821
                                    sp_int* n1, sp_int* y, sp_int* r)
15822
1.95k
{
15823
1.95k
    int s;
15824
1.95k
    int j;
15825
1.95k
    int err = MP_OKAY;
15826
15827
    /* default */
15828
1.95k
    *result = MP_NO;
15829
15830
    /* ensure b > 1 */
15831
1.95k
    if (sp_cmp_d(b, 1) == MP_GT) {
15832
        /* get n1 = a - 1 */
15833
1.95k
        (void)sp_copy(a, n1);
15834
1.95k
        _sp_sub_d(n1, 1, n1);
15835
        /* set 2**s * r = n1 */
15836
1.95k
        (void)sp_copy(n1, r);
15837
15838
        /* count the number of least significant bits
15839
         * which are zero
15840
         */
15841
1.95k
        s = sp_cnt_lsb(r);
15842
15843
        /* now divide n - 1 by 2**s */
15844
1.95k
        sp_rshb(r, s, r);
15845
15846
        /* compute y = b**r mod a */
15847
1.95k
        err = sp_exptmod(b, r, a, y);
15848
15849
1.95k
        if (err == MP_OKAY) {
15850
            /* probably prime until shown otherwise */
15851
1.60k
            *result = MP_YES;
15852
15853
            /* if y != 1 and y != n1 do */
15854
1.60k
            if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
15855
1.27k
                j = 1;
15856
                /* while j <= s-1 and y != n1 */
15857
25.1k
                while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
15858
23.8k
                    err = sp_sqrmod(y, a, y);
15859
23.8k
                    if (err != MP_OKAY) {
15860
7
                        break;
15861
7
                    }
15862
15863
                    /* if y == 1 then composite */
15864
23.8k
                    if (sp_cmp_d(y, 1) == MP_EQ) {
15865
0
                        *result = MP_NO;
15866
0
                        break;
15867
0
                    }
15868
23.8k
                    ++j;
15869
23.8k
                }
15870
15871
                /* if y != n1 then composite */
15872
1.27k
                if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
15873
1.08k
                    *result = MP_NO;
15874
1.08k
                }
15875
1.27k
            }
15876
1.60k
        }
15877
1.95k
    }
15878
15879
1.95k
    return err;
15880
1.95k
}
15881
15882
/* Miller-Rabin test of "a" to the base of "b" as described in
15883
 * HAC pp. 139 Algorithm 4.24
15884
 *
15885
 * Sets result to 0 if definitely composite or 1 if probably prime.
15886
 * Randomly the chance of error is no more than 1/4 and often
15887
 * very much lower.
15888
 *
15889
 * @param  [in]   a       SP integer to check.
15890
 * @param  [in]   b       SP integer that is a small prime.
15891
 * @param  [out]  result  MP_YES when number is likey prime.
15892
 *                        MP_NO otherwise.
15893
 *
15894
 * @return  MP_OKAY on success.
15895
 * @return  MP_MEM when dynamic memory allocation fails.
15896
 */
15897
static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result)
15898
0
{
15899
0
    int err = MP_OKAY;
15900
0
    sp_int *n1;
15901
0
    sp_int *y;
15902
0
    sp_int *r;
15903
0
    DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3);
15904
15905
0
    ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL);
15906
0
    if (err == MP_OKAY) {
15907
0
        n1 = t[0];
15908
0
        y  = t[1];
15909
0
        r  = t[2];
15910
15911
        /* Only 'y' needs to be twice as big. */
15912
0
        sp_init_size(n1, a->used * 2 + 1);
15913
0
        sp_init_size(y, a->used * 2 + 1);
15914
0
        sp_init_size(r, a->used * 2 + 1);
15915
15916
0
        err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
15917
15918
0
        sp_clear(n1);
15919
0
        sp_clear(y);
15920
0
        sp_clear(r);
15921
0
    }
15922
15923
0
    FREE_SP_INT_ARRAY(t, NULL);
15924
0
    return err;
15925
0
}
15926
15927
#if SP_WORD_SIZE == 8
15928
/* Number of pre-computed primes. First n primes - fitting in a digit. */
15929
#define SP_PRIME_SIZE      54
15930
15931
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
15932
    0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
15933
    0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
15934
    0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
15935
    0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
15936
    0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
15937
    0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
15938
    0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
15939
};
15940
#else
15941
/* Number of pre-computed primes. First n primes. */
15942
516k
#define SP_PRIME_SIZE      256
15943
15944
/* The first 256 primes. */
15945
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
15946
    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
15947
    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
15948
    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
15949
    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
15950
    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
15951
    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
15952
    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
15953
    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
15954
15955
    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
15956
    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
15957
    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
15958
    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
15959
    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
15960
    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
15961
    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
15962
    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
15963
15964
    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
15965
    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
15966
    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
15967
    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
15968
    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
15969
    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
15970
    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
15971
    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
15972
15973
    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
15974
    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
15975
    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
15976
    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
15977
    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
15978
    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
15979
    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
15980
    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
15981
};
15982
#endif
15983
15984
/* Check whether a is prime.
15985
 * Checks against a number of small primes and does t iterations of
15986
 * Miller-Rabin.
15987
 *
15988
 * @param  [in]   a       SP integer to check.
15989
 * @param  [in]   t       Number of iterations of Miller-Rabin test to perform.
15990
 * @param  [out]  result  MP_YES when number is prime.
15991
 *                        MP_NO otherwise.
15992
 *
15993
 * @return  MP_OKAY on success.
15994
 * @return  MP_VAL when a or result is NULL, or t is out of range.
15995
 * @return  MP_MEM when dynamic memory allocation fails.
15996
 */
15997
int sp_prime_is_prime(sp_int* a, int t, int* result)
15998
0
{
15999
0
    int         err = MP_OKAY;
16000
0
    int         i;
16001
0
    int         haveRes = 0;
16002
0
    sp_int_digit d;
16003
0
    DECL_SP_INT(b, 2);
16004
16005
0
    if ((a == NULL) || (result == NULL)) {
16006
0
        if (result != NULL) {
16007
0
            *result = MP_NO;
16008
0
        }
16009
0
        err = MP_VAL;
16010
0
    }
16011
16012
0
    if ((err == MP_OKAY) && ((t <= 0) || (t > SP_PRIME_SIZE))) {
16013
0
        *result = MP_NO;
16014
0
        err = MP_VAL;
16015
0
    }
16016
16017
0
    if ((err == MP_OKAY) && sp_isone(a)) {
16018
0
        *result = MP_NO;
16019
0
        haveRes = 1;
16020
0
    }
16021
16022
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16023
16024
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
16025
        /* check against primes table */
16026
0
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16027
0
            if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
16028
0
                *result = MP_YES;
16029
0
                haveRes = 1;
16030
0
                break;
16031
0
            }
16032
0
        }
16033
0
    }
16034
16035
0
    if ((err == MP_OKAY) && (!haveRes)) {
16036
        /* do trial division */
16037
0
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16038
0
            err = sp_mod_d(a, sp_primes[i], &d);
16039
0
            if ((err != MP_OKAY) || (d == 0)) {
16040
0
                *result = MP_NO;
16041
0
                haveRes = 1;
16042
0
                break;
16043
0
            }
16044
0
        }
16045
0
    }
16046
16047
0
    if ((err == MP_OKAY) && (!haveRes)) {
16048
0
        ALLOC_SP_INT(b, 1, err, NULL);
16049
0
        if (err == MP_OKAY) {
16050
            /* now do 't' miller rabins */
16051
0
            sp_init_size(b, 1);
16052
0
            for (i = 0; i < t; i++) {
16053
0
                sp_set(b, sp_primes[i]);
16054
0
                err = sp_prime_miller_rabin(a, b, result);
16055
0
                if ((err != MP_OKAY) || (*result == MP_NO)) {
16056
0
                    break;
16057
0
                }
16058
0
            }
16059
0
        }
16060
0
     }
16061
16062
0
     RESTORE_VECTOR_REGISTERS();
16063
16064
0
     FREE_SP_INT(b, NULL);
16065
0
     return err;
16066
0
}
16067
16068
/* Check whether a is prime.
16069
 * Checks against a number of small primes and does t iterations of
16070
 * Miller-Rabin.
16071
 *
16072
 * @param  [in]   a       SP integer to check.
16073
 * @param  [in]   t       Number of iterations of Miller-Rabin test to perform.
16074
 * @param  [out]  result  MP_YES when number is prime.
16075
 *                        MP_NO otherwise.
16076
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
16077
 *
16078
 * @return  MP_OKAY on success.
16079
 * @return  MP_VAL when a, result or rng is NULL.
16080
 * @return  MP_MEM when dynamic memory allocation fails.
16081
 */
16082
int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
16083
12.7k
{
16084
12.7k
    int err = MP_OKAY;
16085
12.7k
    int ret = MP_YES;
16086
12.7k
    int haveRes = 0;
16087
12.7k
    int i;
16088
12.7k
#ifndef WC_NO_RNG
16089
12.7k
    sp_int *b = NULL;
16090
12.7k
    sp_int *c = NULL;
16091
12.7k
    sp_int *n1 = NULL;
16092
12.7k
    sp_int *y = NULL;
16093
12.7k
    sp_int *r = NULL;
16094
12.7k
#endif /* WC_NO_RNG */
16095
16096
12.7k
    if ((a == NULL) || (result == NULL) || (rng == NULL)) {
16097
0
        err = MP_VAL;
16098
0
    }
16099
16100
#ifdef WOLFSSL_SP_INT_NEGATIVE
16101
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
16102
        err = MP_VAL;
16103
    }
16104
#endif
16105
16106
12.7k
    if ((err == MP_OKAY) && sp_isone(a)) {
16107
0
        ret = MP_NO;
16108
0
        haveRes = 1;
16109
0
    }
16110
16111
12.7k
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16112
16113
12.7k
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
16114
        /* check against primes table */
16115
0
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16116
0
            if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
16117
0
                ret = MP_YES;
16118
0
                haveRes = 1;
16119
0
                break;
16120
0
            }
16121
0
        }
16122
0
    }
16123
16124
12.7k
    if ((err == MP_OKAY) && (!haveRes)) {
16125
12.7k
        sp_int_digit d;
16126
16127
        /* do trial division */
16128
516k
        for (i = 0; i < SP_PRIME_SIZE; i++) {
16129
514k
            err = sp_mod_d(a, sp_primes[i], &d);
16130
514k
            if ((err != MP_OKAY) || (d == 0)) {
16131
11.1k
                ret = MP_NO;
16132
11.1k
                haveRes = 1;
16133
11.1k
                break;
16134
11.1k
            }
16135
514k
        }
16136
12.7k
    }
16137
16138
12.7k
#ifndef WC_NO_RNG
16139
    /* now do a miller rabin with up to t random numbers, this should
16140
     * give a (1/4)^t chance of a false prime. */
16141
12.7k
    if ((err == MP_OKAY) && (!haveRes)) {
16142
1.54k
        int bits = sp_count_bits(a);
16143
1.54k
        word32 baseSz = (bits + 7) / 8;
16144
1.54k
        DECL_SP_INT_ARRAY(ds, a->used + 1, 3);
16145
1.54k
        DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
16146
16147
1.54k
        ALLOC_SP_INT_ARRAY(ds, a->used + 1, 3, err, NULL);
16148
1.54k
        ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
16149
1.54k
        if (err == MP_OKAY) {
16150
1.50k
            b  = ds[0];
16151
1.50k
            c  = ds[1];
16152
1.50k
            n1 = ds[2];
16153
1.50k
            y  = d[0];
16154
1.50k
            r  = d[1];
16155
16156
            /* Only 'y' needs to be twice as big. */
16157
1.50k
            sp_init_size(b , a->used + 1);
16158
1.50k
            sp_init_size(c , a->used + 1);
16159
1.50k
            sp_init_size(n1, a->used + 1);
16160
1.50k
            sp_init_size(y , a->used * 2 + 1);
16161
1.50k
            sp_init_size(r , a->used * 2 + 1);
16162
16163
1.50k
            _sp_sub_d(a, 2, c);
16164
16165
1.50k
            bits &= SP_WORD_MASK;
16166
16167
3.06k
            while (t > 0) {
16168
3.00k
                err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
16169
3.00k
                if (err != MP_OKAY) {
16170
8
                    break;
16171
8
                }
16172
2.99k
                b->used = a->used;
16173
16174
            #ifdef BIG_ENDIAN_ORDER
16175
                if (((baseSz * 8) & SP_WORD_MASK) != 0) {
16176
                    b->dp[b->used-1] >>=
16177
                        SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
16178
                }
16179
            #endif /* LITTLE_ENDIAN_ORDER */
16180
16181
                /* Ensure the top word has no more bits than necessary. */
16182
2.99k
                if (bits > 0) {
16183
0
                    b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
16184
0
                    sp_clamp(b);
16185
0
                }
16186
16187
2.99k
                if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
16188
1.04k
                    continue;
16189
1.04k
                }
16190
16191
1.95k
                err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
16192
1.95k
                if ((err != MP_OKAY) || (ret == MP_NO)) {
16193
1.42k
                    break;
16194
1.42k
                }
16195
16196
522
                t--;
16197
522
            }
16198
16199
1.50k
            sp_clear(n1);
16200
1.50k
            sp_clear(y);
16201
1.50k
            sp_clear(r);
16202
1.50k
            sp_clear(b);
16203
1.50k
            sp_clear(c);
16204
1.50k
        }
16205
16206
1.54k
        FREE_SP_INT_ARRAY(d, NULL);
16207
1.54k
        FREE_SP_INT_ARRAY(ds, NULL);
16208
1.54k
    }
16209
#else
16210
    (void)t;
16211
#endif /* !WC_NO_RNG */
16212
16213
12.7k
    if (result != NULL) {
16214
12.7k
        *result = ret;
16215
12.7k
    }
16216
16217
12.7k
    RESTORE_VECTOR_REGISTERS();
16218
16219
12.7k
    return err;
16220
12.7k
}
16221
#endif /* WOLFSSL_SP_PRIME_GEN */
16222
16223
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
16224
16225
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
16226
 *
16227
 * a and b are positive integers.
16228
 *
16229
 * @param  [in]   a  SP integer of first operand.
16230
 * @param  [in]   b  SP integer of second operand.
16231
 * @param  [out]  r  SP integer to hold result.
16232
 *
16233
 * @return  MP_OKAY on success.
16234
 * @return  MP_VAL when a, b or r is NULL or too large.
16235
 * @return  MP_MEM when dynamic memory allocation fails.
16236
 */
16237
int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
16238
566
{
16239
566
    int err = MP_OKAY;
16240
16241
566
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
16242
0
        err = MP_VAL;
16243
0
    }
16244
566
    else if (a->used >= SP_INT_DIGITS || b->used >= SP_INT_DIGITS) {
16245
2
        err = MP_VAL;
16246
2
    }
16247
564
    else if (sp_iszero(a)) {
16248
        /* GCD of 0 and 0 is undefined as all integers divide 0. */
16249
23
        if (sp_iszero(b)) {
16250
10
            err = MP_VAL;
16251
10
        }
16252
13
        else {
16253
13
            err = sp_copy(b, r);
16254
13
        }
16255
23
    }
16256
541
    else if (sp_iszero(b)) {
16257
16
        err = sp_copy(a, r);
16258
16
    }
16259
525
    else {
16260
525
        sp_int* u = NULL;
16261
525
        sp_int* v = NULL;
16262
525
        sp_int* t = NULL;
16263
525
        int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
16264
525
        DECL_SP_INT_ARRAY(d, used, 3);
16265
16266
525
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16267
16268
525
        ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
16269
16270
525
        if (err == MP_OKAY) {
16271
514
            u = d[0];
16272
514
            v = d[1];
16273
514
            t = d[2];
16274
514
            sp_init_size(u, used);
16275
514
            sp_init_size(v, used);
16276
514
            sp_init_size(t, used);
16277
16278
514
            if (_sp_cmp(a, b) != MP_LT) {
16279
230
                sp_copy(b, u);
16280
                /* First iteration - u = a, v = b */
16281
230
                if (b->used == 1) {
16282
196
                    err = sp_mod_d(a, b->dp[0], &v->dp[0]);
16283
196
                    if (err == MP_OKAY) {
16284
196
                        v->used = (v->dp[0] != 0);
16285
196
                    }
16286
196
                }
16287
34
                else {
16288
34
                    err = sp_mod(a, b, v);
16289
34
                }
16290
230
            }
16291
284
            else {
16292
284
                sp_copy(a, u);
16293
                /* First iteration - u = b, v = a */
16294
284
                if (a->used == 1) {
16295
232
                    err = sp_mod_d(b, a->dp[0], &v->dp[0]);
16296
232
                    if (err == MP_OKAY) {
16297
232
                        v->used = (v->dp[0] != 0);
16298
232
                    }
16299
232
                }
16300
52
                else {
16301
52
                    err = sp_mod(b, a, v);
16302
52
                }
16303
284
            }
16304
514
        }
16305
16306
525
        if (err == MP_OKAY) {
16307
#ifdef WOLFSSL_SP_INT_NEGATIVE
16308
            u->sign = MP_ZPOS;
16309
            v->sign = MP_ZPOS;
16310
#endif /* WOLFSSL_SP_INT_NEGATIVE */
16311
16312
13.0k
            while (!sp_iszero(v)) {
16313
12.5k
                if (v->used == 1) {
16314
8.02k
                    err = sp_mod_d(u, v->dp[0], &t->dp[0]);
16315
8.02k
                    if (err == MP_OKAY) {
16316
8.02k
                        t->used = (t->dp[0] != 0);
16317
8.02k
                    }
16318
8.02k
                }
16319
4.54k
                else {
16320
4.54k
                    err = sp_mod(u, v, t);
16321
4.54k
                }
16322
12.5k
                if (err != MP_OKAY) {
16323
7
                    break;
16324
7
                }
16325
12.5k
                sp_copy(v, u);
16326
12.5k
                sp_copy(t, v);
16327
12.5k
            }
16328
514
            if (err == MP_OKAY)
16329
507
                err = sp_copy(u, r);
16330
514
        }
16331
16332
525
        FREE_SP_INT_ARRAY(d, NULL);
16333
16334
525
        RESTORE_VECTOR_REGISTERS();
16335
525
    }
16336
16337
566
    return err;
16338
566
}
16339
16340
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
16341
16342
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
16343
    (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
16344
16345
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
16346
 *
16347
 * a and b are positive integers.
16348
 *
16349
 * @param  [in]   a  SP integer of first operand.
16350
 * @param  [in]   b  SP integer of second operand.
16351
 * @param  [out]  r  SP integer to hold result.
16352
 *
16353
 * @return  MP_OKAY on success.
16354
 * @return  MP_VAL when a, b or r is NULL; or a or b is zero.
16355
 * @return  MP_MEM when dynamic memory allocation fails.
16356
 */
16357
int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
16358
{
16359
    int err = MP_OKAY;
16360
    int used = ((a == NULL) || (b == NULL)) ? 1 :
16361
                   (a->used >= b->used ? a->used + 1: b->used + 1);
16362
    DECL_SP_INT_ARRAY(t, used, 2);
16363
16364
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
16365
        err = MP_VAL;
16366
    }
16367
16368
    /* LCM of 0 and any number is undefined as 0 is not in the set of values
16369
     * being used.
16370
     */
16371
    if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
16372
        err = MP_VAL;
16373
    }
16374
16375
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
16376
16377
    if (err == MP_OKAY) {
16378
        sp_init_size(t[0], used);
16379
        sp_init_size(t[1], used);
16380
16381
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
16382
16383
        if (err == MP_OKAY)
16384
            err = sp_gcd(a, b, t[0]);
16385
16386
        if (err == MP_OKAY) {
16387
            if (_sp_cmp_abs(a, b) == MP_GT) {
16388
                err = sp_div(a, t[0], t[1], NULL);
16389
                if (err == MP_OKAY) {
16390
                    err = sp_mul(b, t[1], r);
16391
                }
16392
            }
16393
            else {
16394
                err = sp_div(b, t[0], t[1], NULL);
16395
                if (err == MP_OKAY) {
16396
                    err = sp_mul(a, t[1], r);
16397
                }
16398
            }
16399
        }
16400
16401
        RESTORE_VECTOR_REGISTERS();
16402
    }
16403
16404
    FREE_SP_INT_ARRAY(t, NULL);
16405
    return err;
16406
}
16407
16408
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
16409
16410
/* Returns the run time settings.
16411
 *
16412
 * @return  Settings value.
16413
 */
16414
word32 CheckRunTimeSettings(void)
16415
0
{
16416
0
    return CTC_SETTINGS;
16417
0
}
16418
16419
/* Returns the fast math settings.
16420
 *
16421
 * @return  Setting - number of bits in a digit.
16422
 */
16423
word32 CheckRunTimeFastMath(void)
16424
0
{
16425
0
    return SP_WORD_SIZE;
16426
0
}
16427
16428
#ifdef WOLFSSL_CHECK_MEM_ZERO
16429
/* Add an MP to check.
16430
 *
16431
 * @param [in] name  Name of address to check.
16432
 * @param [in] mp    mp_int that needs to be checked.
16433
 */
16434
void sp_memzero_add(const char* name, mp_int* mp)
16435
{
16436
    wc_MemZero_Add(name, mp->dp, mp->size * sizeof(sp_digit));
16437
}
16438
16439
/* Check the memory in the data pointer for memory that must be zero.
16440
 *
16441
 * @param [in] mp    mp_int that needs to be checked.
16442
 */
16443
void sp_memzero_check(mp_int* mp)
16444
{
16445
    wc_MemZero_Check(mp->dp, mp->size * sizeof(sp_digit));
16446
}
16447
#endif /* WOLFSSL_CHECK_MEM_ZERO */
16448
16449
16450
#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */