Coverage Report

Created: 2025-07-23 06:53

/src/wolfssl/wolfcrypt/src/sp_int.c
Line
Count
Source (jump to first uncovered line)
1
/* sp_int.c
2
 *
3
 * Copyright (C) 2006-2025 wolfSSL Inc.
4
 *
5
 * This file is part of wolfSSL.
6
 *
7
 * wolfSSL is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * wolfSSL is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20
 */
21
22
/* Implementation by Sean Parkinson. */
23
24
/*
25
DESCRIPTION
26
This library provides single precision (SP) integer math functions.
27
28
*/
29
30
#include <wolfssl/wolfcrypt/libwolfssl_sources.h>
31
32
#if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
33
34
#ifdef NO_INLINE
35
    #include <wolfssl/wolfcrypt/misc.h>
36
#else
37
    #define WOLFSSL_MISC_INCLUDED
38
    #include <wolfcrypt/src/misc.c>
39
#endif
40
41
/* SP Build Options:
42
 * WOLFSSL_HAVE_SP_RSA:         Enable SP RSA support
43
 * WOLFSSL_HAVE_SP_DH:          Enable SP DH support
44
 * WOLFSSL_HAVE_SP_ECC:         Enable SP ECC support
45
 * WOLFSSL_SP_MATH:             Use only single precision math and algorithms
46
 *      it supports (no fastmath tfm.c or normal integer.c)
47
 * WOLFSSL_SP_MATH_ALL          Implementation of all MP functions
48
 *      (replacement for tfm.c and integer.c)
49
 * WOLFSSL_SP_SMALL:            Use smaller version of code and avoid large
50
 *      stack variables
51
 * WOLFSSL_SP_NO_MALLOC:        Always use stack, no heap XMALLOC/XFREE allowed
52
 * WOLFSSL_SP_NO_2048:          Disable RSA/DH 2048-bit support
53
 * WOLFSSL_SP_NO_3072:          Disable RSA/DH 3072-bit support
54
 * WOLFSSL_SP_4096:             Enable RSA/RH 4096-bit support
55
 * WOLFSSL_SP_NO_256            Disable ECC 256-bit SECP256R1 support
56
 * WOLFSSL_SP_384               Enable ECC 384-bit SECP384R1 support
57
 * WOLFSSL_SP_521               Enable ECC 521-bit SECP521R1 support
58
 * WOLFSSL_SP_ASM               Enable assembly speedups (detect platform)
59
 * WOLFSSL_SP_X86_64_ASM        Enable Intel x64 assembly implementation
60
 * WOLFSSL_SP_ARM32_ASM         Enable Aarch32 assembly implementation
61
 * WOLFSSL_SP_ARM64_ASM         Enable Aarch64 assembly implementation
62
 * WOLFSSL_SP_ARM_CORTEX_M_ASM  Enable Cortex-M assembly implementation
63
 * WOLFSSL_SP_ARM_THUMB_ASM     Enable ARM Thumb assembly implementation
64
 *      (used with -mthumb)
65
 * WOLFSSL_SP_X86_64            Enable Intel x86 64-bit assembly speedups
66
 * WOLFSSL_SP_X86               Enable Intel x86 assembly speedups
67
 * WOLFSSL_SP_ARM64             Enable Aarch64 assembly speedups
68
 * WOLFSSL_SP_ARM32             Enable ARM32 assembly speedups
69
 * WOLFSSL_SP_ARM32_UDIV        Enable word divide asm that uses UDIV instr
70
 * WOLFSSL_SP_ARM_THUMB         Enable ARM Thumb assembly speedups
71
 *                              (explicitly uses register 'r7')
72
 * WOLFSSL_SP_PPC64             Enable PPC64 assembly speedups
73
 * WOLFSSL_SP_PPC               Enable PPC assembly speedups
74
 * WOLFSSL_SP_MIPS64            Enable MIPS64 assembly speedups
75
 * WOLFSSL_SP_MIPS              Enable MIPS assembly speedups
76
 * WOLFSSL_SP_RISCV64           Enable RISCV64 assembly speedups
77
 * WOLFSSL_SP_RISCV32           Enable RISCV32 assembly speedups
78
 * WOLFSSL_SP_S390X             Enable S390X assembly speedups
79
 * SP_WORD_SIZE                 Force 32 or 64 bit mode
80
 * WOLFSSL_SP_NONBLOCK          Enables "non blocking" mode for SP math, which
81
 *      will return FP_WOULDBLOCK for long operations and function must be
82
 *      called again until complete.
83
 * WOLFSSL_SP_FAST_NCT_EXPTMOD  Enables the faster non-constant time modular
84
 *      exponentiation implementation.
85
 * WOLFSSL_SP_INT_NEGATIVE      Enables negative values to be used.
86
 * WOLFSSL_SP_INT_DIGIT_ALIGN   Enable when unaligned access of sp_int_digit
87
 *                              pointer is not allowed.
88
 * WOLFSSL_SP_NO_DYN_STACK      Disable use of dynamic stack items.
89
 *                              Dynamic arrays used when not small stack.
90
 * WOLFSSL_SP_FAST_MODEXP       Allow fast mod_exp with small C code
91
 * WOLFSSL_SP_LOW_MEM           Use algorithms that use less memory.
92
 */
93
94
/* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
95
#if defined(__clang__) && defined(__clang_major__) && \
96
    (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
97
    #undef WOLFSSL_SP_SMALL
98
#endif
99
100
#include <wolfssl/wolfcrypt/sp_int.h>
101
102
#ifdef WOLFSSL_SP_DYN_STACK
103
/* We are statically declaring a variable smaller than sp_int.
104
 * We track available memory in the 'size' field.
105
 * Disable warnings of sp_int being partly outside array bounds of variable.
106
 */
107
    PRAGMA_GCC_DIAG_PUSH
108
    PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"")
109
#endif
110
111
#if defined(WOLFSSL_LINUXKM) && !defined(WOLFSSL_SP_ASM)
112
    /* force off unneeded vector register save/restore. */
113
    #undef SAVE_VECTOR_REGISTERS
114
    #define SAVE_VECTOR_REGISTERS(fail_clause) WC_DO_NOTHING
115
    #undef RESTORE_VECTOR_REGISTERS
116
    #define RESTORE_VECTOR_REGISTERS() WC_DO_NOTHING
117
#endif
118
119
/* DECL_SP_INT: Declare one variable of type 'sp_int'. */
120
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
121
    !defined(WOLFSSL_SP_NO_MALLOC)
122
    /* Declare a variable that will be assigned a value on XMALLOC. */
123
    #define DECL_SP_INT(n, s)   \
124
        sp_int* n = NULL
125
#else
126
    #ifdef WOLFSSL_SP_DYN_STACK
127
        /* Declare a variable on the stack with the required data size. */
128
        #define DECL_SP_INT(n, s)                       \
129
0
            sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s)]; \
130
0
            sp_int* (n) = (sp_int*)n##d
131
    #else
132
        /* Declare a variable on the stack. */
133
        #define DECL_SP_INT(n, s)               \
134
            sp_int n[1]
135
    #endif
136
#endif
137
138
/* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
139
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
140
    !defined(WOLFSSL_SP_NO_MALLOC)
141
    /* Dynamically allocate just enough data to support size. */
142
    #define ALLOC_SP_INT(n, s, err, h)                                         \
143
    do {                                                                       \
144
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
145
            (err) = MP_VAL;                                                    \
146
        }                                                                      \
147
        if ((err) == MP_OKAY) {                                                \
148
            (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h),                      \
149
                DYNAMIC_TYPE_BIGINT);                                          \
150
            if ((n) == NULL) {                                                 \
151
                (err) = MP_MEM;                                                \
152
            }                                                                  \
153
        }                                                                      \
154
    }                                                                          \
155
    while (0)
156
157
    /* Dynamically allocate just enough data to support size - and set size. */
158
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
159
    do {                                                                       \
160
        ALLOC_SP_INT(n, s, err, h);                                            \
161
        if ((err) == MP_OKAY) {                                                \
162
            (n)->size = (sp_size_t)(s);                                        \
163
        }                                                                      \
164
    }                                                                          \
165
    while (0)
166
#else
167
    /* Array declared on stack - check size is valid. */
168
    #define ALLOC_SP_INT(n, s, err, h)                                         \
169
0
    do {                                                                       \
170
0
        if (((err) == MP_OKAY) && ((s) > (int)SP_INT_DIGITS)) {                \
171
0
            (err) = MP_VAL;                                                    \
172
0
        }                                                                      \
173
0
    }                                                                          \
174
0
    while (0)
175
176
    /* Array declared on stack - set the size field. */
177
    #define ALLOC_SP_INT_SIZE(n, s, err, h)                                    \
178
0
    do {                                                                       \
179
0
        ALLOC_SP_INT(n, s, err, h);                                            \
180
0
        if ((err) == MP_OKAY) {                                                \
181
0
            (n)->size = (sp_size_t)(s);                                        \
182
0
        }                                                                      \
183
0
    }                                                                          \
184
0
    while (0)
185
#endif
186
187
/* FREE_SP_INT: Free an 'sp_int' variable. */
188
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
189
    !defined(WOLFSSL_SP_NO_MALLOC)
190
    /* Free dynamically allocated data. */
191
    #define FREE_SP_INT(n, h)                   \
192
    do {                                        \
193
        if ((n) != NULL) {                      \
194
            XFREE(n, h, DYNAMIC_TYPE_BIGINT);   \
195
        }                                       \
196
    }                                           \
197
    while (0)
198
#else
199
    /* Nothing to do as declared on stack. */
200
0
    #define FREE_SP_INT(n, h) WC_DO_NOTHING
201
#endif
202
203
204
/* Declare a variable that will be assigned a value on XMALLOC. */
205
#define DECL_DYN_SP_INT_ARRAY(n, s, c)               \
206
0
    sp_int* n##d = NULL;                             \
207
0
    sp_int* (n)[c];                                  \
208
0
    void *n ## _dummy_var = XMEMSET(n, 0, sizeof(n))
209
210
/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
211
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
212
    !defined(WOLFSSL_SP_NO_MALLOC)
213
    /* Declare a variable that will be assigned a value on XMALLOC. */
214
    #define DECL_SP_INT_ARRAY(n, s, c)  \
215
        DECL_DYN_SP_INT_ARRAY(n, s, c)
216
#elif defined(WOLFSSL_SP_DYN_STACK)
217
    /* Declare a variable on the stack with the required data size. */
218
    #define DECL_SP_INT_ARRAY(n, s, c)                    \
219
0
        sp_int_digit n##d[MP_INT_SIZEOF_DIGITS(s) * (c)]; \
220
0
        sp_int* (n)[c] = { NULL, }
221
#else
222
    /* Declare a variable on the stack. */
223
    #define DECL_SP_INT_ARRAY(n, s, c)      \
224
        sp_int n##d[c];                     \
225
        sp_int* (n)[c]
226
#endif
227
228
/* Dynamically allocate just enough data to support multiple sp_ints of the
229
 * required size. Use pointers into data to make up array and set sizes.
230
 */
231
0
#define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)                                \
232
0
do {                                                                           \
233
0
    (void)n ## _dummy_var;                                                     \
234
0
    if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                         \
235
0
        (err) = MP_VAL;                                                        \
236
0
    }                                                                          \
237
0
    if ((err) == MP_OKAY) {                                                    \
238
0
        n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h),                   \
239
0
                                                         DYNAMIC_TYPE_BIGINT); \
240
0
        if (n##d == NULL) {                                                    \
241
0
            (err) = MP_MEM;                                                    \
242
0
        }                                                                      \
243
0
        else {                                                                 \
244
0
            int n##ii;                                                         \
245
0
            (n)[0] = n##d;                                                     \
246
0
            (n)[0]->size = (sp_size_t)(s);                                     \
247
0
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
248
0
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
249
0
                (n)[n##ii]->size = (sp_size_t)(s);                             \
250
0
            }                                                                  \
251
0
        }                                                                      \
252
0
    }                                                                          \
253
0
}                                                                              \
254
0
while (0)
255
256
/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
257
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
258
    !defined(WOLFSSL_SP_NO_MALLOC)
259
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
260
        ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
261
#elif defined(WOLFSSL_SP_DYN_STACK)
262
    /* Data declared on stack that supports multiple sp_ints of the
263
     * required size. Use pointers into data to make up array and set sizes.
264
     */
265
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
266
0
    do {                                                                       \
267
0
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
268
0
            (err) = MP_VAL;                                                    \
269
0
        }                                                                      \
270
0
        if ((err) == MP_OKAY) {                                                \
271
0
            int n##ii;                                                         \
272
0
            (n)[0] = (sp_int*)n##d;                                            \
273
0
            ((sp_int_minimal*)(n)[0])->size = (sp_size_t)(s);                  \
274
0
            for (n##ii = 1; n##ii < (int)(c); n##ii++) {                       \
275
0
                (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s);                     \
276
0
                ((sp_int_minimal*)(n)[n##ii])->size = (sp_size_t)(s);          \
277
0
            }                                                                  \
278
0
        }                                                                      \
279
0
    }                                                                          \
280
0
    while (0)
281
#else
282
    /* Data declared on stack that supports multiple sp_ints of the
283
     * required size. Set into array and set sizes.
284
     */
285
    #define ALLOC_SP_INT_ARRAY(n, s, c, err, h)                                \
286
    do {                                                                       \
287
        if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) {                     \
288
            (err) = MP_VAL;                                                    \
289
        }                                                                      \
290
        if ((err) == MP_OKAY) {                                                \
291
            int n##ii;                                                         \
292
            for (n##ii = 0; n##ii < (int)(c); n##ii++) {                       \
293
                (n)[n##ii] = &n##d[n##ii];                                     \
294
                (n)[n##ii]->size = (sp_size_t)(s);                             \
295
            }                                                                  \
296
        }                                                                      \
297
    }                                                                          \
298
    while (0)
299
#endif
300
301
/* Free data variable that was dynamically allocated. */
302
0
#define FREE_DYN_SP_INT_ARRAY(n, h)             \
303
0
do {                                            \
304
0
    if (n##d != NULL) {                         \
305
0
        XFREE(n##d, h, DYNAMIC_TYPE_BIGINT);    \
306
0
    }                                           \
307
0
}                                               \
308
0
while (0)
309
310
/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
311
#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
312
    !defined(WOLFSSL_SP_NO_MALLOC)
313
    #define FREE_SP_INT_ARRAY(n, h)                 \
314
        FREE_DYN_SP_INT_ARRAY(n, h)
315
#else
316
    /* Nothing to do as data declared on stack. */
317
0
    #define FREE_SP_INT_ARRAY(n, h) WC_DO_NOTHING
318
#endif
319
320
321
#ifndef WOLFSSL_NO_ASM
322
    #ifdef __IAR_SYSTEMS_ICC__
323
        #define __asm__        asm
324
        #define __volatile__   volatile
325
    #endif /* __IAR_SYSTEMS_ICC__ */
326
    #ifdef __KEIL__
327
        #define __asm__        __asm
328
        #define __volatile__   volatile
329
    #endif
330
331
    #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
332
/*
333
 * CPU: x86_64
334
 */
335
336
#ifndef _MSC_VER
337
/* Multiply va by vb and store double size result in: vh | vl */
338
#define SP_ASM_MUL(vl, vh, va, vb)                       \
339
0
    __asm__ __volatile__ (                               \
340
0
        "movq %[b], %%rax \n\t"                    \
341
0
        "mulq %[a]    \n\t"                    \
342
0
        "movq %%rax, %[l] \n\t"                    \
343
0
        "movq %%rdx, %[h] \n\t"                    \
344
0
        : [h] "+r" (vh), [l] "+r" (vl)                   \
345
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
346
0
        : "%rax", "%rdx", "cc"                           \
347
0
    )
348
/* Multiply va by vb and store double size result in: vo | vh | vl */
349
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
350
0
    __asm__ __volatile__ (                               \
351
0
        "movq %[b], %%rax \n\t"                    \
352
0
        "mulq %[a]    \n\t"                    \
353
0
        "movq $0   , %[o] \n\t"                    \
354
0
        "movq %%rax, %[l] \n\t"                    \
355
0
        "movq %%rdx, %[h] \n\t"                    \
356
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
357
0
        : [a] "m" (va), [b] "m" (vb)                     \
358
0
        : "%rax", "%rdx", "cc"                           \
359
0
    )
360
/* Multiply va by vb and add double size result into: vo | vh | vl */
361
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
362
0
    __asm__ __volatile__ (                               \
363
0
        "movq %[b], %%rax \n\t"                    \
364
0
        "mulq %[a]    \n\t"                    \
365
0
        "addq %%rax, %[l] \n\t"                    \
366
0
        "adcq %%rdx, %[h] \n\t"                    \
367
0
        "adcq $0   , %[o] \n\t"                    \
368
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
369
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
370
0
        : "%rax", "%rdx", "cc"                           \
371
0
    )
372
/* Multiply va by vb and add double size result into: vh | vl */
373
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
374
0
    __asm__ __volatile__ (                               \
375
0
        "movq %[b], %%rax \n\t"                    \
376
0
        "mulq %[a]    \n\t"                    \
377
0
        "addq %%rax, %[l] \n\t"                    \
378
0
        "adcq %%rdx, %[h] \n\t"                    \
379
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
380
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
381
0
        : "%rax", "%rdx", "cc"                           \
382
0
    )
383
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
384
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
385
0
    __asm__ __volatile__ (                               \
386
0
        "movq %[b], %%rax \n\t"                    \
387
0
        "mulq %[a]    \n\t"                    \
388
0
        "addq %%rax, %[l] \n\t"                    \
389
0
        "adcq %%rdx, %[h] \n\t"                    \
390
0
        "adcq $0   , %[o] \n\t"                    \
391
0
        "addq %%rax, %[l] \n\t"                    \
392
0
        "adcq %%rdx, %[h] \n\t"                    \
393
0
        "adcq $0   , %[o] \n\t"                    \
394
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
395
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
396
0
        : "%rax", "%rdx", "cc"                           \
397
0
    )
398
/* Multiply va by vb and add double size result twice into: vo | vh | vl
399
 * Assumes first add will not overflow vh | vl
400
 */
401
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
402
0
    __asm__ __volatile__ (                               \
403
0
        "movq %[b], %%rax \n\t"                    \
404
0
        "mulq %[a]    \n\t"                    \
405
0
        "addq %%rax, %[l] \n\t"                    \
406
0
        "adcq %%rdx, %[h] \n\t"                    \
407
0
        "addq %%rax, %[l] \n\t"                    \
408
0
        "adcq %%rdx, %[h] \n\t"                    \
409
0
        "adcq $0   , %[o] \n\t"                    \
410
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
411
0
        : [a] "rm" (va), [b] "rm" (vb)                   \
412
0
        : "%rax", "%rdx", "cc"                           \
413
0
    )
414
/* Square va and store double size result in: vh | vl */
415
#define SP_ASM_SQR(vl, vh, va)                           \
416
0
    __asm__ __volatile__ (                               \
417
0
        "movq %[a], %%rax \n\t"                    \
418
0
        "mulq %%rax   \n\t"                    \
419
0
        "movq %%rax, %[l] \n\t"                    \
420
0
        "movq %%rdx, %[h] \n\t"                    \
421
0
        : [h] "+r" (vh), [l] "+r" (vl)                   \
422
0
        : [a] "rm" (va)                                  \
423
0
        : "%rax", "%rdx", "cc"                           \
424
0
    )
425
/* Square va and add double size result into: vo | vh | vl */
426
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
427
0
    __asm__ __volatile__ (                               \
428
0
        "movq %[a], %%rax \n\t"                    \
429
0
        "mulq %%rax   \n\t"                    \
430
0
        "addq %%rax, %[l] \n\t"                    \
431
0
        "adcq %%rdx, %[h] \n\t"                    \
432
0
        "adcq $0   , %[o] \n\t"                    \
433
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
434
0
        : [a] "rm" (va)                                  \
435
0
        : "%rax", "%rdx", "cc"                           \
436
0
    )
437
/* Square va and add double size result into: vh | vl */
438
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
439
0
    __asm__ __volatile__ (                               \
440
0
        "movq %[a], %%rax \n\t"                    \
441
0
        "mulq %%rax   \n\t"                    \
442
0
        "addq %%rax, %[l] \n\t"                    \
443
0
        "adcq %%rdx, %[h] \n\t"                    \
444
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
445
0
        : [a] "rm" (va)                                  \
446
0
        : "%rax", "%rdx", "cc"                           \
447
0
    )
448
/* Add va into: vh | vl */
449
#define SP_ASM_ADDC(vl, vh, va)                          \
450
0
    __asm__ __volatile__ (                               \
451
0
        "addq %[a], %[l]  \n\t"                    \
452
0
        "adcq $0  , %[h]  \n\t"                    \
453
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
454
0
        : [a] "rm" (va)                                  \
455
0
        : "cc"                                           \
456
0
    )
457
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
458
0
    __asm__ __volatile__ (                               \
459
0
        "addq %[a], %[l]  \n\t"                    \
460
0
        "adcq $0  , %[h]  \n\t"                    \
461
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
462
0
        : [a] "r" (va)                                   \
463
0
        : "cc"                                           \
464
0
    )
465
/* Sub va from: vh | vl */
466
#define SP_ASM_SUBB(vl, vh, va)                          \
467
0
    __asm__ __volatile__ (                               \
468
0
        "subq %[a], %[l]  \n\t"                    \
469
0
        "sbbq $0  , %[h]  \n\t"                    \
470
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
471
0
        : [a] "rm" (va)                                  \
472
0
        : "cc"                                           \
473
0
    )
474
/* Sub va from: vh | vl */
475
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
476
0
    __asm__ __volatile__ (                               \
477
0
        "subq %[a], %[l]  \n\t"                    \
478
0
        "sbbq $0  , %[h]  \n\t"                    \
479
0
        : [l] "+r" (vl), [h] "+r" (vh)                   \
480
0
        : [a] "r" (va)                                   \
481
0
        : "cc"                                           \
482
0
    )
483
/* Add two times vc | vb | va into vo | vh | vl */
484
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
485
0
    __asm__ __volatile__ (                               \
486
0
        "addq %[a], %[l]  \n\t"                    \
487
0
        "adcq %[b], %[h]  \n\t"                    \
488
0
        "adcq %[c], %[o]  \n\t"                    \
489
0
        "addq %[a], %[l]  \n\t"                    \
490
0
        "adcq %[b], %[h]  \n\t"                    \
491
0
        "adcq %[c], %[o]  \n\t"                    \
492
0
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
493
0
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
494
0
        : "cc"                                           \
495
0
    )
496
/* Index of highest bit set. */
497
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
498
0
    __asm__ __volatile__ (                               \
499
0
        "bsr  %[a], %[i]  \n\t"                    \
500
0
        : [i] "=r" (vi)                                  \
501
0
        : [a] "r" (va)                                   \
502
0
        : "cc"                                           \
503
0
    )
504
#else
505
#include <intrin.h>
506
507
/* Multiply va by vb and store double size result in: vh | vl */
508
#define SP_ASM_MUL(vl, vh, va, vb)                       \
509
    vl = _umul128(va, vb, &vh)
510
511
/* Multiply va by vb and store double size result in: vo | vh | vl */
512
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
513
    do {                                                 \
514
        vl = _umul128(va, vb, &vh);                      \
515
        vo = 0;                                          \
516
    }                                                    \
517
    while (0)
518
519
/* Multiply va by vb and add double size result into: vo | vh | vl */
520
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
521
    do {                                                 \
522
        unsigned __int64 vtl, vth;                       \
523
        unsigned char c;                                 \
524
        vtl = _umul128(va, vb, &vth);                    \
525
        c = _addcarry_u64(0, vl, vtl, &vl);              \
526
        c = _addcarry_u64(c, vh, vth, &vh);              \
527
            _addcarry_u64(c, vo,   0, &vo);              \
528
    }                                                    \
529
    while (0)
530
531
/* Multiply va by vb and add double size result into: vh | vl */
532
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
533
    do {                                                 \
534
        unsigned __int64 vtl, vth;                       \
535
        unsigned char c;                                 \
536
        vtl = _umul128(va, vb, &vth);                    \
537
        c = _addcarry_u64(0, vl, vtl, &vl);              \
538
            _addcarry_u64(c, vh, vth, &vh);              \
539
    }                                                    \
540
    while (0)
541
542
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
543
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
544
    do {                                                 \
545
        unsigned __int64 vtl, vth;                       \
546
        unsigned char c;                                 \
547
        vtl = _umul128(va, vb, &vth);                    \
548
        c = _addcarry_u64(0, vl, vtl, &vl);              \
549
        c = _addcarry_u64(c, vh, vth, &vh);              \
550
            _addcarry_u64(c, vo,   0, &vo);              \
551
        c = _addcarry_u64(0, vl, vtl, &vl);              \
552
        c = _addcarry_u64(c, vh, vth, &vh);              \
553
            _addcarry_u64(c, vo,   0, &vo);              \
554
    }                                                    \
555
    while (0)
556
/* Multiply va by vb and add double size result twice into: vo | vh | vl
557
 * Assumes first add will not overflow vh | vl
558
 */
559
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
560
    do {                                                 \
561
        unsigned __int64 vtl, vth;                       \
562
        unsigned char c;                                 \
563
        vtl = _umul128(va, vb, &vth);                    \
564
        c = _addcarry_u64(0, vl, vtl, &vl);              \
565
            _addcarry_u64(c, vh, vth, &vh);              \
566
        c = _addcarry_u64(0, vl, vtl, &vl);              \
567
        c = _addcarry_u64(c, vh, vth, &vh);              \
568
            _addcarry_u64(c, vo,   0, &vo);              \
569
    }                                                    \
570
    while (0)
571
572
 /* Square va and store double size result in: vh | vl */
573
#define SP_ASM_SQR(vl, vh, va)                           \
574
    vl = _umul128(va, va, &vh)
575
576
/* Square va and add double size result into: vo | vh | vl */
577
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
578
    do {                                                 \
579
        unsigned __int64 vtl, vth;                       \
580
        unsigned char c;                                 \
581
        vtl = _umul128(va, va, &vth);                    \
582
        c = _addcarry_u64(0, vl, vtl, &vl);              \
583
        c = _addcarry_u64(c, vh, vth, &vh);              \
584
            _addcarry_u64(c, vo,   0, &vo);              \
585
    }                                                    \
586
    while (0)
587
588
/* Square va and add double size result into: vh | vl */
589
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
590
    do {                                                 \
591
        unsigned __int64 vtl, vth;                       \
592
        unsigned char c;                                 \
593
        vtl = _umul128(va, va, &vth);                    \
594
        c = _addcarry_u64(0, vl, vtl, &vl);              \
595
            _addcarry_u64(c, vh, vth, &vh);              \
596
    }                                                    \
597
    while (0)
598
599
/* Add va into: vh | vl */
600
#define SP_ASM_ADDC(vl, vh, va)                          \
601
    do {                                                 \
602
        unsigned char c;                                 \
603
        c = _addcarry_u64(0, vl, va, &vl);               \
604
            _addcarry_u64(c, vh,  0, &vh);               \
605
    }                                                    \
606
    while (0)
607
608
/* Add va, variable in a register, into: vh | vl */
609
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
610
    do {                                                 \
611
        unsigned char c;                                 \
612
        c = _addcarry_u64(0, vl, va, &vl);               \
613
            _addcarry_u64(c, vh,  0, &vh);               \
614
    }                                                    \
615
    while (0)
616
617
/* Sub va from: vh | vl */
618
#define SP_ASM_SUBB(vl, vh, va)                          \
619
    do {                                                 \
620
        unsigned char c;                                 \
621
        c = _subborrow_u64(0, vl, va, &vl);              \
622
            _subborrow_u64(c, vh,  0, &vh);              \
623
    }                                                    \
624
    while (0)
625
626
/* Add two times vc | vb | va into vo | vh | vl */
627
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
628
    do {                                                 \
629
        unsigned char c;                                 \
630
        c = _addcarry_u64(0, vl, va, &vl);               \
631
        c = _addcarry_u64(c, vh, vb, &vh);               \
632
            _addcarry_u64(c, vo, vc, &vo);               \
633
        c = _addcarry_u64(0, vl, va, &vl);               \
634
        c = _addcarry_u64(c, vh, vb, &vh);               \
635
            _addcarry_u64(c, vo, vc, &vo);               \
636
    }                                                    \
637
    while (0)
638
/* Index of highest bit set. */
639
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
640
    do {                                                 \
641
        unsigned long idx;                               \
642
        _BitScanReverse64(&idx, va);                     \
643
        vi = idx;                                        \
644
    }                                                    \
645
    while (0)
646
#endif
647
648
#if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
649
    _MSC_VER >= 1920)
650
/* Divide a two digit number by a digit number and return. (hi | lo) / d
651
 *
652
 * Using divq instruction on Intel x64.
653
 *
654
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
655
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
656
 * @param  [in]  d   SP integer digit. Number to divide by.
657
 * @return  The division result.
658
 */
659
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
660
                                          sp_int_digit d)
661
0
{
662
0
#ifndef _MSC_VER
663
0
    __asm__ __volatile__ (
664
0
        "divq %2"
665
0
        : "+a" (lo)
666
0
        : "d" (hi), "r" (d)
667
0
        : "cc"
668
0
    );
669
0
    return lo;
670
#elif defined(_MSC_VER) && _MSC_VER >= 1920
671
    return _udiv128(hi, lo, d, NULL);
672
#endif
673
0
}
674
#define SP_ASM_DIV_WORD
675
#endif
676
677
#define SP_INT_ASM_AVAILABLE
678
679
    #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
680
681
    #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
682
/*
683
 * CPU: x86
684
 */
685
686
/* Multiply va by vb and store double size result in: vh | vl */
687
#define SP_ASM_MUL(vl, vh, va, vb)                       \
688
    __asm__ __volatile__ (                               \
689
        "movl %[b], %%eax \n\t"                    \
690
        "mull %[a]    \n\t"                    \
691
        "movl %%eax, %[l] \n\t"                    \
692
        "movl %%edx, %[h] \n\t"                    \
693
        : [h] "+r" (vh), [l] "+r" (vl)                   \
694
        : [a] "rm" (va), [b] "rm" (vb)                   \
695
        : "eax", "edx", "cc"                             \
696
    )
697
/* Multiply va by vb and store double size result in: vo | vh | vl */
698
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
699
    __asm__ __volatile__ (                               \
700
        "movl %[b], %%eax \n\t"                    \
701
        "mull %[a]    \n\t"                    \
702
        "movl $0   , %[o] \n\t"                    \
703
        "movl %%eax, %[l] \n\t"                    \
704
        "movl %%edx, %[h] \n\t"                    \
705
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
706
        : [a] "m" (va), [b] "m" (vb)                     \
707
        : "eax", "edx", "cc"                             \
708
    )
709
/* Multiply va by vb and add double size result into: vo | vh | vl */
710
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
711
    __asm__ __volatile__ (                               \
712
        "movl %[b], %%eax \n\t"                    \
713
        "mull %[a]    \n\t"                    \
714
        "addl %%eax, %[l] \n\t"                    \
715
        "adcl %%edx, %[h] \n\t"                    \
716
        "adcl $0   , %[o] \n\t"                    \
717
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
718
        : [a] "rm" (va), [b] "rm" (vb)                   \
719
        : "eax", "edx", "cc"                             \
720
    )
721
/* Multiply va by vb and add double size result into: vh | vl */
722
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
723
    __asm__ __volatile__ (                               \
724
        "movl %[b], %%eax \n\t"                    \
725
        "mull %[a]    \n\t"                    \
726
        "addl %%eax, %[l] \n\t"                    \
727
        "adcl %%edx, %[h] \n\t"                    \
728
        : [l] "+r" (vl), [h] "+r" (vh)                   \
729
        : [a] "rm" (va), [b] "rm" (vb)                   \
730
        : "eax", "edx", "cc"                             \
731
    )
732
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
733
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
734
    __asm__ __volatile__ (                               \
735
        "movl %[b], %%eax \n\t"                    \
736
        "mull %[a]    \n\t"                    \
737
        "addl %%eax, %[l] \n\t"                    \
738
        "adcl %%edx, %[h] \n\t"                    \
739
        "adcl $0   , %[o] \n\t"                    \
740
        "addl %%eax, %[l] \n\t"                    \
741
        "adcl %%edx, %[h] \n\t"                    \
742
        "adcl $0   , %[o] \n\t"                    \
743
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
744
        : [a] "rm" (va), [b] "rm" (vb)                   \
745
        : "eax", "edx", "cc"                             \
746
    )
747
/* Multiply va by vb and add double size result twice into: vo | vh | vl
748
 * Assumes first add will not overflow vh | vl
749
 */
750
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
751
    __asm__ __volatile__ (                               \
752
        "movl %[b], %%eax \n\t"                    \
753
        "mull %[a]    \n\t"                    \
754
        "addl %%eax, %[l] \n\t"                    \
755
        "adcl %%edx, %[h] \n\t"                    \
756
        "addl %%eax, %[l] \n\t"                    \
757
        "adcl %%edx, %[h] \n\t"                    \
758
        "adcl $0   , %[o] \n\t"                    \
759
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
760
        : [a] "rm" (va), [b] "rm" (vb)                   \
761
        : "eax", "edx", "cc"                             \
762
    )
763
/* Square va and store double size result in: vh | vl */
764
#define SP_ASM_SQR(vl, vh, va)                           \
765
    __asm__ __volatile__ (                               \
766
        "movl %[a], %%eax \n\t"                    \
767
        "mull %%eax   \n\t"                    \
768
        "movl %%eax, %[l] \n\t"                    \
769
        "movl %%edx, %[h] \n\t"                    \
770
        : [h] "+r" (vh), [l] "+r" (vl)                   \
771
        : [a] "rm" (va)                                  \
772
        : "eax", "edx", "cc"                             \
773
    )
774
/* Square va and add double size result into: vo | vh | vl */
775
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
776
    __asm__ __volatile__ (                               \
777
        "movl %[a], %%eax \n\t"                    \
778
        "mull %%eax   \n\t"                    \
779
        "addl %%eax, %[l] \n\t"                    \
780
        "adcl %%edx, %[h] \n\t"                    \
781
        "adcl $0   , %[o] \n\t"                    \
782
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
783
        : [a] "rm" (va)                                  \
784
        : "eax", "edx", "cc"                             \
785
    )
786
/* Square va and add double size result into: vh | vl */
787
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
788
    __asm__ __volatile__ (                               \
789
        "movl %[a], %%eax \n\t"                    \
790
        "mull %%eax   \n\t"                    \
791
        "addl %%eax, %[l] \n\t"                    \
792
        "adcl %%edx, %[h] \n\t"                    \
793
        : [l] "+r" (vl), [h] "+r" (vh)                   \
794
        : [a] "rm" (va)                                  \
795
        : "eax", "edx", "cc"                             \
796
    )
797
/* Add va into: vh | vl */
798
#define SP_ASM_ADDC(vl, vh, va)                          \
799
    __asm__ __volatile__ (                               \
800
        "addl %[a], %[l]  \n\t"                    \
801
        "adcl $0  , %[h]  \n\t"                    \
802
        : [l] "+r" (vl), [h] "+r" (vh)                   \
803
        : [a] "rm" (va)                                  \
804
        : "cc"                                           \
805
    )
806
#define SP_ASM_ADDC_REG(vl, vh, va)                      \
807
    __asm__ __volatile__ (                               \
808
        "addl %[a], %[l]  \n\t"                    \
809
        "adcl $0  , %[h]  \n\t"                    \
810
        : [l] "+r" (vl), [h] "+r" (vh)                   \
811
        : [a] "r" (va)                                   \
812
        : "cc"                                           \
813
    )
814
/* Sub va from: vh | vl */
815
#define SP_ASM_SUBB(vl, vh, va)                          \
816
    __asm__ __volatile__ (                               \
817
        "subl %[a], %[l]  \n\t"                    \
818
        "sbbl $0  , %[h]  \n\t"                    \
819
        : [l] "+r" (vl), [h] "+r" (vh)                   \
820
        : [a] "rm" (va)                                  \
821
        : "cc"                                           \
822
    )
823
/* Sub va from: vh | vl */
824
#define SP_ASM_SUBB_REG(vl, vh, va)                      \
825
    __asm__ __volatile__ (                               \
826
        "subl %[a], %[l]  \n\t"                    \
827
        "sbbl $0  , %[h]  \n\t"                    \
828
        : [l] "+r" (vl), [h] "+r" (vh)                   \
829
        : [a] "r" (va)                                   \
830
        : "cc"                                           \
831
    )
832
/* Add two times vc | vb | va into vo | vh | vl */
833
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
834
    __asm__ __volatile__ (                               \
835
        "addl %[a], %[l]  \n\t"                    \
836
        "adcl %[b], %[h]  \n\t"                    \
837
        "adcl %[c], %[o]  \n\t"                    \
838
        "addl %[a], %[l]  \n\t"                    \
839
        "adcl %[b], %[h]  \n\t"                    \
840
        "adcl %[c], %[o]  \n\t"                    \
841
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
842
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
843
        : "cc"                                           \
844
    )
845
/* Index of highest bit set. */
846
#define SP_ASM_HI_BIT_SET_IDX(va, vi)                    \
847
    __asm__ __volatile__ (                               \
848
        "bsr  %[a], %[i]  \n\t"                    \
849
        : [i] "=r" (vi)                                  \
850
        : [a] "r" (va)                                   \
851
        : "cc"                                           \
852
    )
853
854
#ifndef WOLFSSL_SP_DIV_WORD_HALF
855
/* Divide a two digit number by a digit number and return. (hi | lo) / d
856
 *
857
 * Using divl instruction on Intel x64.
858
 *
859
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
860
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
861
 * @param  [in]  d   SP integer digit. Number to divide by.
862
 * @return  The division result.
863
 */
864
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
865
                                          sp_int_digit d)
866
{
867
    __asm__ __volatile__ (
868
        "divl %2"
869
        : "+a" (lo)
870
        : "d" (hi), "r" (d)
871
        : "cc"
872
    );
873
    return lo;
874
}
875
#define SP_ASM_DIV_WORD
876
#endif
877
878
#define SP_INT_ASM_AVAILABLE
879
880
    #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
881
882
    #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
883
/*
884
 * CPU: Aarch64
885
 */
886
887
/* Multiply va by vb and store double size result in: vh | vl */
888
#define SP_ASM_MUL(vl, vh, va, vb)                       \
889
    __asm__ __volatile__ (                               \
890
        "mul  %[l], %[a], %[b]  \n\t"            \
891
        "umulh  %[h], %[a], %[b]  \n\t"            \
892
        : [h] "+r" (vh), [l] "+r" (vl)                   \
893
        : [a] "r" (va), [b] "r" (vb)                     \
894
        : "cc"                                           \
895
    )
896
/* Multiply va by vb and store double size result in: vo | vh | vl */
897
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
898
    __asm__ __volatile__ (                               \
899
        "mul  x8, %[a], %[b]    \n\t"            \
900
        "umulh  %[h], %[a], %[b]  \n\t"            \
901
        "mov  %[l], x8    \n\t"            \
902
        "mov  %[o], xzr   \n\t"            \
903
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
904
        : [a] "r" (va), [b] "r" (vb)                     \
905
        : "x8", "cc"                                     \
906
    )
907
/* Multiply va by vb and add double size result into: vo | vh | vl */
908
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
909
    __asm__ __volatile__ (                               \
910
        "mul  x8, %[a], %[b]    \n\t"            \
911
        "umulh  x9, %[a], %[b]    \n\t"            \
912
        "adds %[l], %[l], x8    \n\t"            \
913
        "adcs %[h], %[h], x9    \n\t"            \
914
        "adc  %[o], %[o], xzr   \n\t"            \
915
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
916
        : [a] "r" (va), [b] "r" (vb)                     \
917
        : "x8", "x9", "cc"                               \
918
    )
919
/* Multiply va by vb and add double size result into: vh | vl */
920
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
921
    __asm__ __volatile__ (                               \
922
        "mul  x8, %[a], %[b]    \n\t"            \
923
        "umulh  x9, %[a], %[b]    \n\t"            \
924
        "adds %[l], %[l], x8    \n\t"            \
925
        "adc  %[h], %[h], x9    \n\t"            \
926
        : [l] "+r" (vl), [h] "+r" (vh)                   \
927
        : [a] "r" (va), [b] "r" (vb)                     \
928
        : "x8", "x9", "cc"                               \
929
    )
930
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
931
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
932
    __asm__ __volatile__ (                               \
933
        "mul  x8, %[a], %[b]    \n\t"            \
934
        "umulh  x9, %[a], %[b]    \n\t"            \
935
        "adds %[l], %[l], x8    \n\t"            \
936
        "adcs %[h], %[h], x9    \n\t"            \
937
        "adc  %[o], %[o], xzr   \n\t"            \
938
        "adds %[l], %[l], x8    \n\t"            \
939
        "adcs %[h], %[h], x9    \n\t"            \
940
        "adc  %[o], %[o], xzr   \n\t"            \
941
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
942
        : [a] "r" (va), [b] "r" (vb)                     \
943
        : "x8", "x9", "cc"                               \
944
    )
945
/* Multiply va by vb and add double size result twice into: vo | vh | vl
946
 * Assumes first add will not overflow vh | vl
947
 */
948
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
949
    __asm__ __volatile__ (                               \
950
        "mul  x8, %[a], %[b]    \n\t"            \
951
        "umulh  x9, %[a], %[b]    \n\t"            \
952
        "adds %[l], %[l], x8    \n\t"            \
953
        "adc  %[h], %[h], x9    \n\t"            \
954
        "adds %[l], %[l], x8    \n\t"            \
955
        "adcs %[h], %[h], x9    \n\t"            \
956
        "adc  %[o], %[o], xzr   \n\t"            \
957
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
958
        : [a] "r" (va), [b] "r" (vb)                     \
959
        : "x8", "x9", "cc"                               \
960
    )
961
/* Square va and store double size result in: vh | vl */
962
#define SP_ASM_SQR(vl, vh, va)                           \
963
    __asm__ __volatile__ (                               \
964
        "mul  %[l], %[a], %[a]  \n\t"            \
965
        "umulh  %[h], %[a], %[a]  \n\t"            \
966
        : [h] "+r" (vh), [l] "+r" (vl)                   \
967
        : [a] "r" (va)                                   \
968
        : "cc"                                           \
969
    )
970
/* Square va and add double size result into: vo | vh | vl */
971
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
972
    __asm__ __volatile__ (                               \
973
        "mul  x8, %[a], %[a]    \n\t"            \
974
        "umulh  x9, %[a], %[a]    \n\t"            \
975
        "adds %[l], %[l], x8    \n\t"            \
976
        "adcs %[h], %[h], x9    \n\t"            \
977
        "adc  %[o], %[o], xzr   \n\t"            \
978
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
979
        : [a] "r" (va)                                   \
980
        : "x8", "x9", "cc"                               \
981
    )
982
/* Square va and add double size result into: vh | vl */
983
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
984
    __asm__ __volatile__ (                               \
985
        "mul  x8, %[a], %[a]    \n\t"            \
986
        "umulh  x9, %[a], %[a]    \n\t"            \
987
        "adds %[l], %[l], x8    \n\t"            \
988
        "adc  %[h], %[h], x9    \n\t"            \
989
        : [l] "+r" (vl), [h] "+r" (vh)                   \
990
        : [a] "r" (va)                                   \
991
        : "x8", "x9", "cc"                               \
992
    )
993
/* Add va into: vh | vl */
994
#define SP_ASM_ADDC(vl, vh, va)                          \
995
    __asm__ __volatile__ (                               \
996
        "adds %[l], %[l], %[a]  \n\t"            \
997
        "adc  %[h], %[h], xzr   \n\t"            \
998
        : [l] "+r" (vl), [h] "+r" (vh)                   \
999
        : [a] "r" (va)                                   \
1000
        : "cc"                                           \
1001
    )
1002
/* Sub va from: vh | vl */
1003
#define SP_ASM_SUBB(vl, vh, va)                          \
1004
    __asm__ __volatile__ (                               \
1005
        "subs %[l], %[l], %[a]  \n\t"            \
1006
        "sbc  %[h], %[h], xzr   \n\t"            \
1007
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1008
        : [a] "r" (va)                                   \
1009
        : "cc"                                           \
1010
    )
1011
/* Add two times vc | vb | va into vo | vh | vl */
1012
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1013
    __asm__ __volatile__ (                               \
1014
        "adds %[l], %[l], %[a]  \n\t"            \
1015
        "adcs %[h], %[h], %[b]  \n\t"            \
1016
        "adc  %[o], %[o], %[c]  \n\t"            \
1017
        "adds %[l], %[l], %[a]  \n\t"            \
1018
        "adcs %[h], %[h], %[b]  \n\t"            \
1019
        "adc  %[o], %[o], %[c]  \n\t"            \
1020
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1021
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1022
        : "cc"                                           \
1023
    )
1024
/* Count leading zeros. */
1025
#define SP_ASM_LZCNT(va, vn)                             \
1026
    __asm__ __volatile__ (                               \
1027
        "clz  %[n], %[a]  \n\t"                    \
1028
        : [n] "=r" (vn)                                  \
1029
        : [a] "r" (va)                                   \
1030
        :                                                \
1031
    )
1032
1033
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1034
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1035
 *
1036
 * Using udiv instruction on Aarch64.
1037
 * Constant time.
1038
 *
1039
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1040
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1041
 * @param  [in]  d   SP integer digit. Number to divide by.
1042
 * @return  The division result.
1043
 */
1044
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1045
                                          sp_int_digit d)
1046
{
1047
    __asm__ __volatile__ (
1048
        "lsr  x3, %[d], 48\n\t"
1049
        "mov  x5, 16\n\t"
1050
        "cmp  x3, 0\n\t"
1051
        "mov  x4, 63\n\t"
1052
        "csel x3, x5, xzr, eq\n\t"
1053
        "sub  x4, x4, x3\n\t"
1054
        "lsl  %[d], %[d], x3\n\t"
1055
        "lsl  %[hi], %[hi], x3\n\t"
1056
        "lsr  x5, %[lo], x4\n\t"
1057
        "lsl  %[lo], %[lo], x3\n\t"
1058
        "orr  %[hi], %[hi], x5, lsr 1\n\t"
1059
1060
        "lsr  x5, %[d], 32\n\t"
1061
        "add  x5, x5, 1\n\t"
1062
1063
        "udiv x3, %[hi], x5\n\t"
1064
        "lsl  x6, x3, 32\n\t"
1065
        "mul  x4, %[d], x6\n\t"
1066
        "umulh  x3, %[d], x6\n\t"
1067
        "subs %[lo], %[lo], x4\n\t"
1068
        "sbc  %[hi], %[hi], x3\n\t"
1069
1070
        "udiv x3, %[hi], x5\n\t"
1071
        "lsl  x3, x3, 32\n\t"
1072
        "add  x6, x6, x3\n\t"
1073
        "mul  x4, %[d], x3\n\t"
1074
        "umulh  x3, %[d], x3\n\t"
1075
        "subs %[lo], %[lo], x4\n\t"
1076
        "sbc  %[hi], %[hi], x3\n\t"
1077
1078
        "lsr  x3, %[lo], 32\n\t"
1079
        "orr  x3, x3, %[hi], lsl 32\n\t"
1080
1081
        "udiv x3, x3, x5\n\t"
1082
        "add  x6, x6, x3\n\t"
1083
        "mul  x4, %[d], x3\n\t"
1084
        "umulh  x3, %[d], x3\n\t"
1085
        "subs %[lo], %[lo], x4\n\t"
1086
        "sbc  %[hi], %[hi], x3\n\t"
1087
1088
        "lsr  x3, %[lo], 32\n\t"
1089
        "orr  x3, x3, %[hi], lsl 32\n\t"
1090
1091
        "udiv x3, x3, x5\n\t"
1092
        "add  x6, x6, x3\n\t"
1093
        "mul  x4, %[d], x3\n\t"
1094
        "sub  %[lo], %[lo], x4\n\t"
1095
1096
        "udiv x3, %[lo], %[d]\n\t"
1097
        "add  %[hi], x6, x3\n\t"
1098
1099
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1100
        :
1101
        : "x3", "x4", "x5", "x6", "cc"
1102
    );
1103
1104
    return hi;
1105
}
1106
#define SP_ASM_DIV_WORD
1107
#endif
1108
1109
#define SP_INT_ASM_AVAILABLE
1110
1111
    #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
1112
1113
    #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
1114
        SP_WORD_SIZE == 32
1115
/*
1116
 * CPU: ARM32 or Cortex-M4 and similar
1117
 */
1118
1119
/* Multiply va by vb and store double size result in: vh | vl */
1120
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1121
    __asm__ __volatile__ (                               \
1122
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1123
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1124
        : [a] "r" (va), [b] "r" (vb)                     \
1125
    )
1126
/* Multiply va by vb and store double size result in: vo | vh | vl */
1127
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1128
    __asm__ __volatile__ (                               \
1129
        "umull  %[l], %[h], %[a], %[b]  \n\t"            \
1130
        "mov  %[o], #0    \n\t"            \
1131
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
1132
        : [a] "r" (va), [b] "r" (vb)                     \
1133
    )
1134
/* Multiply va by vb and add double size result into: vo | vh | vl */
1135
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1136
    __asm__ __volatile__ (                               \
1137
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1138
        "adds %[l], %[l], r8    \n\t"            \
1139
        "adcs %[h], %[h], r9    \n\t"            \
1140
        "adc  %[o], %[o], #0    \n\t"            \
1141
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1142
        : [a] "r" (va), [b] "r" (vb)                     \
1143
        : "r8", "r9", "cc"                               \
1144
    )
1145
/* Multiply va by vb and add double size result into: vh | vl */
1146
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1147
    __asm__ __volatile__ (                               \
1148
        "umlal  %[l], %[h], %[a], %[b]  \n\t"            \
1149
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1150
        : [a] "r" (va), [b] "r" (vb)                     \
1151
    )
1152
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1153
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1154
    __asm__ __volatile__ (                               \
1155
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1156
        "adds %[l], %[l], r8    \n\t"            \
1157
        "adcs %[h], %[h], r9    \n\t"            \
1158
        "adc  %[o], %[o], #0    \n\t"            \
1159
        "adds %[l], %[l], r8    \n\t"            \
1160
        "adcs %[h], %[h], r9    \n\t"            \
1161
        "adc  %[o], %[o], #0    \n\t"            \
1162
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1163
        : [a] "r" (va), [b] "r" (vb)                     \
1164
        : "r8", "r9", "cc"                               \
1165
    )
1166
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1167
 * Assumes first add will not overflow vh | vl
1168
 */
1169
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1170
    __asm__ __volatile__ (                               \
1171
        "umull  r8, r9, %[a], %[b]  \n\t"            \
1172
        "adds %[l], %[l], r8    \n\t"            \
1173
        "adc  %[h], %[h], r9    \n\t"            \
1174
        "adds %[l], %[l], r8    \n\t"            \
1175
        "adcs %[h], %[h], r9    \n\t"            \
1176
        "adc  %[o], %[o], #0    \n\t"            \
1177
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1178
        : [a] "r" (va), [b] "r" (vb)                     \
1179
        : "r8", "r9", "cc"                               \
1180
    )
1181
/* Square va and store double size result in: vh | vl */
1182
#define SP_ASM_SQR(vl, vh, va)                           \
1183
    __asm__ __volatile__ (                               \
1184
        "umull  %[l], %[h], %[a], %[a]  \n\t"            \
1185
        : [h] "+r" (vh), [l] "+r" (vl)                   \
1186
        : [a] "r" (va)                                   \
1187
    )
1188
/* Square va and add double size result into: vo | vh | vl */
1189
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1190
    __asm__ __volatile__ (                               \
1191
        "umull  r8, r9, %[a], %[a]  \n\t"            \
1192
        "adds %[l], %[l], r8    \n\t"            \
1193
        "adcs %[h], %[h], r9    \n\t"            \
1194
        "adc  %[o], %[o], #0    \n\t"            \
1195
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1196
        : [a] "r" (va)                                   \
1197
        : "r8", "r9", "cc"                               \
1198
    )
1199
/* Square va and add double size result into: vh | vl */
1200
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1201
    __asm__ __volatile__ (                               \
1202
        "umlal  %[l], %[h], %[a], %[a]  \n\t"            \
1203
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1204
        : [a] "r" (va)                                   \
1205
        : "cc"                                           \
1206
    )
1207
/* Add va into: vh | vl */
1208
#define SP_ASM_ADDC(vl, vh, va)                          \
1209
    __asm__ __volatile__ (                               \
1210
        "adds %[l], %[l], %[a]  \n\t"            \
1211
        "adc  %[h], %[h], #0    \n\t"            \
1212
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1213
        : [a] "r" (va)                                   \
1214
        : "cc"                                           \
1215
    )
1216
/* Sub va from: vh | vl */
1217
#define SP_ASM_SUBB(vl, vh, va)                          \
1218
    __asm__ __volatile__ (                               \
1219
        "subs %[l], %[l], %[a]  \n\t"            \
1220
        "sbc  %[h], %[h], #0    \n\t"            \
1221
        : [l] "+r" (vl), [h] "+r" (vh)                   \
1222
        : [a] "r" (va)                                   \
1223
        : "cc"                                           \
1224
    )
1225
/* Add two times vc | vb | va into vo | vh | vl */
1226
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1227
    __asm__ __volatile__ (                               \
1228
        "adds %[l], %[l], %[a]  \n\t"            \
1229
        "adcs %[h], %[h], %[b]  \n\t"            \
1230
        "adc  %[o], %[o], %[c]  \n\t"            \
1231
        "adds %[l], %[l], %[a]  \n\t"            \
1232
        "adcs %[h], %[h], %[b]  \n\t"            \
1233
        "adc  %[o], %[o], %[c]  \n\t"            \
1234
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
1235
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
1236
        : "cc"                                           \
1237
    )
1238
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
1239
/* Count leading zeros - instruction only available on ARMv7 and newer. */
1240
#define SP_ASM_LZCNT(va, vn)                             \
1241
    __asm__ __volatile__ (                               \
1242
        "clz  %[n], %[a]  \n\t"                    \
1243
        : [n] "=r" (vn)                                  \
1244
        : [a] "r" (va)                                   \
1245
    )
1246
#endif
1247
1248
#ifndef WOLFSSL_SP_DIV_WORD_HALF
1249
#ifndef WOLFSSL_SP_ARM32_UDIV
1250
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1251
 *
1252
 * No division instruction used - does operation bit by bit.
1253
 * Constant time.
1254
 *
1255
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1256
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1257
 * @param  [in]  d   SP integer digit. Number to divide by.
1258
 * @return  The division result.
1259
 */
1260
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1261
                                          sp_int_digit d)
1262
{
1263
    sp_int_digit r = 0;
1264
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1265
    static const char debruijn32[32] = {
1266
        0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1267
        1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
1268
    };
1269
    static const sp_uint32 debruijn32_mul = 0x076be629;
1270
#endif
1271
1272
    __asm__ __volatile__ (
1273
        /* Shift d so that top bit is set. */
1274
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1275
        "ldr  r4, %[m]\n\t"
1276
        "mov  r5, %[d]\n\t"
1277
        "orr  r5, r5, r5, lsr #1\n\t"
1278
        "orr  r5, r5, r5, lsr #2\n\t"
1279
        "orr  r5, r5, r5, lsr #4\n\t"
1280
        "orr  r5, r5, r5, lsr #8\n\t"
1281
        "orr  r5, r5, r5, lsr #16\n\t"
1282
        "add  r5, r5, #1\n\t"
1283
        "mul  r6, r5, r4\n\t"
1284
        "lsr  r5, r6, #27\n\t"
1285
        "ldrb r5, [%[t], r5]\n\t"
1286
#else
1287
        "clz  r5, %[d]\n\t"
1288
#endif
1289
        "rsb  r6, r5, #31\n\t"
1290
        "lsl  %[d], %[d], r5\n\t"
1291
        "lsl  %[hi], %[hi], r5\n\t"
1292
        "lsr  r9, %[lo], r6\n\t"
1293
        "lsl  %[lo], %[lo], r5\n\t"
1294
        "orr  %[hi], %[hi], r9, lsr #1\n\t"
1295
1296
        "lsr  r5, %[d], #1\n\t"
1297
        "add  r5, r5, #1\n\t"
1298
        "mov  r6, %[lo]\n\t"
1299
        "mov  r9, %[hi]\n\t"
1300
        /* Do top 32 */
1301
        "subs r8, r5, r9\n\t"
1302
        "sbc  r8, r8, r8\n\t"
1303
        "add  %[r], %[r], %[r]\n\t"
1304
        "sub  %[r], %[r], r8\n\t"
1305
        "and  r8, r8, r5\n\t"
1306
        "subs r9, r9, r8\n\t"
1307
        /* Next 30 bits */
1308
        "mov  r4, #29\n\t"
1309
        "\n1:\n\t"
1310
        "movs r6, r6, lsl #1\n\t"
1311
        "adc  r9, r9, r9\n\t"
1312
        "subs r8, r5, r9\n\t"
1313
        "sbc  r8, r8, r8\n\t"
1314
        "add  %[r], %[r], %[r]\n\t"
1315
        "sub  %[r], %[r], r8\n\t"
1316
        "and  r8, r8, r5\n\t"
1317
        "subs r9, r9, r8\n\t"
1318
        "subs r4, r4, #1\n\t"
1319
        "bpl  1b\n\t"
1320
1321
        "add  %[r], %[r], %[r]\n\t"
1322
        "add  %[r], %[r], #1\n\t"
1323
1324
        /* Handle difference has hi word > 0. */
1325
        "umull  r4, r5, %[r], %[d]\n\t"
1326
        "subs r4, %[lo], r4\n\t"
1327
        "sbc  r5, %[hi], r5\n\t"
1328
        "add  %[r], %[r], r5\n\t"
1329
        "umull  r4, r5, %[r], %[d]\n\t"
1330
        "subs r4, %[lo], r4\n\t"
1331
        "sbc  r5, %[hi], r5\n\t"
1332
        "add  %[r], %[r], r5\n\t"
1333
1334
        /* Add 1 to result if bottom half of difference is >= d. */
1335
        "mul  r4, %[r], %[d]\n\t"
1336
        "subs r4, %[lo], r4\n\t"
1337
        "subs r9, %[d], r4\n\t"
1338
        "sbc  r8, r8, r8\n\t"
1339
        "sub  %[r], %[r], r8\n\t"
1340
        "subs r9, r9, #1\n\t"
1341
        "sbc  r8, r8, r8\n\t"
1342
        "sub  %[r], %[r], r8\n\t"
1343
        : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1344
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
1345
        : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
1346
#else
1347
        :
1348
#endif
1349
        : "r4", "r5", "r6", "r8", "r9", "cc"
1350
    );
1351
1352
    return r;
1353
}
1354
#else
1355
/* Divide a two digit number by a digit number and return. (hi | lo) / d
1356
 *
1357
 * Using udiv instruction on arm32
1358
 * Constant time.
1359
 *
1360
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
1361
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
1362
 * @param  [in]  d   SP integer digit. Number to divide by.
1363
 * @return  The division result.
1364
 */
1365
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
1366
                                          sp_int_digit d)
1367
{
1368
    __asm__ __volatile__ (
1369
        "lsrs r3, %[d], #24\n\t"
1370
  "it eq\n\t"
1371
        "moveq  r3, #8\n\t"
1372
  "it ne\n\t"
1373
        "movne  r3, #0\n\t"
1374
        "rsb  r4, r3, #31\n\t"
1375
        "lsl  %[d], %[d], r3\n\t"
1376
        "lsl  %[hi], %[hi], r3\n\t"
1377
        "lsr  r5, %[lo], r4\n\t"
1378
        "lsl  %[lo], %[lo], r3\n\t"
1379
        "orr  %[hi], %[hi], r5, lsr #1\n\t"
1380
1381
        "lsr  r5, %[d], 16\n\t"
1382
        "add  r5, r5, 1\n\t"
1383
1384
        "udiv r3, %[hi], r5\n\t"
1385
        "lsl  r6, r3, 16\n\t"
1386
        "umull  r4, r3, %[d], r6\n\t"
1387
        "subs %[lo], %[lo], r4\n\t"
1388
        "sbc  %[hi], %[hi], r3\n\t"
1389
1390
        "udiv r3, %[hi], r5\n\t"
1391
        "lsl  r3, r3, 16\n\t"
1392
        "add  r6, r6, r3\n\t"
1393
        "umull  r4, r3, %[d], r3\n\t"
1394
        "subs %[lo], %[lo], r4\n\t"
1395
        "sbc  %[hi], %[hi], r3\n\t"
1396
1397
        "lsr  r3, %[lo], 16\n\t"
1398
        "orr  r3, r3, %[hi], lsl 16\n\t"
1399
1400
        "udiv r3, r3, r5\n\t"
1401
        "add  r6, r6, r3\n\t"
1402
        "umull  r4, r3, %[d], r3\n\t"
1403
        "subs %[lo], %[lo], r4\n\t"
1404
        "sbc  %[hi], %[hi], r3\n\t"
1405
1406
        "lsr  r3, %[lo], 16\n\t"
1407
        "orr  r3, r3, %[hi], lsl 16\n\t"
1408
1409
        "udiv r3, r3, r5\n\t"
1410
        "add  r6, r6, r3\n\t"
1411
        "mul  r4, %[d], r3\n\t"
1412
        "sub  %[lo], %[lo], r4\n\t"
1413
1414
        "udiv r3, %[lo], %[d]\n\t"
1415
        "add  %[hi], r6, r3\n\t"
1416
1417
        : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
1418
        :
1419
        : "r3", "r4", "r5", "r6", "cc"
1420
    );
1421
1422
    return hi;
1423
}
1424
#endif
1425
1426
#define SP_ASM_DIV_WORD
1427
#endif
1428
1429
#define SP_INT_ASM_AVAILABLE
1430
1431
    #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
1432
1433
    #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
1434
/*
1435
 * CPU: ARM Thumb (like Cortex-M0)
1436
 */
1437
1438
/* Compile with -fomit-frame-pointer, or similar, if compiler complains about
1439
 * usage of register 'r7'.
1440
 */
1441
1442
#if defined(__clang__)
1443
1444
/* Multiply va by vb and store double size result in: vh | vl */
1445
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1446
    __asm__ __volatile__ (                               \
1447
        /* al * bl */                                    \
1448
        "uxth r6, %[a]    \n\t"            \
1449
        "uxth %[l], %[b]    \n\t"            \
1450
        "muls %[l], r6    \n\t"            \
1451
        /* al * bh */                                    \
1452
        "lsrs r4, %[b], #16   \n\t"            \
1453
        "muls r6, r4      \n\t"            \
1454
        "lsrs %[h], r6, #16   \n\t"            \
1455
        "lsls r6, r6, #16   \n\t"            \
1456
        "adds %[l], %[l], r6    \n\t"            \
1457
        "movs r5, #0      \n\t"            \
1458
        "adcs %[h], r5    \n\t"            \
1459
        /* ah * bh */                                    \
1460
        "lsrs r6, %[a], #16   \n\t"            \
1461
        "muls r4, r6      \n\t"            \
1462
        "adds %[h], %[h], r4    \n\t"            \
1463
        /* ah * bl */                                    \
1464
        "uxth r4, %[b]    \n\t"            \
1465
        "muls r6, r4      \n\t"            \
1466
        "lsrs r4, r6, #16   \n\t"            \
1467
        "lsls r6, r6, #16   \n\t"            \
1468
        "adds %[l], %[l], r6    \n\t"            \
1469
        "adcs %[h], r4    \n\t"            \
1470
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1471
        : [a] "l" (va), [b] "l" (vb)                     \
1472
        : "r4", "r5", "r6", "cc"                         \
1473
    )
1474
/* Multiply va by vb and store double size result in: vo | vh | vl */
1475
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1476
    __asm__ __volatile__ (                               \
1477
        /* al * bl */                                    \
1478
        "uxth r6, %[a]    \n\t"            \
1479
        "uxth %[l], %[b]    \n\t"            \
1480
        "muls %[l], r6    \n\t"            \
1481
        /* al * bh */                                    \
1482
        "lsrs r5, %[b], #16   \n\t"            \
1483
        "muls r6, r5      \n\t"            \
1484
        "lsrs %[h], r6, #16   \n\t"            \
1485
        "lsls r6, r6, #16   \n\t"            \
1486
        "adds %[l], %[l], r6    \n\t"            \
1487
        "movs %[o], #0    \n\t"            \
1488
        "adcs %[h], %[o]    \n\t"            \
1489
        /* ah * bh */                                    \
1490
        "lsrs r6, %[a], #16   \n\t"            \
1491
        "muls r5, r6      \n\t"            \
1492
        "adds %[h], %[h], r5    \n\t"            \
1493
        /* ah * bl */                                    \
1494
        "uxth r5, %[b]    \n\t"            \
1495
        "muls r6, r5      \n\t"            \
1496
        "lsrs r5, r6, #16   \n\t"            \
1497
        "lsls r6, r6, #16   \n\t"            \
1498
        "adds %[l], %[l], r6    \n\t"            \
1499
        "adcs %[h], r5    \n\t"            \
1500
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1501
        : [a] "l" (va), [b] "l" (vb)                     \
1502
        : "r5", "r6", "cc"                               \
1503
    )
1504
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1505
/* Multiply va by vb and add double size result into: vo | vh | vl */
1506
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1507
    __asm__ __volatile__ (                               \
1508
        /* al * bl */                                    \
1509
        "uxth r6, %[a]    \n\t"            \
1510
        "uxth r7, %[b]    \n\t"            \
1511
        "muls r7, r6      \n\t"            \
1512
        "adds %[l], %[l], r7    \n\t"            \
1513
        "movs r5, #0      \n\t"            \
1514
        "adcs %[h], r5    \n\t"            \
1515
        "adcs %[o], r5    \n\t"            \
1516
        /* al * bh */                                    \
1517
        "lsrs r7, %[b], #16   \n\t"            \
1518
        "muls r6, r7      \n\t"            \
1519
        "lsrs r7, r6, #16   \n\t"            \
1520
        "lsls r6, r6, #16   \n\t"            \
1521
        "adds %[l], %[l], r6    \n\t"            \
1522
        "adcs %[h], r7    \n\t"            \
1523
        "adcs %[o], r5    \n\t"            \
1524
        /* ah * bh */                                    \
1525
        "lsrs r6, %[a], #16   \n\t"            \
1526
        "lsrs r7, %[b], #16   \n\t"            \
1527
        "muls r7, r6      \n\t"            \
1528
        "adds %[h], %[h], r7    \n\t"            \
1529
        "adcs %[o], r5    \n\t"            \
1530
        /* ah * bl */                                    \
1531
        "uxth r7, %[b]    \n\t"            \
1532
        "muls r6, r7      \n\t"            \
1533
        "lsrs r7, r6, #16   \n\t"            \
1534
        "lsls r6, r6, #16   \n\t"            \
1535
        "adds %[l], %[l], r6    \n\t"            \
1536
        "adcs %[h], r7    \n\t"            \
1537
        "adcs %[o], r5    \n\t"            \
1538
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1539
        : [a] "l" (va), [b] "l" (vb)                     \
1540
        : "r5", "r6", "r7", "cc"                         \
1541
    )
1542
#else
1543
/* Multiply va by vb and add double size result into: vo | vh | vl */
1544
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1545
    __asm__ __volatile__ (                               \
1546
        /* al * bl */                                    \
1547
        "uxth r6, %[a]    \n\t"            \
1548
        "uxth r5, %[b]    \n\t"            \
1549
        "muls r5, r6      \n\t"            \
1550
        "adds %[l], %[l], r5    \n\t"            \
1551
        "movs r5, #0      \n\t"            \
1552
        "adcs %[h], r5    \n\t"            \
1553
        "adcs %[o], r5    \n\t"            \
1554
        /* al * bh */                                    \
1555
        "lsrs r5, %[b], #16   \n\t"            \
1556
        "muls r6, r5      \n\t"            \
1557
        "lsrs r5, r6, #16   \n\t"            \
1558
        "lsls r6, r6, #16   \n\t"            \
1559
        "adds %[l], %[l], r6    \n\t"            \
1560
        "adcs %[h], r5    \n\t"            \
1561
        "movs r5, #0      \n\t"            \
1562
        "adcs %[o], r5    \n\t"            \
1563
        /* ah * bh */                                    \
1564
        "lsrs r6, %[a], #16   \n\t"            \
1565
        "lsrs r5, %[b], #16   \n\t"            \
1566
        "muls r5, r6      \n\t"            \
1567
        "adds %[h], %[h], r5    \n\t"            \
1568
        "movs r5, #0      \n\t"            \
1569
        "adcs %[o], r5    \n\t"            \
1570
        /* ah * bl */                                    \
1571
        "uxth r5, %[b]    \n\t"            \
1572
        "muls r6, r5      \n\t"            \
1573
        "lsrs r5, r6, #16   \n\t"            \
1574
        "lsls r6, r6, #16   \n\t"            \
1575
        "adds %[l], %[l], r6    \n\t"            \
1576
        "adcs %[h], r5    \n\t"            \
1577
        "movs r5, #0      \n\t"            \
1578
        "adcs %[o], r5    \n\t"            \
1579
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1580
        : [a] "l" (va), [b] "l" (vb)                     \
1581
        : "r5", "r6", "cc"                               \
1582
    )
1583
#endif
1584
/* Multiply va by vb and add double size result into: vh | vl */
1585
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
1586
    __asm__ __volatile__ (                               \
1587
        /* al * bl */                                    \
1588
        "uxth r6, %[a]    \n\t"            \
1589
        "uxth r4, %[b]    \n\t"            \
1590
        "muls r4, r6      \n\t"            \
1591
        "adds %[l], %[l], r4    \n\t"            \
1592
        "movs r5, #0      \n\t"            \
1593
        "adcs %[h], r5    \n\t"            \
1594
        /* al * bh */                                    \
1595
        "lsrs r4, %[b], #16   \n\t"            \
1596
        "muls r6, r4      \n\t"            \
1597
        "lsrs r4, r6, #16   \n\t"            \
1598
        "lsls r6, r6, #16   \n\t"            \
1599
        "adds %[l], %[l], r6    \n\t"            \
1600
        "adcs %[h], r4    \n\t"            \
1601
        /* ah * bh */                                    \
1602
        "lsrs r6, %[a], #16   \n\t"            \
1603
        "lsrs r4, %[b], #16   \n\t"            \
1604
        "muls r4, r6      \n\t"            \
1605
        "adds %[h], %[h], r4    \n\t"            \
1606
        /* ah * bl */                                    \
1607
        "uxth r4, %[b]    \n\t"            \
1608
        "muls r6, r4      \n\t"            \
1609
        "lsrs r4, r6, #16   \n\t"            \
1610
        "lsls r6, r6, #16   \n\t"            \
1611
        "adds %[l], %[l], r6    \n\t"            \
1612
        "adcs %[h], r4    \n\t"            \
1613
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1614
        : [a] "l" (va), [b] "l" (vb)                     \
1615
        : "r4", "r5", "r6", "cc"                         \
1616
    )
1617
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1618
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1619
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1620
    __asm__ __volatile__ (                               \
1621
        /* al * bl */                                    \
1622
        "uxth r6, %[a]    \n\t"            \
1623
        "uxth r7, %[b]    \n\t"            \
1624
        "muls r7, r6      \n\t"            \
1625
        "adds %[l], %[l], r7    \n\t"            \
1626
        "movs r5, #0      \n\t"            \
1627
        "adcs %[h], r5    \n\t"            \
1628
        "adcs %[o], r5    \n\t"            \
1629
        "adds %[l], %[l], r7    \n\t"            \
1630
        "adcs %[h], r5    \n\t"            \
1631
        "adcs %[o], r5    \n\t"            \
1632
        /* al * bh */                                    \
1633
        "lsrs r7, %[b], #16   \n\t"            \
1634
        "muls r6, r7      \n\t"            \
1635
        "lsrs r7, r6, #16   \n\t"            \
1636
        "lsls r6, r6, #16   \n\t"            \
1637
        "adds %[l], %[l], r6    \n\t"            \
1638
        "adcs %[h], r7    \n\t"            \
1639
        "adcs %[o], r5    \n\t"            \
1640
        "adds %[l], %[l], r6    \n\t"            \
1641
        "adcs %[h], r7    \n\t"            \
1642
        "adcs %[o], r5    \n\t"            \
1643
        /* ah * bh */                                    \
1644
        "lsrs r6, %[a], #16   \n\t"            \
1645
        "lsrs r7, %[b], #16   \n\t"            \
1646
        "muls r7, r6      \n\t"            \
1647
        "adds %[h], %[h], r7    \n\t"            \
1648
        "adcs %[o], r5    \n\t"            \
1649
        "adds %[h], %[h], r7    \n\t"            \
1650
        "adcs %[o], r5    \n\t"            \
1651
        /* ah * bl */                                    \
1652
        "uxth r7, %[b]    \n\t"            \
1653
        "muls r6, r7      \n\t"            \
1654
        "lsrs r7, r6, #16   \n\t"            \
1655
        "lsls r6, r6, #16   \n\t"            \
1656
        "adds %[l], %[l], r6    \n\t"            \
1657
        "adcs %[h], r7    \n\t"            \
1658
        "adcs %[o], r5    \n\t"            \
1659
        "adds %[l], %[l], r6    \n\t"            \
1660
        "adcs %[h], r7    \n\t"            \
1661
        "adcs %[o], r5    \n\t"            \
1662
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1663
        : [a] "l" (va), [b] "l" (vb)                     \
1664
        : "r5", "r6", "r7", "cc"                         \
1665
    )
1666
#else
1667
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
1668
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
1669
    __asm__ __volatile__ (                               \
1670
        "movs r8, %[a]    \n\t"            \
1671
        /* al * bl */                                    \
1672
        "uxth r6, %[a]    \n\t"            \
1673
        "uxth r5, %[b]    \n\t"            \
1674
        "muls r5, r6      \n\t"            \
1675
        "adds %[l], %[l], r5    \n\t"            \
1676
        "movs %[a], #0    \n\t"            \
1677
        "adcs %[h], %[a]    \n\t"            \
1678
        "adcs %[o], %[a]    \n\t"            \
1679
        "adds %[l], %[l], r5    \n\t"            \
1680
        "adcs %[h], %[a]    \n\t"            \
1681
        "adcs %[o], %[a]    \n\t"            \
1682
        /* al * bh */                                    \
1683
        "lsrs r5, %[b], #16   \n\t"            \
1684
        "muls r6, r5      \n\t"            \
1685
        "lsrs r5, r6, #16   \n\t"            \
1686
        "lsls r6, r6, #16   \n\t"            \
1687
        "adds %[l], %[l], r6    \n\t"            \
1688
        "adcs %[h], r5    \n\t"            \
1689
        "adcs %[o], %[a]    \n\t"            \
1690
        "adds %[l], %[l], r6    \n\t"            \
1691
        "adcs %[h], r5    \n\t"            \
1692
        "adcs %[o], %[a]    \n\t"            \
1693
        /* ah * bh */                                    \
1694
        "movs %[a], r8    \n\t"            \
1695
        "lsrs r6, %[a], #16   \n\t"            \
1696
        "lsrs r5, %[b], #16   \n\t"            \
1697
        "muls r5, r6      \n\t"            \
1698
        "adds %[h], %[h], r5    \n\t"            \
1699
        "movs %[a], #0    \n\t"            \
1700
        "adcs %[o], %[a]    \n\t"            \
1701
        "adds %[h], %[h], r5    \n\t"            \
1702
        "adcs %[o], %[a]    \n\t"            \
1703
        /* ah * bl */                                    \
1704
        "uxth r5, %[b]    \n\t"            \
1705
        "muls r6, r5      \n\t"            \
1706
        "lsrs r5, r6, #16   \n\t"            \
1707
        "lsls r6, r6, #16   \n\t"            \
1708
        "adds %[l], %[l], r6    \n\t"            \
1709
        "adcs %[h], r5    \n\t"            \
1710
        "adcs %[o], %[a]    \n\t"            \
1711
        "adds %[l], %[l], r6    \n\t"            \
1712
        "adcs %[h], r5    \n\t"            \
1713
        "adcs %[o], %[a]    \n\t"            \
1714
        "movs %[a], r8    \n\t"            \
1715
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1716
        : [a] "l" (va), [b] "l" (vb)                     \
1717
        : "r5", "r6", "r8", "cc"                         \
1718
    )
1719
#endif
1720
#ifndef DEBUG
1721
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1722
 * Assumes first add will not overflow vh | vl
1723
 */
1724
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1725
    __asm__ __volatile__ (                               \
1726
        /* al * bl */                                    \
1727
        "uxth r6, %[a]    \n\t"            \
1728
        "uxth r7, %[b]    \n\t"            \
1729
        "muls r7, r6      \n\t"            \
1730
        "adds %[l], %[l], r7    \n\t"            \
1731
        "movs r5, #0      \n\t"            \
1732
        "adcs %[h], r5    \n\t"            \
1733
        "adds %[l], %[l], r7    \n\t"            \
1734
        "adcs %[h], r5    \n\t"            \
1735
        /* al * bh */                                    \
1736
        "lsrs r7, %[b], #16   \n\t"            \
1737
        "muls r6, r7      \n\t"            \
1738
        "lsrs r7, r6, #16   \n\t"            \
1739
        "lsls r6, r6, #16   \n\t"            \
1740
        "adds %[l], %[l], r6    \n\t"            \
1741
        "adcs %[h], r7    \n\t"            \
1742
        "adds %[l], %[l], r6    \n\t"            \
1743
        "adcs %[h], r7    \n\t"            \
1744
        "adcs %[o], r5    \n\t"            \
1745
        /* ah * bh */                                    \
1746
        "lsrs r6, %[a], #16   \n\t"            \
1747
        "lsrs r7, %[b], #16   \n\t"            \
1748
        "muls r7, r6      \n\t"            \
1749
        "adds %[h], %[h], r7    \n\t"            \
1750
        "adcs %[o], r5    \n\t"            \
1751
        "adds %[h], %[h], r7    \n\t"            \
1752
        "adcs %[o], r5    \n\t"            \
1753
        /* ah * bl */                                    \
1754
        "uxth r7, %[b]    \n\t"            \
1755
        "muls r6, r7      \n\t"            \
1756
        "lsrs r7, r6, #16   \n\t"            \
1757
        "lsls r6, r6, #16   \n\t"            \
1758
        "adds %[l], %[l], r6    \n\t"            \
1759
        "adcs %[h], r7    \n\t"            \
1760
        "adcs %[o], r5    \n\t"            \
1761
        "adds %[l], %[l], r6    \n\t"            \
1762
        "adcs %[h], r7    \n\t"            \
1763
        "adcs %[o], r5    \n\t"            \
1764
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1765
        : [a] "l" (va), [b] "l" (vb)                     \
1766
        : "r5", "r6", "r7", "cc"                         \
1767
    )
1768
#else
1769
/* Multiply va by vb and add double size result twice into: vo | vh | vl
1770
 * Assumes first add will not overflow vh | vl
1771
 */
1772
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
1773
    __asm__ __volatile__ (                               \
1774
        "movs r8, %[a]    \n\t"            \
1775
        /* al * bl */                                    \
1776
        "uxth r5, %[a]    \n\t"            \
1777
        "uxth r6, %[b]    \n\t"            \
1778
        "muls r6, r5      \n\t"            \
1779
        "adds %[l], %[l], r6    \n\t"            \
1780
        "movs %[a], #0    \n\t"            \
1781
        "adcs %[h], %[a]    \n\t"            \
1782
        "adds %[l], %[l], r6    \n\t"            \
1783
        "adcs %[h], %[a]    \n\t"            \
1784
        /* al * bh */                                    \
1785
        "lsrs r6, %[b], #16   \n\t"            \
1786
        "muls r5, r6      \n\t"            \
1787
        "lsrs r6, r5, #16   \n\t"            \
1788
        "lsls r5, r5, #16   \n\t"            \
1789
        "adds %[l], %[l], r5    \n\t"            \
1790
        "adcs %[h], r6    \n\t"            \
1791
        "adds %[l], %[l], r5    \n\t"            \
1792
        "adcs %[h], r6    \n\t"            \
1793
        "adcs %[o], %[a]    \n\t"            \
1794
        /* ah * bh */                                    \
1795
        "movs %[a], r8    \n\t"            \
1796
        "lsrs r5, %[a], #16   \n\t"            \
1797
        "lsrs r6, %[b], #16   \n\t"            \
1798
        "muls r6, r5      \n\t"            \
1799
        "movs %[a], #0    \n\t"            \
1800
        "adds %[h], %[h], r6    \n\t"            \
1801
        "adcs %[o], %[a]    \n\t"            \
1802
        "adds %[h], %[h], r6    \n\t"            \
1803
        "adcs %[o], %[a]    \n\t"            \
1804
        /* ah * bl */                                    \
1805
        "uxth r6, %[b]    \n\t"            \
1806
        "muls r5, r6      \n\t"            \
1807
        "lsrs r6, r5, #16   \n\t"            \
1808
        "lsls r5, r5, #16   \n\t"            \
1809
        "adds %[l], %[l], r5    \n\t"            \
1810
        "adcs %[h], r6    \n\t"            \
1811
        "adcs %[o], %[a]    \n\t"            \
1812
        "adds %[l], %[l], r5    \n\t"            \
1813
        "adcs %[h], r6    \n\t"            \
1814
        "adcs %[o], %[a]    \n\t"            \
1815
        "movs %[a], r8    \n\t"            \
1816
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1817
        : [a] "l" (va), [b] "l" (vb)                     \
1818
        : "r5", "r6", "r8", "cc"                         \
1819
    )
1820
#endif
1821
/* Square va and store double size result in: vh | vl */
1822
#define SP_ASM_SQR(vl, vh, va)                           \
1823
    __asm__ __volatile__ (                               \
1824
        "lsrs r5, %[a], #16   \n\t"            \
1825
        "uxth r6, %[a]    \n\t"            \
1826
        "mov  %[l], r6    \n\t"            \
1827
        "mov  %[h], r5    \n\t"            \
1828
        /* al * al */                                    \
1829
        "muls %[l], %[l]    \n\t"            \
1830
        /* ah * ah */                                    \
1831
        "muls %[h], %[h]    \n\t"            \
1832
        /* 2 * al * ah */                                \
1833
        "muls r6, r5      \n\t"            \
1834
        "lsrs r5, r6, #15   \n\t"            \
1835
        "lsls r6, r6, #17   \n\t"            \
1836
        "adds %[l], %[l], r6    \n\t"            \
1837
        "adcs %[h], r5    \n\t"            \
1838
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1839
        : [a] "l" (va)                                   \
1840
        : "r5", "r6", "cc"                               \
1841
    )
1842
/* Square va and add double size result into: vo | vh | vl */
1843
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
1844
    __asm__ __volatile__ (                               \
1845
        "lsrs r4, %[a], #16   \n\t"            \
1846
        "uxth r6, %[a]    \n\t"            \
1847
        /* al * al */                                    \
1848
        "muls r6, r6      \n\t"            \
1849
        /* ah * ah */                                    \
1850
        "muls r4, r4      \n\t"            \
1851
        "adds %[l], %[l], r6    \n\t"            \
1852
        "adcs %[h], r4    \n\t"            \
1853
        "movs r5, #0      \n\t"            \
1854
        "adcs %[o], r5    \n\t"            \
1855
        "lsrs r4, %[a], #16   \n\t"            \
1856
        "uxth r6, %[a]    \n\t"            \
1857
        /* 2 * al * ah */                                \
1858
        "muls r6, r4      \n\t"            \
1859
        "lsrs r4, r6, #15   \n\t"            \
1860
        "lsls r6, r6, #17   \n\t"            \
1861
        "adds %[l], %[l], r6    \n\t"            \
1862
        "adcs %[h], r4    \n\t"            \
1863
        "adcs %[o], r5    \n\t"            \
1864
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1865
        : [a] "l" (va)                                   \
1866
        : "r4", "r5", "r6", "cc"                         \
1867
    )
1868
/* Square va and add double size result into: vh | vl */
1869
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
1870
    __asm__ __volatile__ (                               \
1871
        "lsrs r6, %[a], #16   \n\t"            \
1872
        "uxth r6, %[a]    \n\t"            \
1873
        /* al * al */                                    \
1874
        "muls r6, r6      \n\t"            \
1875
        /* ah * ah */                                    \
1876
        "muls r6, r6      \n\t"            \
1877
        "adds %[l], %[l], r6    \n\t"            \
1878
        "adcs %[h], r6    \n\t"            \
1879
        "lsrs r6, %[a], #16   \n\t"            \
1880
        "uxth r6, %[a]    \n\t"            \
1881
        /* 2 * al * ah */                                \
1882
        "muls r6, r6      \n\t"            \
1883
        "lsrs r6, r6, #15   \n\t"            \
1884
        "lsls r6, r6, #17   \n\t"            \
1885
        "adds %[l], %[l], r6    \n\t"            \
1886
        "adcs %[h], r6    \n\t"            \
1887
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1888
        : [a] "l" (va)                                   \
1889
        : "r5", "r6", "cc"                               \
1890
    )
1891
/* Add va into: vh | vl */
1892
#define SP_ASM_ADDC(vl, vh, va)                          \
1893
    __asm__ __volatile__ (                               \
1894
        "adds %[l], %[l], %[a]  \n\t"            \
1895
        "movs r5, #0      \n\t"            \
1896
        "adcs %[h], r5    \n\t"            \
1897
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1898
        : [a] "l" (va)                                   \
1899
        : "r5", "cc"                                     \
1900
    )
1901
/* Sub va from: vh | vl */
1902
#define SP_ASM_SUBB(vl, vh, va)                          \
1903
    __asm__ __volatile__ (                               \
1904
        "subs %[l], %[l], %[a]  \n\t"            \
1905
        "movs r5, #0      \n\t"            \
1906
        "sbcs %[h], r5    \n\t"            \
1907
        : [l] "+l" (vl), [h] "+l" (vh)                   \
1908
        : [a] "l" (va)                                   \
1909
        : "r5", "cc"                                     \
1910
    )
1911
/* Add two times vc | vb | va into vo | vh | vl */
1912
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
1913
    __asm__ __volatile__ (                               \
1914
        "adds %[l], %[l], %[a]  \n\t"            \
1915
        "adcs %[h], %[b]    \n\t"            \
1916
        "adcs %[o], %[c]    \n\t"            \
1917
        "adds %[l], %[l], %[a]  \n\t"            \
1918
        "adcs %[h], %[b]    \n\t"            \
1919
        "adcs %[o], %[c]    \n\t"            \
1920
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1921
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
1922
        : "cc"                                           \
1923
    )
1924
1925
#elif defined(WOLFSSL_KEIL)
1926
1927
/* Multiply va by vb and store double size result in: vh | vl */
1928
#define SP_ASM_MUL(vl, vh, va, vb)                       \
1929
    __asm__ __volatile__ (                               \
1930
        /* al * bl */                                    \
1931
        "uxth r6, %[a]    \n\t"            \
1932
        "uxth %[l], %[b]    \n\t"            \
1933
        "muls %[l], r6, %[l]    \n\t"            \
1934
        /* al * bh */                                    \
1935
        "lsrs r4, %[b], #16   \n\t"            \
1936
        "muls r6, r4, r6    \n\t"            \
1937
        "lsrs %[h], r6, #16   \n\t"            \
1938
        "lsls r6, r6, #16   \n\t"            \
1939
        "adds %[l], %[l], r6    \n\t"            \
1940
        "movs r5, #0      \n\t"            \
1941
        "adcs %[h], %[h], r5    \n\t"            \
1942
        /* ah * bh */                                    \
1943
        "lsrs r6, %[a], #16   \n\t"            \
1944
        "muls r4, r6, r4    \n\t"            \
1945
        "adds %[h], %[h], r4    \n\t"            \
1946
        /* ah * bl */                                    \
1947
        "uxth r4, %[b]    \n\t"            \
1948
        "muls r6, r4, r6    \n\t"            \
1949
        "lsrs r4, r6, #16   \n\t"            \
1950
        "lsls r6, r6, #16   \n\t"            \
1951
        "adds %[l], %[l], r6    \n\t"            \
1952
        "adcs %[h], %[h], r4    \n\t"            \
1953
        : [h] "+l" (vh), [l] "+l" (vl)                   \
1954
        : [a] "l" (va), [b] "l" (vb)                     \
1955
        : "r4", "r5", "r6", "cc"                         \
1956
    )
1957
/* Multiply va by vb and store double size result in: vo | vh | vl */
1958
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
1959
    __asm__ __volatile__ (                               \
1960
        /* al * bl */                                    \
1961
        "uxth r6, %[a]    \n\t"            \
1962
        "uxth %[l], %[b]    \n\t"            \
1963
        "muls %[l], r6, %[l]    \n\t"            \
1964
        /* al * bh */                                    \
1965
        "lsrs r5, %[b], #16   \n\t"            \
1966
        "muls r6, r5, r6    \n\t"            \
1967
        "lsrs %[h], r6, #16   \n\t"            \
1968
        "lsls r6, r6, #16   \n\t"            \
1969
        "adds %[l], %[l], r6    \n\t"            \
1970
        "movs %[o], #0    \n\t"            \
1971
        "adcs %[h], %[h], %[o]  \n\t"            \
1972
        /* ah * bh */                                    \
1973
        "lsrs r6, %[a], #16   \n\t"            \
1974
        "muls r5, r6, r5    \n\t"            \
1975
        "adds %[h], %[h], r5    \n\t"            \
1976
        /* ah * bl */                                    \
1977
        "uxth r5, %[b]    \n\t"            \
1978
        "muls r6, r5, r6    \n\t"            \
1979
        "lsrs r5, r6, #16   \n\t"            \
1980
        "lsls r6, r6, #16   \n\t"            \
1981
        "adds %[l], %[l], r6    \n\t"            \
1982
        "adcs %[h], %[h], r5    \n\t"            \
1983
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
1984
        : [a] "l" (va), [b] "l" (vb)                     \
1985
        : "r5", "r6", "cc"                               \
1986
    )
1987
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
1988
/* Multiply va by vb and add double size result into: vo | vh | vl */
1989
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
1990
    __asm__ __volatile__ (                               \
1991
        /* al * bl */                                    \
1992
        "uxth r6, %[a]    \n\t"            \
1993
        "uxth r7, %[b]    \n\t"            \
1994
        "muls r7, r6, r7    \n\t"            \
1995
        "adds %[l], %[l], r7    \n\t"            \
1996
        "movs r5, #0      \n\t"            \
1997
        "adcs %[h], %[h], r5    \n\t"            \
1998
        "adcs %[o], %[o], r5    \n\t"            \
1999
        /* al * bh */                                    \
2000
        "lsrs r7, %[b], #16   \n\t"            \
2001
        "muls r6, r7, r6    \n\t"            \
2002
        "lsrs r7, r6, #16   \n\t"            \
2003
        "lsls r6, r6, #16   \n\t"            \
2004
        "adds %[l], %[l], r6    \n\t"            \
2005
        "adcs %[h], %[h], r7    \n\t"            \
2006
        "adcs %[o], %[o], r5    \n\t"            \
2007
        /* ah * bh */                                    \
2008
        "lsrs r6, %[a], #16   \n\t"            \
2009
        "lsrs r7, %[b], #16   \n\t"            \
2010
        "muls r7, r6, r7    \n\t"            \
2011
        "adds %[h], %[h], r7    \n\t"            \
2012
        "adcs %[o], %[o], r5    \n\t"            \
2013
        /* ah * bl */                                    \
2014
        "uxth r7, %[b]    \n\t"            \
2015
        "muls r6, r7, r6    \n\t"            \
2016
        "lsrs r7, r6, #16   \n\t"            \
2017
        "lsls r6, r6, #16   \n\t"            \
2018
        "adds %[l], %[l], r6    \n\t"            \
2019
        "adcs %[h], %[h], r7    \n\t"            \
2020
        "adcs %[o], %[o], r5    \n\t"            \
2021
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2022
        : [a] "l" (va), [b] "l" (vb)                     \
2023
        : "r5", "r6", "r7", "cc"                         \
2024
    )
2025
#else
2026
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2027
    __asm__ __volatile__ (                               \
2028
        /* al * bl */                                    \
2029
        "uxth   r6, %[a]                \n\t"            \
2030
        "uxth   r5, %[b]                \n\t"            \
2031
        "muls   r5, r6, r5              \n\t"            \
2032
        "adds   %[l], %[l], r5          \n\t"            \
2033
        "movs   r5, #0                  \n\t"            \
2034
        "adcs   %[h], %[h], r5          \n\t"            \
2035
        "adcs   %[o], %[o], r5          \n\t"            \
2036
        /* al * bh */                                    \
2037
        "lsrs   r5, %[b], #16           \n\t"            \
2038
        "muls   r6, r5, r6              \n\t"            \
2039
        "lsrs   r5, r6, #16             \n\t"            \
2040
        "lsls   r6, r6, #16             \n\t"            \
2041
        "adds   %[l], %[l], r6          \n\t"            \
2042
        "adcs   %[h], %[h], r5          \n\t"            \
2043
        "movs   r5, #0                  \n\t"            \
2044
        "adcs   %[o], %[o], r5          \n\t"            \
2045
        /* ah * bh */                                    \
2046
        "lsrs   r6, %[a], #16           \n\t"            \
2047
        "lsrs   r5, %[b], #16           \n\t"            \
2048
        "muls   r5, r6, r5              \n\t"            \
2049
        "adds   %[h], %[h], r5          \n\t"            \
2050
        "movs   r5, #0                  \n\t"            \
2051
        "adcs   %[o], %[o], r5          \n\t"            \
2052
        /* ah * bl */                                    \
2053
        "uxth   r5, %[b]                \n\t"            \
2054
        "muls   r6, r5, r6              \n\t"            \
2055
        "lsrs   r5, r6, #16             \n\t"            \
2056
        "lsls   r6, r6, #16             \n\t"            \
2057
        "adds   %[l], %[l], r6          \n\t"            \
2058
        "adcs   %[h], %[h], r5          \n\t"            \
2059
        "movs   r5, #0                  \n\t"            \
2060
        "adcs   %[o], %[o], r5          \n\t"            \
2061
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2062
        : [a] "l" (va), [b] "l" (vb)                     \
2063
        : "r5", "r6", "cc"                               \
2064
    )
2065
#endif
2066
/* Multiply va by vb and add double size result into: vh | vl */
2067
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2068
    __asm__ __volatile__ (                               \
2069
        /* al * bl */                                    \
2070
        "uxth r6, %[a]    \n\t"            \
2071
        "uxth r4, %[b]    \n\t"            \
2072
        "muls r4, r6, r4    \n\t"            \
2073
        "adds %[l], %[l], r4    \n\t"            \
2074
        "movs r5, #0      \n\t"            \
2075
        "adcs %[h], %[h], r5    \n\t"            \
2076
        /* al * bh */                                    \
2077
        "lsrs r4, %[b], #16   \n\t"            \
2078
        "muls r6, r4, r6    \n\t"            \
2079
        "lsrs r4, r6, #16   \n\t"            \
2080
        "lsls r6, r6, #16   \n\t"            \
2081
        "adds %[l], %[l], r6    \n\t"            \
2082
        "adcs %[h], %[h], r4    \n\t"            \
2083
        /* ah * bh */                                    \
2084
        "lsrs r6, %[a], #16   \n\t"            \
2085
        "lsrs r4, %[b], #16   \n\t"            \
2086
        "muls r4, r6, r4    \n\t"            \
2087
        "adds %[h], %[h], r4    \n\t"            \
2088
        /* ah * bl */                                    \
2089
        "uxth r4, %[b]    \n\t"            \
2090
        "muls r6, r4, r6    \n\t"            \
2091
        "lsrs r4, r6, #16   \n\t"            \
2092
        "lsls r6, r6, #16   \n\t"            \
2093
        "adds %[l], %[l], r6    \n\t"            \
2094
        "adcs %[h], %[h], r4    \n\t"            \
2095
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2096
        : [a] "l" (va), [b] "l" (vb)                     \
2097
        : "r4", "r5", "r6", "cc"                         \
2098
    )
2099
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2100
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2101
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2102
    __asm__ __volatile__ (                               \
2103
        /* al * bl */                                    \
2104
        "uxth r6, %[a]    \n\t"            \
2105
        "uxth r7, %[b]    \n\t"            \
2106
        "muls r7, r6, r7    \n\t"            \
2107
        "adds %[l], %[l], r7    \n\t"            \
2108
        "movs r5, #0      \n\t"            \
2109
        "adcs %[h], %[h], r5    \n\t"            \
2110
        "adcs %[o], %[o], r5    \n\t"            \
2111
        "adds %[l], %[l], r7    \n\t"            \
2112
        "adcs %[h], %[h], r5    \n\t"            \
2113
        "adcs %[o], %[o], r5    \n\t"            \
2114
        /* al * bh */                                    \
2115
        "lsrs r7, %[b], #16   \n\t"            \
2116
        "muls r6, r7, r6    \n\t"            \
2117
        "lsrs r7, r6, #16   \n\t"            \
2118
        "lsls r6, r6, #16   \n\t"            \
2119
        "adds %[l], %[l], r6    \n\t"            \
2120
        "adcs %[h], %[h], r7    \n\t"            \
2121
        "adcs %[o], %[o], r5    \n\t"            \
2122
        "adds %[l], %[l], r6    \n\t"            \
2123
        "adcs %[h], %[h], r7    \n\t"            \
2124
        "adcs %[o], %[o], r5    \n\t"            \
2125
        /* ah * bh */                                    \
2126
        "lsrs r6, %[a], #16   \n\t"            \
2127
        "lsrs r7, %[b], #16   \n\t"            \
2128
        "muls r7, r6, r7    \n\t"            \
2129
        "adds %[h], %[h], r7    \n\t"            \
2130
        "adcs %[o], %[o], r5    \n\t"            \
2131
        "adds %[h], %[h], r7    \n\t"            \
2132
        "adcs %[o], %[o], r5    \n\t"            \
2133
        /* ah * bl */                                    \
2134
        "uxth r7, %[b]    \n\t"            \
2135
        "muls r6, r7, r6    \n\t"            \
2136
        "lsrs r7, r6, #16   \n\t"            \
2137
        "lsls r6, r6, #16   \n\t"            \
2138
        "adds %[l], %[l], r6    \n\t"            \
2139
        "adcs %[h], %[h], r7    \n\t"            \
2140
        "adcs %[o], %[o], r5    \n\t"            \
2141
        "adds %[l], %[l], r6    \n\t"            \
2142
        "adcs %[h], %[h], r7    \n\t"            \
2143
        "adcs %[o], %[o], r5    \n\t"            \
2144
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2145
        : [a] "l" (va), [b] "l" (vb)                     \
2146
        : "r5", "r6", "r7", "cc"                         \
2147
    )
2148
#else
2149
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2150
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2151
    __asm__ __volatile__ (                               \
2152
        "movs r8, %[a]    \n\t"            \
2153
        /* al * bl */                                    \
2154
        "uxth r6, %[a]    \n\t"            \
2155
        "uxth r5, %[b]    \n\t"            \
2156
        "muls r5, r6, r5    \n\t"            \
2157
        "adds %[l], %[l], r5    \n\t"            \
2158
        "movs %[a], #0    \n\t"            \
2159
        "adcs %[h], %[h], %[a]  \n\t"            \
2160
        "adcs %[o], %[o], %[a]  \n\t"            \
2161
        "adds %[l], %[l], r5    \n\t"            \
2162
        "adcs %[h], %[h], %[a]  \n\t"            \
2163
        "adcs %[o], %[o], %[a]  \n\t"            \
2164
        /* al * bh */                                    \
2165
        "lsrs r5, %[b], #16   \n\t"            \
2166
        "muls r6, r5, r6    \n\t"            \
2167
        "lsrs r5, r6, #16   \n\t"            \
2168
        "lsls r6, r6, #16   \n\t"            \
2169
        "adds %[l], %[l], r6    \n\t"            \
2170
        "adcs %[h], %[h], r5    \n\t"            \
2171
        "adcs %[o], %[o], %[a]  \n\t"            \
2172
        "adds %[l], %[l], r6    \n\t"            \
2173
        "adcs %[h], %[h], r5    \n\t"            \
2174
        "adcs %[o], %[o], %[a]  \n\t"            \
2175
        /* ah * bh */                                    \
2176
        "movs %[a], r8    \n\t"            \
2177
        "lsrs r6, %[a], #16   \n\t"            \
2178
        "lsrs r5, %[b], #16   \n\t"            \
2179
        "muls r5, r6, r5    \n\t"            \
2180
        "adds %[h], %[h], r5    \n\t"            \
2181
        "movs %[a], #0    \n\t"            \
2182
        "adcs %[o], %[o], %[a]  \n\t"            \
2183
        "adds %[h], %[h], r5    \n\t"            \
2184
        "adcs %[o], %[o], %[a]  \n\t"            \
2185
        /* ah * bl */                                    \
2186
        "uxth r5, %[b]    \n\t"            \
2187
        "muls r6, r5, r6    \n\t"            \
2188
        "lsrs r5, r6, #16   \n\t"            \
2189
        "lsls r6, r6, #16   \n\t"            \
2190
        "adds %[l], %[l], r6    \n\t"            \
2191
        "adcs %[h], %[h], r5    \n\t"            \
2192
        "adcs %[o], %[o], %[a]  \n\t"            \
2193
        "adds %[l], %[l], r6    \n\t"            \
2194
        "adcs %[h], %[h], r5    \n\t"            \
2195
        "adcs %[o], %[o], %[a]  \n\t"            \
2196
        "movs %[a], r8    \n\t"            \
2197
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2198
        : [a] "l" (va), [b] "l" (vb)                     \
2199
        : "r5", "r6", "r8", "cc"                         \
2200
    )
2201
#endif
2202
#ifndef DEBUG
2203
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2204
 * Assumes first add will not overflow vh | vl
2205
 */
2206
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2207
    __asm__ __volatile__ (                               \
2208
        /* al * bl */                                    \
2209
        "uxth r6, %[a]    \n\t"            \
2210
        "uxth r7, %[b]    \n\t"            \
2211
        "muls r7, r6, r7    \n\t"            \
2212
        "adds %[l], %[l], r7    \n\t"            \
2213
        "movs r5, #0      \n\t"            \
2214
        "adcs %[h], %[h], r5    \n\t"            \
2215
        "adds %[l], %[l], r7    \n\t"            \
2216
        "adcs %[h], %[h], r5    \n\t"            \
2217
        /* al * bh */                                    \
2218
        "lsrs r7, %[b], #16   \n\t"            \
2219
        "muls r6, r7, r6    \n\t"            \
2220
        "lsrs r7, r6, #16   \n\t"            \
2221
        "lsls r6, r6, #16   \n\t"            \
2222
        "adds %[l], %[l], r6    \n\t"            \
2223
        "adcs %[h], %[h], r7    \n\t"            \
2224
        "adds %[l], %[l], r6    \n\t"            \
2225
        "adcs %[h], %[h], r7    \n\t"            \
2226
        "adcs %[o], %[o], r5    \n\t"            \
2227
        /* ah * bh */                                    \
2228
        "lsrs r6, %[a], #16   \n\t"            \
2229
        "lsrs r7, %[b], #16   \n\t"            \
2230
        "muls r7, r6, r7    \n\t"            \
2231
        "adds %[h], %[h], r7    \n\t"            \
2232
        "adcs %[o], %[o], r5    \n\t"            \
2233
        "adds %[h], %[h], r7    \n\t"            \
2234
        "adcs %[o], %[o], r5    \n\t"            \
2235
        /* ah * bl */                                    \
2236
        "uxth r7, %[b]    \n\t"            \
2237
        "muls r6, r7, r6    \n\t"            \
2238
        "lsrs r7, r6, #16   \n\t"            \
2239
        "lsls r6, r6, #16   \n\t"            \
2240
        "adds %[l], %[l], r6    \n\t"            \
2241
        "adcs %[h], %[h], r7    \n\t"            \
2242
        "adcs %[o], %[o], r5    \n\t"            \
2243
        "adds %[l], %[l], r6    \n\t"            \
2244
        "adcs %[h], %[h], r7    \n\t"            \
2245
        "adcs %[o], %[o], r5    \n\t"            \
2246
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2247
        : [a] "l" (va), [b] "l" (vb)                     \
2248
        : "r5", "r6", "r7", "cc"                         \
2249
    )
2250
#else
2251
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2252
 * Assumes first add will not overflow vh | vl
2253
 */
2254
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2255
    __asm__ __volatile__ (                               \
2256
        "movs r8, %[a]    \n\t"            \
2257
        /* al * bl */                                    \
2258
        "uxth r5, %[a]    \n\t"            \
2259
        "uxth r6, %[b]    \n\t"            \
2260
        "muls r6, r5, r6    \n\t"            \
2261
        "adds %[l], %[l], r6    \n\t"            \
2262
        "movs %[a], #0    \n\t"            \
2263
        "adcs %[h], %[h], %[a]  \n\t"            \
2264
        "adds %[l], %[l], r6    \n\t"            \
2265
        "adcs %[h], %[h], %[a]  \n\t"            \
2266
        /* al * bh */                                    \
2267
        "lsrs r6, %[b], #16   \n\t"            \
2268
        "muls r5, r6, r5    \n\t"            \
2269
        "lsrs r6, r5, #16   \n\t"            \
2270
        "lsls r5, r5, #16   \n\t"            \
2271
        "adds %[l], %[l], r5    \n\t"            \
2272
        "adcs %[h], %[h], r6    \n\t"            \
2273
        "adds %[l], %[l], r5    \n\t"            \
2274
        "adcs %[h], %[h], r6    \n\t"            \
2275
        "adcs %[o], %[o], %[a]  \n\t"            \
2276
        /* ah * bh */                                    \
2277
        "movs %[a], r8    \n\t"            \
2278
        "lsrs r5, %[a], #16   \n\t"            \
2279
        "lsrs r6, %[b], #16   \n\t"            \
2280
        "muls r6, r5, r6    \n\t"            \
2281
        "movs %[a], #0    \n\t"            \
2282
        "adds %[h], %[h], r6    \n\t"            \
2283
        "adcs %[o], %[o], %[a]  \n\t"            \
2284
        "adds %[h], %[h], r6    \n\t"            \
2285
        "adcs %[o], %[o], %[a]  \n\t"            \
2286
        /* ah * bl */                                    \
2287
        "uxth r6, %[b]    \n\t"            \
2288
        "muls r5, r6, r5    \n\t"            \
2289
        "lsrs r6, r5, #16   \n\t"            \
2290
        "lsls r5, r5, #16   \n\t"            \
2291
        "adds %[l], %[l], r5    \n\t"            \
2292
        "adcs %[h], %[h], r6    \n\t"            \
2293
        "adcs %[o], %[o], %[a]  \n\t"            \
2294
        "adds %[l], %[l], r5    \n\t"            \
2295
        "adcs %[h], %[h], r6    \n\t"            \
2296
        "adcs %[o], %[o], %[a]  \n\t"            \
2297
        "movs %[a], r8    \n\t"            \
2298
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2299
        : [a] "l" (va), [b] "l" (vb)                     \
2300
        : "r5", "r6", "r8", "cc"                         \
2301
    )
2302
#endif
2303
/* Square va and store double size result in: vh | vl */
2304
#define SP_ASM_SQR(vl, vh, va)                           \
2305
    __asm__ __volatile__ (                               \
2306
        "lsrs r5, %[a], #16   \n\t"            \
2307
        "uxth r6, %[a]    \n\t"            \
2308
        "mov  %[l], r6    \n\t"            \
2309
        "mov  %[h], r5    \n\t"            \
2310
        /* al * al */                                    \
2311
        "muls %[l], %[l], %[l]  \n\t"            \
2312
        /* ah * ah */                                    \
2313
        "muls %[h], %[h], %[h]  \n\t"            \
2314
        /* 2 * al * ah */                                \
2315
        "muls r6, r5, r6    \n\t"            \
2316
        "lsrs r5, r6, #15   \n\t"            \
2317
        "lsls r6, r6, #17   \n\t"            \
2318
        "adds %[l], %[l], r6    \n\t"            \
2319
        "adcs %[h], %[h], r5    \n\t"            \
2320
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2321
        : [a] "l" (va)                                   \
2322
        : "r5", "r6", "cc"                               \
2323
    )
2324
/* Square va and add double size result into: vo | vh | vl */
2325
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2326
    __asm__ __volatile__ (                               \
2327
        "lsrs r4, %[a], #16   \n\t"            \
2328
        "uxth r6, %[a]    \n\t"            \
2329
        /* al * al */                                    \
2330
        "muls r6, r6, r6    \n\t"            \
2331
        /* ah * ah */                                    \
2332
        "muls r4, r4, r4    \n\t"            \
2333
        "adds %[l], %[l], r6    \n\t"            \
2334
        "adcs %[h], %[h], r4    \n\t"            \
2335
        "movs r5, #0      \n\t"            \
2336
        "adcs %[o], %[o], r5    \n\t"            \
2337
        "lsrs r4, %[a], #16   \n\t"            \
2338
        "uxth r6, %[a]    \n\t"            \
2339
        /* 2 * al * ah */                                \
2340
        "muls r6, r4, r6    \n\t"            \
2341
        "lsrs r4, r6, #15   \n\t"            \
2342
        "lsls r6, r6, #17   \n\t"            \
2343
        "adds %[l], %[l], r6    \n\t"            \
2344
        "adcs %[h], %[h], r4    \n\t"            \
2345
        "adcs %[o], %[o], r5    \n\t"            \
2346
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2347
        : [a] "l" (va)                                   \
2348
        : "r4", "r5", "r6", "cc"                         \
2349
    )
2350
/* Square va and add double size result into: vh | vl */
2351
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2352
    __asm__ __volatile__ (                               \
2353
        "lsrs r5, %[a], #16   \n\t"            \
2354
        "uxth r6, %[a]    \n\t"            \
2355
        /* al * al */                                    \
2356
        "muls r6, r6, r6    \n\t"            \
2357
        /* ah * ah */                                    \
2358
        "muls r5, r5, r5    \n\t"            \
2359
        "adds %[l], %[l], r6    \n\t"            \
2360
        "adcs %[h], %[h], r5    \n\t"            \
2361
        "lsrs r5, %[a], #16   \n\t"            \
2362
        "uxth r6, %[a]    \n\t"            \
2363
        /* 2 * al * ah */                                \
2364
        "muls r6, r5, r6    \n\t"            \
2365
        "lsrs r5, r6, #15   \n\t"            \
2366
        "lsls r6, r6, #17   \n\t"            \
2367
        "adds %[l], %[l], r6    \n\t"            \
2368
        "adcs %[h], %[h], r5    \n\t"            \
2369
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2370
        : [a] "l" (va)                                   \
2371
        : "r5", "r6", "cc"                               \
2372
    )
2373
/* Add va into: vh | vl */
2374
#define SP_ASM_ADDC(vl, vh, va)                          \
2375
    __asm__ __volatile__ (                               \
2376
        "adds %[l], %[l], %[a]  \n\t"            \
2377
        "movs r5, #0      \n\t"            \
2378
        "adcs %[h], %[h], r5    \n\t"            \
2379
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2380
        : [a] "l" (va)                                   \
2381
        : "r5", "cc"                                     \
2382
    )
2383
/* Sub va from: vh | vl */
2384
#define SP_ASM_SUBB(vl, vh, va)                          \
2385
    __asm__ __volatile__ (                               \
2386
        "subs %[l], %[l], %[a]  \n\t"            \
2387
        "movs r5, #0      \n\t"            \
2388
        "sbcs %[h], %[h], r5    \n\t"            \
2389
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2390
        : [a] "l" (va)                                   \
2391
        : "r5", "cc"                                     \
2392
    )
2393
/* Add two times vc | vb | va into vo | vh | vl */
2394
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2395
    __asm__ __volatile__ (                               \
2396
        "adds %[l], %[l], %[a]  \n\t"            \
2397
        "adcs %[h], %[h], %[b]  \n\t"            \
2398
        "adcs %[o], %[o], %[c]  \n\t"            \
2399
        "adds %[l], %[l], %[a]  \n\t"            \
2400
        "adcs %[h], %[h], %[b]  \n\t"            \
2401
        "adcs %[o], %[o], %[c]  \n\t"            \
2402
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2403
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2404
        : "cc"                                           \
2405
    )
2406
2407
#elif defined(__GNUC__)
2408
2409
/* Multiply va by vb and store double size result in: vh | vl */
2410
#define SP_ASM_MUL(vl, vh, va, vb)                       \
2411
    __asm__ __volatile__ (                               \
2412
        /* al * bl */                                    \
2413
        "uxth r6, %[a]    \n\t"            \
2414
        "uxth %[l], %[b]    \n\t"            \
2415
        "mul  %[l], r6    \n\t"            \
2416
        /* al * bh */                                    \
2417
        "lsr  r4, %[b], #16   \n\t"            \
2418
        "mul  r6, r4      \n\t"            \
2419
        "lsr  %[h], r6, #16   \n\t"            \
2420
        "lsl  r6, r6, #16   \n\t"            \
2421
        "add  %[l], %[l], r6    \n\t"            \
2422
        "mov  r5, #0      \n\t"            \
2423
        "adc  %[h], r5    \n\t"            \
2424
        /* ah * bh */                                    \
2425
        "lsr  r6, %[a], #16   \n\t"            \
2426
        "mul  r4, r6      \n\t"            \
2427
        "add  %[h], %[h], r4    \n\t"            \
2428
        /* ah * bl */                                    \
2429
        "uxth r4, %[b]    \n\t"            \
2430
        "mul  r6, r4      \n\t"            \
2431
        "lsr  r4, r6, #16   \n\t"            \
2432
        "lsl  r6, r6, #16   \n\t"            \
2433
        "add  %[l], %[l], r6    \n\t"            \
2434
        "adc  %[h], r4    \n\t"            \
2435
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2436
        : [a] "l" (va), [b] "l" (vb)                     \
2437
        : "r4", "r5", "r6", "cc"                         \
2438
    )
2439
/* Multiply va by vb and store double size result in: vo | vh | vl */
2440
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
2441
    __asm__ __volatile__ (                               \
2442
        /* al * bl */                                    \
2443
        "uxth r6, %[a]    \n\t"            \
2444
        "uxth %[l], %[b]    \n\t"            \
2445
        "mul  %[l], r6    \n\t"            \
2446
        /* al * bh */                                    \
2447
        "lsr  r5, %[b], #16   \n\t"            \
2448
        "mul  r6, r5      \n\t"            \
2449
        "lsr  %[h], r6, #16   \n\t"            \
2450
        "lsl  r6, r6, #16   \n\t"            \
2451
        "add  %[l], %[l], r6    \n\t"            \
2452
        "mov  %[o], #0    \n\t"            \
2453
        "adc  %[h], %[o]    \n\t"            \
2454
        /* ah * bh */                                    \
2455
        "lsr  r6, %[a], #16   \n\t"            \
2456
        "mul  r5, r6      \n\t"            \
2457
        "add  %[h], %[h], r5    \n\t"            \
2458
        /* ah * bl */                                    \
2459
        "uxth r5, %[b]    \n\t"            \
2460
        "mul  r6, r5      \n\t"            \
2461
        "lsr  r5, r6, #16   \n\t"            \
2462
        "lsl  r6, r6, #16   \n\t"            \
2463
        "add  %[l], %[l], r6    \n\t"            \
2464
        "adc  %[h], r5    \n\t"            \
2465
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2466
        : [a] "l" (va), [b] "l" (vb)                     \
2467
        : "r5", "r6", "cc"                               \
2468
    )
2469
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2470
/* Multiply va by vb and add double size result into: vo | vh | vl */
2471
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2472
    __asm__ __volatile__ (                               \
2473
        /* al * bl */                                    \
2474
        "uxth r6, %[a]    \n\t"            \
2475
        "uxth r7, %[b]    \n\t"            \
2476
        "mul  r7, r6      \n\t"            \
2477
        "add  %[l], %[l], r7    \n\t"            \
2478
        "mov  r5, #0      \n\t"            \
2479
        "adc  %[h], r5    \n\t"            \
2480
        "adc  %[o], r5    \n\t"            \
2481
        /* al * bh */                                    \
2482
        "lsr  r7, %[b], #16   \n\t"            \
2483
        "mul  r6, r7      \n\t"            \
2484
        "lsr  r7, r6, #16   \n\t"            \
2485
        "lsl  r6, r6, #16   \n\t"            \
2486
        "add  %[l], %[l], r6    \n\t"            \
2487
        "adc  %[h], r7    \n\t"            \
2488
        "adc  %[o], r5    \n\t"            \
2489
        /* ah * bh */                                    \
2490
        "lsr  r6, %[a], #16   \n\t"            \
2491
        "lsr  r7, %[b], #16   \n\t"            \
2492
        "mul  r7, r6      \n\t"            \
2493
        "add  %[h], %[h], r7    \n\t"            \
2494
        "adc  %[o], r5    \n\t"            \
2495
        /* ah * bl */                                    \
2496
        "uxth r7, %[b]    \n\t"            \
2497
        "mul  r6, r7      \n\t"            \
2498
        "lsr  r7, r6, #16   \n\t"            \
2499
        "lsl  r6, r6, #16   \n\t"            \
2500
        "add  %[l], %[l], r6    \n\t"            \
2501
        "adc  %[h], r7    \n\t"            \
2502
        "adc  %[o], r5    \n\t"            \
2503
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2504
        : [a] "l" (va), [b] "l" (vb)                     \
2505
        : "r5", "r6", "r7", "cc"                         \
2506
    )
2507
#else
2508
/* Multiply va by vb and add double size result into: vo | vh | vl */
2509
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
2510
    __asm__ __volatile__ (                               \
2511
        /* al * bl */                                    \
2512
        "uxth   r6, %[a]                \n\t"            \
2513
        "uxth   r5, %[b]                \n\t"            \
2514
        "mul    r5, r6                  \n\t"            \
2515
        "add    %[l], %[l], r5          \n\t"            \
2516
        "mov    r5, #0                  \n\t"            \
2517
        "adc    %[h], r5                \n\t"            \
2518
        "adc    %[o], r5                \n\t"            \
2519
        /* al * bh */                                    \
2520
        "lsr    r5, %[b], #16           \n\t"            \
2521
        "mul    r6, r5                  \n\t"            \
2522
        "lsr    r5, r6, #16             \n\t"            \
2523
        "lsl    r6, r6, #16             \n\t"            \
2524
        "add    %[l], %[l], r6          \n\t"            \
2525
        "adc    %[h], r5                \n\t"            \
2526
        "mov    r5, #0                  \n\t"            \
2527
        "adc    %[o], r5                \n\t"            \
2528
        /* ah * bh */                                    \
2529
        "lsr    r6, %[a], #16           \n\t"            \
2530
        "lsr    r5, %[b], #16           \n\t"            \
2531
        "mul    r5, r6                  \n\t"            \
2532
        "add    %[h], %[h], r5          \n\t"            \
2533
        "mov    r5, #0                  \n\t"            \
2534
        "adc    %[o], r5                \n\t"            \
2535
        /* ah * bl */                                    \
2536
        "uxth   r5, %[b]                \n\t"            \
2537
        "mul    r6, r5                  \n\t"            \
2538
        "lsr    r5, r6, #16             \n\t"            \
2539
        "lsl    r6, r6, #16             \n\t"            \
2540
        "add    %[l], %[l], r6          \n\t"            \
2541
        "adc    %[h], r5                \n\t"            \
2542
        "mov    r5, #0                  \n\t"            \
2543
        "adc    %[o], r5                \n\t"            \
2544
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2545
        : [a] "l" (va), [b] "l" (vb)                     \
2546
        : "r5", "r6", "cc"                               \
2547
    )
2548
#endif
2549
/* Multiply va by vb and add double size result into: vh | vl */
2550
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
2551
    __asm__ __volatile__ (                               \
2552
        /* al * bl */                                    \
2553
        "uxth r6, %[a]    \n\t"            \
2554
        "uxth r4, %[b]    \n\t"            \
2555
        "mul  r4, r6      \n\t"            \
2556
        "add  %[l], %[l], r4    \n\t"            \
2557
        "mov  r5, #0      \n\t"            \
2558
        "adc  %[h], r5    \n\t"            \
2559
        /* al * bh */                                    \
2560
        "lsr  r4, %[b], #16   \n\t"            \
2561
        "mul  r6, r4      \n\t"            \
2562
        "lsr  r4, r6, #16   \n\t"            \
2563
        "lsl  r6, r6, #16   \n\t"            \
2564
        "add  %[l], %[l], r6    \n\t"            \
2565
        "adc  %[h], r4    \n\t"            \
2566
        /* ah * bh */                                    \
2567
        "lsr  r6, %[a], #16   \n\t"            \
2568
        "lsr  r4, %[b], #16   \n\t"            \
2569
        "mul  r4, r6      \n\t"            \
2570
        "add  %[h], %[h], r4    \n\t"            \
2571
        /* ah * bl */                                    \
2572
        "uxth r4, %[b]    \n\t"            \
2573
        "mul  r6, r4      \n\t"            \
2574
        "lsr  r4, r6, #16   \n\t"            \
2575
        "lsl  r6, r6, #16   \n\t"            \
2576
        "add  %[l], %[l], r6    \n\t"            \
2577
        "adc  %[h], r4    \n\t"            \
2578
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2579
        : [a] "l" (va), [b] "l" (vb)                     \
2580
        : "r4", "r5", "r6", "cc"                         \
2581
    )
2582
#if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
2583
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2584
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2585
    __asm__ __volatile__ (                               \
2586
        /* al * bl */                                    \
2587
        "uxth r6, %[a]    \n\t"            \
2588
        "uxth r7, %[b]    \n\t"            \
2589
        "mul  r7, r6      \n\t"            \
2590
        "add  %[l], %[l], r7    \n\t"            \
2591
        "mov  r5, #0      \n\t"            \
2592
        "adc  %[h], r5    \n\t"            \
2593
        "adc  %[o], r5    \n\t"            \
2594
        "add  %[l], %[l], r7    \n\t"            \
2595
        "adc  %[h], r5    \n\t"            \
2596
        "adc  %[o], r5    \n\t"            \
2597
        /* al * bh */                                    \
2598
        "lsr  r7, %[b], #16   \n\t"            \
2599
        "mul  r6, r7      \n\t"            \
2600
        "lsr  r7, r6, #16   \n\t"            \
2601
        "lsl  r6, r6, #16   \n\t"            \
2602
        "add  %[l], %[l], r6    \n\t"            \
2603
        "adc  %[h], r7    \n\t"            \
2604
        "adc  %[o], r5    \n\t"            \
2605
        "add  %[l], %[l], r6    \n\t"            \
2606
        "adc  %[h], r7    \n\t"            \
2607
        "adc  %[o], r5    \n\t"            \
2608
        /* ah * bh */                                    \
2609
        "lsr  r6, %[a], #16   \n\t"            \
2610
        "lsr  r7, %[b], #16   \n\t"            \
2611
        "mul  r7, r6      \n\t"            \
2612
        "add  %[h], %[h], r7    \n\t"            \
2613
        "adc  %[o], r5    \n\t"            \
2614
        "add  %[h], %[h], r7    \n\t"            \
2615
        "adc  %[o], r5    \n\t"            \
2616
        /* ah * bl */                                    \
2617
        "uxth r7, %[b]    \n\t"            \
2618
        "mul  r6, r7      \n\t"            \
2619
        "lsr  r7, r6, #16   \n\t"            \
2620
        "lsl  r6, r6, #16   \n\t"            \
2621
        "add  %[l], %[l], r6    \n\t"            \
2622
        "adc  %[h], r7    \n\t"            \
2623
        "adc  %[o], r5    \n\t"            \
2624
        "add  %[l], %[l], r6    \n\t"            \
2625
        "adc  %[h], r7    \n\t"            \
2626
        "adc  %[o], r5    \n\t"            \
2627
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2628
        : [a] "l" (va), [b] "l" (vb)                     \
2629
        : "r5", "r6", "r7", "cc"                         \
2630
    )
2631
#else
2632
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
2633
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
2634
    __asm__ __volatile__ (                               \
2635
        "mov    r8, %[a]                \n\t"            \
2636
        /* al * bl */                                    \
2637
        "uxth   r6, %[a]                \n\t"            \
2638
        "uxth   r5, %[b]                \n\t"            \
2639
        "mul    r5, r6                  \n\t"            \
2640
        "add    %[l], %[l], r5          \n\t"            \
2641
        "mov    %[a], #0                \n\t"            \
2642
        "adc    %[h], %[a]              \n\t"            \
2643
        "adc    %[o], %[a]              \n\t"            \
2644
        "add    %[l], %[l], r5          \n\t"            \
2645
        "adc    %[h], %[a]              \n\t"            \
2646
        "adc    %[o], %[a]              \n\t"            \
2647
        /* al * bh */                                    \
2648
        "lsr    r5, %[b], #16           \n\t"            \
2649
        "mul    r6, r5                  \n\t"            \
2650
        "lsr    r5, r6, #16             \n\t"            \
2651
        "lsl    r6, r6, #16             \n\t"            \
2652
        "add    %[l], %[l], r6          \n\t"            \
2653
        "adc    %[h], r5                \n\t"            \
2654
        "adc    %[o], %[a]              \n\t"            \
2655
        "add    %[l], %[l], r6          \n\t"            \
2656
        "adc    %[h], r5                \n\t"            \
2657
        "adc    %[o], %[a]              \n\t"            \
2658
        /* ah * bh */                                    \
2659
        "mov    %[a], r8                \n\t"            \
2660
        "lsr    r6, %[a], #16           \n\t"            \
2661
        "lsr    r5, %[b], #16           \n\t"            \
2662
        "mul    r5, r6                  \n\t"            \
2663
        "add    %[h], %[h], r5          \n\t"            \
2664
        "mov    %[a], #0                \n\t"            \
2665
        "adc    %[o], %[a]              \n\t"            \
2666
        "add    %[h], %[h], r5          \n\t"            \
2667
        "adc    %[o], %[a]              \n\t"            \
2668
        /* ah * bl */                                    \
2669
        "uxth   r5, %[b]                \n\t"            \
2670
        "mul    r6, r5                  \n\t"            \
2671
        "lsr    r5, r6, #16             \n\t"            \
2672
        "lsl    r6, r6, #16             \n\t"            \
2673
        "add    %[l], %[l], r6          \n\t"            \
2674
        "adc    %[h], r5                \n\t"            \
2675
        "adc    %[o], %[a]              \n\t"            \
2676
        "add    %[l], %[l], r6          \n\t"            \
2677
        "adc    %[h], r5                \n\t"            \
2678
        "adc    %[o], %[a]              \n\t"            \
2679
        "mov    %[a], r8                \n\t"            \
2680
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2681
        : [a] "l" (va), [b] "l" (vb)                     \
2682
        : "r5", "r6", "r8", "cc"                         \
2683
    )
2684
#endif
2685
#ifndef DEBUG
2686
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2687
 * Assumes first add will not overflow vh | vl
2688
 */
2689
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2690
    __asm__ __volatile__ (                               \
2691
        /* al * bl */                                    \
2692
        "uxth r6, %[a]    \n\t"            \
2693
        "uxth r7, %[b]    \n\t"            \
2694
        "mul  r7, r6      \n\t"            \
2695
        "add  %[l], %[l], r7    \n\t"            \
2696
        "mov  r5, #0      \n\t"            \
2697
        "adc  %[h], r5    \n\t"            \
2698
        "add  %[l], %[l], r7    \n\t"            \
2699
        "adc  %[h], r5    \n\t"            \
2700
        /* al * bh */                                    \
2701
        "lsr  r7, %[b], #16   \n\t"            \
2702
        "mul  r6, r7      \n\t"            \
2703
        "lsr  r7, r6, #16   \n\t"            \
2704
        "lsl  r6, r6, #16   \n\t"            \
2705
        "add  %[l], %[l], r6    \n\t"            \
2706
        "adc  %[h], r7    \n\t"            \
2707
        "add  %[l], %[l], r6    \n\t"            \
2708
        "adc  %[h], r7    \n\t"            \
2709
        "adc  %[o], r5    \n\t"            \
2710
        /* ah * bh */                                    \
2711
        "lsr  r6, %[a], #16   \n\t"            \
2712
        "lsr  r7, %[b], #16   \n\t"            \
2713
        "mul  r7, r6      \n\t"            \
2714
        "add  %[h], %[h], r7    \n\t"            \
2715
        "adc  %[o], r5    \n\t"            \
2716
        "add  %[h], %[h], r7    \n\t"            \
2717
        "adc  %[o], r5    \n\t"            \
2718
        /* ah * bl */                                    \
2719
        "uxth r7, %[b]    \n\t"            \
2720
        "mul  r6, r7      \n\t"            \
2721
        "lsr  r7, r6, #16   \n\t"            \
2722
        "lsl  r6, r6, #16   \n\t"            \
2723
        "add  %[l], %[l], r6    \n\t"            \
2724
        "adc  %[h], r7    \n\t"            \
2725
        "adc  %[o], r5    \n\t"            \
2726
        "add  %[l], %[l], r6    \n\t"            \
2727
        "adc  %[h], r7    \n\t"            \
2728
        "adc  %[o], r5    \n\t"            \
2729
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2730
        : [a] "l" (va), [b] "l" (vb)                     \
2731
        : "r5", "r6", "r7", "cc"                         \
2732
    )
2733
#else
2734
/* Multiply va by vb and add double size result twice into: vo | vh | vl
2735
 * Assumes first add will not overflow vh | vl
2736
 */
2737
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
2738
    __asm__ __volatile__ (                               \
2739
        "mov  r8, %[a]    \n\t"            \
2740
        /* al * bl */                                    \
2741
        "uxth r5, %[a]    \n\t"            \
2742
        "uxth r6, %[b]    \n\t"            \
2743
        "mul  r6, r5      \n\t"            \
2744
        "add  %[l], %[l], r6    \n\t"            \
2745
        "mov  %[a], #0    \n\t"            \
2746
        "adc  %[h], %[a]    \n\t"            \
2747
        "add  %[l], %[l], r6    \n\t"            \
2748
        "adc  %[h], %[a]    \n\t"            \
2749
        /* al * bh */                                    \
2750
        "lsr  r6, %[b], #16   \n\t"            \
2751
        "mul  r5, r6      \n\t"            \
2752
        "lsr  r6, r5, #16   \n\t"            \
2753
        "lsl  r5, r5, #16   \n\t"            \
2754
        "add  %[l], %[l], r5    \n\t"            \
2755
        "adc  %[h], r6    \n\t"            \
2756
        "add  %[l], %[l], r5    \n\t"            \
2757
        "adc  %[h], r6    \n\t"            \
2758
        "adc  %[o], %[a]    \n\t"            \
2759
        /* ah * bh */                                    \
2760
        "mov    %[a], r8                \n\t"            \
2761
        "lsr  r5, %[a], #16   \n\t"            \
2762
        "lsr  r6, %[b], #16   \n\t"            \
2763
        "mul  r6, r5      \n\t"            \
2764
        "mov    %[a], #0                \n\t"            \
2765
        "add  %[h], %[h], r6    \n\t"            \
2766
        "adc  %[o], %[a]    \n\t"            \
2767
        "add  %[h], %[h], r6    \n\t"            \
2768
        "adc  %[o], %[a]    \n\t"            \
2769
        /* ah * bl */                                    \
2770
        "uxth r6, %[b]    \n\t"            \
2771
        "mul  r5, r6      \n\t"            \
2772
        "lsr  r6, r5, #16   \n\t"            \
2773
        "lsl  r5, r5, #16   \n\t"            \
2774
        "add  %[l], %[l], r5    \n\t"            \
2775
        "adc  %[h], r6    \n\t"            \
2776
        "adc  %[o], %[a]    \n\t"            \
2777
        "add  %[l], %[l], r5    \n\t"            \
2778
        "adc  %[h], r6    \n\t"            \
2779
        "adc  %[o], %[a]    \n\t"            \
2780
        "mov    %[a], r8                \n\t"            \
2781
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2782
        : [a] "l" (va), [b] "l" (vb)                     \
2783
        : "r5", "r6", "r8", "cc"                         \
2784
    )
2785
#endif
2786
/* Square va and store double size result in: vh | vl */
2787
#define SP_ASM_SQR(vl, vh, va)                           \
2788
    __asm__ __volatile__ (                               \
2789
        "lsr  r5, %[a], #16   \n\t"            \
2790
        "uxth r6, %[a]    \n\t"            \
2791
        "mov  %[l], r6    \n\t"            \
2792
        "mov  %[h], r5    \n\t"            \
2793
        /* al * al */                                    \
2794
        "mul  %[l], %[l]    \n\t"            \
2795
        /* ah * ah */                                    \
2796
        "mul  %[h], %[h]    \n\t"            \
2797
        /* 2 * al * ah */                                \
2798
        "mul  r6, r5      \n\t"            \
2799
        "lsr  r5, r6, #15   \n\t"            \
2800
        "lsl  r6, r6, #17   \n\t"            \
2801
        "add  %[l], %[l], r6    \n\t"            \
2802
        "adc  %[h], r5    \n\t"            \
2803
        : [h] "+l" (vh), [l] "+l" (vl)                   \
2804
        : [a] "l" (va)                                   \
2805
        : "r5", "r6", "cc"                               \
2806
    )
2807
/* Square va and add double size result into: vo | vh | vl */
2808
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
2809
    __asm__ __volatile__ (                               \
2810
        "lsr  r4, %[a], #16   \n\t"            \
2811
        "uxth r6, %[a]    \n\t"            \
2812
        /* al * al */                                    \
2813
        "mul  r6, r6      \n\t"            \
2814
        /* ah * ah */                                    \
2815
        "mul  r4, r4      \n\t"            \
2816
        "add  %[l], %[l], r6    \n\t"            \
2817
        "adc  %[h], r4    \n\t"            \
2818
        "mov  r5, #0      \n\t"            \
2819
        "adc  %[o], r5    \n\t"            \
2820
        "lsr  r4, %[a], #16   \n\t"            \
2821
        "uxth r6, %[a]    \n\t"            \
2822
        /* 2 * al * ah */                                \
2823
        "mul  r6, r4      \n\t"            \
2824
        "lsr  r4, r6, #15   \n\t"            \
2825
        "lsl  r6, r6, #17   \n\t"            \
2826
        "add  %[l], %[l], r6    \n\t"            \
2827
        "adc  %[h], r4    \n\t"            \
2828
        "adc  %[o], r5    \n\t"            \
2829
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2830
        : [a] "l" (va)                                   \
2831
        : "r4", "r5", "r6", "cc"                         \
2832
    )
2833
/* Square va and add double size result into: vh | vl */
2834
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
2835
    __asm__ __volatile__ (                               \
2836
        "lsr  r5, %[a], #16   \n\t"            \
2837
        "uxth r6, %[a]    \n\t"            \
2838
        /* al * al */                                    \
2839
        "mul  r6, r6      \n\t"            \
2840
        /* ah * ah */                                    \
2841
        "mul  r5, r5      \n\t"            \
2842
        "add  %[l], %[l], r6    \n\t"            \
2843
        "adc  %[h], r5    \n\t"            \
2844
        "lsr  r5, %[a], #16   \n\t"            \
2845
        "uxth r6, %[a]    \n\t"            \
2846
        /* 2 * al * ah */                                \
2847
        "mul  r6, r5      \n\t"            \
2848
        "lsr  r5, r6, #15   \n\t"            \
2849
        "lsl  r6, r6, #17   \n\t"            \
2850
        "add  %[l], %[l], r6    \n\t"            \
2851
        "adc  %[h], r5    \n\t"            \
2852
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2853
        : [a] "l" (va)                                   \
2854
        : "r5", "r6", "cc"                               \
2855
    )
2856
/* Add va into: vh | vl */
2857
#define SP_ASM_ADDC(vl, vh, va)                          \
2858
    __asm__ __volatile__ (                               \
2859
        "add  %[l], %[l], %[a]  \n\t"            \
2860
        "mov  r5, #0      \n\t"            \
2861
        "adc  %[h], r5    \n\t"            \
2862
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2863
        : [a] "l" (va)                                   \
2864
        : "r5", "cc"                                     \
2865
    )
2866
/* Sub va from: vh | vl */
2867
#define SP_ASM_SUBB(vl, vh, va)                          \
2868
    __asm__ __volatile__ (                               \
2869
        "sub  %[l], %[l], %[a]  \n\t"            \
2870
        "mov  r5, #0      \n\t"            \
2871
        "sbc  %[h], r5    \n\t"            \
2872
        : [l] "+l" (vl), [h] "+l" (vh)                   \
2873
        : [a] "l" (va)                                   \
2874
        : "r5", "cc"                                     \
2875
    )
2876
/* Add two times vc | vb | va into vo | vh | vl */
2877
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
2878
    __asm__ __volatile__ (                               \
2879
        "add  %[l], %[l], %[a]  \n\t"            \
2880
        "adc  %[h], %[b]    \n\t"            \
2881
        "adc  %[o], %[c]    \n\t"            \
2882
        "add  %[l], %[l], %[a]  \n\t"            \
2883
        "adc  %[h], %[b]    \n\t"            \
2884
        "adc  %[o], %[c]    \n\t"            \
2885
        : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo)    \
2886
        : [a] "l" (va), [b] "l" (vb), [c] "l" (vc)       \
2887
        : "cc"                                           \
2888
    )
2889
2890
#endif
2891
2892
#ifdef WOLFSSL_SP_DIV_WORD_HALF
2893
/* Divide a two digit number by a digit number and return. (hi | lo) / d
2894
 *
2895
 * No division instruction used - does operation bit by bit.
2896
 * Constant time.
2897
 *
2898
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
2899
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
2900
 * @param  [in]  d   SP integer digit. Number to divide by.
2901
 * @return  The division result.
2902
 */
2903
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
2904
                                          sp_int_digit d)
2905
{
2906
    __asm__ __volatile__ (
2907
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2908
        "lsrs r3, %[d], #24\n\t"
2909
#else
2910
        "lsr  r3, %[d], #24\n\t"
2911
#endif
2912
        "beq  2%=f\n\t"
2913
  "\n1%=:\n\t"
2914
        "movs r3, #0\n\t"
2915
        "b  3%=f\n\t"
2916
  "\n2%=:\n\t"
2917
        "mov  r3, #8\n\t"
2918
  "\n3%=:\n\t"
2919
        "movs r4, #31\n\t"
2920
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2921
        "subs r4, r4, r3\n\t"
2922
#else
2923
        "sub  r4, r4, r3\n\t"
2924
#endif
2925
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2926
        "lsls %[d], %[d], r3\n\t"
2927
#else
2928
        "lsl  %[d], %[d], r3\n\t"
2929
#endif
2930
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2931
        "lsls %[hi], %[hi], r3\n\t"
2932
#else
2933
        "lsl  %[hi], %[hi], r3\n\t"
2934
#endif
2935
        "mov  r5, %[lo]\n\t"
2936
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2937
        "lsrs r5, r5, r4\n\t"
2938
#else
2939
        "lsr  r5, r5, r4\n\t"
2940
#endif
2941
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2942
        "lsls %[lo], %[lo], r3\n\t"
2943
#else
2944
        "lsl  %[lo], %[lo], r3\n\t"
2945
#endif
2946
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2947
        "lsrs r5, r5, #1\n\t"
2948
#else
2949
        "lsr  r5, r5, #1\n\t"
2950
#endif
2951
#if defined(WOLFSSL_KEIL)
2952
        "orrs %[hi], %[hi], r5\n\t"
2953
#elif defined(__clang__)
2954
        "orrs %[hi], r5\n\t"
2955
#else
2956
        "orr  %[hi], r5\n\t"
2957
#endif
2958
2959
        "movs   r3, #0\n\t"
2960
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2961
        "lsrs   r5, %[d], #1\n\t"
2962
#else
2963
        "lsr    r5, %[d], #1\n\t"
2964
#endif
2965
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2966
        "adds   r5, r5, #1\n\t"
2967
#else
2968
        "add    r5, r5, #1\n\t"
2969
#endif
2970
        "mov    r8, %[lo]\n\t"
2971
        "mov    r9, %[hi]\n\t"
2972
        /* Do top 32 */
2973
        "movs   r6, r5\n\t"
2974
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2975
        "subs   r6, r6, %[hi]\n\t"
2976
#else
2977
        "sub    r6, r6, %[hi]\n\t"
2978
#endif
2979
#ifdef WOLFSSL_KEIL
2980
        "sbcs   r6, r6, r6\n\t"
2981
#elif defined(__clang__)
2982
        "sbcs   r6, r6\n\t"
2983
#else
2984
        "sbc    r6, r6\n\t"
2985
#endif
2986
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2987
        "adds   r3, r3, r3\n\t"
2988
#else
2989
        "add    r3, r3, r3\n\t"
2990
#endif
2991
#if defined(__clang__) || defined(WOLFSSL_KEIL)
2992
        "subs   r3, r3, r6\n\t"
2993
#else
2994
        "sub    r3, r3, r6\n\t"
2995
#endif
2996
#ifdef WOLFSSL_KEIL
2997
        "ands   r6, r6, r5\n\t"
2998
#elif defined(__clang__)
2999
        "ands   r6, r5\n\t"
3000
#else
3001
        "and    r6, r5\n\t"
3002
#endif
3003
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3004
        "subs   %[hi], %[hi], r6\n\t"
3005
#else
3006
        "sub    %[hi], %[hi], r6\n\t"
3007
#endif
3008
        "movs   r4, #29\n\t"
3009
        "\n"
3010
    "L_sp_div_word_loop%=:\n\t"
3011
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3012
        "lsls   %[lo], %[lo], #1\n\t"
3013
#else
3014
        "lsl    %[lo], %[lo], #1\n\t"
3015
#endif
3016
#ifdef WOLFSSL_KEIL
3017
        "adcs   %[hi], %[hi], %[hi]\n\t"
3018
#elif defined(__clang__)
3019
        "adcs   %[hi], %[hi]\n\t"
3020
#else
3021
        "adc    %[hi], %[hi]\n\t"
3022
#endif
3023
        "movs   r6, r5\n\t"
3024
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3025
        "subs   r6, r6, %[hi]\n\t"
3026
#else
3027
        "sub    r6, r6, %[hi]\n\t"
3028
#endif
3029
#ifdef WOLFSSL_KEIL
3030
        "sbcs   r6, r6, r6\n\t"
3031
#elif defined(__clang__)
3032
        "sbcs   r6, r6\n\t"
3033
#else
3034
        "sbc    r6, r6\n\t"
3035
#endif
3036
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3037
        "adds   r3, r3, r3\n\t"
3038
#else
3039
        "add    r3, r3, r3\n\t"
3040
#endif
3041
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3042
        "subs   r3, r3, r6\n\t"
3043
#else
3044
        "sub    r3, r3, r6\n\t"
3045
#endif
3046
#ifdef WOLFSSL_KEIL
3047
        "ands   r6, r6, r5\n\t"
3048
#elif defined(__clang__)
3049
        "ands   r6, r5\n\t"
3050
#else
3051
        "and    r6, r5\n\t"
3052
#endif
3053
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3054
        "subs   %[hi], %[hi], r6\n\t"
3055
#else
3056
        "sub    %[hi], %[hi], r6\n\t"
3057
#endif
3058
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3059
        "subs   r4, r4, #1\n\t"
3060
#else
3061
        "sub    r4, r4, #1\n\t"
3062
#endif
3063
        "bpl    L_sp_div_word_loop%=\n\t"
3064
        "movs   r7, #0\n\t"
3065
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3066
        "adds   r3, r3, r3\n\t"
3067
#else
3068
        "add    r3, r3, r3\n\t"
3069
#endif
3070
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3071
        "adds   r3, r3, #1\n\t"
3072
#else
3073
        "add    r3, r3, #1\n\t"
3074
#endif
3075
        /* r * d - Start */
3076
        "uxth   %[hi], r3\n\t"
3077
        "uxth   r4, %[d]\n\t"
3078
#ifdef WOLFSSL_KEIL
3079
        "muls   r4, %[hi], r4\n\t"
3080
#elif defined(__clang__)
3081
        "muls   r4, %[hi]\n\t"
3082
#else
3083
        "mul    r4, %[hi]\n\t"
3084
#endif
3085
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3086
        "lsrs   r6, %[d], #16\n\t"
3087
#else
3088
        "lsr    r6, %[d], #16\n\t"
3089
#endif
3090
#ifdef WOLFSSL_KEIL
3091
        "muls   %[hi], r6, %[hi]\n\t"
3092
#elif defined(__clang__)
3093
        "muls   %[hi], r6\n\t"
3094
#else
3095
        "mul    %[hi], r6\n\t"
3096
#endif
3097
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3098
        "lsrs   r5, %[hi], #16\n\t"
3099
#else
3100
        "lsr    r5, %[hi], #16\n\t"
3101
#endif
3102
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3103
        "lsls   %[hi], %[hi], #16\n\t"
3104
#else
3105
        "lsl    %[hi], %[hi], #16\n\t"
3106
#endif
3107
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3108
        "adds   r4, r4, %[hi]\n\t"
3109
#else
3110
        "add    r4, r4, %[hi]\n\t"
3111
#endif
3112
#ifdef WOLFSSL_KEIL
3113
        "adcs   r5, r5, r7\n\t"
3114
#elif defined(__clang__)
3115
        "adcs   r5, r7\n\t"
3116
#else
3117
        "adc    r5, r7\n\t"
3118
#endif
3119
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3120
        "lsrs   %[hi], r3, #16\n\t"
3121
#else
3122
        "lsr    %[hi], r3, #16\n\t"
3123
#endif
3124
#ifdef WOLFSSL_KEIL
3125
        "muls   r6, %[hi], r6\n\t"
3126
#elif defined(__clang__)
3127
        "muls   r6, %[hi]\n\t"
3128
#else
3129
        "mul    r6, %[hi]\n\t"
3130
#endif
3131
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3132
        "adds   r5, r5, r6\n\t"
3133
#else
3134
        "add    r5, r5, r6\n\t"
3135
#endif
3136
        "uxth   r6, %[d]\n\t"
3137
#ifdef WOLFSSL_KEIL
3138
        "muls   %[hi], r6, %[hi]\n\t"
3139
#elif defined(__clang__)
3140
        "muls   %[hi], r6\n\t"
3141
#else
3142
        "mul    %[hi], r6\n\t"
3143
#endif
3144
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3145
        "lsrs   r6, %[hi], #16\n\t"
3146
#else
3147
        "lsr    r6, %[hi], #16\n\t"
3148
#endif
3149
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3150
        "lsls   %[hi], %[hi], #16\n\t"
3151
#else
3152
        "lsl    %[hi], %[hi], #16\n\t"
3153
#endif
3154
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3155
        "adds   r4, r4, %[hi]\n\t"
3156
#else
3157
        "add    r4, r4, %[hi]\n\t"
3158
#endif
3159
#ifdef WOLFSSL_KEIL
3160
        "adcs   r5, r5, r6\n\t"
3161
#elif defined(__clang__)
3162
        "adcs   r5, r6\n\t"
3163
#else
3164
        "adc    r5, r6\n\t"
3165
#endif
3166
        /* r * d - Done */
3167
        "mov    %[hi], r8\n\t"
3168
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3169
        "subs   %[hi], %[hi], r4\n\t"
3170
#else
3171
        "sub    %[hi], %[hi], r4\n\t"
3172
#endif
3173
        "movs   r4, %[hi]\n\t"
3174
        "mov    %[hi], r9\n\t"
3175
#ifdef WOLFSSL_KEIL
3176
        "sbcs   %[hi], %[hi], r5\n\t"
3177
#elif defined(__clang__)
3178
        "sbcs   %[hi], r5\n\t"
3179
#else
3180
        "sbc    %[hi], r5\n\t"
3181
#endif
3182
        "movs   r5, %[hi]\n\t"
3183
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3184
        "adds   r3, r3, r5\n\t"
3185
#else
3186
        "add    r3, r3, r5\n\t"
3187
#endif
3188
        /* r * d - Start */
3189
        "uxth   %[hi], r3\n\t"
3190
        "uxth   r4, %[d]\n\t"
3191
#ifdef WOLFSSL_KEIL
3192
        "muls   r4, %[hi], r4\n\t"
3193
#elif defined(__clang__)
3194
        "muls   r4, %[hi]\n\t"
3195
#else
3196
        "mul    r4, %[hi]\n\t"
3197
#endif
3198
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3199
        "lsrs   r6, %[d], #16\n\t"
3200
#else
3201
        "lsr    r6, %[d], #16\n\t"
3202
#endif
3203
#ifdef WOLFSSL_KEIL
3204
        "muls   %[hi], r6, %[hi]\n\t"
3205
#elif defined(__clang__)
3206
        "muls   %[hi], r6\n\t"
3207
#else
3208
        "mul    %[hi], r6\n\t"
3209
#endif
3210
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3211
        "lsrs   r5, %[hi], #16\n\t"
3212
#else
3213
        "lsr    r5, %[hi], #16\n\t"
3214
#endif
3215
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3216
        "lsls   %[hi], %[hi], #16\n\t"
3217
#else
3218
        "lsl    %[hi], %[hi], #16\n\t"
3219
#endif
3220
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3221
        "adds   r4, r4, %[hi]\n\t"
3222
#else
3223
        "add    r4, r4, %[hi]\n\t"
3224
#endif
3225
#ifdef WOLFSSL_KEIL
3226
        "adcs   r5, r5, r7\n\t"
3227
#elif defined(__clang__)
3228
        "adcs   r5, r7\n\t"
3229
#else
3230
        "adc    r5, r7\n\t"
3231
#endif
3232
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3233
        "lsrs   %[hi], r3, #16\n\t"
3234
#else
3235
        "lsr    %[hi], r3, #16\n\t"
3236
#endif
3237
#ifdef WOLFSSL_KEIL
3238
        "muls   r6, %[hi], r6\n\t"
3239
#elif defined(__clang__)
3240
        "muls   r6, %[hi]\n\t"
3241
#else
3242
        "mul    r6, %[hi]\n\t"
3243
#endif
3244
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3245
        "adds   r5, r5, r6\n\t"
3246
#else
3247
        "add    r5, r5, r6\n\t"
3248
#endif
3249
        "uxth   r6, %[d]\n\t"
3250
#ifdef WOLFSSL_KEIL
3251
        "muls   %[hi], r6, %[hi]\n\t"
3252
#elif defined(__clang__)
3253
        "muls   %[hi], r6\n\t"
3254
#else
3255
        "mul    %[hi], r6\n\t"
3256
#endif
3257
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3258
        "lsrs   r6, %[hi], #16\n\t"
3259
#else
3260
        "lsr    r6, %[hi], #16\n\t"
3261
#endif
3262
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3263
        "lsls   %[hi], %[hi], #16\n\t"
3264
#else
3265
        "lsl    %[hi], %[hi], #16\n\t"
3266
#endif
3267
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3268
        "adds   r4, r4, %[hi]\n\t"
3269
#else
3270
        "add    r4, r4, %[hi]\n\t"
3271
#endif
3272
#ifdef WOLFSSL_KEIL
3273
        "adcs   r5, r5, r6\n\t"
3274
#elif defined(__clang__)
3275
        "adcs   r5, r6\n\t"
3276
#else
3277
        "adc    r5, r6\n\t"
3278
#endif
3279
        /* r * d - Done */
3280
        "mov    %[hi], r8\n\t"
3281
        "mov    r6, r9\n\t"
3282
#ifdef WOLFSSL_KEIL
3283
        "subs   r4, %[hi], r4\n\t"
3284
#else
3285
#ifdef __clang__
3286
        "subs   r4, %[hi], r4\n\t"
3287
#else
3288
        "sub    r4, %[hi], r4\n\t"
3289
#endif
3290
#endif
3291
#ifdef WOLFSSL_KEIL
3292
        "sbcs   r6, r6, r5\n\t"
3293
#elif defined(__clang__)
3294
        "sbcs   r6, r5\n\t"
3295
#else
3296
        "sbc    r6, r5\n\t"
3297
#endif
3298
        "movs   r5, r6\n\t"
3299
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3300
        "adds   r3, r3, r5\n\t"
3301
#else
3302
        "add    r3, r3, r5\n\t"
3303
#endif
3304
        /* r * d - Start */
3305
        "uxth   %[hi], r3\n\t"
3306
        "uxth   r4, %[d]\n\t"
3307
#ifdef WOLFSSL_KEIL
3308
        "muls   r4, %[hi], r4\n\t"
3309
#elif defined(__clang__)
3310
        "muls   r4, %[hi]\n\t"
3311
#else
3312
        "mul    r4, %[hi]\n\t"
3313
#endif
3314
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3315
        "lsrs   r6, %[d], #16\n\t"
3316
#else
3317
        "lsr    r6, %[d], #16\n\t"
3318
#endif
3319
#ifdef WOLFSSL_KEIL
3320
        "muls   %[hi], r6, %[hi]\n\t"
3321
#elif defined(__clang__)
3322
        "muls   %[hi], r6\n\t"
3323
#else
3324
        "mul    %[hi], r6\n\t"
3325
#endif
3326
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3327
        "lsrs   r5, %[hi], #16\n\t"
3328
#else
3329
        "lsr    r5, %[hi], #16\n\t"
3330
#endif
3331
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3332
        "lsls   %[hi], %[hi], #16\n\t"
3333
#else
3334
        "lsl    %[hi], %[hi], #16\n\t"
3335
#endif
3336
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3337
        "adds   r4, r4, %[hi]\n\t"
3338
#else
3339
        "add    r4, r4, %[hi]\n\t"
3340
#endif
3341
#ifdef WOLFSSL_KEIL
3342
        "adcs   r5, r5, r7\n\t"
3343
#elif defined(__clang__)
3344
        "adcs   r5, r7\n\t"
3345
#else
3346
        "adc    r5, r7\n\t"
3347
#endif
3348
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3349
        "lsrs   %[hi], r3, #16\n\t"
3350
#else
3351
        "lsr    %[hi], r3, #16\n\t"
3352
#endif
3353
#ifdef WOLFSSL_KEIL
3354
        "muls   r6, %[hi], r6\n\t"
3355
#elif defined(__clang__)
3356
        "muls   r6, %[hi]\n\t"
3357
#else
3358
        "mul    r6, %[hi]\n\t"
3359
#endif
3360
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3361
        "adds   r5, r5, r6\n\t"
3362
#else
3363
        "add    r5, r5, r6\n\t"
3364
#endif
3365
        "uxth   r6, %[d]\n\t"
3366
#ifdef WOLFSSL_KEIL
3367
        "muls   %[hi], r6, %[hi]\n\t"
3368
#elif defined(__clang__)
3369
        "muls   %[hi], r6\n\t"
3370
#else
3371
        "mul    %[hi], r6\n\t"
3372
#endif
3373
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3374
        "lsrs   r6, %[hi], #16\n\t"
3375
#else
3376
        "lsr    r6, %[hi], #16\n\t"
3377
#endif
3378
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3379
        "lsls   %[hi], %[hi], #16\n\t"
3380
#else
3381
        "lsl    %[hi], %[hi], #16\n\t"
3382
#endif
3383
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3384
        "adds   r4, r4, %[hi]\n\t"
3385
#else
3386
        "add    r4, r4, %[hi]\n\t"
3387
#endif
3388
#ifdef WOLFSSL_KEIL
3389
        "adcs   r5, r5, r6\n\t"
3390
#elif defined(__clang__)
3391
        "adcs   r5, r6\n\t"
3392
#else
3393
        "adc    r5, r6\n\t"
3394
#endif
3395
        /* r * d - Done */
3396
        "mov    %[hi], r8\n\t"
3397
        "mov    r6, r9\n\t"
3398
#ifdef WOLFSSL_KEIL
3399
        "subs   r4, %[hi], r4\n\t"
3400
#else
3401
#ifdef __clang__
3402
        "subs   r4, %[hi], r4\n\t"
3403
#else
3404
        "sub    r4, %[hi], r4\n\t"
3405
#endif
3406
#endif
3407
#ifdef WOLFSSL_KEIL
3408
        "sbcs   r6, r6, r5\n\t"
3409
#elif defined(__clang__)
3410
        "sbcs   r6, r5\n\t"
3411
#else
3412
        "sbc    r6, r5\n\t"
3413
#endif
3414
        "movs   r5, r6\n\t"
3415
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3416
        "adds   r3, r3, r5\n\t"
3417
#else
3418
        "add    r3, r3, r5\n\t"
3419
#endif
3420
        "movs   r6, %[d]\n\t"
3421
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3422
        "subs   r6, r6, r4\n\t"
3423
#else
3424
        "sub    r6, r6, r4\n\t"
3425
#endif
3426
#ifdef WOLFSSL_KEIL
3427
        "sbcs   r6, r6, r6\n\t"
3428
#elif defined(__clang__)
3429
        "sbcs   r6, r6\n\t"
3430
#else
3431
        "sbc    r6, r6\n\t"
3432
#endif
3433
#if defined(__clang__) || defined(WOLFSSL_KEIL)
3434
        "subs   r3, r3, r6\n\t"
3435
#else
3436
        "sub    r3, r3, r6\n\t"
3437
#endif
3438
        "movs   %[hi], r3\n\t"
3439
        : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
3440
        :
3441
        : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
3442
    );
3443
    return (sp_uint32)(size_t)hi;
3444
}
3445
3446
#define SP_ASM_DIV_WORD
3447
#endif /* !WOLFSSL_SP_DIV_WORD_HALF */
3448
3449
#define SP_INT_ASM_AVAILABLE
3450
3451
    #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
3452
3453
    #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
3454
/*
3455
 * CPU: PPC64
3456
 */
3457
3458
    #ifdef __APPLE__
3459
3460
/* Multiply va by vb and store double size result in: vh | vl */
3461
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3462
    __asm__ __volatile__ (                               \
3463
        "mulld  %[l], %[a], %[b]  \n\t"            \
3464
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3465
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3466
        : [a] "r" (va), [b] "r" (vb)                     \
3467
        :                                                \
3468
    )
3469
/* Multiply va by vb and store double size result in: vo | vh | vl */
3470
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3471
    __asm__ __volatile__ (                               \
3472
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3473
        "mulld  %[l], %[a], %[b]  \n\t"            \
3474
        "li %[o], 0     \n\t"            \
3475
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3476
        : [a] "r" (va), [b] "r" (vb)                     \
3477
        :                                                \
3478
    )
3479
/* Multiply va by vb and add double size result into: vo | vh | vl */
3480
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3481
    __asm__ __volatile__ (                               \
3482
        "mulld  r16, %[a], %[b]   \n\t"            \
3483
        "mulhdu r17, %[a], %[b]   \n\t"            \
3484
        "addc %[l], %[l], r16   \n\t"            \
3485
        "adde %[h], %[h], r17   \n\t"            \
3486
        "addze  %[o], %[o]    \n\t"            \
3487
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3488
        : [a] "r" (va), [b] "r" (vb)                     \
3489
        : "r16", "r17", "cc"                             \
3490
    )
3491
/* Multiply va by vb and add double size result into: vh | vl */
3492
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3493
    __asm__ __volatile__ (                               \
3494
        "mulld  r16, %[a], %[b]   \n\t"            \
3495
        "mulhdu r17, %[a], %[b]   \n\t"            \
3496
        "addc %[l], %[l], r16   \n\t"            \
3497
        "adde %[h], %[h], r17   \n\t"            \
3498
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3499
        : [a] "r" (va), [b] "r" (vb)                     \
3500
        : "r16", "r17", "cc"                             \
3501
    )
3502
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3503
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3504
    __asm__ __volatile__ (                               \
3505
        "mulld  r16, %[a], %[b]   \n\t"            \
3506
        "mulhdu r17, %[a], %[b]   \n\t"            \
3507
        "addc %[l], %[l], r16   \n\t"            \
3508
        "adde %[h], %[h], r17   \n\t"            \
3509
        "addze  %[o], %[o]    \n\t"            \
3510
        "addc %[l], %[l], r16   \n\t"            \
3511
        "adde %[h], %[h], r17   \n\t"            \
3512
        "addze  %[o], %[o]    \n\t"            \
3513
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3514
        : [a] "r" (va), [b] "r" (vb)                     \
3515
        : "r16", "r17", "cc"                             \
3516
    )
3517
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3518
 * Assumes first add will not overflow vh | vl
3519
 */
3520
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3521
    __asm__ __volatile__ (                               \
3522
        "mulld  r16, %[a], %[b]   \n\t"            \
3523
        "mulhdu r17, %[a], %[b]   \n\t"            \
3524
        "addc %[l], %[l], r16   \n\t"            \
3525
        "adde %[h], %[h], r17   \n\t"            \
3526
        "addc %[l], %[l], r16   \n\t"            \
3527
        "adde %[h], %[h], r17   \n\t"            \
3528
        "addze  %[o], %[o]    \n\t"            \
3529
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3530
        : [a] "r" (va), [b] "r" (vb)                     \
3531
        : "r16", "r17", "cc"                             \
3532
    )
3533
/* Square va and store double size result in: vh | vl */
3534
#define SP_ASM_SQR(vl, vh, va)                           \
3535
    __asm__ __volatile__ (                               \
3536
        "mulld  %[l], %[a], %[a]  \n\t"            \
3537
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3538
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3539
        : [a] "r" (va)                                   \
3540
        :                                                \
3541
    )
3542
/* Square va and add double size result into: vo | vh | vl */
3543
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3544
    __asm__ __volatile__ (                               \
3545
        "mulld  r16, %[a], %[a]   \n\t"            \
3546
        "mulhdu r17, %[a], %[a]   \n\t"            \
3547
        "addc %[l], %[l], r16   \n\t"            \
3548
        "adde %[h], %[h], r17   \n\t"            \
3549
        "addze  %[o], %[o]    \n\t"            \
3550
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3551
        : [a] "r" (va)                                   \
3552
        : "r16", "r17", "cc"                             \
3553
    )
3554
/* Square va and add double size result into: vh | vl */
3555
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3556
    __asm__ __volatile__ (                               \
3557
        "mulld  r16, %[a], %[a]   \n\t"            \
3558
        "mulhdu r17, %[a], %[a]   \n\t"            \
3559
        "addc %[l], %[l], r16   \n\t"            \
3560
        "adde %[h], %[h], r17   \n\t"            \
3561
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3562
        : [a] "r" (va)                                   \
3563
        : "r16", "r17", "cc"                             \
3564
    )
3565
/* Add va into: vh | vl */
3566
#define SP_ASM_ADDC(vl, vh, va)                          \
3567
    __asm__ __volatile__ (                               \
3568
        "addc %[l], %[l], %[a]  \n\t"            \
3569
        "addze  %[h], %[h]    \n\t"            \
3570
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3571
        : [a] "r" (va)                                   \
3572
        : "cc"                                           \
3573
    )
3574
/* Sub va from: vh | vl */
3575
#define SP_ASM_SUBB(vl, vh, va)                          \
3576
    __asm__ __volatile__ (                               \
3577
        "subfc  %[l], %[a], %[l]  \n\t"            \
3578
        "li    r16, 0     \n\t"            \
3579
        "subfe %[h], r16, %[h]    \n\t"            \
3580
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3581
        : [a] "r" (va)                                   \
3582
        : "r16", "cc"                                    \
3583
    )
3584
/* Add two times vc | vb | va into vo | vh | vl */
3585
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3586
    __asm__ __volatile__ (                               \
3587
        "addc %[l], %[l], %[a]  \n\t"            \
3588
        "adde %[h], %[h], %[b]  \n\t"            \
3589
        "adde %[o], %[o], %[c]  \n\t"            \
3590
        "addc %[l], %[l], %[a]  \n\t"            \
3591
        "adde %[h], %[h], %[b]  \n\t"            \
3592
        "adde %[o], %[o], %[c]  \n\t"            \
3593
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3594
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3595
        : "cc"                                           \
3596
    )
3597
/* Count leading zeros. */
3598
#define SP_ASM_LZCNT(va, vn)                             \
3599
    __asm__ __volatile__ (                               \
3600
        "cntlzd %[n], %[a]  \n\t"                    \
3601
        : [n] "=r" (vn)                                  \
3602
        : [a] "r" (va)                                   \
3603
        :                                                \
3604
    )
3605
3606
    #else  /* !defined(__APPLE__) */
3607
3608
/* Multiply va by vb and store double size result in: vh | vl */
3609
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3610
    __asm__ __volatile__ (                               \
3611
        "mulld  %[l], %[a], %[b]  \n\t"            \
3612
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3613
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3614
        : [a] "r" (va), [b] "r" (vb)                     \
3615
        :                                                \
3616
    )
3617
/* Multiply va by vb and store double size result in: vo | vh | vl */
3618
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3619
    __asm__ __volatile__ (                               \
3620
        "mulhdu %[h], %[a], %[b]  \n\t"            \
3621
        "mulld  %[l], %[a], %[b]  \n\t"            \
3622
        "li %[o], 0     \n\t"            \
3623
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3624
        : [a] "r" (va), [b] "r" (vb)                     \
3625
        :                                                \
3626
    )
3627
/* Multiply va by vb and add double size result into: vo | vh | vl */
3628
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3629
    __asm__ __volatile__ (                               \
3630
        "mulld  16, %[a], %[b]    \n\t"            \
3631
        "mulhdu 17, %[a], %[b]    \n\t"            \
3632
        "addc %[l], %[l], 16    \n\t"            \
3633
        "adde %[h], %[h], 17    \n\t"            \
3634
        "addze  %[o], %[o]    \n\t"            \
3635
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3636
        : [a] "r" (va), [b] "r" (vb)                     \
3637
        : "16", "17", "cc"                               \
3638
    )
3639
/* Multiply va by vb and add double size result into: vh | vl */
3640
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3641
    __asm__ __volatile__ (                               \
3642
        "mulld  16, %[a], %[b]    \n\t"            \
3643
        "mulhdu 17, %[a], %[b]    \n\t"            \
3644
        "addc %[l], %[l], 16    \n\t"            \
3645
        "adde %[h], %[h], 17    \n\t"            \
3646
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3647
        : [a] "r" (va), [b] "r" (vb)                     \
3648
        : "16", "17", "cc"                               \
3649
    )
3650
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3651
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3652
    __asm__ __volatile__ (                               \
3653
        "mulld  16, %[a], %[b]    \n\t"            \
3654
        "mulhdu 17, %[a], %[b]    \n\t"            \
3655
        "addc %[l], %[l], 16    \n\t"            \
3656
        "adde %[h], %[h], 17    \n\t"            \
3657
        "addze  %[o], %[o]    \n\t"            \
3658
        "addc %[l], %[l], 16    \n\t"            \
3659
        "adde %[h], %[h], 17    \n\t"            \
3660
        "addze  %[o], %[o]    \n\t"            \
3661
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3662
        : [a] "r" (va), [b] "r" (vb)                     \
3663
        : "16", "17", "cc"                               \
3664
    )
3665
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3666
 * Assumes first add will not overflow vh | vl
3667
 */
3668
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3669
    __asm__ __volatile__ (                               \
3670
        "mulld  16, %[a], %[b]    \n\t"            \
3671
        "mulhdu 17, %[a], %[b]    \n\t"            \
3672
        "addc %[l], %[l], 16    \n\t"            \
3673
        "adde %[h], %[h], 17    \n\t"            \
3674
        "addc %[l], %[l], 16    \n\t"            \
3675
        "adde %[h], %[h], 17    \n\t"            \
3676
        "addze  %[o], %[o]    \n\t"            \
3677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3678
        : [a] "r" (va), [b] "r" (vb)                     \
3679
        : "16", "17", "cc"                               \
3680
    )
3681
/* Square va and store double size result in: vh | vl */
3682
#define SP_ASM_SQR(vl, vh, va)                           \
3683
    __asm__ __volatile__ (                               \
3684
        "mulld  %[l], %[a], %[a]  \n\t"            \
3685
        "mulhdu %[h], %[a], %[a]  \n\t"            \
3686
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3687
        : [a] "r" (va)                                   \
3688
        :                                                \
3689
    )
3690
/* Square va and add double size result into: vo | vh | vl */
3691
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3692
    __asm__ __volatile__ (                               \
3693
        "mulld  16, %[a], %[a]    \n\t"            \
3694
        "mulhdu 17, %[a], %[a]    \n\t"            \
3695
        "addc %[l], %[l], 16    \n\t"            \
3696
        "adde %[h], %[h], 17    \n\t"            \
3697
        "addze  %[o], %[o]    \n\t"            \
3698
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3699
        : [a] "r" (va)                                   \
3700
        : "16", "17", "cc"                               \
3701
    )
3702
/* Square va and add double size result into: vh | vl */
3703
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3704
    __asm__ __volatile__ (                               \
3705
        "mulld  16, %[a], %[a]    \n\t"            \
3706
        "mulhdu 17, %[a], %[a]    \n\t"            \
3707
        "addc %[l], %[l], 16    \n\t"            \
3708
        "adde %[h], %[h], 17    \n\t"            \
3709
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3710
        : [a] "r" (va)                                   \
3711
        : "16", "17", "cc"                               \
3712
    )
3713
/* Add va into: vh | vl */
3714
#define SP_ASM_ADDC(vl, vh, va)                          \
3715
    __asm__ __volatile__ (                               \
3716
        "addc %[l], %[l], %[a]  \n\t"            \
3717
        "addze  %[h], %[h]    \n\t"            \
3718
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3719
        : [a] "r" (va)                                   \
3720
        : "cc"                                           \
3721
    )
3722
/* Sub va from: vh | vl */
3723
#define SP_ASM_SUBB(vl, vh, va)                          \
3724
    __asm__ __volatile__ (                               \
3725
        "subfc  %[l], %[a], %[l]  \n\t"            \
3726
        "li    16, 0      \n\t"            \
3727
        "subfe %[h], 16, %[h]   \n\t"            \
3728
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3729
        : [a] "r" (va)                                   \
3730
        : "16", "cc"                                     \
3731
    )
3732
/* Add two times vc | vb | va into vo | vh | vl */
3733
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3734
    __asm__ __volatile__ (                               \
3735
        "addc %[l], %[l], %[a]  \n\t"            \
3736
        "adde %[h], %[h], %[b]  \n\t"            \
3737
        "adde %[o], %[o], %[c]  \n\t"            \
3738
        "addc %[l], %[l], %[a]  \n\t"            \
3739
        "adde %[h], %[h], %[b]  \n\t"            \
3740
        "adde %[o], %[o], %[c]  \n\t"            \
3741
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3742
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3743
        : "cc"                                           \
3744
    )
3745
/* Count leading zeros. */
3746
#define SP_ASM_LZCNT(va, vn)                             \
3747
    __asm__ __volatile__ (                               \
3748
        "cntlzd %[n], %[a]  \n\t"                    \
3749
        : [n] "=r" (vn)                                  \
3750
        : [a] "r" (va)                                   \
3751
        :                                                \
3752
    )
3753
3754
    #endif /* !defined(__APPLE__) */
3755
3756
#define SP_INT_ASM_AVAILABLE
3757
3758
    #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
3759
3760
    #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
3761
/*
3762
 * CPU: PPC 32-bit
3763
 */
3764
3765
    #ifdef __APPLE__
3766
3767
/* Multiply va by vb and store double size result in: vh | vl */
3768
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3769
    __asm__ __volatile__ (                               \
3770
        "mullw  %[l], %[a], %[b]  \n\t"            \
3771
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3772
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3773
        : [a] "r" (va), [b] "r" (vb)                     \
3774
        :                                                \
3775
    )
3776
/* Multiply va by vb and store double size result in: vo | vh | vl */
3777
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3778
    __asm__ __volatile__ (                               \
3779
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3780
        "mullw  %[l], %[a], %[b]  \n\t"            \
3781
        "li %[o], 0     \n\t"            \
3782
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3783
        : [a] "r" (va), [b] "r" (vb)                     \
3784
    )
3785
/* Multiply va by vb and add double size result into: vo | vh | vl */
3786
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3787
    __asm__ __volatile__ (                               \
3788
        "mullw  r16, %[a], %[b]   \n\t"            \
3789
        "mulhwu r17, %[a], %[b]   \n\t"            \
3790
        "addc %[l], %[l], r16   \n\t"            \
3791
        "adde %[h], %[h], r17   \n\t"            \
3792
        "addze  %[o], %[o]    \n\t"            \
3793
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3794
        : [a] "r" (va), [b] "r" (vb)                     \
3795
        : "r16", "r17", "cc"                             \
3796
    )
3797
/* Multiply va by vb and add double size result into: vh | vl */
3798
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3799
    __asm__ __volatile__ (                               \
3800
        "mullw  r16, %[a], %[b]   \n\t"            \
3801
        "mulhwu r17, %[a], %[b]   \n\t"            \
3802
        "addc %[l], %[l], r16   \n\t"            \
3803
        "adde %[h], %[h], r17   \n\t"            \
3804
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3805
        : [a] "r" (va), [b] "r" (vb)                     \
3806
        : "r16", "r17", "cc"                             \
3807
    )
3808
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3809
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3810
    __asm__ __volatile__ (                               \
3811
        "mullw  r16, %[a], %[b]   \n\t"            \
3812
        "mulhwu r17, %[a], %[b]   \n\t"            \
3813
        "addc %[l], %[l], r16   \n\t"            \
3814
        "adde %[h], %[h], r17   \n\t"            \
3815
        "addze  %[o], %[o]    \n\t"            \
3816
        "addc %[l], %[l], r16   \n\t"            \
3817
        "adde %[h], %[h], r17   \n\t"            \
3818
        "addze  %[o], %[o]    \n\t"            \
3819
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3820
        : [a] "r" (va), [b] "r" (vb)                     \
3821
        : "r16", "r17", "cc"                             \
3822
    )
3823
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3824
 * Assumes first add will not overflow vh | vl
3825
 */
3826
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3827
    __asm__ __volatile__ (                               \
3828
        "mullw  r16, %[a], %[b]   \n\t"            \
3829
        "mulhwu r17, %[a], %[b]   \n\t"            \
3830
        "addc %[l], %[l], r16   \n\t"            \
3831
        "adde %[h], %[h], r17   \n\t"            \
3832
        "addc %[l], %[l], r16   \n\t"            \
3833
        "adde %[h], %[h], r17   \n\t"            \
3834
        "addze  %[o], %[o]    \n\t"            \
3835
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3836
        : [a] "r" (va), [b] "r" (vb)                     \
3837
        : "r16", "r17", "cc"                             \
3838
    )
3839
/* Square va and store double size result in: vh | vl */
3840
#define SP_ASM_SQR(vl, vh, va)                           \
3841
    __asm__ __volatile__ (                               \
3842
        "mullw  %[l], %[a], %[a]  \n\t"            \
3843
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3844
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3845
        : [a] "r" (va)                                   \
3846
        :                                                \
3847
    )
3848
/* Square va and add double size result into: vo | vh | vl */
3849
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3850
    __asm__ __volatile__ (                               \
3851
        "mullw  r16, %[a], %[a]   \n\t"            \
3852
        "mulhwu r17, %[a], %[a]   \n\t"            \
3853
        "addc %[l], %[l], r16   \n\t"            \
3854
        "adde %[h], %[h], r17   \n\t"            \
3855
        "addze  %[o], %[o]    \n\t"            \
3856
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3857
        : [a] "r" (va)                                   \
3858
        : "r16", "r17", "cc"                             \
3859
    )
3860
/* Square va and add double size result into: vh | vl */
3861
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
3862
    __asm__ __volatile__ (                               \
3863
        "mullw  r16, %[a], %[a]   \n\t"            \
3864
        "mulhwu r17, %[a], %[a]   \n\t"            \
3865
        "addc %[l], %[l], r16   \n\t"            \
3866
        "adde %[h], %[h], r17   \n\t"            \
3867
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3868
        : [a] "r" (va)                                   \
3869
        : "r16", "r17", "cc"                             \
3870
    )
3871
/* Add va into: vh | vl */
3872
#define SP_ASM_ADDC(vl, vh, va)                          \
3873
    __asm__ __volatile__ (                               \
3874
        "addc %[l], %[l], %[a]  \n\t"            \
3875
        "addze  %[h], %[h]    \n\t"            \
3876
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3877
        : [a] "r" (va)                                   \
3878
        : "cc"                                           \
3879
    )
3880
/* Sub va from: vh | vl */
3881
#define SP_ASM_SUBB(vl, vh, va)                          \
3882
    __asm__ __volatile__ (                               \
3883
        "subfc  %[l], %[a], %[l]  \n\t"            \
3884
        "li r16, 0      \n\t"            \
3885
        "subfe  %[h], r16, %[h]   \n\t"            \
3886
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3887
        : [a] "r" (va)                                   \
3888
        : "r16", "cc"                                    \
3889
    )
3890
/* Add two times vc | vb | va into vo | vh | vl */
3891
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
3892
    __asm__ __volatile__ (                               \
3893
        "addc %[l], %[l], %[a]  \n\t"            \
3894
        "adde %[h], %[h], %[b]  \n\t"            \
3895
        "adde %[o], %[o], %[c]  \n\t"            \
3896
        "addc %[l], %[l], %[a]  \n\t"            \
3897
        "adde %[h], %[h], %[b]  \n\t"            \
3898
        "adde %[o], %[o], %[c]  \n\t"            \
3899
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3900
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
3901
        : "cc"                                           \
3902
    )
3903
/* Count leading zeros. */
3904
#define SP_ASM_LZCNT(va, vn)                             \
3905
    __asm__ __volatile__ (                               \
3906
        "cntlzw %[n], %[a]  \n\t"                    \
3907
        : [n] "=r" (vn)                                  \
3908
        : [a] "r" (va)                                   \
3909
    )
3910
3911
    #else /* !defined(__APPLE__) */
3912
3913
/* Multiply va by vb and store double size result in: vh | vl */
3914
#define SP_ASM_MUL(vl, vh, va, vb)                       \
3915
    __asm__ __volatile__ (                               \
3916
        "mullw  %[l], %[a], %[b]  \n\t"            \
3917
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3918
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3919
        : [a] "r" (va), [b] "r" (vb)                     \
3920
        :                                                \
3921
    )
3922
/* Multiply va by vb and store double size result in: vo | vh | vl */
3923
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
3924
    __asm__ __volatile__ (                               \
3925
        "mulhwu %[h], %[a], %[b]  \n\t"            \
3926
        "mullw  %[l], %[a], %[b]  \n\t"            \
3927
        "xor  %[o], %[o], %[o]  \n\t"            \
3928
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
3929
        : [a] "r" (va), [b] "r" (vb)                     \
3930
    )
3931
/* Multiply va by vb and add double size result into: vo | vh | vl */
3932
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
3933
    __asm__ __volatile__ (                               \
3934
        "mullw  16, %[a], %[b]    \n\t"            \
3935
        "mulhwu 17, %[a], %[b]    \n\t"            \
3936
        "addc %[l], %[l], 16    \n\t"            \
3937
        "adde %[h], %[h], 17    \n\t"            \
3938
        "addze  %[o], %[o]    \n\t"            \
3939
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3940
        : [a] "r" (va), [b] "r" (vb)                     \
3941
        : "16", "17", "cc"                               \
3942
    )
3943
/* Multiply va by vb and add double size result into: vh | vl */
3944
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
3945
    __asm__ __volatile__ (                               \
3946
        "mullw  16, %[a], %[b]    \n\t"            \
3947
        "mulhwu 17, %[a], %[b]    \n\t"            \
3948
        "addc %[l], %[l], 16    \n\t"            \
3949
        "adde %[h], %[h], 17    \n\t"            \
3950
        : [l] "+r" (vl), [h] "+r" (vh)                   \
3951
        : [a] "r" (va), [b] "r" (vb)                     \
3952
        : "16", "17", "cc"                               \
3953
    )
3954
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
3955
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
3956
    __asm__ __volatile__ (                               \
3957
        "mullw  16, %[a], %[b]    \n\t"            \
3958
        "mulhwu 17, %[a], %[b]    \n\t"            \
3959
        "addc %[l], %[l], 16    \n\t"            \
3960
        "adde %[h], %[h], 17    \n\t"            \
3961
        "addze  %[o], %[o]    \n\t"            \
3962
        "addc %[l], %[l], 16    \n\t"            \
3963
        "adde %[h], %[h], 17    \n\t"            \
3964
        "addze  %[o], %[o]    \n\t"            \
3965
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3966
        : [a] "r" (va), [b] "r" (vb)                     \
3967
        : "16", "17", "cc"                               \
3968
    )
3969
/* Multiply va by vb and add double size result twice into: vo | vh | vl
3970
 * Assumes first add will not overflow vh | vl
3971
 */
3972
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
3973
    __asm__ __volatile__ (                               \
3974
        "mullw  16, %[a], %[b]    \n\t"            \
3975
        "mulhwu 17, %[a], %[b]    \n\t"            \
3976
        "addc %[l], %[l], 16    \n\t"            \
3977
        "adde %[h], %[h], 17    \n\t"            \
3978
        "addc %[l], %[l], 16    \n\t"            \
3979
        "adde %[h], %[h], 17    \n\t"            \
3980
        "addze  %[o], %[o]    \n\t"            \
3981
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
3982
        : [a] "r" (va), [b] "r" (vb)                     \
3983
        : "16", "17", "cc"                               \
3984
    )
3985
/* Square va and store double size result in: vh | vl */
3986
#define SP_ASM_SQR(vl, vh, va)                           \
3987
    __asm__ __volatile__ (                               \
3988
        "mullw  %[l], %[a], %[a]  \n\t"            \
3989
        "mulhwu %[h], %[a], %[a]  \n\t"            \
3990
        : [h] "+r" (vh), [l] "+r" (vl)                   \
3991
        : [a] "r" (va)                                   \
3992
        :                                                \
3993
    )
3994
/* Square va and add double size result into: vo | vh | vl */
3995
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
3996
    __asm__ __volatile__ (                               \
3997
        "mullw  16, %[a], %[a]    \n\t"            \
3998
        "mulhwu 17, %[a], %[a]    \n\t"            \
3999
        "addc %[l], %[l], 16    \n\t"            \
4000
        "adde %[h], %[h], 17    \n\t"            \
4001
        "addze  %[o], %[o]    \n\t"            \
4002
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4003
        : [a] "r" (va)                                   \
4004
        : "16", "17", "cc"                               \
4005
    )
4006
/* Square va and add double size result into: vh | vl */
4007
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4008
    __asm__ __volatile__ (                               \
4009
        "mullw  16, %[a], %[a]    \n\t"            \
4010
        "mulhwu 17, %[a], %[a]    \n\t"            \
4011
        "addc %[l], %[l], 16    \n\t"            \
4012
        "adde %[h], %[h], 17    \n\t"            \
4013
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4014
        : [a] "r" (va)                                   \
4015
        : "16", "17", "cc"                               \
4016
    )
4017
/* Add va into: vh | vl */
4018
#define SP_ASM_ADDC(vl, vh, va)                          \
4019
    __asm__ __volatile__ (                               \
4020
        "addc %[l], %[l], %[a]  \n\t"            \
4021
        "addze  %[h], %[h]    \n\t"            \
4022
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4023
        : [a] "r" (va)                                   \
4024
        : "cc"                                           \
4025
    )
4026
/* Sub va from: vh | vl */
4027
#define SP_ASM_SUBB(vl, vh, va)                          \
4028
    __asm__ __volatile__ (                               \
4029
        "subfc  %[l], %[a], %[l]  \n\t"            \
4030
        "xor  16, 16, 16    \n\t"            \
4031
        "subfe  %[h], 16, %[h]    \n\t"            \
4032
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4033
        : [a] "r" (va)                                   \
4034
        : "16", "cc"                                     \
4035
    )
4036
/* Add two times vc | vb | va into vo | vh | vl */
4037
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4038
    __asm__ __volatile__ (                               \
4039
        "addc %[l], %[l], %[a]  \n\t"            \
4040
        "adde %[h], %[h], %[b]  \n\t"            \
4041
        "adde %[o], %[o], %[c]  \n\t"            \
4042
        "addc %[l], %[l], %[a]  \n\t"            \
4043
        "adde %[h], %[h], %[b]  \n\t"            \
4044
        "adde %[o], %[o], %[c]  \n\t"            \
4045
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4046
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4047
        : "cc"                                           \
4048
    )
4049
/* Count leading zeros. */
4050
#define SP_ASM_LZCNT(va, vn)                             \
4051
    __asm__ __volatile__ (                               \
4052
        "cntlzw %[n], %[a]  \n\t"                    \
4053
        : [n] "=r" (vn)                                  \
4054
        : [a] "r" (va)                                   \
4055
    )
4056
4057
    #endif /* !defined(__APPLE__) */
4058
4059
#define SP_INT_ASM_AVAILABLE
4060
4061
    #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
4062
4063
    #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
4064
/*
4065
 * CPU: MIPS 64-bit
4066
 */
4067
4068
/* Multiply va by vb and store double size result in: vh | vl */
4069
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4070
    __asm__ __volatile__ (                               \
4071
        "dmultu %[a], %[b]    \n\t"            \
4072
        "mflo %[l]      \n\t"            \
4073
        "mfhi %[h]      \n\t"            \
4074
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4075
        : [a] "r" (va), [b] "r" (vb)                     \
4076
        : "$lo", "$hi"                                   \
4077
    )
4078
/* Multiply va by vb and store double size result in: vo | vh | vl */
4079
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4080
    __asm__ __volatile__ (                               \
4081
        "dmultu %[a], %[b]    \n\t"            \
4082
        "mflo %[l]      \n\t"            \
4083
        "mfhi %[h]      \n\t"            \
4084
        "move %[o], $0    \n\t"            \
4085
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4086
        : [a] "r" (va), [b] "r" (vb)                     \
4087
        : "$lo", "$hi"                                   \
4088
    )
4089
/* Multiply va by vb and add double size result into: vo | vh | vl */
4090
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4091
    __asm__ __volatile__ (                               \
4092
        "dmultu %[a], %[b]    \n\t"            \
4093
        "mflo $10     \n\t"            \
4094
        "mfhi $11     \n\t"            \
4095
        "daddu  %[l], %[l], $10   \n\t"            \
4096
        "sltu $12, %[l], $10    \n\t"            \
4097
        "daddu  %[h], %[h], $12   \n\t"            \
4098
        "sltu $12, %[h], $12    \n\t"            \
4099
        "daddu  %[o], %[o], $12   \n\t"            \
4100
        "daddu  %[h], %[h], $11   \n\t"            \
4101
        "sltu $12, %[h], $11    \n\t"            \
4102
        "daddu  %[o], %[o], $12   \n\t"            \
4103
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4104
        : [a] "r" (va), [b] "r" (vb)                     \
4105
        : "$10", "$11", "$12", "$lo", "$hi"              \
4106
    )
4107
/* Multiply va by vb and add double size result into: vh | vl */
4108
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4109
    __asm__ __volatile__ (                               \
4110
        "dmultu %[a], %[b]    \n\t"            \
4111
        "mflo $10     \n\t"            \
4112
        "mfhi $11     \n\t"            \
4113
        "daddu  %[l], %[l], $10   \n\t"            \
4114
        "sltu $12, %[l], $10    \n\t"            \
4115
        "daddu  %[h], %[h], $11   \n\t"            \
4116
        "daddu  %[h], %[h], $12   \n\t"            \
4117
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4118
        : [a] "r" (va), [b] "r" (vb)                     \
4119
        : "$10", "$11", "$12", "$lo", "$hi"              \
4120
    )
4121
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4122
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4123
    __asm__ __volatile__ (                               \
4124
        "dmultu %[a], %[b]    \n\t"            \
4125
        "mflo $10     \n\t"            \
4126
        "mfhi $11     \n\t"            \
4127
        "daddu  %[l], %[l], $10   \n\t"            \
4128
        "sltu $12, %[l], $10    \n\t"            \
4129
        "daddu  %[h], %[h], $12   \n\t"            \
4130
        "sltu $12, %[h], $12    \n\t"            \
4131
        "daddu  %[o], %[o], $12   \n\t"            \
4132
        "daddu  %[h], %[h], $11   \n\t"            \
4133
        "sltu $12, %[h], $11    \n\t"            \
4134
        "daddu  %[o], %[o], $12   \n\t"            \
4135
        "daddu  %[l], %[l], $10   \n\t"            \
4136
        "sltu $12, %[l], $10    \n\t"            \
4137
        "daddu  %[h], %[h], $12   \n\t"            \
4138
        "sltu $12, %[h], $12    \n\t"            \
4139
        "daddu  %[o], %[o], $12   \n\t"            \
4140
        "daddu  %[h], %[h], $11   \n\t"            \
4141
        "sltu $12, %[h], $11    \n\t"            \
4142
        "daddu  %[o], %[o], $12   \n\t"            \
4143
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4144
        : [a] "r" (va), [b] "r" (vb)                     \
4145
        : "$10", "$11", "$12", "$lo", "$hi"              \
4146
    )
4147
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4148
 * Assumes first add will not overflow vh | vl
4149
 */
4150
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4151
    __asm__ __volatile__ (                               \
4152
        "dmultu %[a], %[b]    \n\t"            \
4153
        "mflo $10     \n\t"            \
4154
        "mfhi $11     \n\t"            \
4155
        "daddu  %[l], %[l], $10   \n\t"            \
4156
        "sltu $12, %[l], $10    \n\t"            \
4157
        "daddu  %[h], %[h], $11   \n\t"            \
4158
        "daddu  %[h], %[h], $12   \n\t"            \
4159
        "daddu  %[l], %[l], $10   \n\t"            \
4160
        "sltu $12, %[l], $10    \n\t"            \
4161
        "daddu  %[h], %[h], $12   \n\t"            \
4162
        "sltu $12, %[h], $12    \n\t"            \
4163
        "daddu  %[o], %[o], $12   \n\t"            \
4164
        "daddu  %[h], %[h], $11   \n\t"            \
4165
        "sltu $12, %[h], $11    \n\t"            \
4166
        "daddu  %[o], %[o], $12   \n\t"            \
4167
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4168
        : [a] "r" (va), [b] "r" (vb)                     \
4169
        : "$10", "$11", "$12", "$lo", "$hi"              \
4170
    )
4171
/* Square va and store double size result in: vh | vl */
4172
#define SP_ASM_SQR(vl, vh, va)                           \
4173
    __asm__ __volatile__ (                               \
4174
        "dmultu %[a], %[a]    \n\t"            \
4175
        "mflo %[l]      \n\t"            \
4176
        "mfhi %[h]      \n\t"            \
4177
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4178
        : [a] "r" (va)                                   \
4179
        : "$lo", "$hi"                                   \
4180
    )
4181
/* Square va and add double size result into: vo | vh | vl */
4182
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4183
    __asm__ __volatile__ (                               \
4184
        "dmultu %[a], %[a]    \n\t"            \
4185
        "mflo $10     \n\t"            \
4186
        "mfhi $11     \n\t"            \
4187
        "daddu  %[l], %[l], $10   \n\t"            \
4188
        "sltu $12, %[l], $10    \n\t"            \
4189
        "daddu  %[h], %[h], $12   \n\t"            \
4190
        "sltu $12, %[h], $12    \n\t"            \
4191
        "daddu  %[o], %[o], $12   \n\t"            \
4192
        "daddu  %[h], %[h], $11   \n\t"            \
4193
        "sltu $12, %[h], $11    \n\t"            \
4194
        "daddu  %[o], %[o], $12   \n\t"            \
4195
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4196
        : [a] "r" (va)                                   \
4197
        : "$10", "$11", "$12", "$lo", "$hi"              \
4198
    )
4199
/* Square va and add double size result into: vh | vl */
4200
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4201
    __asm__ __volatile__ (                               \
4202
        "dmultu %[a], %[a]    \n\t"            \
4203
        "mflo $10     \n\t"            \
4204
        "mfhi $11     \n\t"            \
4205
        "daddu  %[l], %[l], $10   \n\t"            \
4206
        "sltu $12, %[l], $10    \n\t"            \
4207
        "daddu  %[h], %[h], $11   \n\t"            \
4208
        "daddu  %[h], %[h], $12   \n\t"            \
4209
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4210
        : [a] "r" (va)                                   \
4211
        : "$10", "$11", "$12", "$lo", "$hi"              \
4212
    )
4213
/* Add va into: vh | vl */
4214
#define SP_ASM_ADDC(vl, vh, va)                          \
4215
    __asm__ __volatile__ (                               \
4216
        "daddu  %[l], %[l], %[a]  \n\t"            \
4217
        "sltu $12, %[l], %[a]   \n\t"            \
4218
        "daddu  %[h], %[h], $12   \n\t"            \
4219
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4220
        : [a] "r" (va)                                   \
4221
        : "$12"                                          \
4222
    )
4223
/* Sub va from: vh | vl */
4224
#define SP_ASM_SUBB(vl, vh, va)                          \
4225
    __asm__ __volatile__ (                               \
4226
        "move $12, %[l]   \n\t"            \
4227
        "dsubu  %[l], $12, %[a]   \n\t"            \
4228
        "sltu $12, $12, %[l]    \n\t"            \
4229
        "dsubu  %[h], %[h], $12   \n\t"            \
4230
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4231
        : [a] "r" (va)                                   \
4232
        : "$12"                                          \
4233
    )
4234
/* Add two times vc | vb | va into vo | vh | vl */
4235
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4236
    __asm__ __volatile__ (                               \
4237
        "daddu  %[l], %[l], %[a]  \n\t"            \
4238
        "sltu $12, %[l], %[a]   \n\t"            \
4239
        "daddu  %[h], %[h], $12   \n\t"            \
4240
        "sltu $12, %[h], $12    \n\t"            \
4241
        "daddu  %[o], %[o], $12   \n\t"            \
4242
        "daddu  %[h], %[h], %[b]  \n\t"            \
4243
        "sltu $12, %[h], %[b]   \n\t"            \
4244
        "daddu  %[o], %[o], %[c]  \n\t"            \
4245
        "daddu  %[o], %[o], $12   \n\t"            \
4246
        "daddu  %[l], %[l], %[a]  \n\t"            \
4247
        "sltu $12, %[l], %[a]   \n\t"            \
4248
        "daddu  %[h], %[h], $12   \n\t"            \
4249
        "sltu $12, %[h], $12    \n\t"            \
4250
        "daddu  %[o], %[o], $12   \n\t"            \
4251
        "daddu  %[h], %[h], %[b]  \n\t"            \
4252
        "sltu $12, %[h], %[b]   \n\t"            \
4253
        "daddu  %[o], %[o], %[c]  \n\t"            \
4254
        "daddu  %[o], %[o], $12   \n\t"            \
4255
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4256
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4257
        : "$12"                                          \
4258
    )
4259
4260
#define SP_INT_ASM_AVAILABLE
4261
4262
    #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
4263
4264
    #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
4265
/*
4266
 * CPU: MIPS 32-bit
4267
 */
4268
4269
/* Multiply va by vb and store double size result in: vh | vl */
4270
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4271
    __asm__ __volatile__ (                               \
4272
        "multu  %[a], %[b]    \n\t"            \
4273
        "mflo %[l]      \n\t"            \
4274
        "mfhi %[h]      \n\t"            \
4275
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4276
        : [a] "r" (va), [b] "r" (vb)                     \
4277
        : "%lo", "%hi"                                   \
4278
    )
4279
/* Multiply va by vb and store double size result in: vo | vh | vl */
4280
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4281
    __asm__ __volatile__ (                               \
4282
        "multu  %[a], %[b]    \n\t"            \
4283
        "mflo %[l]      \n\t"            \
4284
        "mfhi %[h]      \n\t"            \
4285
        "move %[o], $0    \n\t"            \
4286
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4287
        : [a] "r" (va), [b] "r" (vb)                     \
4288
        : "%lo", "%hi"                                   \
4289
    )
4290
/* Multiply va by vb and add double size result into: vo | vh | vl */
4291
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4292
    __asm__ __volatile__ (                               \
4293
        "multu  %[a], %[b]    \n\t"            \
4294
        "mflo $10     \n\t"            \
4295
        "mfhi $11     \n\t"            \
4296
        "addu %[l], %[l], $10   \n\t"            \
4297
        "sltu $12, %[l], $10    \n\t"            \
4298
        "addu %[h], %[h], $12   \n\t"            \
4299
        "sltu $12, %[h], $12    \n\t"            \
4300
        "addu %[o], %[o], $12   \n\t"            \
4301
        "addu %[h], %[h], $11   \n\t"            \
4302
        "sltu $12, %[h], $11    \n\t"            \
4303
        "addu %[o], %[o], $12   \n\t"            \
4304
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4305
        : [a] "r" (va), [b] "r" (vb)                     \
4306
        : "$10", "$11", "$12", "%lo", "%hi"              \
4307
    )
4308
/* Multiply va by vb and add double size result into: vh | vl */
4309
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4310
    __asm__ __volatile__ (                               \
4311
        "multu  %[a], %[b]    \n\t"            \
4312
        "mflo $10     \n\t"            \
4313
        "mfhi $11     \n\t"            \
4314
        "addu %[l], %[l], $10   \n\t"            \
4315
        "sltu $12, %[l], $10    \n\t"            \
4316
        "addu %[h], %[h], $11   \n\t"            \
4317
        "addu %[h], %[h], $12   \n\t"            \
4318
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4319
        : [a] "r" (va), [b] "r" (vb)                     \
4320
        : "$10", "$11", "$12", "%lo", "%hi"              \
4321
    )
4322
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4323
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4324
    __asm__ __volatile__ (                               \
4325
        "multu  %[a], %[b]    \n\t"            \
4326
        "mflo $10     \n\t"            \
4327
        "mfhi $11     \n\t"            \
4328
        "addu %[l], %[l], $10   \n\t"            \
4329
        "sltu $12, %[l], $10    \n\t"            \
4330
        "addu %[h], %[h], $12   \n\t"            \
4331
        "sltu $12, %[h], $12    \n\t"            \
4332
        "addu %[o], %[o], $12   \n\t"            \
4333
        "addu %[h], %[h], $11   \n\t"            \
4334
        "sltu $12, %[h], $11    \n\t"            \
4335
        "addu %[o], %[o], $12   \n\t"            \
4336
        "addu %[l], %[l], $10   \n\t"            \
4337
        "sltu $12, %[l], $10    \n\t"            \
4338
        "addu %[h], %[h], $12   \n\t"            \
4339
        "sltu $12, %[h], $12    \n\t"            \
4340
        "addu %[o], %[o], $12   \n\t"            \
4341
        "addu %[h], %[h], $11   \n\t"            \
4342
        "sltu $12, %[h], $11    \n\t"            \
4343
        "addu %[o], %[o], $12   \n\t"            \
4344
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4345
        : [a] "r" (va), [b] "r" (vb)                     \
4346
        : "$10", "$11", "$12", "%lo", "%hi"              \
4347
    )
4348
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4349
 * Assumes first add will not overflow vh | vl
4350
 */
4351
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4352
    __asm__ __volatile__ (                               \
4353
        "multu  %[a], %[b]    \n\t"            \
4354
        "mflo $10     \n\t"            \
4355
        "mfhi $11     \n\t"            \
4356
        "addu %[l], %[l], $10   \n\t"            \
4357
        "sltu $12, %[l], $10    \n\t"            \
4358
        "addu %[h], %[h], $11   \n\t"            \
4359
        "addu %[h], %[h], $12   \n\t"            \
4360
        "addu %[l], %[l], $10   \n\t"            \
4361
        "sltu $12, %[l], $10    \n\t"            \
4362
        "addu %[h], %[h], $12   \n\t"            \
4363
        "sltu $12, %[h], $12    \n\t"            \
4364
        "addu %[o], %[o], $12   \n\t"            \
4365
        "addu %[h], %[h], $11   \n\t"            \
4366
        "sltu $12, %[h], $11    \n\t"            \
4367
        "addu %[o], %[o], $12   \n\t"            \
4368
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4369
        : [a] "r" (va), [b] "r" (vb)                     \
4370
        : "$10", "$11", "$12", "%lo", "%hi"              \
4371
    )
4372
/* Square va and store double size result in: vh | vl */
4373
#define SP_ASM_SQR(vl, vh, va)                           \
4374
    __asm__ __volatile__ (                               \
4375
        "multu  %[a], %[a]    \n\t"            \
4376
        "mflo %[l]      \n\t"            \
4377
        "mfhi %[h]      \n\t"            \
4378
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4379
        : [a] "r" (va)                                   \
4380
        : "%lo", "%hi"                                   \
4381
    )
4382
/* Square va and add double size result into: vo | vh | vl */
4383
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4384
    __asm__ __volatile__ (                               \
4385
        "multu  %[a], %[a]    \n\t"            \
4386
        "mflo $10     \n\t"            \
4387
        "mfhi $11     \n\t"            \
4388
        "addu %[l], %[l], $10   \n\t"            \
4389
        "sltu $12, %[l], $10    \n\t"            \
4390
        "addu %[h], %[h], $12   \n\t"            \
4391
        "sltu $12, %[h], $12    \n\t"            \
4392
        "addu %[o], %[o], $12   \n\t"            \
4393
        "addu %[h], %[h], $11   \n\t"            \
4394
        "sltu $12, %[h], $11    \n\t"            \
4395
        "addu %[o], %[o], $12   \n\t"            \
4396
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4397
        : [a] "r" (va)                                   \
4398
        : "$10", "$11", "$12", "%lo", "%hi"              \
4399
    )
4400
/* Square va and add double size result into: vh | vl */
4401
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4402
    __asm__ __volatile__ (                               \
4403
        "multu  %[a], %[a]    \n\t"            \
4404
        "mflo $10     \n\t"            \
4405
        "mfhi $11     \n\t"            \
4406
        "addu %[l], %[l], $10   \n\t"            \
4407
        "sltu $12, %[l], $10    \n\t"            \
4408
        "addu %[h], %[h], $11   \n\t"            \
4409
        "addu %[h], %[h], $12   \n\t"            \
4410
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4411
        : [a] "r" (va)                                   \
4412
        : "$10", "$11", "$12", "%lo", "%hi"              \
4413
    )
4414
/* Add va into: vh | vl */
4415
#define SP_ASM_ADDC(vl, vh, va)                          \
4416
    __asm__ __volatile__ (                               \
4417
        "addu %[l], %[l], %[a]  \n\t"            \
4418
        "sltu $12, %[l], %[a]   \n\t"            \
4419
        "addu %[h], %[h], $12   \n\t"            \
4420
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4421
        : [a] "r" (va)                                   \
4422
        : "$12"                                          \
4423
    )
4424
/* Sub va from: vh | vl */
4425
#define SP_ASM_SUBB(vl, vh, va)                          \
4426
    __asm__ __volatile__ (                               \
4427
        "move $12, %[l]   \n\t"            \
4428
        "subu %[l], $12, %[a]   \n\t"            \
4429
        "sltu $12, $12, %[l]    \n\t"            \
4430
        "subu %[h], %[h], $12   \n\t"            \
4431
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4432
        : [a] "r" (va)                                   \
4433
        : "$12"                                          \
4434
    )
4435
/* Add two times vc | vb | va into vo | vh | vl */
4436
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4437
    __asm__ __volatile__ (                               \
4438
        "addu %[l], %[l], %[a]  \n\t"            \
4439
        "sltu $12, %[l], %[a]   \n\t"            \
4440
        "addu %[h], %[h], $12   \n\t"            \
4441
        "sltu $12, %[h], $12    \n\t"            \
4442
        "addu %[o], %[o], $12   \n\t"            \
4443
        "addu %[h], %[h], %[b]  \n\t"            \
4444
        "sltu $12, %[h], %[b]   \n\t"            \
4445
        "addu %[o], %[o], %[c]  \n\t"            \
4446
        "addu %[o], %[o], $12   \n\t"            \
4447
        "addu %[l], %[l], %[a]  \n\t"            \
4448
        "sltu $12, %[l], %[a]   \n\t"            \
4449
        "addu %[h], %[h], $12   \n\t"            \
4450
        "sltu $12, %[h], $12    \n\t"            \
4451
        "addu %[o], %[o], $12   \n\t"            \
4452
        "addu %[h], %[h], %[b]  \n\t"            \
4453
        "sltu $12, %[h], %[b]   \n\t"            \
4454
        "addu %[o], %[o], %[c]  \n\t"            \
4455
        "addu %[o], %[o], $12   \n\t"            \
4456
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4457
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4458
        : "$12"                                          \
4459
    )
4460
4461
#define SP_INT_ASM_AVAILABLE
4462
4463
    #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
4464
4465
    #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
4466
/*
4467
 * CPU: RISCV 64-bit
4468
 */
4469
4470
/* Multiply va by vb and store double size result in: vh | vl */
4471
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4472
    __asm__ __volatile__ (                               \
4473
        "mul  %[l], %[a], %[b]  \n\t"            \
4474
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4475
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4476
        : [a] "r" (va), [b] "r" (vb)                     \
4477
        :                                                \
4478
    )
4479
/* Multiply va by vb and store double size result in: vo | vh | vl */
4480
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4481
    __asm__ __volatile__ (                               \
4482
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4483
        "mul  %[l], %[a], %[b]  \n\t"            \
4484
        "add  %[o], zero, zero  \n\t"            \
4485
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4486
        : [a] "r" (va), [b] "r" (vb)                     \
4487
        :                                                \
4488
    )
4489
/* Multiply va by vb and add double size result into: vo | vh | vl */
4490
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4491
    __asm__ __volatile__ (                               \
4492
        "mul  a5, %[a], %[b]    \n\t"            \
4493
        "mulhu  a6, %[a], %[b]    \n\t"            \
4494
        "add  %[l], %[l], a5    \n\t"            \
4495
        "sltu a7, %[l], a5    \n\t"            \
4496
        "add  %[h], %[h], a7    \n\t"            \
4497
        "sltu a7, %[h], a7    \n\t"            \
4498
        "add  %[o], %[o], a7    \n\t"            \
4499
        "add  %[h], %[h], a6    \n\t"            \
4500
        "sltu a7, %[h], a6    \n\t"            \
4501
        "add  %[o], %[o], a7    \n\t"            \
4502
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4503
        : [a] "r" (va), [b] "r" (vb)                     \
4504
        : "a5", "a6", "a7"                               \
4505
    )
4506
/* Multiply va by vb and add double size result into: vh | vl */
4507
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4508
    __asm__ __volatile__ (                               \
4509
        "mul  a5, %[a], %[b]    \n\t"            \
4510
        "mulhu  a6, %[a], %[b]    \n\t"            \
4511
        "add  %[l], %[l], a5    \n\t"            \
4512
        "sltu a7, %[l], a5    \n\t"            \
4513
        "add  %[h], %[h], a6    \n\t"            \
4514
        "add  %[h], %[h], a7    \n\t"            \
4515
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4516
        : [a] "r" (va), [b] "r" (vb)                     \
4517
        : "a5", "a6", "a7"                               \
4518
    )
4519
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4520
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4521
    __asm__ __volatile__ (                               \
4522
        "mul  a5, %[a], %[b]    \n\t"            \
4523
        "mulhu  a6, %[a], %[b]    \n\t"            \
4524
        "add  %[l], %[l], a5    \n\t"            \
4525
        "sltu a7, %[l], a5    \n\t"            \
4526
        "add  %[h], %[h], a7    \n\t"            \
4527
        "sltu a7, %[h], a7    \n\t"            \
4528
        "add  %[o], %[o], a7    \n\t"            \
4529
        "add  %[h], %[h], a6    \n\t"            \
4530
        "sltu a7, %[h], a6    \n\t"            \
4531
        "add  %[o], %[o], a7    \n\t"            \
4532
        "add  %[l], %[l], a5    \n\t"            \
4533
        "sltu a7, %[l], a5    \n\t"            \
4534
        "add  %[h], %[h], a7    \n\t"            \
4535
        "sltu a7, %[h], a7    \n\t"            \
4536
        "add  %[o], %[o], a7    \n\t"            \
4537
        "add  %[h], %[h], a6    \n\t"            \
4538
        "sltu a7, %[h], a6    \n\t"            \
4539
        "add  %[o], %[o], a7    \n\t"            \
4540
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4541
        : [a] "r" (va), [b] "r" (vb)                     \
4542
        : "a5", "a6", "a7"                               \
4543
    )
4544
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4545
 * Assumes first add will not overflow vh | vl
4546
 */
4547
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4548
    __asm__ __volatile__ (                               \
4549
        "mul  a5, %[a], %[b]    \n\t"            \
4550
        "mulhu  a6, %[a], %[b]    \n\t"            \
4551
        "add  %[l], %[l], a5    \n\t"            \
4552
        "sltu a7, %[l], a5    \n\t"            \
4553
        "add  %[h], %[h], a6    \n\t"            \
4554
        "add  %[h], %[h], a7    \n\t"            \
4555
        "add  %[l], %[l], a5    \n\t"            \
4556
        "sltu a7, %[l], a5    \n\t"            \
4557
        "add  %[h], %[h], a7    \n\t"            \
4558
        "sltu a7, %[h], a7    \n\t"            \
4559
        "add  %[o], %[o], a7    \n\t"            \
4560
        "add  %[h], %[h], a6    \n\t"            \
4561
        "sltu a7, %[h], a6    \n\t"            \
4562
        "add  %[o], %[o], a7    \n\t"            \
4563
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4564
        : [a] "r" (va), [b] "r" (vb)                     \
4565
        : "a5", "a6", "a7"                               \
4566
    )
4567
/* Square va and store double size result in: vh | vl */
4568
#define SP_ASM_SQR(vl, vh, va)                           \
4569
    __asm__ __volatile__ (                               \
4570
        "mul  %[l], %[a], %[a]  \n\t"            \
4571
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4572
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4573
        : [a] "r" (va)                                   \
4574
        :                                                \
4575
    )
4576
/* Square va and add double size result into: vo | vh | vl */
4577
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4578
    __asm__ __volatile__ (                               \
4579
        "mul  a5, %[a], %[a]    \n\t"            \
4580
        "mulhu  a6, %[a], %[a]    \n\t"            \
4581
        "add  %[l], %[l], a5    \n\t"            \
4582
        "sltu a7, %[l], a5    \n\t"            \
4583
        "add  %[h], %[h], a7    \n\t"            \
4584
        "sltu a7, %[h], a7    \n\t"            \
4585
        "add  %[o], %[o], a7    \n\t"            \
4586
        "add  %[h], %[h], a6    \n\t"            \
4587
        "sltu a7, %[h], a6    \n\t"            \
4588
        "add  %[o], %[o], a7    \n\t"            \
4589
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4590
        : [a] "r" (va)                                   \
4591
        : "a5", "a6", "a7"                               \
4592
    )
4593
/* Square va and add double size result into: vh | vl */
4594
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4595
    __asm__ __volatile__ (                               \
4596
        "mul  a5, %[a], %[a]    \n\t"            \
4597
        "mulhu  a6, %[a], %[a]    \n\t"            \
4598
        "add  %[l], %[l], a5    \n\t"            \
4599
        "sltu a7, %[l], a5    \n\t"            \
4600
        "add  %[h], %[h], a6    \n\t"            \
4601
        "add  %[h], %[h], a7    \n\t"            \
4602
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4603
        : [a] "r" (va)                                   \
4604
        : "a5", "a6", "a7"                               \
4605
    )
4606
/* Add va into: vh | vl */
4607
#define SP_ASM_ADDC(vl, vh, va)                          \
4608
    __asm__ __volatile__ (                               \
4609
        "add  %[l], %[l], %[a]  \n\t"            \
4610
        "sltu a7, %[l], %[a]    \n\t"            \
4611
        "add  %[h], %[h], a7    \n\t"            \
4612
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4613
        : [a] "r" (va)                                   \
4614
        : "a7"                                           \
4615
    )
4616
/* Sub va from: vh | vl */
4617
#define SP_ASM_SUBB(vl, vh, va)                          \
4618
    __asm__ __volatile__ (                               \
4619
        "add  a7, %[l], zero    \n\t"            \
4620
        "sub  %[l], a7, %[a]    \n\t"            \
4621
        "sltu a7, a7, %[l]    \n\t"            \
4622
        "sub  %[h], %[h], a7    \n\t"            \
4623
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4624
        : [a] "r" (va)                                   \
4625
        : "a7"                                           \
4626
    )
4627
/* Add two times vc | vb | va into vo | vh | vl */
4628
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4629
    __asm__ __volatile__ (                               \
4630
        "add  %[l], %[l], %[a]  \n\t"            \
4631
        "sltu a7, %[l], %[a]    \n\t"            \
4632
        "add  %[h], %[h], a7    \n\t"            \
4633
        "sltu a7, %[h], a7    \n\t"            \
4634
        "add  %[o], %[o], a7    \n\t"            \
4635
        "add  %[h], %[h], %[b]  \n\t"            \
4636
        "sltu a7, %[h], %[b]    \n\t"            \
4637
        "add  %[o], %[o], %[c]  \n\t"            \
4638
        "add  %[o], %[o], a7    \n\t"            \
4639
        "add  %[l], %[l], %[a]  \n\t"            \
4640
        "sltu a7, %[l], %[a]    \n\t"            \
4641
        "add  %[h], %[h], a7    \n\t"            \
4642
        "sltu a7, %[h], a7    \n\t"            \
4643
        "add  %[o], %[o], a7    \n\t"            \
4644
        "add  %[h], %[h], %[b]  \n\t"            \
4645
        "sltu a7, %[h], %[b]    \n\t"            \
4646
        "add  %[o], %[o], %[c]  \n\t"            \
4647
        "add  %[o], %[o], a7    \n\t"            \
4648
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4649
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4650
        : "a7"                                           \
4651
    )
4652
4653
#define SP_INT_ASM_AVAILABLE
4654
4655
    #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
4656
4657
    #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
4658
/*
4659
 * CPU: RISCV 32-bit
4660
 */
4661
4662
/* Multiply va by vb and store double size result in: vh | vl */
4663
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4664
    __asm__ __volatile__ (                               \
4665
        "mul  %[l], %[a], %[b]  \n\t"            \
4666
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4667
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4668
        : [a] "r" (va), [b] "r" (vb)                     \
4669
        :                                                \
4670
    )
4671
/* Multiply va by vb and store double size result in: vo | vh | vl */
4672
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4673
    __asm__ __volatile__ (                               \
4674
        "mulhu  %[h], %[a], %[b]  \n\t"            \
4675
        "mul  %[l], %[a], %[b]  \n\t"            \
4676
        "add  %[o], zero, zero  \n\t"            \
4677
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4678
        : [a] "r" (va), [b] "r" (vb)                     \
4679
        :                                                \
4680
    )
4681
/* Multiply va by vb and add double size result into: vo | vh | vl */
4682
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4683
    __asm__ __volatile__ (                               \
4684
        "mul  a5, %[a], %[b]    \n\t"            \
4685
        "mulhu  a6, %[a], %[b]    \n\t"            \
4686
        "add  %[l], %[l], a5    \n\t"            \
4687
        "sltu a7, %[l], a5    \n\t"            \
4688
        "add  %[h], %[h], a7    \n\t"            \
4689
        "sltu a7, %[h], a7    \n\t"            \
4690
        "add  %[o], %[o], a7    \n\t"            \
4691
        "add  %[h], %[h], a6    \n\t"            \
4692
        "sltu a7, %[h], a6    \n\t"            \
4693
        "add  %[o], %[o], a7    \n\t"            \
4694
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4695
        : [a] "r" (va), [b] "r" (vb)                     \
4696
        : "a5", "a6", "a7"                               \
4697
    )
4698
/* Multiply va by vb and add double size result into: vh | vl */
4699
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4700
    __asm__ __volatile__ (                               \
4701
        "mul  a5, %[a], %[b]    \n\t"            \
4702
        "mulhu  a6, %[a], %[b]    \n\t"            \
4703
        "add  %[l], %[l], a5    \n\t"            \
4704
        "sltu a7, %[l], a5    \n\t"            \
4705
        "add  %[h], %[h], a6    \n\t"            \
4706
        "add  %[h], %[h], a7    \n\t"            \
4707
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4708
        : [a] "r" (va), [b] "r" (vb)                     \
4709
        : "a5", "a6", "a7"                               \
4710
    )
4711
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4712
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4713
    __asm__ __volatile__ (                               \
4714
        "mul  a5, %[a], %[b]    \n\t"            \
4715
        "mulhu  a6, %[a], %[b]    \n\t"            \
4716
        "add  %[l], %[l], a5    \n\t"            \
4717
        "sltu a7, %[l], a5    \n\t"            \
4718
        "add  %[h], %[h], a7    \n\t"            \
4719
        "sltu a7, %[h], a7    \n\t"            \
4720
        "add  %[o], %[o], a7    \n\t"            \
4721
        "add  %[h], %[h], a6    \n\t"            \
4722
        "sltu a7, %[h], a6    \n\t"            \
4723
        "add  %[o], %[o], a7    \n\t"            \
4724
        "add  %[l], %[l], a5    \n\t"            \
4725
        "sltu a7, %[l], a5    \n\t"            \
4726
        "add  %[h], %[h], a7    \n\t"            \
4727
        "sltu a7, %[h], a7    \n\t"            \
4728
        "add  %[o], %[o], a7    \n\t"            \
4729
        "add  %[h], %[h], a6    \n\t"            \
4730
        "sltu a7, %[h], a6    \n\t"            \
4731
        "add  %[o], %[o], a7    \n\t"            \
4732
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4733
        : [a] "r" (va), [b] "r" (vb)                     \
4734
        : "a5", "a6", "a7"                               \
4735
    )
4736
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4737
 * Assumes first add will not overflow vh | vl
4738
 */
4739
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4740
    __asm__ __volatile__ (                               \
4741
        "mul  a5, %[a], %[b]    \n\t"            \
4742
        "mulhu  a6, %[a], %[b]    \n\t"            \
4743
        "add  %[l], %[l], a5    \n\t"            \
4744
        "sltu a7, %[l], a5    \n\t"            \
4745
        "add  %[h], %[h], a6    \n\t"            \
4746
        "add  %[h], %[h], a7    \n\t"            \
4747
        "add  %[l], %[l], a5    \n\t"            \
4748
        "sltu a7, %[l], a5    \n\t"            \
4749
        "add  %[h], %[h], a7    \n\t"            \
4750
        "sltu a7, %[h], a7    \n\t"            \
4751
        "add  %[o], %[o], a7    \n\t"            \
4752
        "add  %[h], %[h], a6    \n\t"            \
4753
        "sltu a7, %[h], a6    \n\t"            \
4754
        "add  %[o], %[o], a7    \n\t"            \
4755
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4756
        : [a] "r" (va), [b] "r" (vb)                     \
4757
        : "a5", "a6", "a7"                               \
4758
    )
4759
/* Square va and store double size result in: vh | vl */
4760
#define SP_ASM_SQR(vl, vh, va)                           \
4761
    __asm__ __volatile__ (                               \
4762
        "mul  %[l], %[a], %[a]  \n\t"            \
4763
        "mulhu  %[h], %[a], %[a]  \n\t"            \
4764
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4765
        : [a] "r" (va)                                   \
4766
        :                                                \
4767
    )
4768
/* Square va and add double size result into: vo | vh | vl */
4769
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4770
    __asm__ __volatile__ (                               \
4771
        "mul  a5, %[a], %[a]    \n\t"            \
4772
        "mulhu  a6, %[a], %[a]    \n\t"            \
4773
        "add  %[l], %[l], a5    \n\t"            \
4774
        "sltu a7, %[l], a5    \n\t"            \
4775
        "add  %[h], %[h], a7    \n\t"            \
4776
        "sltu a7, %[h], a7    \n\t"            \
4777
        "add  %[o], %[o], a7    \n\t"            \
4778
        "add  %[h], %[h], a6    \n\t"            \
4779
        "sltu a7, %[h], a6    \n\t"            \
4780
        "add  %[o], %[o], a7    \n\t"            \
4781
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4782
        : [a] "r" (va)                                   \
4783
        : "a5", "a6", "a7"                               \
4784
    )
4785
/* Square va and add double size result into: vh | vl */
4786
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4787
    __asm__ __volatile__ (                               \
4788
        "mul  a5, %[a], %[a]    \n\t"            \
4789
        "mulhu  a6, %[a], %[a]    \n\t"            \
4790
        "add  %[l], %[l], a5    \n\t"            \
4791
        "sltu a7, %[l], a5    \n\t"            \
4792
        "add  %[h], %[h], a6    \n\t"            \
4793
        "add  %[h], %[h], a7    \n\t"            \
4794
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4795
        : [a] "r" (va)                                   \
4796
        : "a5", "a6", "a7"                               \
4797
    )
4798
/* Add va into: vh | vl */
4799
#define SP_ASM_ADDC(vl, vh, va)                          \
4800
    __asm__ __volatile__ (                               \
4801
        "add  %[l], %[l], %[a]  \n\t"            \
4802
        "sltu a7, %[l], %[a]    \n\t"            \
4803
        "add  %[h], %[h], a7    \n\t"            \
4804
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4805
        : [a] "r" (va)                                   \
4806
        : "a7"                                           \
4807
    )
4808
/* Sub va from: vh | vl */
4809
#define SP_ASM_SUBB(vl, vh, va)                          \
4810
    __asm__ __volatile__ (                               \
4811
        "add  a7, %[l], zero    \n\t"            \
4812
        "sub  %[l], a7, %[a]    \n\t"            \
4813
        "sltu a7, a7, %[l]    \n\t"            \
4814
        "sub  %[h], %[h], a7    \n\t"            \
4815
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4816
        : [a] "r" (va)                                   \
4817
        : "a7"                                           \
4818
    )
4819
/* Add two times vc | vb | va into vo | vh | vl */
4820
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4821
    __asm__ __volatile__ (                               \
4822
        "add  %[l], %[l], %[a]  \n\t"            \
4823
        "sltu a7, %[l], %[a]    \n\t"            \
4824
        "add  %[h], %[h], a7    \n\t"            \
4825
        "sltu a7, %[h], a7    \n\t"            \
4826
        "add  %[o], %[o], a7    \n\t"            \
4827
        "add  %[h], %[h], %[b]  \n\t"            \
4828
        "sltu a7, %[h], %[b]    \n\t"            \
4829
        "add  %[o], %[o], %[c]  \n\t"            \
4830
        "add  %[o], %[o], a7    \n\t"            \
4831
        "add  %[l], %[l], %[a]  \n\t"            \
4832
        "sltu a7, %[l], %[a]    \n\t"            \
4833
        "add  %[h], %[h], a7    \n\t"            \
4834
        "sltu a7, %[h], a7    \n\t"            \
4835
        "add  %[o], %[o], a7    \n\t"            \
4836
        "add  %[h], %[h], %[b]  \n\t"            \
4837
        "sltu a7, %[h], %[b]    \n\t"            \
4838
        "add  %[o], %[o], %[c]  \n\t"            \
4839
        "add  %[o], %[o], a7    \n\t"            \
4840
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4841
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
4842
        : "a7"                                           \
4843
    )
4844
4845
#define SP_INT_ASM_AVAILABLE
4846
4847
    #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
4848
4849
    #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
4850
/*
4851
 * CPU: Intel s390x
4852
 */
4853
4854
/* Multiply va by vb and store double size result in: vh | vl */
4855
#define SP_ASM_MUL(vl, vh, va, vb)                       \
4856
    __asm__ __volatile__ (                               \
4857
        "lgr  %%r1, %[a]    \n\t"            \
4858
        "mlgr %%r0, %[b]    \n\t"            \
4859
        "lgr  %[l], %%r1    \n\t"            \
4860
        "lgr  %[h], %%r0    \n\t"            \
4861
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4862
        : [a] "r" (va), [b] "r" (vb)                     \
4863
        : "r0", "r1"                                     \
4864
    )
4865
/* Multiply va by vb and store double size result in: vo | vh | vl */
4866
#define SP_ASM_MUL_SET(vl, vh, vo, va, vb)               \
4867
    __asm__ __volatile__ (                               \
4868
        "lgr  %%r1, %[a]    \n\t"            \
4869
        "mlgr %%r0, %[b]    \n\t"            \
4870
        "lghi %[o], 0     \n\t"            \
4871
        "lgr  %[l], %%r1    \n\t"            \
4872
        "lgr  %[h], %%r0    \n\t"            \
4873
        : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo)    \
4874
        : [a] "r" (va), [b] "r" (vb)                     \
4875
        : "r0", "r1"                                     \
4876
    )
4877
/* Multiply va by vb and add double size result into: vo | vh | vl */
4878
#define SP_ASM_MUL_ADD(vl, vh, vo, va, vb)               \
4879
    __asm__ __volatile__ (                               \
4880
        "lghi %%r10, 0  \n\t"                    \
4881
        "lgr  %%r1, %[a]    \n\t"            \
4882
        "mlgr %%r0, %[b]    \n\t"            \
4883
        "algr %[l], %%r1  \n\t"                    \
4884
        "alcgr  %[h], %%r0  \n\t"                    \
4885
        "alcgr  %[o], %%r10 \n\t"                    \
4886
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4887
        : [a] "r" (va), [b] "r" (vb)                     \
4888
        : "r0", "r1", "r10", "cc"                        \
4889
    )
4890
/* Multiply va by vb and add double size result into: vh | vl */
4891
#define SP_ASM_MUL_ADD_NO(vl, vh, va, vb)                \
4892
    __asm__ __volatile__ (                               \
4893
        "lgr  %%r1, %[a]    \n\t"            \
4894
        "mlgr %%r0, %[b]    \n\t"            \
4895
        "algr %[l], %%r1  \n\t"                    \
4896
        "alcgr  %[h], %%r0  \n\t"                    \
4897
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4898
        : [a] "r" (va), [b] "r" (vb)                     \
4899
        : "r0", "r1", "cc"                               \
4900
    )
4901
/* Multiply va by vb and add double size result twice into: vo | vh | vl */
4902
#define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb)              \
4903
    __asm__ __volatile__ (                               \
4904
        "lghi %%r10, 0  \n\t"                    \
4905
        "lgr  %%r1, %[a]    \n\t"            \
4906
        "mlgr %%r0, %[b]    \n\t"            \
4907
        "algr %[l], %%r1  \n\t"                    \
4908
        "alcgr  %[h], %%r0  \n\t"                    \
4909
        "alcgr  %[o], %%r10 \n\t"                    \
4910
        "algr %[l], %%r1  \n\t"                    \
4911
        "alcgr  %[h], %%r0  \n\t"                    \
4912
        "alcgr  %[o], %%r10 \n\t"                    \
4913
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4914
        : [a] "r" (va), [b] "r" (vb)                     \
4915
        : "r0", "r1", "r10", "cc"                        \
4916
    )
4917
/* Multiply va by vb and add double size result twice into: vo | vh | vl
4918
 * Assumes first add will not overflow vh | vl
4919
 */
4920
#define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb)           \
4921
    __asm__ __volatile__ (                               \
4922
        "lghi %%r10, 0  \n\t"                    \
4923
        "lgr  %%r1, %[a]    \n\t"            \
4924
        "mlgr %%r0, %[b]    \n\t"            \
4925
        "algr %[l], %%r1  \n\t"                    \
4926
        "alcgr  %[h], %%r0  \n\t"                    \
4927
        "algr %[l], %%r1  \n\t"                    \
4928
        "alcgr  %[h], %%r0  \n\t"                    \
4929
        "alcgr  %[o], %%r10 \n\t"                    \
4930
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4931
        : [a] "r" (va), [b] "r" (vb)                     \
4932
        : "r0", "r1", "r10", "cc"                        \
4933
    )
4934
/* Square va and store double size result in: vh | vl */
4935
#define SP_ASM_SQR(vl, vh, va)                           \
4936
    __asm__ __volatile__ (                               \
4937
        "lgr  %%r1, %[a]    \n\t"            \
4938
        "mlgr %%r0, %%r1    \n\t"            \
4939
        "lgr  %[l], %%r1    \n\t"            \
4940
        "lgr  %[h], %%r0    \n\t"            \
4941
        : [h] "+r" (vh), [l] "+r" (vl)                   \
4942
        : [a] "r" (va)                                   \
4943
        : "r0", "r1"                                     \
4944
    )
4945
/* Square va and add double size result into: vo | vh | vl */
4946
#define SP_ASM_SQR_ADD(vl, vh, vo, va)                   \
4947
    __asm__ __volatile__ (                               \
4948
        "lghi %%r10, 0  \n\t"                    \
4949
        "lgr  %%r1, %[a]    \n\t"            \
4950
        "mlgr %%r0, %%r1    \n\t"            \
4951
        "algr %[l], %%r1  \n\t"                    \
4952
        "alcgr  %[h], %%r0  \n\t"                    \
4953
        "alcgr  %[o], %%r10 \n\t"                    \
4954
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4955
        : [a] "r" (va)                                   \
4956
        : "r0", "r1", "r10", "cc"                        \
4957
    )
4958
/* Square va and add double size result into: vh | vl */
4959
#define SP_ASM_SQR_ADD_NO(vl, vh, va)                    \
4960
    __asm__ __volatile__ (                               \
4961
        "lgr  %%r1, %[a]    \n\t"            \
4962
        "mlgr %%r0, %%r1    \n\t"            \
4963
        "algr %[l], %%r1  \n\t"                    \
4964
        "alcgr  %[h], %%r0  \n\t"                    \
4965
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4966
        : [a] "r" (va)                                   \
4967
        : "r0", "r1", "cc"                               \
4968
    )
4969
/* Add va into: vh | vl */
4970
#define SP_ASM_ADDC(vl, vh, va)                          \
4971
    __asm__ __volatile__ (                               \
4972
        "lghi %%r10, 0  \n\t"                    \
4973
        "algr %[l], %[a]  \n\t"                    \
4974
        "alcgr  %[h], %%r10 \n\t"                    \
4975
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4976
        : [a] "r" (va)                                   \
4977
        : "r10", "cc"                                    \
4978
    )
4979
/* Sub va from: vh | vl */
4980
#define SP_ASM_SUBB(vl, vh, va)                          \
4981
    __asm__ __volatile__ (                               \
4982
        "lghi %%r10, 0  \n\t"                    \
4983
        "slgr %[l], %[a]  \n\t"                    \
4984
        "slbgr  %[h], %%r10 \n\t"                    \
4985
        : [l] "+r" (vl), [h] "+r" (vh)                   \
4986
        : [a] "r" (va)                                   \
4987
        : "r10", "cc"                                    \
4988
    )
4989
/* Add two times vc | vb | va into vo | vh | vl */
4990
#define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc)         \
4991
    __asm__ __volatile__ (                               \
4992
        "algr %[l], %[a]  \n\t"                    \
4993
        "alcgr  %[h], %[b]  \n\t"                    \
4994
        "alcgr  %[o], %[c]  \n\t"                    \
4995
        "algr %[l], %[a]  \n\t"                    \
4996
        "alcgr  %[h], %[b]  \n\t"                    \
4997
        "alcgr  %[o], %[c]  \n\t"                    \
4998
        : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo)    \
4999
        : [a] "r" (va), [b] "r" (vb), [c] "r" (vc)       \
5000
        : "cc"                                           \
5001
    )
5002
5003
#define SP_INT_ASM_AVAILABLE
5004
5005
    #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
5006
5007
#ifdef SP_INT_ASM_AVAILABLE
5008
    #ifndef SP_INT_NO_ASM
5009
        #define SQR_MUL_ASM
5010
    #endif
5011
    #ifndef SP_ASM_ADDC_REG
5012
        #define SP_ASM_ADDC_REG  SP_ASM_ADDC
5013
    #endif /* SP_ASM_ADDC_REG */
5014
    #ifndef SP_ASM_SUBB_REG
5015
        #define SP_ASM_SUBB_REG  SP_ASM_SUBB
5016
    #endif /* SP_ASM_ADDC_REG */
5017
#endif /* SQR_MUL_ASM */
5018
5019
#endif /* !WOLFSSL_NO_ASM */
5020
5021
5022
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
5023
    !defined(NO_DSA) || !defined(NO_DH) || \
5024
    (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
5025
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
5026
#ifndef WC_NO_CACHE_RESISTANT
5027
    /* Mask of address for constant time operations. */
5028
    const size_t sp_off_on_addr[2] =
5029
    {
5030
        (size_t) 0,
5031
        (size_t)-1
5032
    };
5033
#endif
5034
#endif
5035
5036
5037
#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
5038
5039
#ifdef __cplusplus
5040
extern "C" {
5041
#endif
5042
5043
/* Modular exponentiation implementations using Single Precision. */
5044
WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
5045
    sp_int* res);
5046
WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
5047
    sp_int* res);
5048
WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
5049
    sp_int* res);
5050
WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
5051
    sp_int* res);
5052
WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
5053
    sp_int* res);
5054
5055
#ifdef __cplusplus
5056
} /* extern "C" */
5057
#endif
5058
5059
#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
5060
5061
5062
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5063
    defined(OPENSSL_ALL)
5064
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct);
5065
#endif
5066
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
5067
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
5068
    defined(OPENSSL_ALL)
5069
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
5070
#endif
5071
5072
5073
/* Set the multi-precision number to zero.
5074
 *
5075
 * Assumes a is not NULL.
5076
 *
5077
 * @param  [out]  a  SP integer to set to zero.
5078
 */
5079
static void _sp_zero(sp_int* a)
5080
0
{
5081
0
    sp_int_minimal* am = (sp_int_minimal *)a;
5082
5083
0
    am->used = 0;
5084
0
    am->dp[0] = 0;
5085
#ifdef WOLFSSL_SP_INT_NEGATIVE
5086
    am->sign = MP_ZPOS;
5087
#endif
5088
0
}
5089
5090
5091
/* Initialize the multi-precision number to be zero with a given max size.
5092
 *
5093
 * @param  [out]  a     SP integer.
5094
 * @param  [in]   size  Number of words to say are available.
5095
 */
5096
static void _sp_init_size(sp_int* a, unsigned int size)
5097
0
{
5098
0
    volatile sp_int_minimal* am = (sp_int_minimal *)a;
5099
5100
#ifdef HAVE_WOLF_BIGINT
5101
    wc_bigint_init((struct WC_BIGINT*)&am->raw);
5102
#endif
5103
0
    _sp_zero((sp_int*)am);
5104
5105
0
    am->size = (sp_size_t)size;
5106
0
}
5107
5108
/* Initialize the multi-precision number to be zero with a given max size.
5109
 *
5110
 * @param  [out]  a     SP integer.
5111
 * @param  [in]   size  Number of words to say are available.
5112
 *
5113
 * @return  MP_OKAY on success.
5114
 * @return  MP_VAL when a is NULL.
5115
 */
5116
int sp_init_size(sp_int* a, unsigned int size)
5117
0
{
5118
0
    int err = MP_OKAY;
5119
5120
    /* Validate parameters. Don't use size more than max compiled. */
5121
0
    if ((a == NULL) || ((size == 0) || (size > SP_INT_DIGITS))) {
5122
0
        err = MP_VAL;
5123
0
    }
5124
5125
0
    if (err == MP_OKAY) {
5126
0
        _sp_init_size(a, size);
5127
0
    }
5128
5129
0
    return err;
5130
0
}
5131
5132
/* Initialize the multi-precision number to be zero.
5133
 *
5134
 * @param  [out]  a  SP integer.
5135
 *
5136
 * @return  MP_OKAY on success.
5137
 * @return  MP_VAL when a is NULL.
5138
 */
5139
int sp_init(sp_int* a)
5140
0
{
5141
0
    int err = MP_OKAY;
5142
5143
    /* Validate parameter. */
5144
0
    if (a == NULL) {
5145
0
        err = MP_VAL;
5146
0
    }
5147
0
    else {
5148
        /* Assume complete sp_int with SP_INT_DIGITS digits. */
5149
0
        _sp_init_size(a, SP_INT_DIGITS);
5150
0
    }
5151
5152
0
    return err;
5153
0
}
5154
5155
#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
5156
/* Initialize up to six multi-precision numbers to be zero.
5157
 *
5158
 * @param  [out]  n1  SP integer.
5159
 * @param  [out]  n2  SP integer.
5160
 * @param  [out]  n3  SP integer.
5161
 * @param  [out]  n4  SP integer.
5162
 * @param  [out]  n5  SP integer.
5163
 * @param  [out]  n6  SP integer.
5164
 *
5165
 * @return  MP_OKAY on success.
5166
 */
5167
int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
5168
    sp_int* n6)
5169
0
{
5170
    /* Initialize only those pointers that are valid. */
5171
0
    if (n1 != NULL) {
5172
0
        _sp_init_size(n1, SP_INT_DIGITS);
5173
0
    }
5174
0
    if (n2 != NULL) {
5175
0
        _sp_init_size(n2, SP_INT_DIGITS);
5176
0
    }
5177
0
    if (n3 != NULL) {
5178
0
        _sp_init_size(n3, SP_INT_DIGITS);
5179
0
    }
5180
0
    if (n4 != NULL) {
5181
0
        _sp_init_size(n4, SP_INT_DIGITS);
5182
0
    }
5183
0
    if (n5 != NULL) {
5184
0
        _sp_init_size(n5, SP_INT_DIGITS);
5185
0
    }
5186
0
    if (n6 != NULL) {
5187
0
        _sp_init_size(n6, SP_INT_DIGITS);
5188
0
    }
5189
5190
0
    return MP_OKAY;
5191
0
}
5192
#endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
5193
5194
/* Free the memory allocated in the multi-precision number.
5195
 *
5196
 * @param  [in]  a  SP integer.
5197
 */
5198
void sp_free(sp_int* a)
5199
0
{
5200
0
    if (a != NULL) {
5201
    #ifdef HAVE_WOLF_BIGINT
5202
        wc_bigint_free(&a->raw);
5203
    #endif
5204
0
    }
5205
0
}
5206
5207
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5208
    !defined(NO_DH) || defined(HAVE_ECC)
5209
/* Grow multi-precision number to be able to hold l digits.
5210
 * This function does nothing as the number of digits is fixed.
5211
 *
5212
 * @param  [in,out]  a  SP integer.
5213
 * @param  [in]      l  Number of digits to grow to.
5214
 *
5215
 * @return  MP_OKAY on success
5216
 * @return  MP_MEM if the number of digits requested is more than available.
5217
 */
5218
int sp_grow(sp_int* a, int l)
5219
0
{
5220
0
    int err = MP_OKAY;
5221
5222
    /* Validate parameter. */
5223
0
    if ((a == NULL) || (l < 0)) {
5224
0
        err = MP_VAL;
5225
0
    }
5226
    /* Ensure enough words allocated for grow. */
5227
0
    if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
5228
0
        err = MP_MEM;
5229
0
    }
5230
0
    if (err == MP_OKAY) {
5231
0
        unsigned int i;
5232
5233
        /* Put in zeros up to the new length. */
5234
0
        for (i = a->used; i < (unsigned int)l; i++) {
5235
0
            a->dp[i] = 0;
5236
0
        }
5237
0
    }
5238
5239
0
    return err;
5240
0
}
5241
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
5242
5243
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5244
    defined(HAVE_ECC) || defined(WOLFSSL_PUBLIC_MP)
5245
/* Set the multi-precision number to zero.
5246
 *
5247
 * @param  [out]  a  SP integer to set to zero.
5248
 */
5249
void sp_zero(sp_int* a)
5250
0
{
5251
    /* Make an sp_int with valid pointer zero. */
5252
0
    if (a != NULL) {
5253
0
        _sp_zero(a);
5254
0
    }
5255
0
}
5256
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
5257
5258
/* Clear the data from the multi-precision number, set to zero and free.
5259
 *
5260
 * @param  [out]  a  SP integer.
5261
 */
5262
void sp_clear(sp_int* a)
5263
0
{
5264
#ifdef HAVE_FIPS
5265
    sp_forcezero(a);
5266
#else
5267
    /* Clear when valid pointer passed in. */
5268
0
    if (a != NULL) {
5269
0
        unsigned int i;
5270
5271
        /* Only clear the digits being used. */
5272
0
        for (i = 0; i < a->used; i++) {
5273
0
            a->dp[i] = 0;
5274
0
        }
5275
        /* Set back to zero and free. */
5276
0
        _sp_zero(a);
5277
0
        sp_free(a);
5278
0
    }
5279
0
#endif
5280
0
}
5281
5282
#if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
5283
    !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
5284
/* Ensure the data in the multi-precision number is zeroed.
5285
 *
5286
 * Use when security sensitive data needs to be wiped.
5287
 *
5288
 * @param  [in]  a  SP integer.
5289
 */
5290
void sp_forcezero(sp_int* a)
5291
0
{
5292
    /* Zeroize when a vald pointer passed in. */
5293
0
    if (a != NULL) {
5294
        /* Ensure all data zeroized - data not zeroed when used decreases. */
5295
0
        ForceZero(a->dp, a->size * (word32)SP_WORD_SIZEOF);
5296
        /* Set back to zero. */
5297
    #ifdef HAVE_WOLF_BIGINT
5298
        /* Zeroize the raw data as well. */
5299
        wc_bigint_zero(&a->raw);
5300
    #endif
5301
        /* Make value zero and free. */
5302
0
        _sp_zero(a);
5303
0
        sp_free(a);
5304
0
    }
5305
0
}
5306
#endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
5307
5308
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5309
    !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
5310
/* Copy value of multi-precision number a into r.
5311
 *
5312
 * @param  [in]   a  SP integer - source.
5313
 * @param  [out]  r  SP integer - destination.
5314
 */
5315
static void _sp_copy(const sp_int* a, sp_int* r)
5316
0
{
5317
    /* Copy words across. */
5318
0
    if (a->used == 0) {
5319
0
        r->dp[0] = 0;
5320
0
    }
5321
0
    else {
5322
0
        XMEMCPY(r->dp, a->dp, a->used * (word32)SP_WORD_SIZEOF);
5323
0
    }
5324
    /* Set number of used words in result. */
5325
0
    r->used = a->used;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5326
#ifdef WOLFSSL_SP_INT_NEGATIVE
5327
    /* Set sign of result. */
5328
    r->sign = a->sign;/* // NOLINT(clang-analyzer-core.uninitialized.Assign) */
5329
#endif
5330
0
}
5331
5332
/* Copy value of multi-precision number a into r.
5333
 *
5334
 * @param  [in]   a  SP integer - source.
5335
 * @param  [out]  r  SP integer - destination.
5336
 *
5337
 * @return  MP_OKAY on success.
5338
 */
5339
int sp_copy(const sp_int* a, sp_int* r)
5340
0
{
5341
0
    int err = MP_OKAY;
5342
5343
    /* Validate parameters. */
5344
0
    if ((a == NULL) || (r == NULL)) {
5345
0
        err = MP_VAL;
5346
0
    }
5347
    /* Only copy if different pointers. */
5348
0
    if (a != r) {
5349
        /* Validated space in result. */
5350
0
        if ((err == MP_OKAY) && (a->used > r->size)) {
5351
0
            err = MP_VAL;
5352
0
        }
5353
0
        if (err == MP_OKAY) {
5354
0
            _sp_copy(a, r);
5355
0
        }
5356
0
    }
5357
5358
0
    return err;
5359
0
}
5360
#endif
5361
5362
#if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5363
      !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
5364
     defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
5365
5366
/* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
5367
 *
5368
 * Constant time implementation.
5369
 * When y is 0, r1 = a2 and r2 = a1.
5370
 * When y is 1, r1 = a1 and r2 = a2.
5371
 *
5372
 * @param [in]  a1    First number to copy.
5373
 * @param [in]  a2    Second number to copy.
5374
 * @param [out] r1    First result number to copy into.
5375
 * @param [out] r2    Second result number to copy into.
5376
 * @param [in]  y     Indicates which number goes into which result number.
5377
 * @param [in]  used  Number of digits to copy.
5378
 */
5379
static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
5380
    sp_int* r2, int y, unsigned int used)
5381
{
5382
    unsigned int i;
5383
5384
    /* Copy data - constant time. */
5385
    for (i = 0; i < used; i++) {
5386
        r1->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ])) +
5387
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1]));
5388
        r2->dp[i] = (a1->dp[i] & ((sp_int_digit)wc_off_on_addr[y^1])) +
5389
                    (a2->dp[i] & ((sp_int_digit)wc_off_on_addr[y  ]));
5390
    }
5391
    /* Copy used. */
5392
    r1->used = (a1->used & ((int)wc_off_on_addr[y  ])) +
5393
               (a2->used & ((int)wc_off_on_addr[y^1]));
5394
    r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
5395
               (a2->used & ((int)wc_off_on_addr[y  ]));
5396
#ifdef WOLFSSL_SP_INT_NEGATIVE
5397
    /* Copy sign. */
5398
    r1->sign = (a1->sign & ((int)wc_off_on_addr[y  ])) +
5399
               (a2->sign & ((int)wc_off_on_addr[y^1]));
5400
    r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
5401
               (a2->sign & ((int)wc_off_on_addr[y  ]));
5402
#endif
5403
}
5404
5405
#endif
5406
5407
#if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
5408
/* Initializes r and copies in value from a.
5409
 *
5410
 * @param  [out]  r  SP integer - destination.
5411
 * @param  [in]   a  SP integer - source.
5412
 *
5413
 * @return  MP_OKAY on success.
5414
 * @return  MP_VAL when a or r is NULL.
5415
 */
5416
int sp_init_copy(sp_int* r, const sp_int* a)
5417
0
{
5418
0
    int err;
5419
5420
    /* Initialize r and copy value in a into it. */
5421
0
    err = sp_init(r);
5422
0
    if (err == MP_OKAY) {
5423
0
        err = sp_copy(a, r);
5424
0
    }
5425
5426
0
    return err;
5427
0
}
5428
#endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
5429
5430
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5431
    !defined(NO_DH) || !defined(NO_DSA)
5432
/* Exchange the values in a and b.
5433
 *
5434
 * Avoid using this API as three copy operations are performed.
5435
 *
5436
 * @param  [in,out]  a  SP integer to swap.
5437
 * @param  [in,out]  b  SP integer to swap.
5438
 *
5439
 * @return  MP_OKAY on success.
5440
 * @return  MP_VAL when a or b is NULL.
5441
 * @return  MP_MEM when dynamic memory allocation fails.
5442
 */
5443
int sp_exch(sp_int* a, sp_int* b)
5444
0
{
5445
0
    int err = MP_OKAY;
5446
5447
    /* Validate parameters. */
5448
0
    if ((a == NULL) || (b == NULL)) {
5449
0
        err = MP_VAL;
5450
0
    }
5451
    /* Check space for a in b and b in a. */
5452
0
    if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
5453
0
        err = MP_VAL;
5454
0
    }
5455
5456
0
    if (err == MP_OKAY) {
5457
        /* Declare temporary for swapping. */
5458
0
        DECL_SP_INT(t, a->used);
5459
5460
        /* Create temporary for swapping. */
5461
0
        ALLOC_SP_INT(t, a->used, err, NULL);
5462
0
        if (err == MP_OKAY) {
5463
            /* Cache allocated size of a and b. */
5464
0
            sp_size_t asize = a->size;
5465
0
            sp_size_t bsize = b->size;
5466
            /* Copy all of SP int: t <- a, a <- b, b <- t. */
5467
0
            XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
5468
0
            XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
5469
0
            XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
5470
            /* Put back size of a and b. */
5471
0
            a->size = asize;
5472
0
            b->size = bsize;
5473
0
        }
5474
5475
0
        FREE_SP_INT(t, NULL);
5476
0
    }
5477
5478
0
    return err;
5479
0
}
5480
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
5481
        * !NO_DSA */
5482
5483
#if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
5484
    !defined(WC_NO_CACHE_RESISTANT)
5485
/* Conditional swap of SP int values in constant time.
5486
 *
5487
 * @param [in]  a     First SP int to conditionally swap.
5488
 * @param [in]  b     Second SP int to conditionally swap.
5489
 * @param [in]  cnt   Count of words to copy.
5490
 * @param [in]  swap  When value is 1 then swap.
5491
 * @param [in]  t     Temporary SP int to use in swap.
5492
 * @return  MP_OKAY on success.
5493
 * @return  MP_MEM when dynamic memory allocation fails.
5494
 */
5495
int sp_cond_swap_ct_ex(sp_int* a, sp_int* b, int cnt, int swap, sp_int* t)
5496
0
{
5497
0
    unsigned int i;
5498
0
    sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
5499
5500
    /* XOR other fields in sp_int into temp - mask set when swapping. */
5501
0
    t->used = (a->used ^ b->used) & (sp_size_t)mask;
5502
#ifdef WOLFSSL_SP_INT_NEGATIVE
5503
    t->sign = (a->sign ^ b->sign) & (sp_uint8)mask;
5504
#endif
5505
5506
    /* XOR requested words into temp - mask set when swapping. */
5507
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5508
0
        t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
5509
0
    }
5510
5511
    /* XOR temporary - when mask set then result will be b. */
5512
0
    a->used ^= t->used;
5513
#ifdef WOLFSSL_SP_INT_NEGATIVE
5514
    a->sign ^= t->sign;
5515
#endif
5516
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5517
0
        a->dp[i] ^= t->dp[i];
5518
0
    }
5519
5520
    /* XOR temporary - when mask set then result will be a. */
5521
0
    b->used ^= t->used;
5522
#ifdef WOLFSSL_SP_INT_NEGATIVE
5523
    b->sign ^= b->sign;
5524
#endif
5525
0
    for (i = 0; i < (unsigned int)cnt; i++) {
5526
0
        b->dp[i] ^= t->dp[i];
5527
0
    }
5528
5529
0
    return MP_OKAY;
5530
0
}
5531
5532
/* Conditional swap of SP int values in constant time.
5533
 *
5534
 * @param [in]  a     First SP int to conditionally swap.
5535
 * @param [in]  b     Second SP int to conditionally swap.
5536
 * @param [in]  cnt   Count of words to copy.
5537
 * @param [in]  swap  When value is 1 then swap.
5538
 * @return  MP_OKAY on success.
5539
 * @return  MP_MEM when dynamic memory allocation fails.
5540
 */
5541
int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
5542
0
{
5543
0
    int err = MP_OKAY;
5544
0
    DECL_SP_INT(t, (size_t)cnt);
5545
5546
    /* Allocate temporary to hold masked xor of a and b. */
5547
0
    ALLOC_SP_INT(t, cnt, err, NULL);
5548
5549
0
    if (err == MP_OKAY) {
5550
0
        err = sp_cond_swap_ct_ex(a, b, cnt, swap, t);
5551
0
        FREE_SP_INT(t, NULL);
5552
0
    }
5553
5554
0
    return err;
5555
0
}
5556
#endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
5557
5558
#ifdef WOLFSSL_SP_INT_NEGATIVE
5559
/* Calculate the absolute value of the multi-precision number.
5560
 *
5561
 * @param  [in]   a  SP integer to calculate absolute value of.
5562
 * @param  [out]  r  SP integer to hold result.
5563
 *
5564
 * @return  MP_OKAY on success.
5565
 * @return  MP_VAL when a or r is NULL.
5566
 */
5567
int sp_abs(const sp_int* a, sp_int* r)
5568
{
5569
    int err;
5570
5571
    /* Copy a into r - copy fails when r is NULL. */
5572
    err = sp_copy(a, r);
5573
    if (err == MP_OKAY) {
5574
        r->sign = MP_ZPOS;
5575
    }
5576
5577
    return err;
5578
}
5579
#endif /* WOLFSSL_SP_INT_NEGATIVE */
5580
5581
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
5582
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
5583
/* Compare absolute value of two multi-precision numbers.
5584
 *
5585
 * @param [in] a  SP integer.
5586
 * @param [in] b  SP integer.
5587
 *
5588
 * @return  MP_GT when a is greater than b.
5589
 * @return  MP_LT when a is less than b.
5590
 * @return  MP_EQ when a is equals b.
5591
 */
5592
static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
5593
0
{
5594
0
    int ret = MP_EQ;
5595
5596
    /* Check number of words first. */
5597
0
    if (a->used > b->used) {
5598
0
        ret = MP_GT;
5599
0
    }
5600
0
    else if (a->used < b->used) {
5601
0
        ret = MP_LT;
5602
0
    }
5603
0
    else {
5604
0
        int i;
5605
5606
        /* Starting from most significant word, compare words.
5607
         * Stop when different and set comparison return.
5608
         */
5609
0
        for (i = (int)(a->used - 1); i >= 0; i--) {
5610
0
            if (a->dp[i] > b->dp[i]) {
5611
0
                ret = MP_GT;
5612
0
                break;
5613
0
            }
5614
0
            else if (a->dp[i] < b->dp[i]) {
5615
0
                ret = MP_LT;
5616
0
                break;
5617
0
            }
5618
0
        }
5619
        /* If we made to the end then ret is MP_EQ from initialization. */
5620
0
    }
5621
5622
0
    return ret;
5623
0
}
5624
#endif
5625
5626
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
5627
/* Compare absolute value of two multi-precision numbers.
5628
 *
5629
 * Pointers are compared such that NULL is less than not NULL.
5630
 *
5631
 * @param [in] a  SP integer.
5632
 * @param [in] b  SP integer.
5633
 *
5634
 * @return  MP_GT when a is greater than b.
5635
 * @return  MP_LT when a is less than b.
5636
 * @return  MP_EQ when a equals b.
5637
 */
5638
int sp_cmp_mag(const sp_int* a, const sp_int* b)
5639
0
{
5640
0
    int ret;
5641
5642
    /* Do pointer checks first. Both NULL returns equal. */
5643
0
    if (a == b) {
5644
0
        ret = MP_EQ;
5645
0
    }
5646
    /* Nothing is smaller than something. */
5647
0
    else if (a == NULL) {
5648
0
        ret = MP_LT;
5649
0
    }
5650
    /* Something is larger than nothing. */
5651
0
    else if (b == NULL) {
5652
0
        ret = MP_GT;
5653
0
    }
5654
0
    else
5655
0
    {
5656
        /* Compare values - a and b are not NULL. */
5657
0
        ret = _sp_cmp_abs(a, b);
5658
0
    }
5659
5660
0
    return ret;
5661
0
}
5662
#endif
5663
5664
#if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
5665
    defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
5666
    (!defined(NO_RSA) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
5667
     defined(WOLFSSL_KEY_GEN)))
5668
/* Compare two multi-precision numbers.
5669
 *
5670
 * Assumes a and b are not NULL.
5671
 *
5672
 * @param [in] a  SP integer.
5673
 * @param [in] b  SP integer.
5674
 *
5675
 * @return  MP_GT when a is greater than b.
5676
 * @return  MP_LT when a is less than b.
5677
 * @return  MP_EQ when a is equals b.
5678
 */
5679
static int _sp_cmp(const sp_int* a, const sp_int* b)
5680
0
{
5681
0
    int ret;
5682
5683
#ifdef WOLFSSL_SP_INT_NEGATIVE
5684
    /* Check sign first. */
5685
    if (a->sign > b->sign) {
5686
        ret = MP_LT;
5687
    }
5688
    else if (a->sign < b->sign) {
5689
        ret = MP_GT;
5690
    }
5691
    else /* (a->sign == b->sign) */ {
5692
#endif
5693
        /* Compare values. */
5694
0
        ret = _sp_cmp_abs(a, b);
5695
#ifdef WOLFSSL_SP_INT_NEGATIVE
5696
        if (a->sign == MP_NEG) {
5697
            /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
5698
             * Swapping MP_GT and MP_LT results.
5699
             */
5700
            ret = -ret;
5701
        }
5702
    }
5703
#endif
5704
5705
0
    return ret;
5706
0
}
5707
#endif
5708
5709
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5710
    !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
5711
    defined(WOLFSSL_SP_MATH_ALL)
5712
/* Compare two multi-precision numbers.
5713
 *
5714
 * Pointers are compared such that NULL is less than not NULL.
5715
 *
5716
 * @param [in] a  SP integer.
5717
 * @param [in] b  SP integer.
5718
 *
5719
 * @return  MP_GT when a is greater than b.
5720
 * @return  MP_LT when a is less than b.
5721
 * @return  MP_EQ when a is equals b.
5722
 */
5723
int sp_cmp(const sp_int* a, const sp_int* b)
5724
0
{
5725
0
    int ret;
5726
5727
    /* Check pointers first. Both NULL returns equal. */
5728
0
    if (a == b) {
5729
0
        ret = MP_EQ;
5730
0
    }
5731
    /* Nothing is smaller than something. */
5732
0
    else if (a == NULL) {
5733
0
        ret = MP_LT;
5734
0
    }
5735
    /* Something is larger than nothing. */
5736
0
    else if (b == NULL) {
5737
0
        ret = MP_GT;
5738
0
    }
5739
0
    else
5740
0
    {
5741
        /* Compare values - a and b are not NULL. */
5742
0
        ret = _sp_cmp(a, b);
5743
0
    }
5744
5745
0
    return ret;
5746
0
}
5747
#endif
5748
5749
#if defined(HAVE_ECC) && !defined(WC_NO_RNG) && \
5750
    defined(WOLFSSL_ECC_GEN_REJECT_SAMPLING)
5751
/* Compare two multi-precision numbers in constant time.
5752
 *
5753
 * Assumes a and b are not NULL.
5754
 * Assumes a and b are positive.
5755
 *
5756
 * @param [in] a  SP integer.
5757
 * @param [in] b  SP integer.
5758
 * @param [in] n  Number of digits to compare.
5759
 *
5760
 * @return  MP_GT when a is greater than b.
5761
 * @return  MP_LT when a is less than b.
5762
 * @return  MP_EQ when a is equals b.
5763
 */
5764
static int _sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5765
{
5766
    int ret = MP_EQ;
5767
    int i;
5768
    int mask = -1;
5769
5770
    for (i = n - 1; i >= 0; i--) {
5771
        sp_int_digit ad = a->dp[i] & ((sp_int_digit)0 - (i < (int)a->used));
5772
        sp_int_digit bd = b->dp[i] & ((sp_int_digit)0 - (i < (int)b->used));
5773
5774
        ret |= mask & ((0 - (ad < bd)) & MP_LT);
5775
        mask &= 0 - (ret == MP_EQ);
5776
        ret |= mask & ((0 - (ad > bd)) & MP_GT);
5777
        mask &= 0 - (ret == MP_EQ);
5778
    }
5779
5780
    return ret;
5781
}
5782
5783
/* Compare two multi-precision numbers in constant time.
5784
 *
5785
 * Pointers are compared such that NULL is less than not NULL.
5786
 * Assumes a and b are positive.
5787
 * Assumes a and b have n digits set at sometime.
5788
 *
5789
 * @param [in] a  SP integer.
5790
 * @param [in] b  SP integer.
5791
 * @param [in] n  Number of digits to compare.
5792
 *
5793
 * @return  MP_GT when a is greater than b.
5794
 * @return  MP_LT when a is less than b.
5795
 * @return  MP_EQ when a is equals b.
5796
 */
5797
int sp_cmp_ct(const sp_int* a, const sp_int* b, unsigned int n)
5798
{
5799
    int ret;
5800
5801
    /* Check pointers first. Both NULL returns equal. */
5802
    if (a == b) {
5803
        ret = MP_EQ;
5804
    }
5805
    /* Nothing is smaller than something. */
5806
    else if (a == NULL) {
5807
        ret = MP_LT;
5808
    }
5809
    /* Something is larger than nothing. */
5810
    else if (b == NULL) {
5811
        ret = MP_GT;
5812
    }
5813
    else
5814
    {
5815
        /* Compare values - a and b are not NULL. */
5816
        ret = _sp_cmp_ct(a, b, n);
5817
    }
5818
5819
    return ret;
5820
}
5821
#endif /* HAVE_ECC && !WC_NO_RNG && WOLFSSL_ECC_GEN_REJECT_SAMPLING */
5822
5823
/*************************
5824
 * Bit check/set functions
5825
 *************************/
5826
5827
#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
5828
    ((defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_SM2)) && \
5829
     defined(HAVE_ECC)) || defined(OPENSSL_EXTRA) || defined(WOLFSSL_PUBLIC_MP)
5830
/* Check if a bit is set
5831
 *
5832
 * When a is NULL, result is 0.
5833
 *
5834
 * @param  [in]  a  SP integer.
5835
 * @param  [in]  b  Bit position to check.
5836
 *
5837
 * @return  0 when bit is not set.
5838
 * @return  1 when bit is set.
5839
 */
5840
int sp_is_bit_set(const sp_int* a, unsigned int b)
5841
0
{
5842
0
    int ret = 0;
5843
    /* Index of word. */
5844
0
    unsigned int i = b >> SP_WORD_SHIFT;
5845
5846
    /* Check parameters. */
5847
0
    if ((a != NULL) && (i < a->used)) {
5848
        /* Shift amount to get bit down to index 0. */
5849
0
        unsigned int s = b & SP_WORD_MASK;
5850
5851
        /* Get and mask bit. */
5852
0
        ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
5853
0
    }
5854
5855
0
    return ret;
5856
0
}
5857
#endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
5858
        * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
5859
5860
/* Count the number of bits in the multi-precision number.
5861
 *
5862
 * When a is NULL, result is 0.
5863
 *
5864
 * @param  [in]  a  SP integer.
5865
 *
5866
 * @return  Number of bits in the SP integer value.
5867
 */
5868
int sp_count_bits(const sp_int* a)
5869
0
{
5870
0
    int n = -1;
5871
5872
    /* Check parameter. */
5873
0
    if ((a != NULL) && (a->used > 0)) {
5874
        /* Get index of last word. */
5875
0
        n = (int)(a->used - 1);
5876
        /* Don't count leading zeros. */
5877
0
        while ((n >= 0) && (a->dp[n] == 0)) {
5878
0
            n--;
5879
0
        }
5880
0
    }
5881
5882
    /* -1 indicates SP integer value was zero. */
5883
0
    if (n < 0) {
5884
0
        n = 0;
5885
0
    }
5886
0
    else {
5887
        /* Get the most significant word. */
5888
0
        sp_int_digit d = a->dp[n];
5889
        /* Count of bits up to last word. */
5890
0
        n *= SP_WORD_SIZE;
5891
5892
0
    #ifdef SP_ASM_HI_BIT_SET_IDX
5893
0
        {
5894
0
            sp_int_digit hi;
5895
            /* Get index of highest set bit. */
5896
0
            SP_ASM_HI_BIT_SET_IDX(d, hi);
5897
            /* Add bits up to and including index. */
5898
0
            n += (int)hi + 1;
5899
0
        }
5900
    #elif defined(SP_ASM_LZCNT)
5901
        {
5902
            sp_int_digit lz;
5903
            /* Count number of leading zeros in highest non-zero digit. */
5904
            SP_ASM_LZCNT(d, lz);
5905
            /* Add non-leading zero bits count. */
5906
            n += SP_WORD_SIZE - (int)lz;
5907
        }
5908
    #else
5909
        /* Check if top word has more than half the bits set. */
5910
        if (d > SP_HALF_MAX) {
5911
            /* Set count to a full last word. */
5912
            n += SP_WORD_SIZE;
5913
            /* Don't count leading zero bits. */
5914
            while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
5915
                n--;
5916
                d <<= 1;
5917
            }
5918
        }
5919
        else {
5920
            /* Add to count until highest set bit is shifted out. */
5921
            while (d != 0) {
5922
                n++;
5923
                d >>= 1;
5924
            }
5925
        }
5926
    #endif
5927
0
    }
5928
5929
0
    return n;
5930
0
}
5931
5932
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
5933
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
5934
    (defined(HAVE_ECC) && defined(FP_ECC)) || \
5935
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
5936
5937
/* Number of entries in array of number of least significant zero bits. */
5938
#define SP_LNZ_CNT      16
5939
/* Number of bits the array checks. */
5940
0
#define SP_LNZ_BITS     4
5941
/* Mask to apply to check with array. */
5942
0
#define SP_LNZ_MASK     0xf
5943
/* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
5944
static const int sp_lnz[SP_LNZ_CNT] = {
5945
   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
5946
};
5947
5948
/* Count the number of least significant zero bits.
5949
 *
5950
 * When a is not NULL, result is 0.
5951
 *
5952
 * @param  [in]   a  SP integer to use.
5953
 *
5954
 * @return  Number of least significant zero bits.
5955
 */
5956
#if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
5957
static
5958
#endif /* !HAVE_ECC || HAVE_COMP_KEY */
5959
int sp_cnt_lsb(const sp_int* a)
5960
0
{
5961
0
    unsigned int bc = 0;
5962
5963
    /* Check for number with a value. */
5964
0
    if ((a != NULL) && (!sp_iszero(a))) {
5965
0
        unsigned int i;
5966
0
        unsigned int j;
5967
5968
        /* Count least significant words that are zero. */
5969
0
        for (i = 0; (i < a->used) && (a->dp[i] == 0); i++, bc += SP_WORD_SIZE) {
5970
0
        }
5971
5972
        /* Use 4-bit table to get count. */
5973
0
        for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
5974
            /* Get number of lesat significant 0 bits in nibble. */
5975
0
            int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
5976
            /* Done if not all 4 bits are zero. */
5977
0
            if (cnt != 4) {
5978
                /* Add checked bits and count in last 4 bits checked. */
5979
0
                bc += j + (unsigned int)cnt;
5980
0
                break;
5981
0
            }
5982
0
        }
5983
0
    }
5984
5985
0
    return (int)bc;
5986
0
}
5987
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
5988
5989
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
5990
    (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
5991
/* Determine if the most significant byte of the encoded multi-precision number
5992
 * has the top bit set.
5993
 *
5994
 * When a is NULL, result is 0.
5995
 *
5996
 * @param  [in]  a  SP integer.
5997
 *
5998
 * @return  1 when the top bit of top byte is set.
5999
 * @return  0 when the top bit of top byte is not set.
6000
 */
6001
int sp_leading_bit(const sp_int* a)
6002
0
{
6003
0
    int bit = 0;
6004
6005
    /* Check if we have a number and value to use. */
6006
0
    if ((a != NULL) && (a->used > 0)) {
6007
        /* Get top word. */
6008
0
        sp_int_digit d = a->dp[a->used - 1];
6009
6010
0
    #if SP_WORD_SIZE > 8
6011
        /* Remove bottom 8 bits until highest 8 bits left. */
6012
0
        while (d > (sp_int_digit)0xff) {
6013
0
            d >>= 8;
6014
0
        }
6015
0
    #endif
6016
        /* Get the highest bit of the 8-bit value. */
6017
0
        bit = (int)(d >> 7);
6018
0
    }
6019
6020
0
    return bit;
6021
0
}
6022
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
6023
6024
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
6025
    defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
6026
    !defined(NO_RSA)
6027
/* Set one bit of a: a |= 1 << i
6028
 * The field 'used' is updated in a.
6029
 *
6030
 * @param  [in,out]  a  SP integer to set bit into.
6031
 * @param  [in]      i  Index of bit to set.
6032
 *
6033
 * @return  MP_OKAY on success.
6034
 * @return  MP_VAL when a is NULL, index is negative or index is too large.
6035
 */
6036
int sp_set_bit(sp_int* a, int i)
6037
0
{
6038
0
    int err = MP_OKAY;
6039
    /* Get index of word to set. */
6040
0
    sp_size_t w = (sp_size_t)(i >> SP_WORD_SHIFT);
6041
6042
    /* Check for valid number and and space for bit. */
6043
0
    if ((a == NULL) || (i < 0) || (w >= a->size)) {
6044
0
        err = MP_VAL;
6045
0
    }
6046
0
    if (err == MP_OKAY) {
6047
        /* Amount to shift up to set bit in word. */
6048
0
        unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
6049
0
        unsigned int j;
6050
6051
        /* Set to zero all unused words up to and including word to have bit
6052
         * set.
6053
         */
6054
0
        for (j = a->used; j <= w; j++) {
6055
0
            a->dp[j] = 0;
6056
0
        }
6057
        /* Set bit in word. */
6058
0
        a->dp[w] |= (sp_int_digit)1 << s;
6059
        /* Update used if necessary */
6060
0
        if (a->used <= w) {
6061
0
            a->used = (sp_size_t)(w + 1U);
6062
0
        }
6063
0
    }
6064
6065
0
    return err;
6066
0
}
6067
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
6068
        * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
6069
6070
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6071
    defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
6072
/* Exponentiate 2 to the power of e: a = 2^e
6073
 * This is done by setting the 'e'th bit.
6074
 *
6075
 * @param  [out]  a  SP integer to hold result.
6076
 * @param  [in]   e  Exponent.
6077
 *
6078
 * @return  MP_OKAY on success.
6079
 * @return  MP_VAL when a is NULL, e is negative or 2^exponent is too large.
6080
 */
6081
int sp_2expt(sp_int* a, int e)
6082
0
{
6083
0
    int err = MP_OKAY;
6084
6085
    /* Validate parameters. */
6086
0
    if ((a == NULL) || (e < 0)) {
6087
0
        err = MP_VAL;
6088
0
    }
6089
0
    if (err == MP_OKAY) {
6090
        /* Set number to zero and then set bit. */
6091
0
        _sp_zero(a);
6092
0
        err = sp_set_bit(a, e);
6093
0
    }
6094
6095
0
    return err;
6096
0
}
6097
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6098
        * WOLFSSL_KEY_GEN || !NO_DH */
6099
6100
/**********************
6101
 * Digit/Long functions
6102
 **********************/
6103
6104
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
6105
    defined(HAVE_ECC)
6106
/* Set the multi-precision number to be the value of the digit.
6107
 *
6108
 * @param  [out]  a  SP integer to become number.
6109
 * @param  [in]   d  Digit to be set.
6110
 */
6111
static void _sp_set(sp_int* a, sp_int_digit d)
6112
0
{
6113
    /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
6114
0
    sp_int_minimal* am = (sp_int_minimal*)a;
6115
6116
0
    am->dp[0] = d;
6117
    /* d == 0 => used = 0, d > 0 => used = 1 */
6118
0
    am->used = (d > 0);
6119
#ifdef WOLFSSL_SP_INT_NEGATIVE
6120
    am->sign = MP_ZPOS;
6121
#endif
6122
0
}
6123
6124
/* Set the multi-precision number to be the value of the digit.
6125
 *
6126
 * @param  [out]  a  SP integer to become number.
6127
 * @param  [in]   d  Digit to be set.
6128
 *
6129
 * @return  MP_OKAY on success.
6130
 * @return  MP_VAL when a is NULL.
6131
 */
6132
int sp_set(sp_int* a, sp_int_digit d)
6133
0
{
6134
0
    int err = MP_OKAY;
6135
6136
    /* Validate parameters. */
6137
0
    if (a == NULL) {
6138
0
        err = MP_VAL;
6139
0
    }
6140
0
    if (err == MP_OKAY) {
6141
0
        _sp_set(a, d);
6142
0
    }
6143
6144
0
    return err;
6145
0
}
6146
#endif
6147
6148
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
6149
/* Set a number into the multi-precision number.
6150
 *
6151
 * Number may be larger than the size of a digit.
6152
 *
6153
 * @param  [out]  a  SP integer to set.
6154
 * @param  [in]   n  Long value to set.
6155
 *
6156
 * @return  MP_OKAY on success.
6157
 * @return  MP_VAL when a is NULL.
6158
 */
6159
int sp_set_int(sp_int* a, unsigned long n)
6160
0
{
6161
0
    int err = MP_OKAY;
6162
6163
0
    if (a == NULL) {
6164
0
        err = MP_VAL;
6165
0
    }
6166
6167
0
    if (err == MP_OKAY) {
6168
    #if SP_WORD_SIZE < SP_ULONG_BITS
6169
        /* Assign if value first in one word. */
6170
        if (n <= (sp_int_digit)SP_DIGIT_MAX) {
6171
    #endif
6172
0
            a->dp[0] = (sp_int_digit)n;
6173
0
            a->used = (n != 0);
6174
    #if SP_WORD_SIZE < SP_ULONG_BITS
6175
        }
6176
        else {
6177
            unsigned int i;
6178
6179
            /* Assign value word by word. */
6180
            for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
6181
                a->dp[i] = (sp_int_digit)n;
6182
            }
6183
            /* Update number of words used. */
6184
            a->used = i;
6185
            /* Check for overflow. */
6186
            if ((i == a->size) && (n != 0)) {
6187
                err = MP_VAL;
6188
            }
6189
        }
6190
    #endif
6191
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6192
        a->sign = MP_ZPOS;
6193
    #endif
6194
0
    }
6195
6196
0
    return err;
6197
0
}
6198
#endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA  */
6199
6200
#if defined(WOLFSSL_SP_MATH_ALL) || \
6201
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6202
    !defined(NO_DH) || defined(HAVE_ECC)
6203
/* Compare a one digit number with a multi-precision number.
6204
 *
6205
 * When a is NULL, MP_LT is returned.
6206
 *
6207
 * @param  [in]  a  SP integer to compare.
6208
 * @param  [in]  d  Digit to compare with.
6209
 *
6210
 * @return  MP_GT when a is greater than d.
6211
 * @return  MP_LT when a is less than d.
6212
 * @return  MP_EQ when a is equals d.
6213
 */
6214
int sp_cmp_d(const sp_int* a, sp_int_digit d)
6215
0
{
6216
0
    int ret = MP_EQ;
6217
6218
    /* No SP integer is always less - even when d is zero. */
6219
0
    if (a == NULL) {
6220
0
        ret = MP_LT;
6221
0
    }
6222
0
    else
6223
#ifdef WOLFSSL_SP_INT_NEGATIVE
6224
    /* Check sign first. */
6225
    if (a->sign == MP_NEG) {
6226
        ret = MP_LT;
6227
    }
6228
    else
6229
#endif
6230
0
    {
6231
        /* Check if SP integer as more than one word. */
6232
0
        if (a->used > 1) {
6233
0
            ret = MP_GT;
6234
0
        }
6235
        /* Special case for zero. */
6236
0
        else if (a->used == 0) {
6237
0
            if (d != 0) {
6238
0
                ret = MP_LT;
6239
0
            }
6240
            /* ret initialized to equal. */
6241
0
        }
6242
0
        else {
6243
            /* The single word in the SP integer can now be compared with d. */
6244
0
            if (a->dp[0] > d) {
6245
0
                ret = MP_GT;
6246
0
            }
6247
0
            else if (a->dp[0] < d) {
6248
0
                ret = MP_LT;
6249
0
            }
6250
            /* ret initialized to equal. */
6251
0
        }
6252
0
    }
6253
6254
0
    return ret;
6255
0
}
6256
#endif
6257
6258
#if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
6259
    defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
6260
/* Add a one digit number to the multi-precision number.
6261
 *
6262
 * @param  [in]   a  SP integer be added to.
6263
 * @param  [in]   d  Digit to add.
6264
 * @param  [out]  r  SP integer to store result in.
6265
 *
6266
 * @return  MP_OKAY on success.
6267
 * @return  MP_VAL when result is too large for fixed size dp array.
6268
 */
6269
static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6270
0
{
6271
0
    int err = MP_OKAY;
6272
6273
    /* Special case of zero means we want result to have a digit when not adding
6274
     * zero. */
6275
0
    if (a->used == 0) {
6276
0
        r->dp[0] = d;
6277
0
        r->used = (d > 0);
6278
0
    }
6279
0
    else {
6280
0
        unsigned int i = 0;
6281
0
        sp_int_digit a0 = a->dp[0];
6282
6283
        /* Set used of result - updated if overflow seen. */
6284
0
        r->used = a->used;
6285
6286
0
        r->dp[0] = a0 + d;
6287
        /* Check for carry. */
6288
0
        if (r->dp[0] < a0) {
6289
            /* Do carry through all words. */
6290
0
            for (++i; i < a->used; i++) {
6291
0
                r->dp[i] = a->dp[i] + 1;
6292
0
                if (r->dp[i] != 0) {
6293
0
                   break;
6294
0
                }
6295
0
            }
6296
            /* Add another word if required. */
6297
0
            if (i == a->used) {
6298
                /* Check result has enough space for another word. */
6299
0
                if (i < r->size) {
6300
0
                    r->used++;
6301
0
                    r->dp[i] = 1;
6302
0
                }
6303
0
                else {
6304
0
                    err = MP_VAL;
6305
0
                }
6306
0
            }
6307
0
        }
6308
        /* When result is not the same as input, copy rest of digits. */
6309
0
        if ((err == MP_OKAY) && (r != a)) {
6310
            /* Copy any words that didn't update with carry. */
6311
0
            for (++i; i < a->used; i++) {
6312
0
                r->dp[i] = a->dp[i];
6313
0
            }
6314
0
        }
6315
0
    }
6316
6317
0
    return err;
6318
0
}
6319
#endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
6320
        * defined(WOLFSSL_SP_READ_RADIX_10) */
6321
6322
#if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
6323
    defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
6324
    defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
6325
    !defined(WC_NO_RNG))
6326
/* Sub a one digit number from the multi-precision number.
6327
 *
6328
 * @param  [in]   a  SP integer be subtracted from.
6329
 * @param  [in]   d  Digit to subtract.
6330
 * @param  [out]  r  SP integer to store result in.
6331
 */
6332
static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6333
0
{
6334
    /* Set result used to be same as input. Updated with clamp. */
6335
0
    r->used = a->used;
6336
    /* Only possible when not handling negatives. */
6337
0
    if (a->used == 0) {
6338
        /* Set result to zero as no negative support. */
6339
0
        r->dp[0] = 0;
6340
0
    }
6341
0
    else {
6342
0
        unsigned int i = 0;
6343
0
        sp_int_digit a0 = a->dp[0];
6344
6345
0
        r->dp[0] = a0 - d;
6346
        /* Check for borrow. */
6347
0
        if (r->dp[0] > a0) {
6348
            /* Do borrow through all words. */
6349
0
            for (++i; i < a->used; i++) {
6350
0
                r->dp[i] = a->dp[i] - 1;
6351
0
                if (r->dp[i] != SP_DIGIT_MAX) {
6352
0
                   break;
6353
0
                }
6354
0
            }
6355
0
        }
6356
        /* When result is not the same as input, copy rest of digits. */
6357
0
        if (r != a) {
6358
            /* Copy any words that didn't update with borrow. */
6359
0
            for (++i; i < a->used; i++) {
6360
0
                r->dp[i] = a->dp[i];
6361
0
            }
6362
0
        }
6363
        /* Remove leading zero words. */
6364
0
        sp_clamp(r);
6365
0
    }
6366
0
}
6367
#endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
6368
        * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
6369
        * WOLFSSL_SP_PRIME_GEN */
6370
6371
#ifdef WOLFSSL_SP_ADD_D
6372
/* Add a one digit number to the multi-precision number.
6373
 *
6374
 * @param  [in]   a  SP integer be added to.
6375
 * @param  [in]   d  Digit to add.
6376
 * @param  [out]  r  SP integer to store result in.
6377
 *
6378
 * @return  MP_OKAY on success.
6379
 * @return  MP_VAL when result is too large for fixed size dp array.
6380
 */
6381
int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
6382
0
{
6383
0
    int err = MP_OKAY;
6384
6385
    /* Check validity of parameters. */
6386
0
    if ((a == NULL) || (r == NULL)) {
6387
0
        err = MP_VAL;
6388
0
    }
6389
6390
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
6391
    /* Check for space in result especially when carry adds a new word. */
6392
0
    if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
6393
0
         err = MP_VAL;
6394
0
    }
6395
0
    if (err == MP_OKAY) {
6396
        /* Positive only so just use internal function. */
6397
0
        err = _sp_add_d(a, d, r);
6398
0
    }
6399
#else
6400
    /* Check for space in result especially when carry adds a new word. */
6401
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
6402
         err = MP_VAL;
6403
    }
6404
    /* Check for space in result - no carry but borrow possible. */
6405
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
6406
         err = MP_VAL;
6407
    }
6408
    if (err == MP_OKAY) {
6409
        if (a->sign == MP_ZPOS) {
6410
            /* Positive, so use internal function. */
6411
            r->sign = MP_ZPOS;
6412
            err = _sp_add_d(a, d, r);
6413
        }
6414
        else if ((a->used > 1) || (a->dp[0] > d)) {
6415
            /* Negative value bigger than digit so subtract digit. */
6416
            r->sign = MP_NEG;
6417
            _sp_sub_d(a, d, r);
6418
        }
6419
        else {
6420
            /* Negative value smaller or equal to digit. */
6421
            r->sign = MP_ZPOS;
6422
            /* Subtract negative value from digit. */
6423
            r->dp[0] = d - a->dp[0];
6424
            /* Result is a digit equal to or greater than zero. */
6425
            r->used = (r->dp[0] > 0);
6426
        }
6427
    }
6428
#endif
6429
6430
0
    return err;
6431
0
}
6432
#endif /* WOLFSSL_SP_ADD_D */
6433
6434
#ifdef WOLFSSL_SP_SUB_D
6435
/* Sub a one digit number from the multi-precision number.
6436
 *
6437
 * @param  [in]   a  SP integer be subtracted from.
6438
 * @param  [in]   d  Digit to subtract.
6439
 * @param  [out]  r  SP integer to store result in.
6440
 *
6441
 * @return  MP_OKAY on success.
6442
 * @return  MP_VAL when a or r is NULL.
6443
 */
6444
int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
6445
0
{
6446
0
    int err = MP_OKAY;
6447
6448
    /* Check validity of parameters. */
6449
0
    if ((a == NULL) || (r == NULL)) {
6450
0
        err = MP_VAL;
6451
0
    }
6452
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
6453
    /* Check for space in result. */
6454
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
6455
0
         err = MP_VAL;
6456
0
    }
6457
0
    if (err == MP_OKAY) {
6458
        /* Positive only so just use internal function. */
6459
0
        _sp_sub_d(a, d, r);
6460
0
    }
6461
#else
6462
    /* Check for space in result especially when borrow adds a new word. */
6463
    if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
6464
         err = MP_VAL;
6465
    }
6466
    /* Check for space in result - no carry but borrow possible. */
6467
    if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
6468
         err = MP_VAL;
6469
    }
6470
    if (err == MP_OKAY) {
6471
        if (a->sign == MP_NEG) {
6472
            /* Subtracting from negative use internal add. */
6473
            r->sign = MP_NEG;
6474
            err = _sp_add_d(a, d, r);
6475
        }
6476
        else if ((a->used > 1) || (a->dp[0] >= d)) {
6477
            /* Positive number greater than or equal to digit - subtract digit.
6478
             */
6479
            r->sign = MP_ZPOS;
6480
            _sp_sub_d(a, d, r);
6481
        }
6482
        else {
6483
            /* Positive value smaller than digit. */
6484
            r->sign = MP_NEG;
6485
            /* Subtract positive value from digit. */
6486
            r->dp[0] = d - a->dp[0];
6487
            /* Result is a digit equal to or greater than zero. */
6488
            r->used = 1;
6489
        }
6490
    }
6491
#endif
6492
6493
0
    return err;
6494
0
}
6495
#endif /* WOLFSSL_SP_SUB_D */
6496
6497
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6498
    defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
6499
    !defined(NO_DH) || defined(HAVE_ECC) || \
6500
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6501
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6502
    (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)) || \
6503
    defined(WOLFSSL_SP_MUL_D)
6504
/* Multiply a by digit n and put result into r shifting up o digits.
6505
 *   r = (a * n) << (o * SP_WORD_SIZE)
6506
 *
6507
 * @param  [in]   a  SP integer to be multiplied.
6508
 * @param  [in]   d  SP digit to multiply by.
6509
 * @param  [out]  r  SP integer result.
6510
 * @param  [in]   o  Number of digits to move result up by.
6511
 * @return  MP_OKAY on success.
6512
 * @return  MP_VAL when result is too large for sp_int.
6513
 */
6514
static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
6515
0
{
6516
0
    int err = MP_OKAY;
6517
0
    unsigned int i;
6518
#ifndef SQR_MUL_ASM
6519
    sp_int_word t = 0;
6520
#else
6521
0
    sp_int_digit l = 0;
6522
0
    sp_int_digit h = 0;
6523
0
#endif
6524
6525
#ifdef WOLFSSL_SP_SMALL
6526
    /* Zero out offset words. */
6527
    for (i = 0; i < o; i++) {
6528
        r->dp[i] = 0;
6529
    }
6530
#else
6531
    /* Don't use the offset. Only when doing small code size div. */
6532
0
    (void)o;
6533
0
#endif
6534
6535
    /* Multiply each word of a by n. */
6536
0
    for (i = 0; i < a->used; i++, o++) {
6537
    #ifndef SQR_MUL_ASM
6538
        /* Add product to top word of previous result. */
6539
        t += (sp_int_word)a->dp[i] * d;
6540
        /* Store low word. */
6541
        r->dp[o] = (sp_int_digit)t;
6542
        /* Move top word down. */
6543
        t >>= SP_WORD_SIZE;
6544
    #else
6545
        /* Multiply and add into low and high from previous result.
6546
         * No overflow of possible with add. */
6547
0
        SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
6548
        /* Store low word. */
6549
0
        r->dp[o] = l;
6550
        /* Move high word into low word and set high word to 0. */
6551
0
        l = h;
6552
0
        h = 0;
6553
0
    #endif
6554
0
    }
6555
6556
    /* Check whether new word to be appended to result. */
6557
#ifndef SQR_MUL_ASM
6558
    if (t > 0)
6559
#else
6560
0
    if (l > 0)
6561
0
#endif
6562
0
    {
6563
        /* Validate space available in result. */
6564
0
        if (o == r->size) {
6565
0
            err = MP_VAL;
6566
0
        }
6567
0
        else {
6568
            /* Store new top word. */
6569
        #ifndef SQR_MUL_ASM
6570
            r->dp[o++] = (sp_int_digit)t;
6571
        #else
6572
0
            r->dp[o++] = l;
6573
0
        #endif
6574
0
        }
6575
0
    }
6576
    /* Update number of words in result. */
6577
0
    r->used = (sp_size_t)o;
6578
    /* In case n is zero. */
6579
0
    sp_clamp(r);
6580
6581
0
    return err;
6582
0
}
6583
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
6584
        *  WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
6585
6586
#ifdef WOLFSSL_SP_MUL_D
6587
/* Multiply a by digit n and put result into r. r = a * n
6588
 *
6589
 * @param  [in]   a  SP integer to multiply.
6590
 * @param  [in]   n  Digit to multiply by.
6591
 * @param  [out]  r  SP integer to hold result.
6592
 *
6593
 * @return  MP_OKAY on success.
6594
 * @return  MP_VAL when a or b is NULL, or a has maximum number of digits used.
6595
 */
6596
int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
6597
0
{
6598
0
    int err = MP_OKAY;
6599
6600
    /* Validate parameters. */
6601
0
    if ((a == NULL) || (r == NULL)) {
6602
0
        err = MP_VAL;
6603
0
    }
6604
    /* Check space for product result - _sp_mul_d checks when new word added. */
6605
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
6606
0
        err = MP_VAL;
6607
0
    }
6608
6609
0
    if (err == MP_OKAY) {
6610
0
        err = _sp_mul_d(a, d, r, 0);
6611
    #ifdef WOLFSSL_SP_INT_NEGATIVE
6612
        /* Update sign. */
6613
        if (d == 0) {
6614
            r->sign = MP_ZPOS;
6615
        }
6616
        else {
6617
            r->sign = a->sign;
6618
        }
6619
    #endif
6620
0
    }
6621
6622
0
    return err;
6623
0
}
6624
#endif /* WOLFSSL_SP_MUL_D */
6625
6626
/* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
6627
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6628
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
6629
    defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
6630
#define WOLFSSL_SP_DIV_D
6631
#endif
6632
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
6633
    !defined(NO_DH) || \
6634
    (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
6635
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
6636
#define WOLFSSL_SP_MOD_D
6637
#endif
6638
6639
#if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
6640
     (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
6641
      !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
6642
    defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6643
#ifndef SP_ASM_DIV_WORD
6644
/* Divide a two digit number by a digit number and return. (hi | lo) / d
6645
 *
6646
 * @param  [in]  hi  SP integer digit. High digit of the dividend.
6647
 * @param  [in]  lo  SP integer digit. Lower digit of the dividend.
6648
 * @param  [in]  d   SP integer digit. Number to divide by.
6649
 * @return  The division result.
6650
 */
6651
static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
6652
    sp_int_digit d)
6653
{
6654
#ifdef WOLFSSL_SP_DIV_WORD_HALF
6655
    sp_int_digit r;
6656
6657
    /* Trial division using half of the bits in d. */
6658
6659
    /* Check for shortcut when no high word set. */
6660
    if (hi == 0) {
6661
        r = lo / d;
6662
    }
6663
    else {
6664
        /* Half the bits of d. */
6665
        sp_int_digit divh = d >> SP_HALF_SIZE;
6666
        /* Number to divide in one value. */
6667
        sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6668
        sp_int_word trial;
6669
        sp_int_digit r2;
6670
6671
        /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
6672
        /* Divide high word by top half of divisor. */
6673
        r = hi / divh;
6674
        /* When result too big then assume only max value. */
6675
        if (r > SP_HALF_MAX) {
6676
            r = SP_HALF_MAX;
6677
        }
6678
        /* Shift up result for trial division calculation. */
6679
        r <<= SP_HALF_SIZE;
6680
        /* Calculate trial value. */
6681
        trial = r * (sp_int_word)d;
6682
        /* Decrease r while trial is too big. */
6683
        while (trial > w) {
6684
            r -= (sp_int_digit)1 << SP_HALF_SIZE;
6685
            trial -= (sp_int_word)d << SP_HALF_SIZE;
6686
        }
6687
        /* Subtract trial. */
6688
        w -= trial;
6689
6690
        /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
6691
        /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
6692
        r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
6693
        /* Calculate trial value. */
6694
        trial = r2 * (sp_int_word)d;
6695
        /* Decrease r while trial is too big. */
6696
        while (trial > w) {
6697
            r2--;
6698
            trial -= d;
6699
        }
6700
        /* Subtract trial. */
6701
        w -= trial;
6702
        /* Update result. */
6703
        r += r2;
6704
6705
        /* Calculation for remaining bottom SP_WORD_SIZE bits. */
6706
        r2 = ((sp_int_digit)w) / d;
6707
        /* Update result. */
6708
        r += r2;
6709
    }
6710
6711
    return r;
6712
#else
6713
    sp_int_word w;
6714
    sp_int_digit r;
6715
6716
    /* Use built-in divide. */
6717
    w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
6718
    w /= d;
6719
    r = (sp_int_digit)w;
6720
6721
    return r;
6722
#endif /* WOLFSSL_SP_DIV_WORD_HALF */
6723
}
6724
#endif /* !SP_ASM_DIV_WORD */
6725
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
6726
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
6727
6728
#if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
6729
    !defined(WOLFSSL_SP_SMALL)
6730
6731
#if SP_WORD_SIZE == 64
6732
    /* 2^64 / 3 */
6733
    #define SP_DIV_3_CONST      0x5555555555555555L
6734
    /* 2^64 / 10 */
6735
0
    #define SP_DIV_10_CONST     0x1999999999999999L
6736
#elif SP_WORD_SIZE == 32
6737
    /* 2^32 / 3 */
6738
    #define SP_DIV_3_CONST      0x55555555
6739
    /* 2^32 / 10 */
6740
    #define SP_DIV_10_CONST     0x19999999
6741
#elif SP_WORD_SIZE == 16
6742
    /* 2^16 / 3 */
6743
    #define SP_DIV_3_CONST      0x5555
6744
    /* 2^16 / 10 */
6745
    #define SP_DIV_10_CONST     0x1999
6746
#elif SP_WORD_SIZE == 8
6747
    /* 2^8 / 3 */
6748
    #define SP_DIV_3_CONST      0x55
6749
    /* 2^8 / 10 */
6750
    #define SP_DIV_10_CONST     0x19
6751
#endif
6752
6753
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
6754
/* Divide by 3: r = a / 3 and rem = a % 3
6755
 *
6756
 * Used in checking prime: (a % 3) == 0?.
6757
 *
6758
 * @param  [in]   a    SP integer to be divided.
6759
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6760
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6761
 */
6762
static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
6763
{
6764
#ifndef SQR_MUL_ASM
6765
    sp_int_word t;
6766
    sp_int_digit tt;
6767
#else
6768
    sp_int_digit l = 0;
6769
    sp_int_digit tt = 0;
6770
    sp_int_digit t = SP_DIV_3_CONST;
6771
    sp_int_digit lm = 0;
6772
    sp_int_digit hm = 0;
6773
#endif
6774
    sp_int_digit tr = 0;
6775
    /* Quotient fixup. */
6776
    static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
6777
    /* Remainder fixup. */
6778
    static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
6779
6780
    /* Check whether only mod value needed. */
6781
    if (r == NULL) {
6782
        unsigned int i;
6783
6784
        /*    2^2 mod 3 = 4 mod 3 = 1.
6785
         * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
6786
         * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
6787
         *
6788
         * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
6789
         */
6790
    #ifndef SQR_MUL_ASM
6791
        t = 0;
6792
        /* Sum the digits. */
6793
        for (i = 0; i < a->used; i++) {
6794
            t += a->dp[i];
6795
        }
6796
        /* Sum digits of sum. */
6797
        t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
6798
        /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6799
        tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6800
        /* Subtract trial division. */
6801
        tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6802
    #else
6803
        /* Sum the digits. */
6804
        for (i = 0; i < a->used; i++) {
6805
            SP_ASM_ADDC_REG(l, tr, a->dp[i]);
6806
        }
6807
        /* Sum digits of sum - can get carry. */
6808
        SP_ASM_ADDC_REG(l, tt, tr);
6809
        /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6810
        SP_ASM_MUL(lm, hm, l, t);
6811
        /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6812
        hm += tt * SP_DIV_3_CONST;
6813
        /* Subtract trial division from digit. */
6814
        tr = l - (hm * 3);
6815
    #endif
6816
        /* tr is 0..5 but need 0..2 */
6817
        /* Fix up remainder. */
6818
        tr = sp_rem6[tr];
6819
        *rem = tr;
6820
    }
6821
    /* At least result needed - remainder is calculated anyway. */
6822
    else {
6823
        int i;
6824
6825
        /* Divide starting at most significant word down to least. */
6826
        for (i = (int)(a->used - 1); i >= 0; i--) {
6827
    #ifndef SQR_MUL_ASM
6828
            /* Combine remainder from last operation with this word. */
6829
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6830
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 3. */
6831
            tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
6832
            /* Subtract trial division. */
6833
            tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
6834
    #else
6835
            /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
6836
            SP_ASM_MUL(l, tt, a->dp[i], t);
6837
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
6838
            tt += tr * SP_DIV_3_CONST;
6839
            /* Subtract trial division from digit. */
6840
            tr = a->dp[i] - (tt * 3);
6841
    #endif
6842
            /* tr is 0..5 but need 0..2 */
6843
            /* Fix up result. */
6844
            tt += sp_r6[tr];
6845
            /* Fix up remainder. */
6846
            tr = sp_rem6[tr];
6847
            /* Store result of digit divided by 3. */
6848
            r->dp[i] = tt;
6849
        }
6850
6851
        /* Set the used amount to maximal amount. */
6852
        r->used = a->used;
6853
        /* Remove leading zeros. */
6854
        sp_clamp(r);
6855
        /* Return remainder if required. */
6856
        if (rem != NULL) {
6857
            *rem = tr;
6858
        }
6859
    }
6860
}
6861
#endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
6862
6863
/* Divide by 10: r = a / 10 and rem = a % 10
6864
 *
6865
 * Used when writing with a radix of 10 - decimal number.
6866
 *
6867
 * @param  [in]   a    SP integer to be divided.
6868
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6869
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6870
 */
6871
static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
6872
0
{
6873
0
    int i;
6874
#ifndef SQR_MUL_ASM
6875
    sp_int_word t;
6876
    sp_int_digit tt;
6877
#else
6878
0
    sp_int_digit l = 0;
6879
0
    sp_int_digit tt = 0;
6880
0
    sp_int_digit t = SP_DIV_10_CONST;
6881
0
#endif
6882
0
    sp_int_digit tr = 0;
6883
6884
    /* Check whether only mod value needed. */
6885
0
    if (r == NULL) {
6886
        /* Divide starting at most significant word down to least. */
6887
0
        for (i = (int)(a->used - 1); i >= 0; i--) {
6888
    #ifndef SQR_MUL_ASM
6889
            /* Combine remainder from last operation with this word. */
6890
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6891
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6892
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6893
            /* Subtract trial division. */
6894
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6895
    #else
6896
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
6897
0
            SP_ASM_MUL(l, tt, a->dp[i], t);
6898
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
6899
             */
6900
0
            tt += tr * SP_DIV_10_CONST;
6901
            /* Subtract trial division from digit. */
6902
0
            tr = a->dp[i] - (tt * 10);
6903
0
    #endif
6904
            /* tr is 0..99 but need 0..9 */
6905
            /* Fix up remainder. */
6906
0
            tr = tr % 10;
6907
0
        }
6908
0
        *rem = tr;
6909
0
    }
6910
    /* At least result needed - remainder is calculated anyway. */
6911
0
    else {
6912
        /* Divide starting at most significant word down to least. */
6913
0
        for (i = (int)(a->used - 1); i >= 0; i--) {
6914
    #ifndef SQR_MUL_ASM
6915
            /* Combine remainder from last operation with this word. */
6916
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6917
            /* Get top digit after multiplying by (2^SP_WORD_SIZE) / 10. */
6918
            tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
6919
            /* Subtract trial division. */
6920
            tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
6921
    #else
6922
            /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
6923
0
            SP_ASM_MUL(l, tt, a->dp[i], t);
6924
            /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
6925
             */
6926
0
            tt += tr * SP_DIV_10_CONST;
6927
            /* Subtract trial division from digit. */
6928
0
            tr = a->dp[i] - (tt * 10);
6929
0
    #endif
6930
            /* tr is 0..99 but need 0..9 */
6931
            /* Fix up result. */
6932
0
            tt += tr / 10;
6933
            /* Fix up remainder. */
6934
0
            tr %= 10;
6935
            /* Store result of digit divided by 10. */
6936
0
            r->dp[i] = tt;
6937
0
        }
6938
6939
        /* Set the used amount to maximal amount. */
6940
0
        r->used = a->used;
6941
        /* Remove leading zeros. */
6942
0
        sp_clamp(r);
6943
        /* Return remainder if required. */
6944
0
        if (rem != NULL) {
6945
0
            *rem = tr;
6946
0
        }
6947
0
    }
6948
0
}
6949
#endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
6950
6951
#if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
6952
/* Divide by small number: r = a / d and rem = a % d
6953
 *
6954
 * @param  [in]   a    SP integer to be divided.
6955
 * @param  [in]   d    Digit to divide by.
6956
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
6957
 * @param  [out]  rem  SP integer that is the remainder. May be NULL.
6958
 */
6959
static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
6960
    sp_int_digit* rem)
6961
0
{
6962
0
    int i;
6963
#ifndef SQR_MUL_ASM
6964
    sp_int_word t;
6965
    sp_int_digit tt;
6966
#else
6967
0
    sp_int_digit l = 0;
6968
0
    sp_int_digit tt = 0;
6969
0
#endif
6970
0
    sp_int_digit tr = 0;
6971
0
    sp_int_digit m = SP_DIGIT_MAX / d;
6972
6973
0
#ifndef WOLFSSL_SP_SMALL
6974
    /* Check whether only mod value needed. */
6975
0
    if (r == NULL) {
6976
        /* Divide starting at most significant word down to least. */
6977
0
        for (i = (int)(a->used - 1); i >= 0; i--) {
6978
        #ifndef SQR_MUL_ASM
6979
            /* Combine remainder from last operation with this word. */
6980
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
6981
            /* Get top digit after multiplying. */
6982
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
6983
            /* Subtract trial division. */
6984
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
6985
        #else
6986
            /* Multiply digit. */
6987
0
            SP_ASM_MUL(l, tt, a->dp[i], m);
6988
            /* Add multiplied remainder to top digit. */
6989
0
            tt += tr * m;
6990
            /* Subtract trial division from digit. */
6991
0
            tr = a->dp[i] - (tt * d);
6992
0
        #endif
6993
            /* tr < d * d */
6994
            /* Fix up remainder. */
6995
0
            tr = tr % d;
6996
0
        }
6997
0
        *rem = tr;
6998
0
    }
6999
    /* At least result needed - remainder is calculated anyway. */
7000
0
    else
7001
0
#endif /* !WOLFSSL_SP_SMALL */
7002
0
    {
7003
        /* Divide starting at most significant word down to least. */
7004
0
        for (i = (int)(a->used - 1); i >= 0; i--) {
7005
        #ifndef SQR_MUL_ASM
7006
            /* Combine remainder from last operation with this word. */
7007
            t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
7008
            /* Get top digit after multiplying. */
7009
            tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
7010
            /* Subtract trial division. */
7011
            tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
7012
        #else
7013
            /* Multiply digit. */
7014
0
            SP_ASM_MUL(l, tt, a->dp[i], m);
7015
            /* Add multiplied remainder to top digit. */
7016
0
            tt += tr * m;
7017
            /* Subtract trial division from digit. */
7018
0
            tr = a->dp[i] - (tt * d);
7019
0
        #endif
7020
            /* tr < d * d */
7021
            /* Fix up result. */
7022
0
            tt += tr / d;
7023
            /* Fix up remainder. */
7024
0
            tr %= d;
7025
            /* Store result of dividing the digit. */
7026
        #ifdef WOLFSSL_SP_SMALL
7027
            if (r != NULL)
7028
        #endif
7029
0
            {
7030
0
                r->dp[i] = tt;
7031
0
            }
7032
0
        }
7033
7034
    #ifdef WOLFSSL_SP_SMALL
7035
        if (r != NULL)
7036
    #endif
7037
0
        {
7038
            /* Set the used amount to maximal amount. */
7039
0
            r->used = a->used;
7040
            /* Remove leading zeros. */
7041
0
            sp_clamp(r);
7042
0
        }
7043
        /* Return remainder if required. */
7044
0
        if (rem != NULL) {
7045
0
            *rem = tr;
7046
0
        }
7047
0
    }
7048
0
}
7049
#endif
7050
7051
#ifdef WOLFSSL_SP_DIV_D
7052
/* Divide a multi-precision number by a digit size number and calculate
7053
 * remainder.
7054
 *   r = a / d; rem = a % d
7055
 *
7056
 * Use trial division algorithm.
7057
 *
7058
 * @param  [in]   a    SP integer to be divided.
7059
 * @param  [in]   d    Digit to divide by.
7060
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
7061
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
7062
 */
7063
static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
7064
    sp_int_digit* rem)
7065
0
{
7066
0
    int i;
7067
#ifndef SQR_MUL_ASM
7068
    sp_int_word w = 0;
7069
#else
7070
0
    sp_int_digit l;
7071
0
    sp_int_digit h = 0;
7072
0
#endif
7073
0
    sp_int_digit t;
7074
7075
    /* Divide starting at most significant word down to least. */
7076
0
    for (i = (int)(a->used - 1); i >= 0; i--) {
7077
    #ifndef SQR_MUL_ASM
7078
        /* Combine remainder from last operation with this word and divide. */
7079
        t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7080
        /* Combine remainder from last operation with this word. */
7081
        w = (w << SP_WORD_SIZE) | a->dp[i];
7082
        /* Subtract to get modulo result. */
7083
        w -= (sp_int_word)t * d;
7084
    #else
7085
        /* Get current word. */
7086
0
        l = a->dp[i];
7087
        /* Combine remainder from last operation with this word and divide. */
7088
0
        t = sp_div_word(h, l, d);
7089
        /* Subtract to get modulo result. */
7090
0
        h = l - t * d;
7091
0
    #endif
7092
        /* Store result of dividing the digit. */
7093
0
        if (r != NULL) {
7094
0
            r->dp[i] = t;
7095
0
        }
7096
0
    }
7097
0
    if (r != NULL) {
7098
        /* Set the used amount to maximal amount. */
7099
0
        r->used = a->used;
7100
        /* Remove leading zeros. */
7101
0
        sp_clamp(r);
7102
0
    }
7103
7104
    /* Return remainder if required. */
7105
0
    if (rem != NULL) {
7106
    #ifndef SQR_MUL_ASM
7107
        *rem = (sp_int_digit)w;
7108
    #else
7109
0
        *rem = h;
7110
0
    #endif
7111
0
    }
7112
0
}
7113
7114
/* Divide a multi-precision number by a digit size number and calculate
7115
 * remainder.
7116
 *   r = a / d; rem = a % d
7117
 *
7118
 * @param  [in]   a    SP integer to be divided.
7119
 * @param  [in]   d    Digit to divide by.
7120
 * @param  [out]  r    SP integer that is the quotient. May be NULL.
7121
 * @param  [out]  rem  Digit that is the remainder. May be NULL.
7122
 *
7123
 * @return  MP_OKAY on success.
7124
 * @return  MP_VAL when a is NULL or d is 0.
7125
 */
7126
int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
7127
0
{
7128
0
    int err = MP_OKAY;
7129
7130
    /* Validate parameters. */
7131
0
    if ((a == NULL) || (d == 0)) {
7132
0
        err = MP_VAL;
7133
0
    }
7134
    /* Check space for maximal sized result. */
7135
0
    if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
7136
0
        err = MP_VAL;
7137
0
    }
7138
7139
0
    if (err == MP_OKAY) {
7140
0
#if !defined(WOLFSSL_SP_SMALL)
7141
    #if SP_WORD_SIZE < 64
7142
        if (d == 3) {
7143
            /* Fast implementation for divisor of 3. */
7144
            _sp_div_3(a, r, rem);
7145
        }
7146
        else
7147
    #endif
7148
0
        if (d == 10) {
7149
            /* Fast implementation for divisor of 10 - sp_todecimal(). */
7150
0
            _sp_div_10(a, r, rem);
7151
0
        }
7152
0
        else
7153
0
#endif
7154
0
        if (d <= SP_HALF_MAX) {
7155
            /* For small divisors. */
7156
0
            _sp_div_small(a, d, r, rem);
7157
0
        }
7158
0
        else
7159
0
        {
7160
0
            _sp_div_d(a, d, r, rem);
7161
0
        }
7162
7163
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7164
        if (r != NULL) {
7165
            r->sign = a->sign;
7166
        }
7167
    #endif
7168
0
    }
7169
7170
0
    return err;
7171
0
}
7172
#endif /* WOLFSSL_SP_DIV_D */
7173
7174
#ifdef WOLFSSL_SP_MOD_D
7175
/* Calculate a modulo the digit d into r: r = a mod d
7176
 *
7177
 * @param  [in]   a  SP integer to reduce.
7178
 * @param  [in]   d  Digit to that is the modulus.
7179
 * @param  [out]  r  Digit that is the result.
7180
 */
7181
static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
7182
0
{
7183
0
    int i;
7184
#ifndef SQR_MUL_ASM
7185
    sp_int_word w = 0;
7186
#else
7187
0
    sp_int_digit h = 0;
7188
0
#endif
7189
7190
    /* Divide starting at most significant word down to least. */
7191
0
    for (i = (int)(a->used - 1); i >= 0; i--) {
7192
    #ifndef SQR_MUL_ASM
7193
        /* Combine remainder from last operation with this word and divide. */
7194
        sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
7195
        /* Combine remainder from last operation with this word. */
7196
        w = (w << SP_WORD_SIZE) | a->dp[i];
7197
        /* Subtract to get modulo result. */
7198
        w -= (sp_int_word)t * d;
7199
    #else
7200
        /* Combine remainder from last operation with this word and divide. */
7201
0
        sp_int_digit t = sp_div_word(h, a->dp[i], d);
7202
        /* Subtract to get modulo result. */
7203
0
        h = a->dp[i] - t * d;
7204
0
    #endif
7205
0
    }
7206
7207
    /* Return remainder. */
7208
#ifndef SQR_MUL_ASM
7209
    *r = (sp_int_digit)w;
7210
#else
7211
0
    *r = h;
7212
0
#endif
7213
0
}
7214
7215
/* Calculate a modulo the digit d into r: r = a mod d
7216
 *
7217
 * @param  [in]   a  SP integer to reduce.
7218
 * @param  [in]   d  Digit to that is the modulus.
7219
 * @param  [out]  r  Digit that is the result.
7220
 *
7221
 * @return  MP_OKAY on success.
7222
 * @return  MP_VAL when a is NULL or d is 0.
7223
 */
7224
#if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
7225
    !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
7226
static
7227
#endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
7228
int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
7229
0
{
7230
0
    int err = MP_OKAY;
7231
7232
    /* Validate parameters. */
7233
0
    if ((a == NULL) || (r == NULL) || (d == 0)) {
7234
0
        err = MP_VAL;
7235
0
    }
7236
7237
#if 0
7238
    sp_print(a, "a");
7239
    sp_print_digit(d, "m");
7240
#endif
7241
7242
0
    if (err == MP_OKAY) {
7243
        /* Check whether d is a power of 2. */
7244
0
        if ((d & (d - 1)) == 0) {
7245
0
            if (a->used == 0) {
7246
0
                *r = 0;
7247
0
            }
7248
0
            else {
7249
0
                *r = a->dp[0] & (d - 1);
7250
0
            }
7251
0
        }
7252
0
#if !defined(WOLFSSL_SP_SMALL)
7253
    #if SP_WORD_SIZE < 64
7254
        else if (d == 3) {
7255
            /* Fast implementation for divisor of 3. */
7256
            _sp_div_3(a, NULL, r);
7257
        }
7258
    #endif
7259
0
        else if (d == 10) {
7260
            /* Fast implementation for divisor of 10. */
7261
0
            _sp_div_10(a, NULL, r);
7262
0
        }
7263
0
#endif
7264
0
        else if (d <= SP_HALF_MAX) {
7265
            /* For small divisors. */
7266
0
            _sp_div_small(a, d, NULL, r);
7267
0
        }
7268
0
        else {
7269
0
            _sp_mod_d(a, d, r);
7270
0
        }
7271
7272
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7273
        if (a->sign == MP_NEG) {
7274
            *r = d - *r;
7275
        }
7276
    #endif
7277
0
    }
7278
7279
#if 0
7280
    sp_print_digit(*r, "rmod");
7281
#endif
7282
7283
0
    return err;
7284
0
}
7285
#endif /* WOLFSSL_SP_MOD_D */
7286
7287
#if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
7288
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
7289
     !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_SP_INVMOD)
7290
/* Divides a by 2 and stores in r: r = a >> 1
7291
 *
7292
 * @param  [in]   a  SP integer to divide.
7293
 * @param  [out]  r  SP integer to hold result.
7294
 */
7295
static void _sp_div_2(const sp_int* a, sp_int* r)
7296
0
{
7297
0
    int i;
7298
7299
    /* Shift down each word by 1 and include bottom bit of next at top. */
7300
0
    for (i = 0; i < (int)a->used - 1; i++) {
7301
0
        r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
7302
0
    }
7303
    /* Last word only needs to be shifted down. */
7304
0
    r->dp[i] = a->dp[i] >> 1;
7305
    /* Set used to be all words seen. */
7306
0
    r->used = (sp_size_t)(i + 1);
7307
    /* Remove leading zeros. */
7308
0
    sp_clamp(r);
7309
#ifdef WOLFSSL_SP_INT_NEGATIVE
7310
    /* Same sign in result. */
7311
    r->sign = a->sign;
7312
#endif
7313
0
}
7314
7315
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7316
/* Divides a by 2 and stores in r: r = a >> 1
7317
 *
7318
 * @param  [in]   a  SP integer to divide.
7319
 * @param  [out]  r  SP integer to hold result.
7320
 *
7321
 * @return  MP_OKAY on success.
7322
 * @return  MP_VAL when a or r is NULL.
7323
 */
7324
int sp_div_2(const sp_int* a, sp_int* r)
7325
0
{
7326
0
    int err = MP_OKAY;
7327
7328
    /* Only when a public API. */
7329
0
    if ((a == NULL) || (r == NULL)) {
7330
0
        err = MP_VAL;
7331
0
    }
7332
    /* Ensure maximal size is supported by result. */
7333
0
    if ((err == MP_OKAY) && (a->used > r->size)) {
7334
0
        err = MP_VAL;
7335
0
    }
7336
7337
0
    if (err == MP_OKAY) {
7338
0
        _sp_div_2(a, r);
7339
0
    }
7340
7341
0
    return err;
7342
0
}
7343
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7344
#endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
7345
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7346
7347
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
7348
/* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
7349
 *
7350
 * r = a / 2 (mod m) - constant time (a < m and positive)
7351
 *
7352
 * @param  [in]   a  SP integer to divide.
7353
 * @param  [in]   m  SP integer that is modulus.
7354
 * @param  [out]  r  SP integer to hold result.
7355
 *
7356
 * @return  MP_OKAY on success.
7357
 * @return  MP_VAL when a, m or r is NULL.
7358
 */
7359
int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
7360
0
{
7361
0
    int err = MP_OKAY;
7362
7363
    /* Validate parameters. */
7364
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
7365
0
        err = MP_VAL;
7366
0
    }
7367
    /* Check result has enough space for a + m. */
7368
0
    if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
7369
0
        err = MP_VAL;
7370
0
    }
7371
7372
0
    if (err == MP_OKAY) {
7373
    #ifndef SQR_MUL_ASM
7374
        sp_int_word  w = 0;
7375
    #else
7376
0
        sp_int_digit l = 0;
7377
0
        sp_int_digit h;
7378
0
        sp_int_digit t;
7379
0
    #endif
7380
        /* Mask to apply to modulus. */
7381
0
        sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
7382
0
        sp_size_t i;
7383
7384
    #if 0
7385
        sp_print(a, "a");
7386
        sp_print(m, "m");
7387
    #endif
7388
7389
        /* Add a to m, if a is odd, into r in constant time. */
7390
0
        for (i = 0; i < m->used; i++) {
7391
            /* Mask to apply to a - set when used value at index. */
7392
0
            sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
7393
7394
        #ifndef SQR_MUL_ASM
7395
            /* Conditionally add modulus. */
7396
            w         += m->dp[i] & mask;
7397
            /* Conditionally add a. */
7398
            w         += a->dp[i] & mask_a;
7399
            /* Store low digit in result. */
7400
            r->dp[i]   = (sp_int_digit)w;
7401
            /* Move high digit down. */
7402
            w        >>= DIGIT_BIT;
7403
        #else
7404
            /* No high digit. */
7405
0
            h        = 0;
7406
            /* Conditionally use modulus. */
7407
0
            t        = m->dp[i] & mask;
7408
            /* Add with carry modulus. */
7409
0
            SP_ASM_ADDC_REG(l, h, t);
7410
            /* Conditionally use a. */
7411
0
            t        = a->dp[i] & mask_a;
7412
            /* Add with carry a. */
7413
0
            SP_ASM_ADDC_REG(l, h, t);
7414
            /* Store low digit in result. */
7415
0
            r->dp[i] = l;
7416
            /* Move high digit down. */
7417
0
            l        = h;
7418
0
        #endif
7419
0
        }
7420
        /* Store carry. */
7421
    #ifndef SQR_MUL_ASM
7422
        r->dp[i] = (sp_int_digit)w;
7423
    #else
7424
0
        r->dp[i] = l;
7425
0
    #endif
7426
        /* Used includes carry - set or not. */
7427
0
        r->used = (sp_size_t)(i + 1);
7428
    #ifdef WOLFSSL_SP_INT_NEGATIVE
7429
        r->sign = MP_ZPOS;
7430
    #endif
7431
        /* Divide conditional sum by 2. */
7432
0
        _sp_div_2(r, r);
7433
7434
    #if 0
7435
        sp_print(r, "rd2");
7436
    #endif
7437
0
    }
7438
7439
0
    return err;
7440
0
}
7441
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
7442
7443
/************************
7444
 * Add/Subtract Functions
7445
 ************************/
7446
7447
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7448
/* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
7449
 *
7450
 * @param  [in]   a  SP integer to add to.
7451
 * @param  [in]   b  SP integer to add.
7452
 * @param  [out]  r  SP integer to store result in.
7453
 * @param  [in]   o  Number of digits to offset b.
7454
 */
7455
static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
7456
0
{
7457
0
    sp_size_t i = 0;
7458
#ifndef SQR_MUL_ASM
7459
    sp_int_word t = 0;
7460
#else
7461
0
    sp_int_digit l = 0;
7462
0
    sp_int_digit h = 0;
7463
0
    sp_int_digit t = 0;
7464
0
#endif
7465
7466
#ifdef SP_MATH_NEED_ADD_OFF
7467
    unsigned int j;
7468
7469
    /* Copy a into result up to offset. */
7470
    for (; (i < o) && (i < a->used); i++) {
7471
        r->dp[i] = a->dp[i];
7472
    }
7473
    /* Set result to 0 for digits beyonf those in a. */
7474
    for (; i < o; i++) {
7475
        r->dp[i] = 0;
7476
    }
7477
7478
    /* Add each digit from a and b where both have values. */
7479
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7480
    #ifndef SQR_MUL_ASM
7481
        t += a->dp[i];
7482
        t += b->dp[j];
7483
        r->dp[i] = (sp_int_digit)t;
7484
        t >>= SP_WORD_SIZE;
7485
    #else
7486
        t = a->dp[i];
7487
        SP_ASM_ADDC(l, h, t);
7488
        t = b->dp[j];
7489
        SP_ASM_ADDC(l, h, t);
7490
        r->dp[i] = l;
7491
        l = h;
7492
        h = 0;
7493
    #endif
7494
    }
7495
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7496
    for (; i < a->used; i++) {
7497
    #ifndef SQR_MUL_ASM
7498
        t += a->dp[i];
7499
        r->dp[i] = (sp_int_digit)t;
7500
        t >>= SP_WORD_SIZE;
7501
    #else
7502
        t = a->dp[i];
7503
        SP_ASM_ADDC(l, h, t);
7504
        r->dp[i] = l;
7505
        l = h;
7506
        h = 0;
7507
    #endif
7508
    }
7509
    /* a is out of digits. Add carry and remaining b digits. */
7510
    for (; j < b->used; i++, j++) {
7511
    #ifndef SQR_MUL_ASM
7512
        t += b->dp[j];
7513
        r->dp[i] = (sp_int_digit)t;
7514
        t >>= SP_WORD_SIZE;
7515
    #else
7516
        t = b->dp[j];
7517
        SP_ASM_ADDC(l, h, t);
7518
        r->dp[i] = l;
7519
        l = h;
7520
        h = 0;
7521
    #endif
7522
    }
7523
#else
7524
0
    (void)o;
7525
7526
    /* Add each digit from a and b where both have values. */
7527
0
    for (; (i < a->used) && (i < b->used); i++) {
7528
    #ifndef SQR_MUL_ASM
7529
        t += a->dp[i];
7530
        t += b->dp[i];
7531
        r->dp[i] = (sp_int_digit)t;
7532
        t >>= SP_WORD_SIZE;
7533
    #else
7534
0
        t = a->dp[i];
7535
0
        SP_ASM_ADDC(l, h, t);
7536
0
        t = b->dp[i];
7537
0
        SP_ASM_ADDC(l, h, t);
7538
0
        r->dp[i] = l;
7539
0
        l = h;
7540
0
        h = 0;
7541
0
    #endif
7542
0
    }
7543
    /* Either a and/or b are out of digits. Add carry and remaining a digits. */
7544
0
    for (; i < a->used; i++) {
7545
    #ifndef SQR_MUL_ASM
7546
        t += a->dp[i];
7547
        r->dp[i] = (sp_int_digit)t;
7548
        t >>= SP_WORD_SIZE;
7549
    #else
7550
0
        t = a->dp[i];
7551
0
        SP_ASM_ADDC(l, h, t);
7552
0
        r->dp[i] = l;
7553
0
        l = h;
7554
0
        h = 0;
7555
0
    #endif
7556
0
    }
7557
    /* a is out of digits. Add carry and remaining b digits. */
7558
0
    for (; i < b->used; i++) {
7559
    #ifndef SQR_MUL_ASM
7560
        t += b->dp[i];
7561
        r->dp[i] = (sp_int_digit)t;
7562
        t >>= SP_WORD_SIZE;
7563
    #else
7564
0
        t = b->dp[i];
7565
0
        SP_ASM_ADDC(l, h, t);
7566
0
        r->dp[i] = l;
7567
0
        l = h;
7568
0
        h = 0;
7569
0
    #endif
7570
0
    }
7571
0
#endif
7572
7573
    /* Set used based on last digit put in. */
7574
0
    r->used = i;
7575
    /* Put in carry. */
7576
#ifndef SQR_MUL_ASM
7577
    r->dp[i] = (sp_int_digit)t;
7578
    r->used = (sp_size_t)(r->used + (sp_size_t)(t != 0));
7579
#else
7580
0
    r->dp[i] = l;
7581
0
    r->used = (sp_size_t)(r->used + (sp_size_t)(l != 0));
7582
0
#endif
7583
7584
    /* Remove leading zeros. */
7585
0
    sp_clamp(r);
7586
0
}
7587
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7588
7589
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
7590
    !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
7591
    !defined(WOLFSSL_RSA_VERIFY_ONLY))
7592
/* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
7593
 * a must be greater than b.
7594
 *
7595
 * When using offset, r == a is faster.
7596
 *
7597
 * @param  [in]   a  SP integer to subtract from.
7598
 * @param  [in]   b  SP integer to subtract.
7599
 * @param  [out]  r  SP integer to store result in.
7600
 * @param  [in]   o  Number of digits to offset b.
7601
 */
7602
static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
7603
    sp_size_t o)
7604
0
{
7605
0
    sp_size_t i = 0;
7606
0
    sp_size_t j;
7607
#ifndef SQR_MUL_ASM
7608
    sp_int_sword t = 0;
7609
#else
7610
0
    sp_int_digit l = 0;
7611
0
    sp_int_digit h = 0;
7612
0
#endif
7613
7614
    /* Need to copy digits up to offset into result. */
7615
0
    if (r != a) {
7616
0
        for (; (i < o) && (i < a->used); i++) {
7617
0
            r->dp[i] = a->dp[i];
7618
0
        }
7619
0
    }
7620
0
    else {
7621
0
        i = o;
7622
0
    }
7623
    /* Index to add at is the offset now. */
7624
7625
0
    for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
7626
    #ifndef SQR_MUL_ASM
7627
        /* Add a into and subtract b from current value. */
7628
        t += a->dp[i];
7629
        t -= b->dp[j];
7630
        /* Store low digit in result. */
7631
        r->dp[i] = (sp_int_digit)t;
7632
        /* Move high digit down. */
7633
        t >>= SP_WORD_SIZE;
7634
    #else
7635
        /* Add a into and subtract b from current value. */
7636
0
        SP_ASM_ADDC(l, h, a->dp[i]);
7637
0
        SP_ASM_SUBB(l, h, b->dp[j]);
7638
        /* Store low digit in result. */
7639
0
        r->dp[i] = l;
7640
        /* Move high digit down. */
7641
0
        l = h;
7642
        /* High digit is 0 when positive or -1 on negative. */
7643
0
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7644
0
    #endif
7645
0
    }
7646
0
    for (; i < a->used; i++) {
7647
    #ifndef SQR_MUL_ASM
7648
        /* Add a into current value. */
7649
        t += a->dp[i];
7650
        /* Store low digit in result. */
7651
        r->dp[i] = (sp_int_digit)t;
7652
        /* Move high digit down. */
7653
        t >>= SP_WORD_SIZE;
7654
    #else
7655
        /* Add a into current value. */
7656
0
        SP_ASM_ADDC(l, h, a->dp[i]);
7657
        /* Store low digit in result. */
7658
0
        r->dp[i] = l;
7659
        /* Move high digit down. */
7660
0
        l = h;
7661
        /* High digit is 0 when positive or -1 on negative. */
7662
0
        h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
7663
0
    #endif
7664
0
    }
7665
7666
    /* Set used based on last digit put in. */
7667
0
    r->used = i;
7668
    /* Remove leading zeros. */
7669
0
    sp_clamp(r);
7670
0
}
7671
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
7672
        * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
7673
7674
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
7675
/* Add b to a into r: r = a + b
7676
 *
7677
 * @param  [in]   a  SP integer to add to.
7678
 * @param  [in]   b  SP integer to add.
7679
 * @param  [out]  r  SP integer to store result in.
7680
 *
7681
 * @return  MP_OKAY on success.
7682
 * @return  MP_VAL when a, b, or r is NULL.
7683
 */
7684
int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
7685
0
{
7686
0
    int err = MP_OKAY;
7687
7688
    /* Validate parameters. */
7689
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7690
0
        err = MP_VAL;
7691
0
    }
7692
    /* Check that r as big as a and b plus one word. */
7693
0
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7694
0
        err = MP_VAL;
7695
0
    }
7696
7697
0
    if (err == MP_OKAY) {
7698
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7699
        /* Add two positive numbers. */
7700
0
        _sp_add_off(a, b, r, 0);
7701
    #else
7702
        /* Same sign then add absolute values and use sign. */
7703
        if (a->sign == b->sign) {
7704
            _sp_add_off(a, b, r, 0);
7705
            r->sign = a->sign;
7706
        }
7707
        /* Different sign and abs(a) >= abs(b). */
7708
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7709
            /* Subtract absolute values and use sign of a unless result 0. */
7710
            _sp_sub_off(a, b, r, 0);
7711
            if (sp_iszero(r)) {
7712
                r->sign = MP_ZPOS;
7713
            }
7714
            else {
7715
                r->sign = a->sign;
7716
            }
7717
        }
7718
        /* Different sign and abs(a) < abs(b). */
7719
        else {
7720
            /* Reverse subtract absolute values and use sign of b. */
7721
            _sp_sub_off(b, a, r, 0);
7722
            r->sign = b->sign;
7723
        }
7724
    #endif
7725
0
    }
7726
7727
0
    return err;
7728
0
}
7729
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
7730
7731
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
7732
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
7733
/* Subtract b from a into r: r = a - b
7734
 *
7735
 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
7736
 *
7737
 * @param  [in]   a  SP integer to subtract from.
7738
 * @param  [in]   b  SP integer to subtract.
7739
 * @param  [out]  r  SP integer to store result in.
7740
 *
7741
 * @return  MP_OKAY on success.
7742
 * @return  MP_VAL when a, b, or r is NULL.
7743
 */
7744
int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
7745
0
{
7746
0
    int err = MP_OKAY;
7747
7748
    /* Validate parameters. */
7749
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
7750
0
        err = MP_VAL;
7751
0
    }
7752
    /* Check that r as big as a and b plus one word. */
7753
0
    if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
7754
0
        err = MP_VAL;
7755
0
    }
7756
7757
0
    if (err == MP_OKAY) {
7758
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
7759
        /* Subtract positive numbers b from a. */
7760
0
        _sp_sub_off(a, b, r, 0);
7761
    #else
7762
        /* Different sign. */
7763
        if (a->sign != b->sign) {
7764
            /* Add absolute values and use sign of a. */
7765
            _sp_add_off(a, b, r, 0);
7766
            r->sign = a->sign;
7767
        }
7768
        /* Same sign and abs(a) >= abs(b). */
7769
        else if (_sp_cmp_abs(a, b) != MP_LT) {
7770
            /* Subtract absolute values and use sign of a unless result 0. */
7771
            _sp_sub_off(a, b, r, 0);
7772
            if (sp_iszero(r)) {
7773
                r->sign = MP_ZPOS;
7774
            }
7775
            else {
7776
                r->sign = a->sign;
7777
            }
7778
        }
7779
        /* Same sign and abs(a) < abs(b). */
7780
        else {
7781
            /* Reverse subtract absolute values and use opposite sign of a */
7782
            _sp_sub_off(b, a, r, 0);
7783
            r->sign = 1 - a->sign;
7784
        }
7785
    #endif
7786
0
    }
7787
7788
0
    return err;
7789
0
}
7790
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
7791
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
7792
7793
/****************************
7794
 * Add/Subtract mod functions
7795
 ****************************/
7796
7797
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
7798
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
7799
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
7800
/* Add two value and reduce: r = (a + b) % m
7801
 *
7802
 * @param  [in]   a  SP integer to add.
7803
 * @param  [in]   b  SP integer to add with.
7804
 * @param  [in]   m  SP integer that is the modulus.
7805
 * @param  [out]  r  SP integer to hold result.
7806
 *
7807
 * @return  MP_OKAY on success.
7808
 * @return  MP_MEM when dynamic memory allocation fails.
7809
 */
7810
static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
7811
    sp_int* r)
7812
0
{
7813
0
    int err = MP_OKAY;
7814
    /* Calculate used based on digits used in a and b. */
7815
0
    sp_size_t used = (sp_size_t)(((a->used >= b->used) ? a->used + 1U : b->used + 1U));
7816
0
    DECL_SP_INT(t, used);
7817
7818
    /* Allocate a temporary SP int to hold sum. */
7819
0
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
7820
7821
0
    if (err == MP_OKAY) {
7822
        /* Do sum. */
7823
0
        err = sp_add(a, b, t);
7824
0
    }
7825
0
    if (err == MP_OKAY) {
7826
        /* Mod result. */
7827
0
        err = sp_mod(t, m, r);
7828
0
    }
7829
7830
0
    FREE_SP_INT(t, NULL);
7831
0
    return err;
7832
0
}
7833
7834
/* Add two value and reduce: r = (a + b) % m
7835
 *
7836
 * @param  [in]   a  SP integer to add.
7837
 * @param  [in]   b  SP integer to add with.
7838
 * @param  [in]   m  SP integer that is the modulus.
7839
 * @param  [out]  r  SP integer to hold result.
7840
 *
7841
 * @return  MP_OKAY on success.
7842
 * @return  MP_VAL when a, b, m or r is NULL.
7843
 * @return  MP_MEM when dynamic memory allocation fails.
7844
 */
7845
int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7846
0
{
7847
0
    int err = MP_OKAY;
7848
7849
    /* Validate parameters. */
7850
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7851
0
        err = MP_VAL;
7852
0
    }
7853
    /* Ensure a and b aren't too big a number to operate on. */
7854
0
    else if (a->used >= SP_INT_DIGITS) {
7855
0
        err = MP_VAL;
7856
0
    }
7857
0
    else if (b->used >= SP_INT_DIGITS) {
7858
0
        err = MP_VAL;
7859
0
    }
7860
7861
7862
#if 0
7863
    if (err == MP_OKAY) {
7864
        sp_print(a, "a");
7865
        sp_print(b, "b");
7866
        sp_print(m, "m");
7867
    }
7868
#endif
7869
0
    if (err == MP_OKAY) {
7870
        /* Do add and modular reduction. */
7871
0
        err = _sp_addmod(a, b, m, r);
7872
0
    }
7873
#if 0
7874
    if (err == MP_OKAY) {
7875
        sp_print(r, "rma");
7876
    }
7877
#endif
7878
7879
0
    return err;
7880
0
}
7881
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
7882
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
7883
7884
#if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
7885
    defined(HAVE_ECC))
7886
/* Sub b from a and reduce: r = (a - b) % m
7887
 * Result is always positive.
7888
 *
7889
 * @param  [in]   a  SP integer to subtract from
7890
 * @param  [in]   b  SP integer to subtract.
7891
 * @param  [in]   m  SP integer that is the modulus.
7892
 * @param  [out]  r  SP integer to hold result.
7893
 *
7894
 * @return  MP_OKAY on success.
7895
 * @return  MP_MEM when dynamic memory allocation fails.
7896
 */
7897
static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
7898
    sp_int* r)
7899
0
{
7900
0
    int err = MP_OKAY;
7901
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
7902
0
    unsigned int used = ((a->used >= m->used) ?
7903
0
        ((a->used >= b->used) ? (a->used + 1U) : (b->used + 1U)) :
7904
0
        ((b->used >= m->used)) ? (b->used + 1U) : (m->used + 1U));
7905
0
    DECL_SP_INT(t0, used);
7906
0
    DECL_SP_INT(t1, used);
7907
7908
0
    ALLOC_SP_INT_SIZE(t0, used, err, NULL);
7909
0
    ALLOC_SP_INT_SIZE(t1, used, err, NULL);
7910
0
    if (err == MP_OKAY) {
7911
        /* Reduce a to less than m. */
7912
0
        if (_sp_cmp(a, m) != MP_LT) {
7913
0
            err = sp_mod(a, m, t0);
7914
0
            a = t0;
7915
0
        }
7916
0
    }
7917
0
    if (err == MP_OKAY) {
7918
        /* Reduce b to less than m. */
7919
0
        if (_sp_cmp(b, m) != MP_LT) {
7920
0
            err = sp_mod(b, m, t1);
7921
0
            b = t1;
7922
0
        }
7923
0
    }
7924
0
    if (err == MP_OKAY) {
7925
        /* Add m to a if a smaller than b. */
7926
0
        if (_sp_cmp(a, b) == MP_LT) {
7927
0
            err = sp_add(a, m, t0);
7928
0
            a = t0;
7929
0
        }
7930
0
    }
7931
0
    if (err == MP_OKAY) {
7932
        /* Subtract b from a. */
7933
0
        err = sp_sub(a, b, r);
7934
0
    }
7935
7936
0
    FREE_SP_INT(t0, NULL);
7937
0
    FREE_SP_INT(t1, NULL);
7938
#else /* WOLFSSL_SP_INT_NEGATIVE */
7939
    sp_size_t used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
7940
    DECL_SP_INT(t, used);
7941
7942
    ALLOC_SP_INT_SIZE(t, used, err, NULL);
7943
    /* Subtract b from a into temporary. */
7944
    if (err == MP_OKAY) {
7945
        err = sp_sub(a, b, t);
7946
    }
7947
    if (err == MP_OKAY) {
7948
        /* Reduce result mod m into result. */
7949
        err = sp_mod(t, m, r);
7950
    }
7951
    FREE_SP_INT(t, NULL);
7952
#endif /* WOLFSSL_SP_INT_NEGATIVE */
7953
7954
0
    return err;
7955
0
}
7956
7957
/* Sub b from a and reduce: r = (a - b) % m
7958
 * Result is always positive.
7959
 *
7960
 * @param  [in]   a  SP integer to subtract from
7961
 * @param  [in]   b  SP integer to subtract.
7962
 * @param  [in]   m  SP integer that is the modulus.
7963
 * @param  [out]  r  SP integer to hold result.
7964
 *
7965
 * @return  MP_OKAY on success.
7966
 * @return  MP_VAL when a, b, m or r is NULL.
7967
 * @return  MP_MEM when dynamic memory allocation fails.
7968
 */
7969
int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
7970
0
{
7971
0
    int err = MP_OKAY;
7972
    /* Validate parameters. */
7973
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7974
0
        err = MP_VAL;
7975
0
    }
7976
    /* Ensure a, b and m aren't too big a number to operate on. */
7977
0
    else if (a->used >= SP_INT_DIGITS) {
7978
0
        err = MP_VAL;
7979
0
    }
7980
0
    else if (b->used >= SP_INT_DIGITS) {
7981
0
        err = MP_VAL;
7982
0
    }
7983
0
    else if (m->used >= SP_INT_DIGITS) {
7984
0
        err = MP_VAL;
7985
0
    }
7986
7987
#if 0
7988
    if (err == MP_OKAY) {
7989
        sp_print(a, "a");
7990
        sp_print(b, "b");
7991
        sp_print(m, "m");
7992
    }
7993
#endif
7994
0
    if (err == MP_OKAY) {
7995
        /* Do submod. */
7996
0
        err = _sp_submod(a, b, m, r);
7997
0
    }
7998
#if 0
7999
    if (err == MP_OKAY) {
8000
        sp_print(r, "rms");
8001
    }
8002
#endif
8003
8004
0
    return err;
8005
0
}
8006
#endif /* WOLFSSL_SP_MATH_ALL */
8007
8008
/* Constant time clamping.
8009
 *
8010
 * @param [in, out] a  SP integer to clamp.
8011
 */
8012
static void sp_clamp_ct(sp_int* a)
8013
0
{
8014
0
    int i;
8015
0
    sp_size_t used = a->used;
8016
0
    sp_size_t mask = (sp_size_t)-1;
8017
8018
0
    for (i = (int)a->used - 1; i >= 0; i--) {
8019
#if ((SP_WORD_SIZE == 64) && \
8020
     (defined(_WIN64) || !defined(WOLFSSL_UINT128_T_DEFINED))) || \
8021
    ((SP_WORD_SIZE == 32) && defined(NO_64BIT))
8022
        sp_int_digit negVal = ~a->dp[i];
8023
        sp_int_digit minusOne = a->dp[i] - 1;
8024
        sp_int_digit zeroMask =
8025
            (sp_int_digit)((sp_int_sdigit)(negVal & minusOne) >>
8026
                           (SP_WORD_SIZE - 1));
8027
#else
8028
0
        sp_size_t zeroMask =
8029
0
            (sp_size_t)((((sp_int_sword)a->dp[i]) - 1) >> SP_WORD_SIZE);
8030
0
#endif
8031
0
        mask &= (sp_size_t)zeroMask;
8032
0
        used = (sp_size_t)(used + mask);
8033
0
    }
8034
0
    a->used = used;
8035
0
}
8036
8037
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8038
/* Add two value and reduce: r = (a + b) % m
8039
 *
8040
 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8041
 *
8042
 * Assumes a, b, m and r are not NULL.
8043
 * m and r must not be the same pointer.
8044
 *
8045
 * @param  [in]   a  SP integer to add.
8046
 * @param  [in]   b  SP integer to add with.
8047
 * @param  [in]   m  SP integer that is the modulus.
8048
 * @param  [out]  r  SP integer to hold result.
8049
 *
8050
 * @return  MP_OKAY on success.
8051
 */
8052
int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8053
0
{
8054
0
    int err = MP_OKAY;
8055
#ifndef SQR_MUL_ASM
8056
    sp_int_sword w;
8057
    sp_int_sword s;
8058
#else
8059
0
    sp_int_digit wl;
8060
0
    sp_int_digit wh;
8061
0
    sp_int_digit sl;
8062
0
    sp_int_digit sh;
8063
0
    sp_int_digit t;
8064
0
#endif
8065
0
    sp_int_digit mask;
8066
0
    sp_int_digit mask_a = (sp_int_digit)-1;
8067
0
    sp_int_digit mask_b = (sp_int_digit)-1;
8068
0
    sp_size_t i;
8069
8070
    /* Check result is as big as modulus. */
8071
0
    if (m->used > r->size) {
8072
0
        err = MP_VAL;
8073
0
    }
8074
    /* Validate parameters. */
8075
0
    if ((err == MP_OKAY) && (r == m)) {
8076
0
        err = MP_VAL;
8077
0
    }
8078
8079
0
    if (err == MP_OKAY) {
8080
#if 0
8081
        sp_print(a, "a");
8082
        sp_print(b, "b");
8083
        sp_print(m, "m");
8084
#endif
8085
8086
        /* Add a to b into r. Do the subtract of modulus but don't store result.
8087
         * When subtract result is negative, the overflow will be negative.
8088
         * Only need to subtract mod when result is positive - overflow is
8089
         * positive.
8090
         */
8091
    #ifndef SQR_MUL_ASM
8092
        w = 0;
8093
        s = 0;
8094
    #else
8095
0
        wl = 0;
8096
0
        sl = 0;
8097
0
        sh = 0;
8098
0
    #endif
8099
        /* Constant time - add modulus digits worth from a and b. */
8100
0
        for (i = 0; i < m->used; i++) {
8101
            /* Values past 'used' are not initialized. */
8102
0
            mask_a += (i == a->used);
8103
0
            mask_b += (i == b->used);
8104
8105
        #ifndef SQR_MUL_ASM
8106
            /* Add next digits from a and b to current value. */
8107
            w         += a->dp[i] & mask_a;
8108
            w         += b->dp[i] & mask_b;
8109
            /* Store low digit in result. */
8110
            r->dp[i]   = (sp_int_digit)w;
8111
            /* Add result to reducing value. */
8112
            s         += (sp_int_digit)w;
8113
            /* Subtract next digit of modulus. */
8114
            s         -= m->dp[i];
8115
            /* Move high digit of reduced result down. */
8116
            s        >>= DIGIT_BIT;
8117
            /* Move high digit of sum result down. */
8118
            w        >>= DIGIT_BIT;
8119
        #else
8120
0
            wh = 0;
8121
            /* Add next digits from a and b to current value. */
8122
0
            t = a->dp[i] & mask_a;
8123
0
            SP_ASM_ADDC_REG(wl, wh, t);
8124
0
            t = b->dp[i] & mask_b;
8125
0
            SP_ASM_ADDC_REG(wl, wh, t);
8126
            /* Store low digit in result. */
8127
0
            r->dp[i] = wl;
8128
            /* Add result to reducing value. */
8129
0
            SP_ASM_ADDC_REG(sl, sh, wl);
8130
            /* Subtract next digit of modulus. */
8131
0
            SP_ASM_SUBB(sl, sh, m->dp[i]);
8132
            /* Move high digit of reduced result down. */
8133
0
            sl = sh;
8134
            /* High digit is 0 when positive or -1 on negative. */
8135
0
            sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
8136
            /* Move high digit of sum result down. */
8137
0
            wl = wh;
8138
0
        #endif
8139
0
        }
8140
    #ifndef SQR_MUL_ASM
8141
        /* Add carry into reduced result. */
8142
        s += (sp_int_digit)w;
8143
        /* s will be positive when subtracting modulus is needed. */
8144
        mask = (sp_int_digit)0 - (s >= 0);
8145
    #else
8146
        /* Add carry into reduced result. */
8147
0
        SP_ASM_ADDC_REG(sl, sh, wl);
8148
        /* s will be positive when subtracting modulus is needed. */
8149
0
        mask = (sh >> (SP_WORD_SIZE-1)) - 1;
8150
0
    #endif
8151
8152
        /* Constant time, conditionally, subtract modulus from sum. */
8153
    #ifndef SQR_MUL_ASM
8154
        w = 0;
8155
    #else
8156
0
        wl = 0;
8157
0
        wh = 0;
8158
0
    #endif
8159
0
        for (i = 0; i < m->used; i++) {
8160
        #ifndef SQR_MUL_ASM
8161
            /* Add result to current value and conditionally subtract modulus.
8162
             */
8163
            w         += r->dp[i];
8164
            w         -= m->dp[i] & mask;
8165
            /* Store low digit in result. */
8166
            r->dp[i]   = (sp_int_digit)w;
8167
            /* Move high digit of sum result down. */
8168
            w        >>= DIGIT_BIT;
8169
        #else
8170
            /* Add result to current value and conditionally subtract modulus.
8171
             */
8172
0
            SP_ASM_ADDC(wl, wh, r->dp[i]);
8173
0
            t = m->dp[i] & mask;
8174
0
            SP_ASM_SUBB_REG(wl, wh, t);
8175
            /* Store low digit in result. */
8176
0
            r->dp[i] = wl;
8177
            /* Move high digit of sum result down. */
8178
0
            wl = wh;
8179
            /* High digit is 0 when positive or -1 on negative. */
8180
0
            wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
8181
0
        #endif
8182
0
        }
8183
        /* Result will always have digits equal to or less than those in
8184
         * modulus. */
8185
0
        r->used = i;
8186
    #ifdef WOLFSSL_SP_INT_NEGATIVE
8187
        r->sign = MP_ZPOS;
8188
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
8189
        /* Remove leading zeros. */
8190
0
        sp_clamp_ct(r);
8191
8192
#if 0
8193
        sp_print(r, "rma");
8194
#endif
8195
0
    }
8196
8197
0
    return err;
8198
0
}
8199
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8200
8201
#if (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
8202
    (defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
8203
     defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
8204
     defined(OPENSSL_ALL))
8205
/* Sub b from a modulo m: r = (a - b) % m
8206
 *
8207
 * Result is always positive.
8208
 *
8209
 * Assumes a, b, m and r are not NULL.
8210
 * m and r must not be the same pointer.
8211
 *
8212
 * @param  [in]   a  SP integer to subtract from
8213
 * @param  [in]   b  SP integer to subtract.
8214
 * @param  [in]   m  SP integer that is the modulus.
8215
 * @param  [out]  r  SP integer to hold result.
8216
 *
8217
 * @return  MP_OKAY on success.
8218
 */
8219
static void _sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m,
8220
    unsigned int max_size, sp_int* r)
8221
0
{
8222
#ifndef SQR_MUL_ASM
8223
    sp_int_sword w;
8224
#else
8225
0
    sp_int_digit l;
8226
0
    sp_int_digit h;
8227
0
    sp_int_digit t;
8228
0
#endif
8229
0
    sp_int_digit mask;
8230
0
    sp_int_digit mask_a = (sp_int_digit)-1;
8231
0
    sp_int_digit mask_b = (sp_int_digit)-1;
8232
0
    unsigned int i;
8233
8234
    /* In constant time, subtract b from a putting result in r. */
8235
#ifndef SQR_MUL_ASM
8236
    w = 0;
8237
#else
8238
0
    l = 0;
8239
0
    h = 0;
8240
0
#endif
8241
0
    for (i = 0; i < max_size; i++) {
8242
        /* Values past 'used' are not initialized. */
8243
0
        mask_a += (i == a->used);
8244
0
        mask_b += (i == b->used);
8245
8246
    #ifndef SQR_MUL_ASM
8247
        /* Add a to and subtract b from current value. */
8248
        w         += a->dp[i] & mask_a;
8249
        w         -= b->dp[i] & mask_b;
8250
        /* Store low digit in result. */
8251
        r->dp[i]   = (sp_int_digit)w;
8252
        /* Move high digit down. */
8253
        w        >>= DIGIT_BIT;
8254
    #else
8255
        /* Add a and subtract b from current value. */
8256
0
        t = a->dp[i] & mask_a;
8257
0
        SP_ASM_ADDC_REG(l, h, t);
8258
0
        t = b->dp[i] & mask_b;
8259
0
        SP_ASM_SUBB_REG(l, h, t);
8260
        /* Store low digit in result. */
8261
0
        r->dp[i] = l;
8262
        /* Move high digit down. */
8263
0
        l = h;
8264
        /* High digit is 0 when positive or -1 on negative. */
8265
0
        h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
8266
0
    #endif
8267
0
    }
8268
    /* When w is negative then we need to add modulus to make result
8269
     * positive. */
8270
#ifndef SQR_MUL_ASM
8271
    mask = (sp_int_digit)0 - (w < 0);
8272
#else
8273
0
    mask = h;
8274
0
#endif
8275
8276
    /* Constant time, conditionally, add modulus to difference. */
8277
#ifndef SQR_MUL_ASM
8278
    w = 0;
8279
#else
8280
0
    l = 0;
8281
0
#endif
8282
0
    for (i = 0; i < m->used; i++) {
8283
    #ifndef SQR_MUL_ASM
8284
        /* Add result and conditionally modulus to current value. */
8285
        w         += r->dp[i];
8286
        w         += m->dp[i] & mask;
8287
        /* Store low digit in result. */
8288
        r->dp[i]   = (sp_int_digit)w;
8289
        /* Move high digit down. */
8290
        w        >>= DIGIT_BIT;
8291
    #else
8292
0
        h = 0;
8293
        /* Add result and conditionally modulus to current value. */
8294
0
        SP_ASM_ADDC(l, h, r->dp[i]);
8295
0
        t = m->dp[i] & mask;
8296
0
        SP_ASM_ADDC_REG(l, h, t);
8297
        /* Store low digit in result. */
8298
0
        r->dp[i] = l;
8299
        /* Move high digit down. */
8300
0
        l = h;
8301
0
    #endif
8302
0
    }
8303
    /* Result will always have digits equal to or less than those in
8304
     * modulus. */
8305
0
    r->used = (sp_size_t)i;
8306
#ifdef WOLFSSL_SP_INT_NEGATIVE
8307
    r->sign = MP_ZPOS;
8308
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8309
    /* Remove leading zeros. */
8310
0
    sp_clamp_ct(r);
8311
0
}
8312
#endif
8313
8314
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
8315
/* Sub b from a modulo m: r = (a - b) % m
8316
 * Result is always positive.
8317
 *
8318
 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
8319
 *
8320
 * Assumes a, b, m and r are not NULL.
8321
 * m and r must not be the same pointer.
8322
 *
8323
 * @param  [in]   a  SP integer to subtract from
8324
 * @param  [in]   b  SP integer to subtract.
8325
 * @param  [in]   m  SP integer that is the modulus.
8326
 * @param  [out]  r  SP integer to hold result.
8327
 *
8328
 * @return  MP_OKAY on success.
8329
 */
8330
int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
8331
0
{
8332
0
    int err = MP_OKAY;
8333
8334
    /* Check result is as big as modulus plus one digit. */
8335
0
    if (m->used > r->size) {
8336
0
        err = MP_VAL;
8337
0
    }
8338
    /* Validate parameters. */
8339
0
    if ((err == MP_OKAY) && (r == m)) {
8340
0
        err = MP_VAL;
8341
0
    }
8342
8343
0
    if (err == MP_OKAY) {
8344
#if 0
8345
        sp_print(a, "a");
8346
        sp_print(b, "b");
8347
        sp_print(m, "m");
8348
#endif
8349
8350
0
        _sp_submod_ct(a, b, m, m->used, r);
8351
8352
#if 0
8353
        sp_print(r, "rms");
8354
#endif
8355
0
    }
8356
8357
0
    return err;
8358
0
}
8359
#endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
8360
8361
#if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC) && \
8362
    defined(WOLFSSL_ECC_BLIND_K)
8363
void sp_xor_ct(const sp_int* a, const sp_int* b, int len, sp_int* r)
8364
{
8365
    if ((a != NULL) && (b != NULL) && (r != NULL)) {
8366
        unsigned int i;
8367
8368
        r->used = (len * 8 + SP_WORD_SIZE - 1) / SP_WORD_SIZE;
8369
        for (i = 0; i < r->used; i++) {
8370
            r->dp[i] = a->dp[i] ^ b->dp[i];
8371
        }
8372
        i = (len * 8) % SP_WORD_SIZE;
8373
        if (i > 0) {
8374
            r->dp[r->used - 1] &= ((sp_int_digit)1 << i) - 1;
8375
        }
8376
        /* Remove leading zeros. */
8377
        sp_clamp_ct(r);
8378
    }
8379
}
8380
#endif
8381
8382
/********************
8383
 * Shifting functoins
8384
 ********************/
8385
8386
#if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
8387
    defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
8388
/* Left shift the multi-precision number by a number of digits.
8389
 *
8390
 * @param  [in,out]  a  SP integer to shift.
8391
 * @param  [in]      s  Number of digits to shift.
8392
 *
8393
 * @return  MP_OKAY on success.
8394
 * @return  MP_VAL when a is NULL, s is negative or the result is too big.
8395
 */
8396
int sp_lshd(sp_int* a, int s)
8397
0
{
8398
0
    int err = MP_OKAY;
8399
8400
    /* Validate parameters. */
8401
0
    if ((a == NULL) || (s < 0)) {
8402
0
        err = MP_VAL;
8403
0
    }
8404
    /* Ensure number has enough digits for operation. */
8405
0
    if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
8406
0
        err = MP_VAL;
8407
0
    }
8408
0
    if (err == MP_OKAY) {
8409
        /* Move up digits. */
8410
0
        XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8411
        /* Back fill with zeros. */
8412
0
        XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
8413
        /* Update used. */
8414
0
        a->used = (sp_size_t)(a->used + s);
8415
        /* Remove leading zeros. */
8416
0
        sp_clamp(a);
8417
0
    }
8418
8419
0
    return err;
8420
0
}
8421
#endif
8422
8423
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8424
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8425
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8426
/* Left shift the multi-precision number by n bits.
8427
 * Bits may be larger than the word size.
8428
 *
8429
 * Used by sp_mul_2d() and other internal functions.
8430
 *
8431
 * @param  [in,out]  a  SP integer to shift.
8432
 * @param  [in]      n  Number of bits to shift left.
8433
 *
8434
 * @return  MP_OKAY on success.
8435
 * @return  MP_VAL when the result is too big.
8436
 */
8437
static int sp_lshb(sp_int* a, int n)
8438
0
{
8439
0
    int err = MP_OKAY;
8440
8441
0
    if (a->used != 0) {
8442
        /* Calculate number of digits to shift. */
8443
0
        sp_size_t s = (sp_size_t)n >> SP_WORD_SHIFT;
8444
8445
        /* Ensure number has enough digits for result. */
8446
0
        if (a->used + s >= a->size) {
8447
0
            err = MP_VAL;
8448
0
        }
8449
0
        if (err == MP_OKAY) {
8450
            /* Get count of bits to move in digit. */
8451
0
            n &= (int)SP_WORD_MASK;
8452
            /* Check whether this is a complicated case. */
8453
0
            if (n != 0) {
8454
0
                unsigned int i;
8455
8456
                /* Shift up starting at most significant digit. */
8457
                /* Get new most significant digit. */
8458
0
                sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
8459
                /* Shift up each digit. */
8460
0
                for (i = a->used - 1U; i >= 1U; i--) {
8461
0
                    a->dp[i + s] = (a->dp[i] << n) |
8462
0
                                   (a->dp[i - 1] >> (SP_WORD_SIZE - n));
8463
0
                }
8464
                /* Shift up least significant digit. */
8465
0
                a->dp[s] = a->dp[0] << n;
8466
                /* Add new high digit unless zero. */
8467
0
                if (v != 0) {
8468
0
                    a->dp[a->used + s] = v;
8469
0
                    a->used++;
8470
0
                }
8471
0
            }
8472
            /* Only digits to move and ensure not zero. */
8473
0
            else if (s > 0) {
8474
                /* Move up digits. */
8475
0
                XMEMMOVE(a->dp + s, a->dp, a->used * (word32)SP_WORD_SIZEOF);
8476
0
            }
8477
8478
            /* Update used digit count. */
8479
0
            a->used = (sp_size_t)(a->used + s);
8480
            /* Back fill with zeros. */
8481
0
            XMEMSET(a->dp, 0, (word32)SP_WORD_SIZEOF * s);
8482
0
        }
8483
0
    }
8484
8485
0
    return err;
8486
0
}
8487
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8488
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
8489
8490
#ifdef WOLFSSL_SP_MATH_ALL
8491
/* Shift a right by c digits: a = a >> (n * SP_WORD_SIZE)
8492
 *
8493
 * @param  [in, out] a  SP integer to shift.
8494
 * @param  [in]      c  Number of digits to shift.
8495
 */
8496
void sp_rshd(sp_int* a, int c)
8497
0
{
8498
    /* Do shift if we have an SP int. */
8499
0
    if ((a != NULL) && (c > 0)) {
8500
        /* Make zero if shift removes all digits. */
8501
0
        if ((sp_size_t)c >= a->used) {
8502
0
            _sp_zero(a);
8503
0
        }
8504
0
        else {
8505
0
            sp_size_t i;
8506
8507
            /* Update used digits count. */
8508
0
            a->used = (sp_size_t)(a->used - c);
8509
            /* Move digits down. */
8510
0
            for (i = 0; i < a->used; i++, c++) {
8511
0
                a->dp[i] = a->dp[c];
8512
0
            }
8513
0
        }
8514
0
    }
8515
0
}
8516
#endif /* WOLFSSL_SP_MATH_ALL */
8517
8518
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8519
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8520
    defined(WOLFSSL_HAVE_SP_DH)
8521
/* Shift a right by n bits into r: r = a >> n
8522
 *
8523
 * @param  [in]   a  SP integer to shift.
8524
 * @param  [in]   n  Number of bits to shift.
8525
 * @param  [out]  r  SP integer to store result in.
8526
 */
8527
int sp_rshb(const sp_int* a, int n, sp_int* r)
8528
0
{
8529
0
    int err = MP_OKAY;
8530
    /* Number of digits to shift down. */
8531
0
    sp_size_t i;
8532
8533
0
    if ((a == NULL) || (n < 0)) {
8534
0
        err = MP_VAL;
8535
0
    }
8536
    /* Handle case where shifting out all digits. */
8537
0
    else if ((i = (sp_size_t)(n >> SP_WORD_SHIFT)) >= a->used) {
8538
0
        _sp_zero(r);
8539
0
    }
8540
    /* Change callers when more error cases returned. */
8541
0
    else if ((err == MP_OKAY) && (a->used - i > r->size)) {
8542
0
        err = MP_VAL;
8543
0
    }
8544
0
    else if (err == MP_OKAY) {
8545
0
        sp_size_t j;
8546
8547
        /* Number of bits to shift in digits. */
8548
0
        n &= SP_WORD_SIZE - 1;
8549
        /* Handle simple case. */
8550
0
        if (n == 0) {
8551
            /* Set the count of used digits. */
8552
0
            r->used = (sp_size_t)(a->used - i);
8553
            /* Move digits down. */
8554
0
            if (r == a) {
8555
0
                XMEMMOVE(r->dp, r->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8556
0
            }
8557
0
            else {
8558
0
                XMEMCPY(r->dp, a->dp + i, (word32)SP_WORD_SIZEOF * r->used);
8559
0
            }
8560
0
        }
8561
0
        else {
8562
            /* Move the bits down starting at least significant digit. */
8563
0
            for (j = 0; i < a->used - 1; i++, j++)
8564
0
                r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
8565
            /* Most significant digit has no higher digit to pull from. */
8566
0
            r->dp[j] = a->dp[i] >> n;
8567
            /* Set the count of used digits. */
8568
0
            r->used = (sp_size_t)(j + (r->dp[j] > 0));
8569
0
        }
8570
#ifdef WOLFSSL_SP_INT_NEGATIVE
8571
        if (sp_iszero(r)) {
8572
            /* Set zero sign. */
8573
            r->sign = MP_ZPOS;
8574
        }
8575
        else {
8576
            /* Retain sign. */
8577
            r->sign = a->sign;
8578
        }
8579
#endif
8580
0
    }
8581
8582
0
    return err;
8583
0
}
8584
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
8585
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
8586
8587
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
8588
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8589
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
8590
static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
8591
0
{
8592
0
    sp_size_t i;
8593
8594
    /* Compare top digits of dividend with those of divisor up to last. */
8595
0
    for (i = (sp_size_t)(d->used - 1U); i > 0; i--) {
8596
        /* Break if top divisor is not equal to dividend. */
8597
0
        if (a->dp[a->used - d->used + i] != d->dp[i]) {
8598
0
            break;
8599
0
        }
8600
0
    }
8601
    /* Check if top dividend is greater than or equal to divisor. */
8602
0
    if (a->dp[a->used - d->used + i] >= d->dp[i]) {
8603
        /* Update quotient result. */
8604
0
        r->dp[a->used - d->used] += 1;
8605
        /* Get 'used' to restore - ensure zeros put into quotient. */
8606
0
        i = a->used;
8607
        /* Subtract d from top of a. */
8608
0
        _sp_sub_off(a, d, a, (sp_size_t)(a->used - d->used));
8609
        /* Restore 'used' on remainder. */
8610
0
        a->used = i;
8611
0
    }
8612
0
}
8613
8614
/* Divide a by d and return the quotient in r and the remainder in a.
8615
 *   r = a / d; a = a % d
8616
 *
8617
 * Note: a is constantly having multiplies of d subtracted.
8618
 *
8619
 * @param  [in, out] a      SP integer to be divided and remainder on out.
8620
 * @param  [in]      d      SP integer to divide by.
8621
 * @param  [out]     r      SP integer that is the quotient.
8622
 * @param  [out]     trial  SP integer that is product in trial division.
8623
 *
8624
 * @return  MP_OKAY on success.
8625
 * @return  MP_VAL when operation fails - only when compiling small code.
8626
 */
8627
static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
8628
0
{
8629
0
    int err = MP_OKAY;
8630
0
    sp_size_t i;
8631
#ifdef WOLFSSL_SP_SMALL
8632
    int c;
8633
#else
8634
0
    sp_size_t j;
8635
0
    sp_size_t o;
8636
    #ifndef SQR_MUL_ASM
8637
    sp_int_sword sw;
8638
    #else
8639
0
    sp_int_digit sl;
8640
0
    sp_int_digit sh;
8641
0
    sp_int_digit st;
8642
0
    #endif
8643
0
#endif /* WOLFSSL_SP_SMALL */
8644
0
    sp_int_digit t;
8645
0
    sp_int_digit dt;
8646
8647
    /* Set result size to clear. */
8648
0
    r->used = (sp_size_t)(a->used - d->used + 1);
8649
    /* Set all potentially used digits to zero. */
8650
0
    for (i = 0; i < r->used; i++) {
8651
0
        r->dp[i] = 0;
8652
0
    }
8653
#ifdef WOLFSSL_SP_INT_NEGATIVE
8654
    r->sign = MP_ZPOS;
8655
#endif
8656
    /* Get the most significant digit (will have top bit set). */
8657
0
    dt = d->dp[d->used-1];
8658
8659
    /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
8660
0
    _sp_div_same_size(a, d, r);
8661
8662
    /* Keep subtracting multiples of d as long as the digit count of a is
8663
     * greater than equal to d.
8664
     */
8665
0
    for (i = (sp_size_t)(a->used - 1U); i >= d->used; i--) {
8666
        /* When top digits equal, guestimate maximum multiplier.
8667
         * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
8668
         * That is, for w (word size in bits) > 1, n > 1, let:
8669
         *   a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
8670
         * Then,
8671
         *     d * t
8672
         *   = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
8673
         *   = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
8674
         *   = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
8675
         *   = a - 2^((n-1)*w+1) - 2^w + 2
8676
         * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
8677
         */
8678
0
        if (a->dp[i] == dt) {
8679
0
            t = SP_DIGIT_MAX;
8680
0
        }
8681
0
        else {
8682
            /* Calculate trial quotient by dividing top word of dividend by top
8683
             * digit of divisor.
8684
             * Some implementations segfault when quotient > SP_DIGIT_MAX.
8685
             * Implementations in assembly, using builtins or using
8686
             * digits only (WOLFSSL_SP_DIV_WORD_HALF).
8687
             */
8688
0
            t = sp_div_word(a->dp[i], a->dp[i-1], dt);
8689
0
        }
8690
#ifdef WOLFSSL_SP_SMALL
8691
        do {
8692
            /* Calculate trial from trial quotient. */
8693
            err = _sp_mul_d(d, t, trial, i - d->used);
8694
            if (err != MP_OKAY) {
8695
                break;
8696
            }
8697
            /* Check if trial is bigger. */
8698
            c = _sp_cmp_abs(trial, a);
8699
            if (c == MP_GT) {
8700
                /* Decrement trial quotient and try again. */
8701
                t--;
8702
            }
8703
        }
8704
        while (c == MP_GT);
8705
8706
        if (err != MP_OKAY) {
8707
            break;
8708
        }
8709
8710
        /* Subtract the trial and add qoutient to result. */
8711
        _sp_sub_off(a, trial, a, 0);
8712
        r->dp[i - d->used] += t;
8713
        /* Handle overflow of digit. */
8714
        if (r->dp[i - d->used] < t) {
8715
            r->dp[i + 1 - d->used]++;
8716
        }
8717
#else
8718
        /* Index of lowest digit trial is subtracted from. */
8719
0
        o = (sp_size_t)(i - d->used);
8720
0
        do {
8721
        #ifndef SQR_MUL_ASM
8722
            sp_int_word tw = 0;
8723
        #else
8724
0
            sp_int_digit tl = 0;
8725
0
            sp_int_digit th = 0;
8726
0
        #endif
8727
8728
            /* Multiply divisor by trial quotient. */
8729
0
            for (j = 0; j < d->used; j++) {
8730
            #ifndef SQR_MUL_ASM
8731
                tw += (sp_int_word)d->dp[j] * t;
8732
                trial->dp[j] = (sp_int_digit)tw;
8733
                tw >>= SP_WORD_SIZE;
8734
            #else
8735
0
                SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
8736
0
                trial->dp[j] = tl;
8737
0
                tl = th;
8738
0
                th = 0;
8739
0
            #endif
8740
0
            }
8741
          #ifndef SQR_MUL_ASM
8742
            trial->dp[j] = (sp_int_digit)tw;
8743
          #else
8744
0
            trial->dp[j] = tl;
8745
0
          #endif
8746
8747
            /* Check trial quotient isn't larger than dividend. */
8748
0
            for (j = d->used; j > 0; j--) {
8749
0
                if (trial->dp[j] != a->dp[j + o]) {
8750
0
                    break;
8751
0
                }
8752
0
            }
8753
            /* Decrement trial quotient if larger and try again. */
8754
0
            if (trial->dp[j] > a->dp[j + o]) {
8755
0
                t--;
8756
0
            }
8757
0
        }
8758
0
        while (trial->dp[j] > a->dp[j + o]);
8759
8760
    #ifndef SQR_MUL_ASM
8761
        sw = 0;
8762
    #else
8763
0
        sl = 0;
8764
0
        sh = 0;
8765
0
    #endif
8766
        /* Subtract trial - don't need to update used. */
8767
0
        for (j = 0; j <= d->used; j++) {
8768
        #ifndef SQR_MUL_ASM
8769
            sw += a->dp[j + o];
8770
            sw -= trial->dp[j];
8771
            a->dp[j + o] = (sp_int_digit)sw;
8772
            sw >>= SP_WORD_SIZE;
8773
        #else
8774
0
            st = a->dp[j + o];
8775
0
            SP_ASM_ADDC(sl, sh, st);
8776
0
            st = trial->dp[j];
8777
0
            SP_ASM_SUBB(sl, sh, st);
8778
0
            a->dp[j + o] = sl;
8779
0
            sl = sh;
8780
0
            sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
8781
0
        #endif
8782
0
        }
8783
8784
0
        r->dp[o] = t;
8785
0
#endif /* WOLFSSL_SP_SMALL */
8786
0
    }
8787
    /* Update used. */
8788
0
    a->used = (sp_size_t)(i + 1U);
8789
0
    if (a->used == d->used) {
8790
        /* Finish div now that length of dividend is same as divisor. */
8791
0
        _sp_div_same_size(a, d, r);
8792
0
    }
8793
8794
0
    return err;
8795
0
}
8796
8797
/* Divide a by d and return the quotient in r and the remainder in rem.
8798
 *   r = a / d; rem = a % d
8799
 *
8800
 * @param  [in]   a     SP integer to be divided.
8801
 * @param  [in]   d     SP integer to divide by.
8802
 * @param  [out]  r     SP integer that is the quotient.
8803
 * @param  [out]  rem   SP integer that is the remainder.
8804
 * @param  [in]   used  Number of digits in temporaries to use.
8805
 *
8806
 * @return  MP_OKAY on success.
8807
 * @return  MP_MEM when dynamic memory allocation fails.
8808
 */
8809
static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
8810
    unsigned int used)
8811
0
{
8812
0
    int err = MP_OKAY;
8813
0
    int ret;
8814
0
    int done = 0;
8815
0
    int s = 0;
8816
0
    sp_int* sa = NULL;
8817
0
    sp_int* sd = NULL;
8818
0
    sp_int* tr = NULL;
8819
0
    sp_int* trial = NULL;
8820
#ifdef WOLFSSL_SP_INT_NEGATIVE
8821
    sp_uint8 signA = MP_ZPOS;
8822
    sp_uint8 signD = MP_ZPOS;
8823
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8824
    /* Intermediates will always be less than or equal to dividend. */
8825
0
    DECL_SP_INT_ARRAY(td, used, 4);
8826
8827
#ifdef WOLFSSL_SP_INT_NEGATIVE
8828
    /* Cache sign for results. */
8829
    signA = a->sign;
8830
    signD = d->sign;
8831
#endif /* WOLFSSL_SP_INT_NEGATIVE */
8832
8833
    /* Handle simple case of: dividend < divisor. */
8834
0
    ret = _sp_cmp_abs(a, d);
8835
0
    if (ret == MP_LT) {
8836
        /* a = 0 * d + a */
8837
0
        if ((rem != NULL) && (a != rem)) {
8838
0
            _sp_copy(a, rem);
8839
0
        }
8840
0
        if (r != NULL) {
8841
0
            _sp_set(r, 0);
8842
0
        }
8843
0
        done = 1;
8844
0
    }
8845
    /* Handle simple case of: dividend == divisor. */
8846
0
    else if (ret == MP_EQ) {
8847
        /* a = 1 * d + 0 */
8848
0
        if (rem != NULL) {
8849
0
            _sp_set(rem, 0);
8850
0
        }
8851
0
        if (r != NULL) {
8852
0
            _sp_set(r, 1);
8853
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8854
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8855
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8856
0
        }
8857
0
        done = 1;
8858
0
    }
8859
0
    else if (sp_count_bits(a) == sp_count_bits(d)) {
8860
        /* a is greater than d but same bit length - subtract. */
8861
0
        if (rem != NULL) {
8862
0
            _sp_sub_off(a, d, rem, 0);
8863
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8864
            rem->sign = signA;
8865
        #endif
8866
0
        }
8867
0
        if (r != NULL) {
8868
0
            _sp_set(r, 1);
8869
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8870
            r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
8871
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8872
0
        }
8873
0
        done = 1;
8874
0
    }
8875
8876
    /* Allocate temporary 'sp_int's and assign. */
8877
0
    if ((!done) && (err == MP_OKAY)) {
8878
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
8879
        !defined(WOLFSSL_SP_NO_MALLOC)
8880
        unsigned int cnt = 4;
8881
        /* Reuse remainder sp_int where possible. */
8882
        if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
8883
            sa = rem;
8884
            cnt--;
8885
        }
8886
        /* Reuse result sp_int where possible. */
8887
        if ((r != NULL) && (r != d)) {
8888
            tr = r;
8889
            cnt--;
8890
        }
8891
        /* Macro always has code associated with it and checks err first. */
8892
        ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
8893
    #else
8894
0
        ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
8895
0
    #endif
8896
0
    }
8897
0
    if ((!done) && (err == MP_OKAY)) {
8898
    #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
8899
        !defined(WOLFSSL_SP_NO_MALLOC)
8900
        int i = 2;
8901
8902
        /* Set to temporary when not reusing. */
8903
        if (sa == NULL) {
8904
            sa = td[i++];
8905
            _sp_init_size(sa, used);
8906
        }
8907
        if (tr == NULL) {
8908
            tr = td[i];
8909
            _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
8910
        }
8911
    #else
8912
0
        sa    = td[2];
8913
0
        tr    = td[3];
8914
8915
0
        _sp_init_size(sa, used);
8916
0
        _sp_init_size(tr, (unsigned int)(a->used - d->used + 2));
8917
0
    #endif
8918
0
        sd    = td[0];
8919
0
        trial = td[1];
8920
8921
        /* Initialize sizes to minimal values. */
8922
0
        _sp_init_size(sd, (sp_size_t)(d->used + 1U));
8923
0
        _sp_init_size(trial, used);
8924
8925
        /* Move divisor to top of word. Adjust dividend as well. */
8926
0
        s = sp_count_bits(d);
8927
0
        s = SP_WORD_SIZE - (s & (int)SP_WORD_MASK);
8928
0
        _sp_copy(a, sa);
8929
        /* Only shift if top bit of divisor no set. */
8930
0
        if (s != SP_WORD_SIZE) {
8931
0
            err = sp_lshb(sa, s);
8932
0
            if (err == MP_OKAY) {
8933
0
                _sp_copy(d, sd);
8934
0
                d = sd;
8935
0
                err = sp_lshb(sd, s);
8936
0
            }
8937
0
        }
8938
0
    }
8939
0
    if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
8940
        /* Do division: tr = sa / d, sa = sa % d. */
8941
0
        err = _sp_div_impl(sa, d, tr, trial);
8942
        /* Return the remainder if required. */
8943
0
        if ((err == MP_OKAY) && (rem != NULL)) {
8944
            /* Move result back down if moved up for divisor value. */
8945
0
            if (s != SP_WORD_SIZE) {
8946
0
                (void)sp_rshb(sa, s, sa);
8947
0
            }
8948
0
            _sp_copy(sa, rem);
8949
0
            sp_clamp(rem);
8950
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8951
            rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
8952
        #endif
8953
0
        }
8954
        /* Return the quotient if required. */
8955
0
        if ((err == MP_OKAY) && (r != NULL)) {
8956
0
            _sp_copy(tr, r);
8957
0
            sp_clamp(r);
8958
        #ifdef WOLFSSL_SP_INT_NEGATIVE
8959
            if ((r->used == 0) || (signA == signD)) {
8960
                r->sign = MP_ZPOS;
8961
            }
8962
            else {
8963
                r->sign = MP_NEG;
8964
            }
8965
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
8966
0
        }
8967
0
    }
8968
8969
0
    FREE_SP_INT_ARRAY(td, NULL);
8970
0
    return err;
8971
0
}
8972
8973
/* Divide a by d and return the quotient in r and the remainder in rem.
8974
 *   r = a / d; rem = a % d
8975
 *
8976
 * @param  [in]   a    SP integer to be divided.
8977
 * @param  [in]   d    SP integer to divide by.
8978
 * @param  [out]  r    SP integer that is the quotient.
8979
 * @param  [out]  rem  SP integer that is the remainder.
8980
 *
8981
 * @return  MP_OKAY on success.
8982
 * @return  MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
8983
 * @return  MP_MEM when dynamic memory allocation fails.
8984
 */
8985
int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
8986
0
{
8987
0
    int err = MP_OKAY;
8988
0
    unsigned int used = 1;
8989
8990
    /* Validate parameters. */
8991
0
    if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
8992
0
        err = MP_VAL;
8993
0
    }
8994
    /* a / 0 = infinity. */
8995
0
    if ((err == MP_OKAY) && sp_iszero(d)) {
8996
0
        err = MP_VAL;
8997
0
    }
8998
    /* Ensure quotient result has enough memory. */
8999
0
    if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
9000
0
        err = MP_VAL;
9001
0
    }
9002
0
    if ((err == MP_OKAY) && (rem != NULL)) {
9003
        /* Ensure remainder has enough memory. */
9004
0
        if ((a->used <= d->used) && (rem->size < a->used + 1)) {
9005
0
            err = MP_VAL;
9006
0
        }
9007
0
        else if ((a->used > d->used) && (rem->size < d->used + 1)) {
9008
0
            err = MP_VAL;
9009
0
        }
9010
0
    }
9011
0
    if (err == MP_OKAY) {
9012
0
        if (a->used == SP_INT_DIGITS) {
9013
            /* May need to shift number being divided left into a new word. */
9014
0
            int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
9015
0
            if ((bits != SP_WORD_SIZE) &&
9016
0
                    (sp_count_bits(a) + bits > (int)(SP_INT_DIGITS * SP_WORD_SIZE))) {
9017
0
                err = MP_VAL;
9018
0
            }
9019
0
            else {
9020
0
                used = SP_INT_DIGITS;
9021
0
            }
9022
0
        }
9023
0
        else {
9024
0
            used = (sp_size_t)(a->used + 1U);
9025
0
        }
9026
0
    }
9027
9028
0
    if (err == MP_OKAY) {
9029
    #if 0
9030
        sp_print(a, "a");
9031
        sp_print(d, "b");
9032
    #endif
9033
        /* Do operation. */
9034
0
        err = _sp_div(a, d, r, rem, used);
9035
    #if 0
9036
        if (err == MP_OKAY) {
9037
            if (rem != NULL) {
9038
                sp_print(rem, "rdr");
9039
            }
9040
            if (r != NULL) {
9041
                sp_print(r, "rdw");
9042
            }
9043
        }
9044
    #endif
9045
0
    }
9046
9047
0
    return err;
9048
0
}
9049
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9050
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9051
9052
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
9053
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
9054
     !defined(WOLFSSL_RSA_PUBLIC_ONLY))
9055
#ifndef FREESCALE_LTC_TFM
9056
#ifdef WOLFSSL_SP_INT_NEGATIVE
9057
/* Calculate the remainder of dividing a by m: r = a mod m. r is m.
9058
 *
9059
 * @param  [in]   a  SP integer to reduce.
9060
 * @param  [in]   m  SP integer that is the modulus.
9061
 * @param  [out]  r  SP integer to store result in.
9062
 *
9063
 * @return  MP_OKAY on success.
9064
 * @return  MP_MEM when dynamic memory allocation fails.
9065
 */
9066
static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9067
{
9068
    int err = MP_OKAY;
9069
    /* Remainder will start as a. */
9070
    DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
9071
9072
    /* In case remainder is modulus - allocate temporary. */
9073
    ALLOC_SP_INT(t, a->used + 1, err, NULL);
9074
    if (err == MP_OKAY) {
9075
        _sp_init_size(t, a->used + 1);
9076
        /* Use divide to calculate remainder and don't get quotient. */
9077
        err = sp_div(a, m, NULL, t);
9078
    }
9079
    if (err == MP_OKAY) {
9080
        /* Make remainder positive and copy into result. */
9081
        if ((!sp_iszero(t)) && (t->sign != m->sign)) {
9082
            err = sp_add(t, m, r);
9083
        }
9084
        else {
9085
            _sp_copy(t, r);
9086
        }
9087
    }
9088
    FREE_SP_INT(t, NULL);
9089
9090
    return err;
9091
}
9092
#endif
9093
9094
/* Calculate the remainder of dividing a by m: r = a mod m.
9095
 *
9096
 * @param  [in]   a  SP integer to reduce.
9097
 * @param  [in]   m  SP integer that is the modulus.
9098
 * @param  [out]  r  SP integer to store result in.
9099
 *
9100
 * @return  MP_OKAY on success.
9101
 * @return  MP_VAL when a, m or r is NULL or m is 0.
9102
 * @return  MP_MEM when dynamic memory allocation fails.
9103
 */
9104
int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
9105
0
{
9106
0
    int err = MP_OKAY;
9107
9108
    /* Validate parameters. */
9109
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
9110
0
        err = MP_VAL;
9111
0
    }
9112
    /* Ensure a isn't too big a number to operate on. */
9113
0
    else if (a->used >= SP_INT_DIGITS) {
9114
0
        err = MP_VAL;
9115
0
    }
9116
9117
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
9118
0
    if (err == MP_OKAY) {
9119
        /* Use divide to calculate remainder and don't get quotient. */
9120
0
        err = sp_div(a, m, NULL, r);
9121
0
    }
9122
#else
9123
    if ((err == MP_OKAY) && (r != m)) {
9124
        err = sp_div(a, m, NULL, r);
9125
        if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
9126
            err = sp_add(r, m, r);
9127
        }
9128
    }
9129
    else if (err == MP_OKAY) {
9130
        err = _sp_mod(a, m, r);
9131
    }
9132
#endif /* WOLFSSL_SP_INT_NEGATIVE */
9133
9134
0
    return err;
9135
0
}
9136
#endif /* !FREESCALE_LTC_TFM */
9137
#endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
9138
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
9139
9140
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
9141
    defined(HAVE_ECC) || !defined(NO_RSA)
9142
9143
/* START SP_MUL implementations. */
9144
/* This code is generated.
9145
 * To generate:
9146
 *   cd scripts/sp/sp_int
9147
 *   ./gen.sh
9148
 * File sp_mul.c contains code.
9149
 */
9150
9151
#ifdef SQR_MUL_ASM
9152
/* Multiply a by b into r where a and b have same no. digits. r = a * b
9153
 *
9154
 * Optimised code for when number of digits in a and b are the same.
9155
 *
9156
 * @param  [in]   a    SP integer to multiply.
9157
 * @param  [in]   b    SP integer to multiply by.
9158
 * @param  [out]  r    SP integer to hold result.
9159
 *
9160
 * @return  MP_OKAY otherwise.
9161
 * @return  MP_MEM when dynamic memory allocation fails.
9162
 */
9163
static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
9164
0
{
9165
0
    int err = MP_OKAY;
9166
0
    unsigned int i;
9167
0
    int j;
9168
0
    unsigned int k;
9169
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9170
    sp_int_digit* t = NULL;
9171
#elif defined(WOLFSSL_SP_DYN_STACK)
9172
    sp_int_digit t[a->used];
9173
#else
9174
    sp_int_digit t[SP_INT_DIGITS / 2];
9175
#endif
9176
9177
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9178
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
9179
        DYNAMIC_TYPE_BIGINT);
9180
    if (t == NULL) {
9181
        err = MP_MEM;
9182
    }
9183
#endif
9184
0
    if (err == MP_OKAY) {
9185
0
        sp_int_digit l;
9186
0
        sp_int_digit h;
9187
0
        sp_int_digit o;
9188
0
        const sp_int_digit* dp;
9189
9190
0
        h = 0;
9191
0
        l = 0;
9192
0
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9193
0
        t[0] = h;
9194
0
        h = 0;
9195
0
        o = 0;
9196
0
        for (k = 1; k <= (unsigned int)a->used - 1; k++) {
9197
0
            j = (int)k;
9198
0
            dp = a->dp;
9199
0
            for (; j >= 0; dp++, j--) {
9200
0
                SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
9201
0
            }
9202
0
            t[k] = l;
9203
0
            l = h;
9204
0
            h = o;
9205
0
            o = 0;
9206
0
        }
9207
0
        for (; k <= ((unsigned int)a->used - 1) * 2; k++) {
9208
0
            i = k - (sp_size_t)(b->used - 1);
9209
0
            dp = &b->dp[b->used - 1];
9210
0
            for (; i < a->used; i++, dp--) {
9211
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
9212
0
            }
9213
0
            r->dp[k] = l;
9214
0
            l = h;
9215
0
            h = o;
9216
0
            o = 0;
9217
0
        }
9218
0
        r->dp[k] = l;
9219
0
        XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
9220
0
        r->used = (sp_size_t)(k + 1);
9221
0
        sp_clamp(r);
9222
0
    }
9223
9224
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9225
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9226
#endif
9227
0
    return err;
9228
0
}
9229
9230
/* Multiply a by b into r. r = a * b
9231
 *
9232
 * @param  [in]   a    SP integer to multiply.
9233
 * @param  [in]   b    SP integer to multiply by.
9234
 * @param  [out]  r    SP integer to hold result.
9235
 *
9236
 * @return  MP_OKAY otherwise.
9237
 * @return  MP_MEM when dynamic memory allocation fails.
9238
 */
9239
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9240
0
{
9241
0
    int err = MP_OKAY;
9242
0
    sp_size_t i;
9243
0
    int j;
9244
0
    sp_size_t k;
9245
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9246
    sp_int_digit* t = NULL;
9247
#elif defined(WOLFSSL_SP_DYN_STACK)
9248
    sp_int_digit t[a->used + b->used];
9249
#else
9250
    sp_int_digit t[SP_INT_DIGITS];
9251
#endif
9252
9253
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9254
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9255
                               (size_t)(a->used + b->used), NULL,
9256
                               DYNAMIC_TYPE_BIGINT);
9257
    if (t == NULL) {
9258
        err = MP_MEM;
9259
    }
9260
#endif
9261
0
    if (err == MP_OKAY) {
9262
0
        sp_int_digit l;
9263
0
        sp_int_digit h;
9264
0
        sp_int_digit o;
9265
9266
0
        h = 0;
9267
0
        l = 0;
9268
0
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9269
0
        t[0] = h;
9270
0
        h = 0;
9271
0
        o = 0;
9272
0
        for (k = 1; k <= (sp_size_t)(b->used - 1); k++) {
9273
0
            i = 0;
9274
0
            j = (int)k;
9275
0
            for (; (i < a->used) && (j >= 0); i++, j--) {
9276
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9277
0
            }
9278
0
            t[k] = l;
9279
0
            l = h;
9280
0
            h = o;
9281
0
            o = 0;
9282
0
        }
9283
0
        for (; k <= (sp_size_t)((a->used - 1) + (b->used - 1)); k++) {
9284
0
            j = (int)(b->used - 1);
9285
0
            i = (sp_size_t)(k - (sp_size_t)j);
9286
0
            for (; (i < a->used) && (j >= 0); i++, j--) {
9287
0
                SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
9288
0
            }
9289
0
            t[k] = l;
9290
0
            l = h;
9291
0
            h = o;
9292
0
            o = 0;
9293
0
        }
9294
0
        t[k] = l;
9295
0
        r->used = (sp_size_t)(k + 1);
9296
0
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9297
0
        sp_clamp(r);
9298
0
    }
9299
9300
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9301
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9302
#endif
9303
0
    return err;
9304
0
}
9305
#else
9306
/* Multiply a by b into r. r = a * b
9307
 *
9308
 * @param  [in]   a    SP integer to multiply.
9309
 * @param  [in]   b    SP integer to multiply by.
9310
 * @param  [out]  r    SP integer to hold result.
9311
 *
9312
 * @return  MP_OKAY otherwise.
9313
 * @return  MP_MEM when dynamic memory allocation fails.
9314
 */
9315
static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
9316
{
9317
    int err = MP_OKAY;
9318
    sp_size_t i;
9319
    int j;
9320
    sp_size_t k;
9321
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9322
    sp_int_digit* t = NULL;
9323
#elif defined(WOLFSSL_SP_DYN_STACK)
9324
    sp_int_digit t[a->used + b->used];
9325
#else
9326
    sp_int_digit t[SP_INT_DIGITS];
9327
#endif
9328
9329
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9330
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
9331
                               (size_t)(a->used + b->used), NULL,
9332
                               DYNAMIC_TYPE_BIGINT);
9333
    if (t == NULL) {
9334
        err = MP_MEM;
9335
    }
9336
#endif
9337
    if (err == MP_OKAY) {
9338
        sp_int_word w;
9339
        sp_int_word l;
9340
        sp_int_word h;
9341
    #ifdef SP_WORD_OVERFLOW
9342
        sp_int_word o;
9343
    #endif
9344
9345
        w = (sp_int_word)a->dp[0] * b->dp[0];
9346
        t[0] = (sp_int_digit)w;
9347
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
9348
        h = 0;
9349
    #ifdef SP_WORD_OVERFLOW
9350
        o = 0;
9351
    #endif
9352
        for (k = 1; (int)k <= ((int)a->used - 1) + ((int)b->used - 1); k++) {
9353
            i = (sp_size_t)(k - (b->used - 1));
9354
            i &= (sp_size_t)(((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
9355
            j = (int)(k - i);
9356
            for (; (i < a->used) && (j >= 0); i++, j--) {
9357
                w = (sp_int_word)a->dp[i] * b->dp[j];
9358
                l += (sp_int_digit)w;
9359
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
9360
            #ifdef SP_WORD_OVERFLOW
9361
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
9362
                l &= SP_MASK;
9363
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
9364
                h &= SP_MASK;
9365
            #endif
9366
            }
9367
            t[k] = (sp_int_digit)l;
9368
            l >>= SP_WORD_SIZE;
9369
            l += (sp_int_digit)h;
9370
            h >>= SP_WORD_SIZE;
9371
        #ifdef SP_WORD_OVERFLOW
9372
            h += o & SP_MASK;
9373
            o >>= SP_WORD_SIZE;
9374
        #endif
9375
        }
9376
        t[k] = (sp_int_digit)l;
9377
        r->used = (sp_size_t)(k + 1);
9378
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9379
        sp_clamp(r);
9380
    }
9381
9382
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9383
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9384
#endif
9385
    return err;
9386
}
9387
#endif
9388
9389
#ifndef WOLFSSL_SP_SMALL
9390
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
9391
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
9392
#ifndef SQR_MUL_ASM
9393
/* Multiply a by b and store in r: r = a * b
9394
 *
9395
 * Long-hand implementation.
9396
 *
9397
 * @param  [in]   a  SP integer to multiply.
9398
 * @param  [in]   b  SP integer to multiply.
9399
 * @param  [out]  r  SP integer result.
9400
 *
9401
 * @return  MP_OKAY on success.
9402
 * @return  MP_MEM when dynamic memory allocation fails.
9403
 */
9404
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9405
{
9406
    int err = MP_OKAY;
9407
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9408
    sp_int_word* w = NULL;
9409
#else
9410
    sp_int_word w[16];
9411
#endif
9412
    const sp_int_digit* da = a->dp;
9413
    const sp_int_digit* db = b->dp;
9414
9415
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9416
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
9417
        DYNAMIC_TYPE_BIGINT);
9418
    if (w == NULL) {
9419
        err = MP_MEM;
9420
    }
9421
#endif
9422
9423
    if (err == MP_OKAY) {
9424
        w[0] = (sp_int_word)da[0] * db[0];
9425
        w[1] = (sp_int_word)da[0] * db[1];
9426
        w[2] = (sp_int_word)da[1] * db[0];
9427
        w[3] = (sp_int_word)da[0] * db[2];
9428
        w[4] = (sp_int_word)da[1] * db[1];
9429
        w[5] = (sp_int_word)da[2] * db[0];
9430
        w[6] = (sp_int_word)da[0] * db[3];
9431
        w[7] = (sp_int_word)da[1] * db[2];
9432
        w[8] = (sp_int_word)da[2] * db[1];
9433
        w[9] = (sp_int_word)da[3] * db[0];
9434
        w[10] = (sp_int_word)da[1] * db[3];
9435
        w[11] = (sp_int_word)da[2] * db[2];
9436
        w[12] = (sp_int_word)da[3] * db[1];
9437
        w[13] = (sp_int_word)da[2] * db[3];
9438
        w[14] = (sp_int_word)da[3] * db[2];
9439
        w[15] = (sp_int_word)da[3] * db[3];
9440
9441
        r->dp[0] = (sp_int_digit)w[0];
9442
        w[0] >>= SP_WORD_SIZE;
9443
        w[0] += (sp_int_digit)w[1];
9444
        w[0] += (sp_int_digit)w[2];
9445
        r->dp[1] = (sp_int_digit)w[0];
9446
        w[0] >>= SP_WORD_SIZE;
9447
        w[1] >>= SP_WORD_SIZE;
9448
        w[0] += (sp_int_digit)w[1];
9449
        w[2] >>= SP_WORD_SIZE;
9450
        w[0] += (sp_int_digit)w[2];
9451
        w[0] += (sp_int_digit)w[3];
9452
        w[0] += (sp_int_digit)w[4];
9453
        w[0] += (sp_int_digit)w[5];
9454
        r->dp[2] = (sp_int_digit)w[0];
9455
        w[0] >>= SP_WORD_SIZE;
9456
        w[3] >>= SP_WORD_SIZE;
9457
        w[0] += (sp_int_digit)w[3];
9458
        w[4] >>= SP_WORD_SIZE;
9459
        w[0] += (sp_int_digit)w[4];
9460
        w[5] >>= SP_WORD_SIZE;
9461
        w[0] += (sp_int_digit)w[5];
9462
        w[0] += (sp_int_digit)w[6];
9463
        w[0] += (sp_int_digit)w[7];
9464
        w[0] += (sp_int_digit)w[8];
9465
        w[0] += (sp_int_digit)w[9];
9466
        r->dp[3] = (sp_int_digit)w[0];
9467
        w[0] >>= SP_WORD_SIZE;
9468
        w[6] >>= SP_WORD_SIZE;
9469
        w[0] += (sp_int_digit)w[6];
9470
        w[7] >>= SP_WORD_SIZE;
9471
        w[0] += (sp_int_digit)w[7];
9472
        w[8] >>= SP_WORD_SIZE;
9473
        w[0] += (sp_int_digit)w[8];
9474
        w[9] >>= SP_WORD_SIZE;
9475
        w[0] += (sp_int_digit)w[9];
9476
        w[0] += (sp_int_digit)w[10];
9477
        w[0] += (sp_int_digit)w[11];
9478
        w[0] += (sp_int_digit)w[12];
9479
        r->dp[4] = (sp_int_digit)w[0];
9480
        w[0] >>= SP_WORD_SIZE;
9481
        w[10] >>= SP_WORD_SIZE;
9482
        w[0] += (sp_int_digit)w[10];
9483
        w[11] >>= SP_WORD_SIZE;
9484
        w[0] += (sp_int_digit)w[11];
9485
        w[12] >>= SP_WORD_SIZE;
9486
        w[0] += (sp_int_digit)w[12];
9487
        w[0] += (sp_int_digit)w[13];
9488
        w[0] += (sp_int_digit)w[14];
9489
        r->dp[5] = (sp_int_digit)w[0];
9490
        w[0] >>= SP_WORD_SIZE;
9491
        w[13] >>= SP_WORD_SIZE;
9492
        w[0] += (sp_int_digit)w[13];
9493
        w[14] >>= SP_WORD_SIZE;
9494
        w[0] += (sp_int_digit)w[14];
9495
        w[0] += (sp_int_digit)w[15];
9496
        r->dp[6] = (sp_int_digit)w[0];
9497
        w[0] >>= SP_WORD_SIZE;
9498
        w[15] >>= SP_WORD_SIZE;
9499
        w[0] += (sp_int_digit)w[15];
9500
        r->dp[7] = (sp_int_digit)w[0];
9501
9502
        r->used = 8;
9503
        sp_clamp(r);
9504
    }
9505
9506
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9507
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
9508
#endif
9509
    return err;
9510
}
9511
#else /* SQR_MUL_ASM */
9512
/* Multiply a by b and store in r: r = a * b
9513
 *
9514
 * Comba implementation.
9515
 *
9516
 * @param  [in]   a  SP integer to multiply.
9517
 * @param  [in]   b  SP integer to multiply.
9518
 * @param  [out]  r  SP integer result.
9519
 *
9520
 * @return  MP_OKAY on success.
9521
 * @return  MP_MEM when dynamic memory allocation fails.
9522
 */
9523
static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
9524
0
{
9525
0
    sp_int_digit l = 0;
9526
0
    sp_int_digit h = 0;
9527
0
    sp_int_digit o = 0;
9528
0
    sp_int_digit t[4];
9529
9530
0
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9531
0
    t[0] = h;
9532
0
    h = 0;
9533
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9534
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9535
0
    t[1] = l;
9536
0
    l = h;
9537
0
    h = o;
9538
0
    o = 0;
9539
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9540
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9541
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9542
0
    t[2] = l;
9543
0
    l = h;
9544
0
    h = o;
9545
0
    o = 0;
9546
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9547
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9548
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9549
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9550
0
    t[3] = l;
9551
0
    l = h;
9552
0
    h = o;
9553
0
    o = 0;
9554
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9555
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9556
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9557
0
    r->dp[4] = l;
9558
0
    l = h;
9559
0
    h = o;
9560
0
    o = 0;
9561
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9562
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9563
0
    r->dp[5] = l;
9564
0
    l = h;
9565
0
    h = o;
9566
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
9567
0
    r->dp[6] = l;
9568
0
    r->dp[7] = h;
9569
0
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
9570
0
    r->used = 8;
9571
0
    sp_clamp(r);
9572
9573
0
    return MP_OKAY;
9574
0
}
9575
#endif /* SQR_MUL_ASM */
9576
#endif /* SP_WORD_SIZE == 64 */
9577
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
9578
#ifdef SQR_MUL_ASM
9579
/* Multiply a by b and store in r: r = a * b
9580
 *
9581
 * Comba implementation.
9582
 *
9583
 * @param  [in]   a  SP integer to multiply.
9584
 * @param  [in]   b  SP integer to multiply.
9585
 * @param  [out]  r  SP integer result.
9586
 *
9587
 * @return  MP_OKAY on success.
9588
 * @return  MP_MEM when dynamic memory allocation fails.
9589
 */
9590
static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
9591
0
{
9592
0
    sp_int_digit l = 0;
9593
0
    sp_int_digit h = 0;
9594
0
    sp_int_digit o = 0;
9595
0
    sp_int_digit t[6];
9596
9597
0
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9598
0
    t[0] = h;
9599
0
    h = 0;
9600
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9601
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9602
0
    t[1] = l;
9603
0
    l = h;
9604
0
    h = o;
9605
0
    o = 0;
9606
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9607
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9608
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9609
0
    t[2] = l;
9610
0
    l = h;
9611
0
    h = o;
9612
0
    o = 0;
9613
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9614
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9615
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9616
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9617
0
    t[3] = l;
9618
0
    l = h;
9619
0
    h = o;
9620
0
    o = 0;
9621
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9622
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9623
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9624
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9625
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9626
0
    t[4] = l;
9627
0
    l = h;
9628
0
    h = o;
9629
0
    o = 0;
9630
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9631
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9632
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9633
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9634
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9635
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9636
0
    t[5] = l;
9637
0
    l = h;
9638
0
    h = o;
9639
0
    o = 0;
9640
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9641
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9642
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9643
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9644
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9645
0
    r->dp[6] = l;
9646
0
    l = h;
9647
0
    h = o;
9648
0
    o = 0;
9649
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9650
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9651
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9652
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9653
0
    r->dp[7] = l;
9654
0
    l = h;
9655
0
    h = o;
9656
0
    o = 0;
9657
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9658
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9659
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9660
0
    r->dp[8] = l;
9661
0
    l = h;
9662
0
    h = o;
9663
0
    o = 0;
9664
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9665
0
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9666
0
    r->dp[9] = l;
9667
0
    l = h;
9668
0
    h = o;
9669
0
    SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
9670
0
    r->dp[10] = l;
9671
0
    r->dp[11] = h;
9672
0
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
9673
0
    r->used = 12;
9674
0
    sp_clamp(r);
9675
9676
0
    return MP_OKAY;
9677
0
}
9678
#endif /* SQR_MUL_ASM */
9679
#endif /* SP_WORD_SIZE == 64 */
9680
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
9681
#ifdef SQR_MUL_ASM
9682
/* Multiply a by b and store in r: r = a * b
9683
 *
9684
 * Comba implementation.
9685
 *
9686
 * @param  [in]   a  SP integer to multiply.
9687
 * @param  [in]   b  SP integer to multiply.
9688
 * @param  [out]  r  SP integer result.
9689
 *
9690
 * @return  MP_OKAY on success.
9691
 * @return  MP_MEM when dynamic memory allocation fails.
9692
 */
9693
static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
9694
{
9695
    sp_int_digit l = 0;
9696
    sp_int_digit h = 0;
9697
    sp_int_digit o = 0;
9698
    sp_int_digit t[8];
9699
9700
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9701
    t[0] = h;
9702
    h = 0;
9703
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9704
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9705
    t[1] = l;
9706
    l = h;
9707
    h = o;
9708
    o = 0;
9709
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9710
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9711
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9712
    t[2] = l;
9713
    l = h;
9714
    h = o;
9715
    o = 0;
9716
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9717
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9718
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9719
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9720
    t[3] = l;
9721
    l = h;
9722
    h = o;
9723
    o = 0;
9724
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9725
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9726
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9727
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9728
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9729
    t[4] = l;
9730
    l = h;
9731
    h = o;
9732
    o = 0;
9733
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9734
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9735
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9736
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9737
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9738
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9739
    t[5] = l;
9740
    l = h;
9741
    h = o;
9742
    o = 0;
9743
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9744
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9745
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9746
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9747
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9748
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9749
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9750
    t[6] = l;
9751
    l = h;
9752
    h = o;
9753
    o = 0;
9754
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9755
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9756
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9757
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9758
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9759
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9760
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9761
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9762
    t[7] = l;
9763
    l = h;
9764
    h = o;
9765
    o = 0;
9766
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9767
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9768
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9769
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9770
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9771
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9772
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9773
    r->dp[8] = l;
9774
    l = h;
9775
    h = o;
9776
    o = 0;
9777
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9778
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9779
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9780
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9781
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9782
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9783
    r->dp[9] = l;
9784
    l = h;
9785
    h = o;
9786
    o = 0;
9787
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9788
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9789
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9790
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9791
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9792
    r->dp[10] = l;
9793
    l = h;
9794
    h = o;
9795
    o = 0;
9796
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9797
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9798
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9799
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9800
    r->dp[11] = l;
9801
    l = h;
9802
    h = o;
9803
    o = 0;
9804
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9805
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9806
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9807
    r->dp[12] = l;
9808
    l = h;
9809
    h = o;
9810
    o = 0;
9811
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9812
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9813
    r->dp[13] = l;
9814
    l = h;
9815
    h = o;
9816
    SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
9817
    r->dp[14] = l;
9818
    r->dp[15] = h;
9819
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
9820
    r->used = 16;
9821
    sp_clamp(r);
9822
9823
    return MP_OKAY;
9824
}
9825
#endif /* SQR_MUL_ASM */
9826
#endif /* SP_WORD_SIZE == 32 */
9827
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
9828
#ifdef SQR_MUL_ASM
9829
/* Multiply a by b and store in r: r = a * b
9830
 *
9831
 * Comba implementation.
9832
 *
9833
 * @param  [in]   a  SP integer to multiply.
9834
 * @param  [in]   b  SP integer to multiply.
9835
 * @param  [out]  r  SP integer result.
9836
 *
9837
 * @return  MP_OKAY on success.
9838
 * @return  MP_MEM when dynamic memory allocation fails.
9839
 */
9840
static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
9841
{
9842
    sp_int_digit l = 0;
9843
    sp_int_digit h = 0;
9844
    sp_int_digit o = 0;
9845
    sp_int_digit t[12];
9846
9847
    SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
9848
    t[0] = h;
9849
    h = 0;
9850
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
9851
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
9852
    t[1] = l;
9853
    l = h;
9854
    h = o;
9855
    o = 0;
9856
    SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
9857
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
9858
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
9859
    t[2] = l;
9860
    l = h;
9861
    h = o;
9862
    o = 0;
9863
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
9864
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
9865
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
9866
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
9867
    t[3] = l;
9868
    l = h;
9869
    h = o;
9870
    o = 0;
9871
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
9872
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
9873
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
9874
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
9875
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
9876
    t[4] = l;
9877
    l = h;
9878
    h = o;
9879
    o = 0;
9880
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
9881
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
9882
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
9883
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
9884
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
9885
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
9886
    t[5] = l;
9887
    l = h;
9888
    h = o;
9889
    o = 0;
9890
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
9891
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
9892
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
9893
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
9894
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
9895
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
9896
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
9897
    t[6] = l;
9898
    l = h;
9899
    h = o;
9900
    o = 0;
9901
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
9902
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
9903
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
9904
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
9905
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
9906
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
9907
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
9908
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
9909
    t[7] = l;
9910
    l = h;
9911
    h = o;
9912
    o = 0;
9913
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
9914
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
9915
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
9916
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
9917
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
9918
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
9919
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
9920
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
9921
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
9922
    t[8] = l;
9923
    l = h;
9924
    h = o;
9925
    o = 0;
9926
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
9927
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
9928
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
9929
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
9930
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
9931
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
9932
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
9933
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
9934
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
9935
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
9936
    t[9] = l;
9937
    l = h;
9938
    h = o;
9939
    o = 0;
9940
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
9941
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
9942
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
9943
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
9944
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
9945
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
9946
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
9947
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
9948
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
9949
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
9950
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
9951
    t[10] = l;
9952
    l = h;
9953
    h = o;
9954
    o = 0;
9955
    SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
9956
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
9957
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
9958
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
9959
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
9960
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
9961
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
9962
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
9963
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
9964
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
9965
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
9966
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
9967
    t[11] = l;
9968
    l = h;
9969
    h = o;
9970
    o = 0;
9971
    SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
9972
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
9973
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
9974
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
9975
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
9976
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
9977
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
9978
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
9979
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
9980
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
9981
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
9982
    r->dp[12] = l;
9983
    l = h;
9984
    h = o;
9985
    o = 0;
9986
    SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
9987
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
9988
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
9989
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
9990
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
9991
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
9992
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
9993
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
9994
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
9995
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
9996
    r->dp[13] = l;
9997
    l = h;
9998
    h = o;
9999
    o = 0;
10000
    SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10001
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10002
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10003
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10004
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10005
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10006
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10007
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10008
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10009
    r->dp[14] = l;
10010
    l = h;
10011
    h = o;
10012
    o = 0;
10013
    SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10014
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10015
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10016
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10017
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10018
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10019
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10020
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10021
    r->dp[15] = l;
10022
    l = h;
10023
    h = o;
10024
    o = 0;
10025
    SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10026
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10027
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10028
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10029
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10030
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10031
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10032
    r->dp[16] = l;
10033
    l = h;
10034
    h = o;
10035
    o = 0;
10036
    SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10037
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10038
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10039
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10040
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10041
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10042
    r->dp[17] = l;
10043
    l = h;
10044
    h = o;
10045
    o = 0;
10046
    SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10047
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10048
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10049
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10050
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10051
    r->dp[18] = l;
10052
    l = h;
10053
    h = o;
10054
    o = 0;
10055
    SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10056
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10057
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10058
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10059
    r->dp[19] = l;
10060
    l = h;
10061
    h = o;
10062
    o = 0;
10063
    SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10064
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10065
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10066
    r->dp[20] = l;
10067
    l = h;
10068
    h = o;
10069
    o = 0;
10070
    SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10071
    SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10072
    r->dp[21] = l;
10073
    l = h;
10074
    h = o;
10075
    SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
10076
    r->dp[22] = l;
10077
    r->dp[23] = h;
10078
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
10079
    r->used = 24;
10080
    sp_clamp(r);
10081
10082
    return MP_OKAY;
10083
}
10084
#endif /* SQR_MUL_ASM */
10085
#endif /* SP_WORD_SIZE == 32 */
10086
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10087
10088
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
10089
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
10090
    (SP_WORD_SIZE == 64)))
10091
    #if SP_INT_DIGITS >= 32
10092
/* Multiply a by b and store in r: r = a * b
10093
 *
10094
 * Comba implementation.
10095
 *
10096
 * @param  [in]   a  SP integer to multiply.
10097
 * @param  [in]   b  SP integer to multiply.
10098
 * @param  [out]  r  SP integer result.
10099
 *
10100
 * @return  MP_OKAY on success.
10101
 * @return  MP_MEM when dynamic memory allocation fails.
10102
 */
10103
static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
10104
{
10105
    int err = MP_OKAY;
10106
    sp_int_digit l = 0;
10107
    sp_int_digit h = 0;
10108
    sp_int_digit o = 0;
10109
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10110
    sp_int_digit* t = NULL;
10111
#else
10112
    sp_int_digit t[16];
10113
#endif
10114
10115
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10116
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
10117
         DYNAMIC_TYPE_BIGINT);
10118
     if (t == NULL) {
10119
         err = MP_MEM;
10120
     }
10121
#endif
10122
    if (err == MP_OKAY) {
10123
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10124
        t[0] = h;
10125
        h = 0;
10126
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10127
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10128
        t[1] = l;
10129
        l = h;
10130
        h = o;
10131
        o = 0;
10132
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10133
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10134
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10135
        t[2] = l;
10136
        l = h;
10137
        h = o;
10138
        o = 0;
10139
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10140
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10141
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10142
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10143
        t[3] = l;
10144
        l = h;
10145
        h = o;
10146
        o = 0;
10147
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10148
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10149
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10150
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10151
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10152
        t[4] = l;
10153
        l = h;
10154
        h = o;
10155
        o = 0;
10156
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10157
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10158
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10159
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10160
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10161
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10162
        t[5] = l;
10163
        l = h;
10164
        h = o;
10165
        o = 0;
10166
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10167
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10168
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10169
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10170
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10171
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10172
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10173
        t[6] = l;
10174
        l = h;
10175
        h = o;
10176
        o = 0;
10177
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10178
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10179
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10180
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10181
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10182
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10183
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10184
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10185
        t[7] = l;
10186
        l = h;
10187
        h = o;
10188
        o = 0;
10189
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10190
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10191
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10192
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10193
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10194
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10195
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10196
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10197
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10198
        t[8] = l;
10199
        l = h;
10200
        h = o;
10201
        o = 0;
10202
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10203
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10204
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10205
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10206
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10207
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10208
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10209
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10210
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10211
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10212
        t[9] = l;
10213
        l = h;
10214
        h = o;
10215
        o = 0;
10216
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10217
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10218
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10219
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10220
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10221
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10222
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10223
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10224
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10225
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10226
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10227
        t[10] = l;
10228
        l = h;
10229
        h = o;
10230
        o = 0;
10231
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10232
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10233
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10234
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10235
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10236
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10237
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10238
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10239
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10240
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10241
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10242
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10243
        t[11] = l;
10244
        l = h;
10245
        h = o;
10246
        o = 0;
10247
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10248
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10249
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10250
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10251
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10252
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10253
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10254
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10255
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10256
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10257
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10258
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10259
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10260
        t[12] = l;
10261
        l = h;
10262
        h = o;
10263
        o = 0;
10264
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10265
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10266
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10267
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10268
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10269
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10270
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10271
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10272
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10273
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10274
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10275
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10276
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10277
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10278
        t[13] = l;
10279
        l = h;
10280
        h = o;
10281
        o = 0;
10282
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10283
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10284
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10285
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10286
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10287
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10288
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10289
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10290
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10291
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10292
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10293
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10294
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10295
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10296
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10297
        t[14] = l;
10298
        l = h;
10299
        h = o;
10300
        o = 0;
10301
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10302
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10303
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10304
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10305
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10306
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10307
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10308
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10309
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10310
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10311
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10312
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10313
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10314
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10315
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10316
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10317
        t[15] = l;
10318
        l = h;
10319
        h = o;
10320
        o = 0;
10321
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10322
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10323
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10324
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10325
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10326
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10327
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10328
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10329
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10330
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10331
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10332
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10333
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10334
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10335
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10336
        r->dp[16] = l;
10337
        l = h;
10338
        h = o;
10339
        o = 0;
10340
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10341
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10342
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10343
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10344
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10345
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10346
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10347
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10348
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10349
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10350
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10351
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10352
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10353
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10354
        r->dp[17] = l;
10355
        l = h;
10356
        h = o;
10357
        o = 0;
10358
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10359
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10360
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10361
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10362
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10363
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10364
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10365
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10366
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10367
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10368
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10369
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10370
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10371
        r->dp[18] = l;
10372
        l = h;
10373
        h = o;
10374
        o = 0;
10375
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10376
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10377
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10378
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10379
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10380
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10381
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10382
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10383
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10384
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10385
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10386
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10387
        r->dp[19] = l;
10388
        l = h;
10389
        h = o;
10390
        o = 0;
10391
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10392
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10393
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10394
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10395
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10396
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10397
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10398
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10399
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10400
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10401
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10402
        r->dp[20] = l;
10403
        l = h;
10404
        h = o;
10405
        o = 0;
10406
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10407
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10408
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10409
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10410
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10411
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10412
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10413
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10414
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10415
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10416
        r->dp[21] = l;
10417
        l = h;
10418
        h = o;
10419
        o = 0;
10420
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10421
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10422
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10423
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10424
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10425
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10426
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10427
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10428
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10429
        r->dp[22] = l;
10430
        l = h;
10431
        h = o;
10432
        o = 0;
10433
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10434
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10435
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10436
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10437
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10438
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10439
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10440
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10441
        r->dp[23] = l;
10442
        l = h;
10443
        h = o;
10444
        o = 0;
10445
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10446
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10447
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10448
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10449
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10450
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10451
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10452
        r->dp[24] = l;
10453
        l = h;
10454
        h = o;
10455
        o = 0;
10456
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10457
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10458
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10459
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10460
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10461
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10462
        r->dp[25] = l;
10463
        l = h;
10464
        h = o;
10465
        o = 0;
10466
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
10467
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
10468
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
10469
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
10470
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
10471
        r->dp[26] = l;
10472
        l = h;
10473
        h = o;
10474
        o = 0;
10475
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
10476
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
10477
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
10478
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
10479
        r->dp[27] = l;
10480
        l = h;
10481
        h = o;
10482
        o = 0;
10483
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
10484
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
10485
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
10486
        r->dp[28] = l;
10487
        l = h;
10488
        h = o;
10489
        o = 0;
10490
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
10491
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
10492
        r->dp[29] = l;
10493
        l = h;
10494
        h = o;
10495
        SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
10496
        r->dp[30] = l;
10497
        r->dp[31] = h;
10498
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
10499
        r->used = 32;
10500
        sp_clamp(r);
10501
    }
10502
10503
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10504
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
10505
#endif
10506
    return err;
10507
}
10508
    #endif /* SP_INT_DIGITS >= 32 */
10509
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
10510
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
10511
10512
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10513
    #if SP_INT_DIGITS >= 48
10514
/* Multiply a by b and store in r: r = a * b
10515
 *
10516
 * Comba implementation.
10517
 *
10518
 * @param  [in]   a  SP integer to multiply.
10519
 * @param  [in]   b  SP integer to multiply.
10520
 * @param  [out]  r  SP integer result.
10521
 *
10522
 * @return  MP_OKAY on success.
10523
 * @return  MP_MEM when dynamic memory allocation fails.
10524
 */
10525
static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
10526
{
10527
    int err = MP_OKAY;
10528
    sp_int_digit l = 0;
10529
    sp_int_digit h = 0;
10530
    sp_int_digit o = 0;
10531
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10532
    sp_int_digit* t = NULL;
10533
#else
10534
    sp_int_digit t[24];
10535
#endif
10536
10537
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10538
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
10539
         DYNAMIC_TYPE_BIGINT);
10540
     if (t == NULL) {
10541
         err = MP_MEM;
10542
     }
10543
#endif
10544
    if (err == MP_OKAY) {
10545
        SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
10546
        t[0] = h;
10547
        h = 0;
10548
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
10549
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
10550
        t[1] = l;
10551
        l = h;
10552
        h = o;
10553
        o = 0;
10554
        SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
10555
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
10556
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
10557
        t[2] = l;
10558
        l = h;
10559
        h = o;
10560
        o = 0;
10561
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
10562
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
10563
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
10564
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
10565
        t[3] = l;
10566
        l = h;
10567
        h = o;
10568
        o = 0;
10569
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
10570
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
10571
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
10572
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
10573
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
10574
        t[4] = l;
10575
        l = h;
10576
        h = o;
10577
        o = 0;
10578
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
10579
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
10580
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
10581
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
10582
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
10583
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
10584
        t[5] = l;
10585
        l = h;
10586
        h = o;
10587
        o = 0;
10588
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
10589
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
10590
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
10591
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
10592
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
10593
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
10594
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
10595
        t[6] = l;
10596
        l = h;
10597
        h = o;
10598
        o = 0;
10599
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
10600
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
10601
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
10602
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
10603
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
10604
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
10605
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
10606
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
10607
        t[7] = l;
10608
        l = h;
10609
        h = o;
10610
        o = 0;
10611
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
10612
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
10613
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
10614
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
10615
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
10616
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
10617
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
10618
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
10619
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
10620
        t[8] = l;
10621
        l = h;
10622
        h = o;
10623
        o = 0;
10624
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
10625
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
10626
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
10627
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
10628
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
10629
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
10630
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
10631
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
10632
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
10633
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
10634
        t[9] = l;
10635
        l = h;
10636
        h = o;
10637
        o = 0;
10638
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
10639
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
10640
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
10641
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
10642
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
10643
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
10644
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
10645
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
10646
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
10647
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
10648
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
10649
        t[10] = l;
10650
        l = h;
10651
        h = o;
10652
        o = 0;
10653
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
10654
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
10655
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
10656
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
10657
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
10658
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
10659
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
10660
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
10661
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
10662
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
10663
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
10664
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
10665
        t[11] = l;
10666
        l = h;
10667
        h = o;
10668
        o = 0;
10669
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
10670
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
10671
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
10672
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
10673
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
10674
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
10675
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
10676
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
10677
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
10678
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
10679
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
10680
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
10681
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
10682
        t[12] = l;
10683
        l = h;
10684
        h = o;
10685
        o = 0;
10686
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
10687
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
10688
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
10689
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
10690
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
10691
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
10692
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
10693
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
10694
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
10695
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
10696
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
10697
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
10698
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
10699
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
10700
        t[13] = l;
10701
        l = h;
10702
        h = o;
10703
        o = 0;
10704
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
10705
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
10706
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
10707
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
10708
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
10709
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
10710
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
10711
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
10712
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
10713
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
10714
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
10715
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
10716
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
10717
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
10718
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
10719
        t[14] = l;
10720
        l = h;
10721
        h = o;
10722
        o = 0;
10723
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
10724
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
10725
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
10726
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
10727
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
10728
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
10729
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
10730
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
10731
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
10732
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
10733
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
10734
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
10735
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
10736
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
10737
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
10738
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
10739
        t[15] = l;
10740
        l = h;
10741
        h = o;
10742
        o = 0;
10743
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
10744
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
10745
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
10746
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
10747
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
10748
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
10749
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
10750
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
10751
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
10752
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
10753
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
10754
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
10755
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
10756
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
10757
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
10758
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
10759
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
10760
        t[16] = l;
10761
        l = h;
10762
        h = o;
10763
        o = 0;
10764
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
10765
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
10766
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
10767
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
10768
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
10769
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
10770
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
10771
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
10772
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
10773
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
10774
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
10775
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
10776
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
10777
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
10778
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
10779
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
10780
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
10781
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
10782
        t[17] = l;
10783
        l = h;
10784
        h = o;
10785
        o = 0;
10786
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
10787
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
10788
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
10789
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
10790
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
10791
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
10792
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
10793
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
10794
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
10795
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
10796
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
10797
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
10798
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
10799
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
10800
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
10801
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
10802
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
10803
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
10804
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
10805
        t[18] = l;
10806
        l = h;
10807
        h = o;
10808
        o = 0;
10809
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
10810
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
10811
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
10812
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
10813
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
10814
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
10815
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
10816
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
10817
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
10818
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
10819
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
10820
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
10821
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
10822
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
10823
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
10824
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
10825
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
10826
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
10827
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
10828
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
10829
        t[19] = l;
10830
        l = h;
10831
        h = o;
10832
        o = 0;
10833
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
10834
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
10835
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
10836
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
10837
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
10838
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
10839
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
10840
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
10841
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
10842
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
10843
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
10844
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
10845
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
10846
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
10847
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
10848
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
10849
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
10850
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
10851
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
10852
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
10853
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
10854
        t[20] = l;
10855
        l = h;
10856
        h = o;
10857
        o = 0;
10858
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
10859
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
10860
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
10861
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
10862
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
10863
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
10864
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
10865
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
10866
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
10867
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
10868
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
10869
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
10870
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
10871
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
10872
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
10873
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
10874
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
10875
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
10876
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
10877
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
10878
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
10879
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
10880
        t[21] = l;
10881
        l = h;
10882
        h = o;
10883
        o = 0;
10884
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
10885
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
10886
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
10887
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
10888
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
10889
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
10890
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
10891
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
10892
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
10893
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
10894
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
10895
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
10896
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
10897
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
10898
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
10899
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
10900
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
10901
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
10902
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
10903
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
10904
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
10905
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
10906
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
10907
        t[22] = l;
10908
        l = h;
10909
        h = o;
10910
        o = 0;
10911
        SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
10912
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
10913
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
10914
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
10915
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
10916
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
10917
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
10918
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
10919
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
10920
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
10921
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
10922
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
10923
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
10924
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
10925
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
10926
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
10927
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
10928
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
10929
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
10930
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
10931
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
10932
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
10933
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
10934
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
10935
        t[23] = l;
10936
        l = h;
10937
        h = o;
10938
        o = 0;
10939
        SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
10940
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
10941
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
10942
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
10943
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
10944
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
10945
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
10946
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
10947
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
10948
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
10949
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
10950
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
10951
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
10952
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
10953
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
10954
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
10955
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
10956
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
10957
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
10958
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
10959
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
10960
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
10961
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
10962
        r->dp[24] = l;
10963
        l = h;
10964
        h = o;
10965
        o = 0;
10966
        SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
10967
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
10968
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
10969
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
10970
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
10971
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
10972
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
10973
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
10974
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
10975
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
10976
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
10977
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
10978
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
10979
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
10980
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
10981
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
10982
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
10983
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
10984
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
10985
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
10986
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
10987
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
10988
        r->dp[25] = l;
10989
        l = h;
10990
        h = o;
10991
        o = 0;
10992
        SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
10993
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
10994
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
10995
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
10996
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
10997
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
10998
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
10999
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
11000
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
11001
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
11002
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
11003
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
11004
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
11005
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
11006
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
11007
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
11008
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
11009
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
11010
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
11011
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
11012
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
11013
        r->dp[26] = l;
11014
        l = h;
11015
        h = o;
11016
        o = 0;
11017
        SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
11018
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
11019
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
11020
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
11021
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
11022
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
11023
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
11024
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
11025
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
11026
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
11027
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
11028
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
11029
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
11030
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
11031
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
11032
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
11033
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
11034
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
11035
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
11036
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
11037
        r->dp[27] = l;
11038
        l = h;
11039
        h = o;
11040
        o = 0;
11041
        SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
11042
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
11043
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
11044
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
11045
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
11046
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
11047
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
11048
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
11049
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
11050
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
11051
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
11052
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
11053
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
11054
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
11055
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
11056
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
11057
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
11058
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
11059
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
11060
        r->dp[28] = l;
11061
        l = h;
11062
        h = o;
11063
        o = 0;
11064
        SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
11065
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
11066
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
11067
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
11068
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
11069
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
11070
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
11071
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
11072
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
11073
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
11074
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
11075
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
11076
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
11077
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
11078
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
11079
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
11080
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
11081
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
11082
        r->dp[29] = l;
11083
        l = h;
11084
        h = o;
11085
        o = 0;
11086
        SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
11087
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
11088
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
11089
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
11090
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
11091
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
11092
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
11093
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
11094
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
11095
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
11096
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
11097
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
11098
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
11099
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
11100
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
11101
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
11102
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
11103
        r->dp[30] = l;
11104
        l = h;
11105
        h = o;
11106
        o = 0;
11107
        SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
11108
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
11109
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
11110
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
11111
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
11112
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
11113
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
11114
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
11115
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
11116
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
11117
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
11118
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
11119
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
11120
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
11121
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
11122
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
11123
        r->dp[31] = l;
11124
        l = h;
11125
        h = o;
11126
        o = 0;
11127
        SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
11128
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
11129
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
11130
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
11131
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
11132
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
11133
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
11134
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
11135
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
11136
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
11137
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
11138
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
11139
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
11140
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
11141
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
11142
        r->dp[32] = l;
11143
        l = h;
11144
        h = o;
11145
        o = 0;
11146
        SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
11147
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
11148
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
11149
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
11150
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
11151
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
11152
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
11153
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
11154
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
11155
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
11156
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
11157
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
11158
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
11159
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
11160
        r->dp[33] = l;
11161
        l = h;
11162
        h = o;
11163
        o = 0;
11164
        SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
11165
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
11166
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
11167
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
11168
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
11169
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
11170
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
11171
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
11172
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
11173
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
11174
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
11175
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
11176
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
11177
        r->dp[34] = l;
11178
        l = h;
11179
        h = o;
11180
        o = 0;
11181
        SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
11182
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
11183
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
11184
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
11185
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
11186
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
11187
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
11188
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
11189
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
11190
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
11191
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
11192
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
11193
        r->dp[35] = l;
11194
        l = h;
11195
        h = o;
11196
        o = 0;
11197
        SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
11198
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
11199
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
11200
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
11201
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
11202
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
11203
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
11204
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
11205
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
11206
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
11207
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
11208
        r->dp[36] = l;
11209
        l = h;
11210
        h = o;
11211
        o = 0;
11212
        SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
11213
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
11214
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
11215
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
11216
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
11217
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
11218
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
11219
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
11220
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
11221
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
11222
        r->dp[37] = l;
11223
        l = h;
11224
        h = o;
11225
        o = 0;
11226
        SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
11227
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
11228
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
11229
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
11230
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
11231
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
11232
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
11233
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
11234
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
11235
        r->dp[38] = l;
11236
        l = h;
11237
        h = o;
11238
        o = 0;
11239
        SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
11240
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
11241
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
11242
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
11243
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
11244
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
11245
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
11246
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
11247
        r->dp[39] = l;
11248
        l = h;
11249
        h = o;
11250
        o = 0;
11251
        SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
11252
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
11253
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
11254
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
11255
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
11256
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
11257
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
11258
        r->dp[40] = l;
11259
        l = h;
11260
        h = o;
11261
        o = 0;
11262
        SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
11263
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
11264
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
11265
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
11266
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
11267
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
11268
        r->dp[41] = l;
11269
        l = h;
11270
        h = o;
11271
        o = 0;
11272
        SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
11273
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
11274
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
11275
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
11276
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
11277
        r->dp[42] = l;
11278
        l = h;
11279
        h = o;
11280
        o = 0;
11281
        SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
11282
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
11283
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
11284
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
11285
        r->dp[43] = l;
11286
        l = h;
11287
        h = o;
11288
        o = 0;
11289
        SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
11290
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
11291
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
11292
        r->dp[44] = l;
11293
        l = h;
11294
        h = o;
11295
        o = 0;
11296
        SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
11297
        SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
11298
        r->dp[45] = l;
11299
        l = h;
11300
        h = o;
11301
        SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
11302
        r->dp[46] = l;
11303
        r->dp[47] = h;
11304
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
11305
        r->used = 48;
11306
        sp_clamp(r);
11307
    }
11308
11309
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11310
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
11311
#endif
11312
    return err;
11313
}
11314
    #endif /* SP_INT_DIGITS >= 48 */
11315
11316
    #if SP_INT_DIGITS >= 64
11317
/* Multiply a by b and store in r: r = a * b
11318
 *
11319
 * Karatsuba implementation.
11320
 *
11321
 * @param  [in]   a  SP integer to multiply.
11322
 * @param  [in]   b  SP integer to multiply.
11323
 * @param  [out]  r  SP integer result.
11324
 *
11325
 * @return  MP_OKAY on success.
11326
 * @return  MP_MEM when dynamic memory allocation fails.
11327
 */
11328
static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
11329
{
11330
    int err = MP_OKAY;
11331
    unsigned int i;
11332
    sp_int_digit l;
11333
    sp_int_digit h;
11334
    sp_int* a1;
11335
    sp_int* b1;
11336
    sp_int* z0;
11337
    sp_int* z1;
11338
    sp_int* z2;
11339
    sp_int_digit ca;
11340
    sp_int_digit cb;
11341
    DECL_SP_INT_ARRAY(t, 16, 2);
11342
    DECL_SP_INT_ARRAY(z, 33, 2);
11343
11344
    ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
11345
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
11346
    if (err == MP_OKAY) {
11347
        a1 = t[0];
11348
        b1 = t[1];
11349
        z1 = z[0];
11350
        z2 = z[1];
11351
        z0 = r;
11352
11353
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
11354
        a1->used = 16;
11355
        XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
11356
        b1->used = 16;
11357
11358
        /* z2 = a1 * b1 */
11359
        err = _sp_mul_16(a1, b1, z2);
11360
    }
11361
    if (err == MP_OKAY) {
11362
        l = a1->dp[0];
11363
        h = 0;
11364
        SP_ASM_ADDC(l, h, a->dp[0]);
11365
        a1->dp[0] = l;
11366
        l = h;
11367
        h = 0;
11368
        for (i = 1; i < 16; i++) {
11369
            SP_ASM_ADDC(l, h, a1->dp[i]);
11370
            SP_ASM_ADDC(l, h, a->dp[i]);
11371
            a1->dp[i] = l;
11372
            l = h;
11373
            h = 0;
11374
        }
11375
        ca = l;
11376
        /* b01 = b0 + b1 */
11377
        l = b1->dp[0];
11378
        h = 0;
11379
        SP_ASM_ADDC(l, h, b->dp[0]);
11380
        b1->dp[0] = l;
11381
        l = h;
11382
        h = 0;
11383
        for (i = 1; i < 16; i++) {
11384
            SP_ASM_ADDC(l, h, b1->dp[i]);
11385
            SP_ASM_ADDC(l, h, b->dp[i]);
11386
            b1->dp[i] = l;
11387
            l = h;
11388
            h = 0;
11389
        }
11390
        cb = l;
11391
11392
        /* z0 = a0 * b0 */
11393
        err = _sp_mul_16(a, b, z0);
11394
    }
11395
    if (err == MP_OKAY) {
11396
        /* z1 = (a0 + a1) * (b0 + b1) */
11397
        err = _sp_mul_16(a1, b1, z1);
11398
    }
11399
    if (err == MP_OKAY) {
11400
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
11401
        /* r = z0 */
11402
        /* r += (z1 - z0 - z2) << 16 */
11403
        z1->dp[32] = ca & cb;
11404
        l = 0;
11405
        if (ca) {
11406
            h = 0;
11407
            for (i = 0; i < 16; i++) {
11408
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11409
                SP_ASM_ADDC(l, h, b1->dp[i]);
11410
                z1->dp[i + 16] = l;
11411
                l = h;
11412
                h = 0;
11413
            }
11414
        }
11415
        z1->dp[32] += l;
11416
        l = 0;
11417
        if (cb) {
11418
            h = 0;
11419
            for (i = 0; i < 16; i++) {
11420
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11421
                SP_ASM_ADDC(l, h, a1->dp[i]);
11422
                z1->dp[i + 16] = l;
11423
                l = h;
11424
                h = 0;
11425
            }
11426
        }
11427
        z1->dp[32] += l;
11428
        /* z1 = z1 - z0 - z1 */
11429
        l = 0;
11430
        h = 0;
11431
        for (i = 0; i < 32; i++) {
11432
            l += z1->dp[i];
11433
            SP_ASM_SUBB(l, h, z0->dp[i]);
11434
            SP_ASM_SUBB(l, h, z2->dp[i]);
11435
            z1->dp[i] = l;
11436
            l = h;
11437
            h = 0;
11438
        }
11439
        z1->dp[i] += l;
11440
        /* r += z1 << 16 */
11441
        l = 0;
11442
        h = 0;
11443
        for (i = 0; i < 16; i++) {
11444
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
11445
            SP_ASM_ADDC(l, h, z1->dp[i]);
11446
            r->dp[i + 16] = l;
11447
            l = h;
11448
            h = 0;
11449
        }
11450
        for (; i < 33; i++) {
11451
            SP_ASM_ADDC(l, h, z1->dp[i]);
11452
            r->dp[i + 16] = l;
11453
            l = h;
11454
            h = 0;
11455
        }
11456
        /* r += z2 << 32  */
11457
        l = 0;
11458
        h = 0;
11459
        for (i = 0; i < 17; i++) {
11460
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11461
            SP_ASM_ADDC(l, h, z2->dp[i]);
11462
            r->dp[i + 32] = l;
11463
            l = h;
11464
            h = 0;
11465
        }
11466
        for (; i < 32; i++) {
11467
            SP_ASM_ADDC(l, h, z2->dp[i]);
11468
            r->dp[i + 32] = l;
11469
            l = h;
11470
            h = 0;
11471
        }
11472
        r->used = 64;
11473
        sp_clamp(r);
11474
    }
11475
11476
    FREE_SP_INT_ARRAY(z, NULL);
11477
    FREE_SP_INT_ARRAY(t, NULL);
11478
    return err;
11479
}
11480
    #endif /* SP_INT_DIGITS >= 64 */
11481
11482
    #if SP_INT_DIGITS >= 96
11483
/* Multiply a by b and store in r: r = a * b
11484
 *
11485
 * Karatsuba implementation.
11486
 *
11487
 * @param  [in]   a  SP integer to multiply.
11488
 * @param  [in]   b  SP integer to multiply.
11489
 * @param  [out]  r  SP integer result.
11490
 *
11491
 * @return  MP_OKAY on success.
11492
 * @return  MP_MEM when dynamic memory allocation fails.
11493
 */
11494
static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
11495
{
11496
    int err = MP_OKAY;
11497
    unsigned int i;
11498
    sp_int_digit l;
11499
    sp_int_digit h;
11500
    sp_int* a1;
11501
    sp_int* b1;
11502
    sp_int* z0;
11503
    sp_int* z1;
11504
    sp_int* z2;
11505
    sp_int_digit ca;
11506
    sp_int_digit cb;
11507
    DECL_SP_INT_ARRAY(t, 24, 2);
11508
    DECL_SP_INT_ARRAY(z, 49, 2);
11509
11510
    ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
11511
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
11512
    if (err == MP_OKAY) {
11513
        a1 = t[0];
11514
        b1 = t[1];
11515
        z1 = z[0];
11516
        z2 = z[1];
11517
        z0 = r;
11518
11519
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
11520
        a1->used = 24;
11521
        XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
11522
        b1->used = 24;
11523
11524
        /* z2 = a1 * b1 */
11525
        err = _sp_mul_24(a1, b1, z2);
11526
    }
11527
    if (err == MP_OKAY) {
11528
        l = a1->dp[0];
11529
        h = 0;
11530
        SP_ASM_ADDC(l, h, a->dp[0]);
11531
        a1->dp[0] = l;
11532
        l = h;
11533
        h = 0;
11534
        for (i = 1; i < 24; i++) {
11535
            SP_ASM_ADDC(l, h, a1->dp[i]);
11536
            SP_ASM_ADDC(l, h, a->dp[i]);
11537
            a1->dp[i] = l;
11538
            l = h;
11539
            h = 0;
11540
        }
11541
        ca = l;
11542
        /* b01 = b0 + b1 */
11543
        l = b1->dp[0];
11544
        h = 0;
11545
        SP_ASM_ADDC(l, h, b->dp[0]);
11546
        b1->dp[0] = l;
11547
        l = h;
11548
        h = 0;
11549
        for (i = 1; i < 24; i++) {
11550
            SP_ASM_ADDC(l, h, b1->dp[i]);
11551
            SP_ASM_ADDC(l, h, b->dp[i]);
11552
            b1->dp[i] = l;
11553
            l = h;
11554
            h = 0;
11555
        }
11556
        cb = l;
11557
11558
        /* z0 = a0 * b0 */
11559
        err = _sp_mul_24(a, b, z0);
11560
    }
11561
    if (err == MP_OKAY) {
11562
        /* z1 = (a0 + a1) * (b0 + b1) */
11563
        err = _sp_mul_24(a1, b1, z1);
11564
    }
11565
    if (err == MP_OKAY) {
11566
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
11567
        /* r = z0 */
11568
        /* r += (z1 - z0 - z2) << 24 */
11569
        z1->dp[48] = ca & cb;
11570
        l = 0;
11571
        if (ca) {
11572
            h = 0;
11573
            for (i = 0; i < 24; i++) {
11574
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11575
                SP_ASM_ADDC(l, h, b1->dp[i]);
11576
                z1->dp[i + 24] = l;
11577
                l = h;
11578
                h = 0;
11579
            }
11580
        }
11581
        z1->dp[48] += l;
11582
        l = 0;
11583
        if (cb) {
11584
            h = 0;
11585
            for (i = 0; i < 24; i++) {
11586
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11587
                SP_ASM_ADDC(l, h, a1->dp[i]);
11588
                z1->dp[i + 24] = l;
11589
                l = h;
11590
                h = 0;
11591
            }
11592
        }
11593
        z1->dp[48] += l;
11594
        /* z1 = z1 - z0 - z1 */
11595
        l = 0;
11596
        h = 0;
11597
        for (i = 0; i < 48; i++) {
11598
            l += z1->dp[i];
11599
            SP_ASM_SUBB(l, h, z0->dp[i]);
11600
            SP_ASM_SUBB(l, h, z2->dp[i]);
11601
            z1->dp[i] = l;
11602
            l = h;
11603
            h = 0;
11604
        }
11605
        z1->dp[i] += l;
11606
        /* r += z1 << 16 */
11607
        l = 0;
11608
        h = 0;
11609
        for (i = 0; i < 24; i++) {
11610
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
11611
            SP_ASM_ADDC(l, h, z1->dp[i]);
11612
            r->dp[i + 24] = l;
11613
            l = h;
11614
            h = 0;
11615
        }
11616
        for (; i < 49; i++) {
11617
            SP_ASM_ADDC(l, h, z1->dp[i]);
11618
            r->dp[i + 24] = l;
11619
            l = h;
11620
            h = 0;
11621
        }
11622
        /* r += z2 << 48  */
11623
        l = 0;
11624
        h = 0;
11625
        for (i = 0; i < 25; i++) {
11626
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
11627
            SP_ASM_ADDC(l, h, z2->dp[i]);
11628
            r->dp[i + 48] = l;
11629
            l = h;
11630
            h = 0;
11631
        }
11632
        for (; i < 48; i++) {
11633
            SP_ASM_ADDC(l, h, z2->dp[i]);
11634
            r->dp[i + 48] = l;
11635
            l = h;
11636
            h = 0;
11637
        }
11638
        r->used = 96;
11639
        sp_clamp(r);
11640
    }
11641
11642
    FREE_SP_INT_ARRAY(z, NULL);
11643
    FREE_SP_INT_ARRAY(t, NULL);
11644
    return err;
11645
}
11646
    #endif /* SP_INT_DIGITS >= 96 */
11647
11648
    #if SP_INT_DIGITS >= 128
11649
/* Multiply a by b and store in r: r = a * b
11650
 *
11651
 * Karatsuba implementation.
11652
 *
11653
 * @param  [in]   a  SP integer to multiply.
11654
 * @param  [in]   b  SP integer to multiply.
11655
 * @param  [out]  r  SP integer result.
11656
 *
11657
 * @return  MP_OKAY on success.
11658
 * @return  MP_MEM when dynamic memory allocation fails.
11659
 */
11660
static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
11661
{
11662
    int err = MP_OKAY;
11663
    unsigned int i;
11664
    sp_int_digit l;
11665
    sp_int_digit h;
11666
    sp_int* a1;
11667
    sp_int* b1;
11668
    sp_int* z0;
11669
    sp_int* z1;
11670
    sp_int* z2;
11671
    sp_int_digit ca;
11672
    sp_int_digit cb;
11673
    DECL_SP_INT_ARRAY(t, 32, 2);
11674
    DECL_SP_INT_ARRAY(z, 65, 2);
11675
11676
    ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
11677
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
11678
    if (err == MP_OKAY) {
11679
        a1 = t[0];
11680
        b1 = t[1];
11681
        z1 = z[0];
11682
        z2 = z[1];
11683
        z0 = r;
11684
11685
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
11686
        a1->used = 32;
11687
        XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
11688
        b1->used = 32;
11689
11690
        /* z2 = a1 * b1 */
11691
        err = _sp_mul_32(a1, b1, z2);
11692
    }
11693
    if (err == MP_OKAY) {
11694
        l = a1->dp[0];
11695
        h = 0;
11696
        SP_ASM_ADDC(l, h, a->dp[0]);
11697
        a1->dp[0] = l;
11698
        l = h;
11699
        h = 0;
11700
        for (i = 1; i < 32; i++) {
11701
            SP_ASM_ADDC(l, h, a1->dp[i]);
11702
            SP_ASM_ADDC(l, h, a->dp[i]);
11703
            a1->dp[i] = l;
11704
            l = h;
11705
            h = 0;
11706
        }
11707
        ca = l;
11708
        /* b01 = b0 + b1 */
11709
        l = b1->dp[0];
11710
        h = 0;
11711
        SP_ASM_ADDC(l, h, b->dp[0]);
11712
        b1->dp[0] = l;
11713
        l = h;
11714
        h = 0;
11715
        for (i = 1; i < 32; i++) {
11716
            SP_ASM_ADDC(l, h, b1->dp[i]);
11717
            SP_ASM_ADDC(l, h, b->dp[i]);
11718
            b1->dp[i] = l;
11719
            l = h;
11720
            h = 0;
11721
        }
11722
        cb = l;
11723
11724
        /* z0 = a0 * b0 */
11725
        err = _sp_mul_32(a, b, z0);
11726
    }
11727
    if (err == MP_OKAY) {
11728
        /* z1 = (a0 + a1) * (b0 + b1) */
11729
        err = _sp_mul_32(a1, b1, z1);
11730
    }
11731
    if (err == MP_OKAY) {
11732
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
11733
        /* r = z0 */
11734
        /* r += (z1 - z0 - z2) << 32 */
11735
        z1->dp[64] = ca & cb;
11736
        l = 0;
11737
        if (ca) {
11738
            h = 0;
11739
            for (i = 0; i < 32; i++) {
11740
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11741
                SP_ASM_ADDC(l, h, b1->dp[i]);
11742
                z1->dp[i + 32] = l;
11743
                l = h;
11744
                h = 0;
11745
            }
11746
        }
11747
        z1->dp[64] += l;
11748
        l = 0;
11749
        if (cb) {
11750
            h = 0;
11751
            for (i = 0; i < 32; i++) {
11752
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11753
                SP_ASM_ADDC(l, h, a1->dp[i]);
11754
                z1->dp[i + 32] = l;
11755
                l = h;
11756
                h = 0;
11757
            }
11758
        }
11759
        z1->dp[64] += l;
11760
        /* z1 = z1 - z0 - z1 */
11761
        l = 0;
11762
        h = 0;
11763
        for (i = 0; i < 64; i++) {
11764
            l += z1->dp[i];
11765
            SP_ASM_SUBB(l, h, z0->dp[i]);
11766
            SP_ASM_SUBB(l, h, z2->dp[i]);
11767
            z1->dp[i] = l;
11768
            l = h;
11769
            h = 0;
11770
        }
11771
        z1->dp[i] += l;
11772
        /* r += z1 << 16 */
11773
        l = 0;
11774
        h = 0;
11775
        for (i = 0; i < 32; i++) {
11776
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
11777
            SP_ASM_ADDC(l, h, z1->dp[i]);
11778
            r->dp[i + 32] = l;
11779
            l = h;
11780
            h = 0;
11781
        }
11782
        for (; i < 65; i++) {
11783
            SP_ASM_ADDC(l, h, z1->dp[i]);
11784
            r->dp[i + 32] = l;
11785
            l = h;
11786
            h = 0;
11787
        }
11788
        /* r += z2 << 64  */
11789
        l = 0;
11790
        h = 0;
11791
        for (i = 0; i < 33; i++) {
11792
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
11793
            SP_ASM_ADDC(l, h, z2->dp[i]);
11794
            r->dp[i + 64] = l;
11795
            l = h;
11796
            h = 0;
11797
        }
11798
        for (; i < 64; i++) {
11799
            SP_ASM_ADDC(l, h, z2->dp[i]);
11800
            r->dp[i + 64] = l;
11801
            l = h;
11802
            h = 0;
11803
        }
11804
        r->used = 128;
11805
        sp_clamp(r);
11806
    }
11807
11808
    FREE_SP_INT_ARRAY(z, NULL);
11809
    FREE_SP_INT_ARRAY(t, NULL);
11810
    return err;
11811
}
11812
    #endif /* SP_INT_DIGITS >= 128 */
11813
11814
    #if SP_INT_DIGITS >= 192
11815
/* Multiply a by b and store in r: r = a * b
11816
 *
11817
 * Karatsuba implementation.
11818
 *
11819
 * @param  [in]   a  SP integer to multiply.
11820
 * @param  [in]   b  SP integer to multiply.
11821
 * @param  [out]  r  SP integer result.
11822
 *
11823
 * @return  MP_OKAY on success.
11824
 * @return  MP_MEM when dynamic memory allocation fails.
11825
 */
11826
static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
11827
{
11828
    int err = MP_OKAY;
11829
    unsigned int i;
11830
    sp_int_digit l;
11831
    sp_int_digit h;
11832
    sp_int* a1;
11833
    sp_int* b1;
11834
    sp_int* z0;
11835
    sp_int* z1;
11836
    sp_int* z2;
11837
    sp_int_digit ca;
11838
    sp_int_digit cb;
11839
    DECL_SP_INT_ARRAY(t, 48, 2);
11840
    DECL_SP_INT_ARRAY(z, 97, 2);
11841
11842
    ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
11843
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
11844
    if (err == MP_OKAY) {
11845
        a1 = t[0];
11846
        b1 = t[1];
11847
        z1 = z[0];
11848
        z2 = z[1];
11849
        z0 = r;
11850
11851
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
11852
        a1->used = 48;
11853
        XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
11854
        b1->used = 48;
11855
11856
        /* z2 = a1 * b1 */
11857
        err = _sp_mul_48(a1, b1, z2);
11858
    }
11859
    if (err == MP_OKAY) {
11860
        l = a1->dp[0];
11861
        h = 0;
11862
        SP_ASM_ADDC(l, h, a->dp[0]);
11863
        a1->dp[0] = l;
11864
        l = h;
11865
        h = 0;
11866
        for (i = 1; i < 48; i++) {
11867
            SP_ASM_ADDC(l, h, a1->dp[i]);
11868
            SP_ASM_ADDC(l, h, a->dp[i]);
11869
            a1->dp[i] = l;
11870
            l = h;
11871
            h = 0;
11872
        }
11873
        ca = l;
11874
        /* b01 = b0 + b1 */
11875
        l = b1->dp[0];
11876
        h = 0;
11877
        SP_ASM_ADDC(l, h, b->dp[0]);
11878
        b1->dp[0] = l;
11879
        l = h;
11880
        h = 0;
11881
        for (i = 1; i < 48; i++) {
11882
            SP_ASM_ADDC(l, h, b1->dp[i]);
11883
            SP_ASM_ADDC(l, h, b->dp[i]);
11884
            b1->dp[i] = l;
11885
            l = h;
11886
            h = 0;
11887
        }
11888
        cb = l;
11889
11890
        /* z0 = a0 * b0 */
11891
        err = _sp_mul_48(a, b, z0);
11892
    }
11893
    if (err == MP_OKAY) {
11894
        /* z1 = (a0 + a1) * (b0 + b1) */
11895
        err = _sp_mul_48(a1, b1, z1);
11896
    }
11897
    if (err == MP_OKAY) {
11898
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
11899
        /* r = z0 */
11900
        /* r += (z1 - z0 - z2) << 48 */
11901
        z1->dp[96] = ca & cb;
11902
        l = 0;
11903
        if (ca) {
11904
            h = 0;
11905
            for (i = 0; i < 48; i++) {
11906
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
11907
                SP_ASM_ADDC(l, h, b1->dp[i]);
11908
                z1->dp[i + 48] = l;
11909
                l = h;
11910
                h = 0;
11911
            }
11912
        }
11913
        z1->dp[96] += l;
11914
        l = 0;
11915
        if (cb) {
11916
            h = 0;
11917
            for (i = 0; i < 48; i++) {
11918
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
11919
                SP_ASM_ADDC(l, h, a1->dp[i]);
11920
                z1->dp[i + 48] = l;
11921
                l = h;
11922
                h = 0;
11923
            }
11924
        }
11925
        z1->dp[96] += l;
11926
        /* z1 = z1 - z0 - z1 */
11927
        l = 0;
11928
        h = 0;
11929
        for (i = 0; i < 96; i++) {
11930
            l += z1->dp[i];
11931
            SP_ASM_SUBB(l, h, z0->dp[i]);
11932
            SP_ASM_SUBB(l, h, z2->dp[i]);
11933
            z1->dp[i] = l;
11934
            l = h;
11935
            h = 0;
11936
        }
11937
        z1->dp[i] += l;
11938
        /* r += z1 << 16 */
11939
        l = 0;
11940
        h = 0;
11941
        for (i = 0; i < 48; i++) {
11942
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
11943
            SP_ASM_ADDC(l, h, z1->dp[i]);
11944
            r->dp[i + 48] = l;
11945
            l = h;
11946
            h = 0;
11947
        }
11948
        for (; i < 97; i++) {
11949
            SP_ASM_ADDC(l, h, z1->dp[i]);
11950
            r->dp[i + 48] = l;
11951
            l = h;
11952
            h = 0;
11953
        }
11954
        /* r += z2 << 96  */
11955
        l = 0;
11956
        h = 0;
11957
        for (i = 0; i < 49; i++) {
11958
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
11959
            SP_ASM_ADDC(l, h, z2->dp[i]);
11960
            r->dp[i + 96] = l;
11961
            l = h;
11962
            h = 0;
11963
        }
11964
        for (; i < 96; i++) {
11965
            SP_ASM_ADDC(l, h, z2->dp[i]);
11966
            r->dp[i + 96] = l;
11967
            l = h;
11968
            h = 0;
11969
        }
11970
        r->used = 192;
11971
        sp_clamp(r);
11972
    }
11973
11974
    FREE_SP_INT_ARRAY(z, NULL);
11975
    FREE_SP_INT_ARRAY(t, NULL);
11976
    return err;
11977
}
11978
    #endif /* SP_INT_DIGITS >= 192 */
11979
11980
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
11981
#endif /* !WOLFSSL_SP_SMALL */
11982
11983
/* Multiply a by b and store in r: r = a * b
11984
 *
11985
 * @param  [in]   a  SP integer to multiply.
11986
 * @param  [in]   b  SP integer to multiply.
11987
 * @param  [out]  r  SP integer result.
11988
 *
11989
 * @return  MP_OKAY on success.
11990
 * @return  MP_VAL when a, b or is NULL; or the result will be too big for fixed
11991
 *          data length.
11992
 * @return  MP_MEM when dynamic memory allocation fails.
11993
 */
11994
int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
11995
0
{
11996
0
    int err = MP_OKAY;
11997
#ifdef WOLFSSL_SP_INT_NEGATIVE
11998
    sp_uint8 sign = MP_ZPOS;
11999
#endif
12000
12001
0
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
12002
0
        err = MP_VAL;
12003
0
    }
12004
12005
    /* Need extra digit during calculation. */
12006
    /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12007
    /* clang-tidy falsely believes that r->size was corrupted by the _sp_copy()
12008
     * to "Copy base into working variable" in _sp_exptmod_ex().
12009
     */
12010
0
    if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
12011
0
        err = MP_VAL;
12012
0
    }
12013
    /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
12014
12015
#if 0
12016
    if (err == MP_OKAY) {
12017
        sp_print(a, "a");
12018
        sp_print(b, "b");
12019
    }
12020
#endif
12021
12022
0
    if (err == MP_OKAY) {
12023
    #ifdef WOLFSSL_SP_INT_NEGATIVE
12024
        sign = a->sign ^ b->sign;
12025
    #endif
12026
12027
0
        if ((a->used == 0) || (b->used == 0)) {
12028
0
            _sp_zero(r);
12029
0
        }
12030
0
        else
12031
0
#ifndef WOLFSSL_SP_SMALL
12032
0
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
12033
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
12034
0
        if ((a->used == 4) && (b->used == 4)) {
12035
0
            err = _sp_mul_4(a, b, r);
12036
0
        }
12037
0
        else
12038
0
#endif /* SP_WORD_SIZE == 64 */
12039
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
12040
0
#ifdef SQR_MUL_ASM
12041
0
        if ((a->used == 6) && (b->used == 6)) {
12042
0
            err = _sp_mul_6(a, b, r);
12043
0
        }
12044
0
        else
12045
0
#endif /* SQR_MUL_ASM */
12046
0
#endif /* SP_WORD_SIZE == 64 */
12047
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
12048
#ifdef SQR_MUL_ASM
12049
        if ((a->used == 8) && (b->used == 8)) {
12050
            err = _sp_mul_8(a, b, r);
12051
        }
12052
        else
12053
#endif /* SQR_MUL_ASM */
12054
#endif /* SP_WORD_SIZE == 32 */
12055
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
12056
#ifdef SQR_MUL_ASM
12057
        if ((a->used == 12) && (b->used == 12)) {
12058
            err = _sp_mul_12(a, b, r);
12059
        }
12060
        else
12061
#endif /* SQR_MUL_ASM */
12062
#endif /* SP_WORD_SIZE == 32 */
12063
0
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
12064
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
12065
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
12066
    (SP_WORD_SIZE == 64)))
12067
    #if SP_INT_DIGITS >= 32
12068
        if ((a->used == 16) && (b->used == 16)) {
12069
            err = _sp_mul_16(a, b, r);
12070
        }
12071
        else
12072
    #endif /* SP_INT_DIGITS >= 32 */
12073
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
12074
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
12075
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
12076
    #if SP_INT_DIGITS >= 48
12077
        if ((a->used == 24) && (b->used == 24)) {
12078
            err = _sp_mul_24(a, b, r);
12079
        }
12080
        else
12081
    #endif /* SP_INT_DIGITS >= 48 */
12082
    #if SP_INT_DIGITS >= 64
12083
        if ((a->used == 32) && (b->used == 32)) {
12084
            err = _sp_mul_32(a, b, r);
12085
        }
12086
        else
12087
    #endif /* SP_INT_DIGITS >= 64 */
12088
    #if SP_INT_DIGITS >= 96
12089
        if ((a->used == 48) && (b->used == 48)) {
12090
            err = _sp_mul_48(a, b, r);
12091
        }
12092
        else
12093
    #endif /* SP_INT_DIGITS >= 96 */
12094
    #if SP_INT_DIGITS >= 128
12095
        if ((a->used == 64) && (b->used == 64)) {
12096
            err = _sp_mul_64(a, b, r);
12097
        }
12098
        else
12099
    #endif /* SP_INT_DIGITS >= 128 */
12100
    #if SP_INT_DIGITS >= 192
12101
        if ((a->used == 96) && (b->used == 96)) {
12102
            err = _sp_mul_96(a, b, r);
12103
        }
12104
        else
12105
    #endif /* SP_INT_DIGITS >= 192 */
12106
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
12107
0
#endif /* !WOLFSSL_SP_SMALL */
12108
12109
0
#ifdef SQR_MUL_ASM
12110
0
        if (a->used == b->used) {
12111
0
            err = _sp_mul_nxn(a, b, r);
12112
0
        }
12113
0
        else
12114
0
#endif
12115
0
        {
12116
0
            err = _sp_mul(a, b, r);
12117
0
        }
12118
0
    }
12119
12120
#ifdef WOLFSSL_SP_INT_NEGATIVE
12121
    if (err == MP_OKAY) {
12122
        r->sign = (r->used == 0) ? MP_ZPOS : sign;
12123
    }
12124
#endif
12125
12126
#if 0
12127
    if (err == MP_OKAY) {
12128
        sp_print(r, "rmul");
12129
    }
12130
#endif
12131
12132
0
    return err;
12133
0
}
12134
/* END SP_MUL implementations. */
12135
12136
#endif
12137
12138
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
12139
    defined(WOLFCRYPT_HAVE_ECCSI) || \
12140
    (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
12141
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12142
 *
12143
 * @param  [in]   a  SP integer to multiply.
12144
 * @param  [in]   b  SP integer to multiply.
12145
 * @param  [in]   m  SP integer that is the modulus.
12146
 * @param  [out]  r  SP integer result.
12147
 *
12148
 * @return  MP_OKAY on success.
12149
 * @return  MP_MEM when dynamic memory allocation fails.
12150
 */
12151
static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
12152
    sp_int* r)
12153
0
{
12154
0
    int err = MP_OKAY;
12155
    /* Create temporary for multiplication result. */
12156
0
    DECL_SP_INT(t, a->used + b->used);
12157
12158
0
    ALLOC_SP_INT(t, a->used + b->used, err, NULL);
12159
0
    if (err == MP_OKAY) {
12160
0
        err = sp_init_size(t, (sp_size_t)(a->used + b->used));
12161
0
    }
12162
12163
    /* Multiply and reduce. */
12164
0
    if (err == MP_OKAY) {
12165
0
        err = sp_mul(a, b, t);
12166
0
    }
12167
0
    if (err == MP_OKAY) {
12168
0
        err = sp_mod(t, m, r);
12169
0
    }
12170
12171
    /* Dispose of an allocated SP int. */
12172
0
    FREE_SP_INT(t, NULL);
12173
12174
0
    return err;
12175
0
}
12176
12177
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12178
 *
12179
 * @param  [in]   a  SP integer to multiply.
12180
 * @param  [in]   b  SP integer to multiply.
12181
 * @param  [in]   m  SP integer that is the modulus.
12182
 * @param  [out]  r  SP integer result.
12183
 *
12184
 * @return  MP_OKAY on success.
12185
 * @return  MP_MEM when dynamic memory allocation fails.
12186
 */
12187
static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
12188
    sp_int* r)
12189
0
{
12190
0
    int err = MP_OKAY;
12191
12192
    /* Use r as intermediate result if not same as pointer m which is needed
12193
     * after first intermediate result.
12194
     */
12195
0
    if (r != m) {
12196
        /* Multiply and reduce. */
12197
0
        err = sp_mul(a, b, r);
12198
0
        if (err == MP_OKAY) {
12199
0
            err = sp_mod(r, m, r);
12200
0
        }
12201
0
    }
12202
0
    else {
12203
        /* Do operation using temporary. */
12204
0
        err = _sp_mulmod_tmp(a, b, m, r);
12205
0
    }
12206
12207
0
    return err;
12208
0
}
12209
12210
/* Multiply a by b mod m and store in r: r = (a * b) mod m
12211
 *
12212
 * @param  [in]   a  SP integer to multiply.
12213
 * @param  [in]   b  SP integer to multiply.
12214
 * @param  [in]   m  SP integer that is the modulus.
12215
 * @param  [out]  r  SP integer result.
12216
 *
12217
 * @return  MP_OKAY on success.
12218
 * @return  MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
12219
 *          fixed data length.
12220
 * @return  MP_MEM when dynamic memory allocation fails.
12221
 */
12222
int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
12223
0
{
12224
0
    int err = MP_OKAY;
12225
12226
    /* Validate parameters. */
12227
0
    if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
12228
0
        err = MP_VAL;
12229
0
    }
12230
    /* Ensure result SP int is big enough for intermediates. */
12231
0
    if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
12232
0
        err = MP_VAL;
12233
0
    }
12234
12235
#if 0
12236
    if (err == 0) {
12237
        sp_print(a, "a");
12238
        sp_print(b, "b");
12239
        sp_print(m, "m");
12240
    }
12241
#endif
12242
12243
0
    if (err == MP_OKAY) {
12244
0
        err = _sp_mulmod(a, b, m, r);
12245
0
    }
12246
12247
#if 0
12248
    if (err == 0) {
12249
        sp_print(r, "rmm");
12250
    }
12251
#endif
12252
12253
0
    return err;
12254
0
}
12255
#endif
12256
12257
#ifdef WOLFSSL_SP_INVMOD
12258
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12259
 * Right-shift Algorithm. NOT constant time.
12260
 *
12261
 * Algorithm:
12262
 *   1. u = m, v = a, b = 0, c = 1
12263
 *   2. While v != 1 and u != 0
12264
 *     2.1. If u even
12265
 *       2.1.1. u /= 2
12266
 *       2.1.2. b = (b / 2) mod m
12267
 *     2.2. Else if v even
12268
 *       2.2.1. v /= 2
12269
 *       2.2.2. c = (c / 2) mod m
12270
 *     2.3. Else if u >= v
12271
 *       2.3.1. u -= v
12272
 *       2.3.2. b = (c - b) mod m
12273
 *     2.4. Else (v > u)
12274
 *       2.4.1. v -= u
12275
 *       2.4.2. c = (b - c) mod m
12276
 *  3. NO_INVERSE if u == 0
12277
 *
12278
 * @param  [in]   a  SP integer to find inverse of.
12279
 * @param  [in]   m  SP integer this is the modulus.
12280
 * @param  [in]   u  SP integer to use in calculation.
12281
 * @param  [in]   v  SP integer to use in calculation.
12282
 * @param  [in]   b  SP integer to use in calculation
12283
 * @param  [out]  c  SP integer that is the inverse.
12284
 *
12285
 * @return  MP_OKAY on success.
12286
 * @return  MP_VAL when no inverse.
12287
 */
12288
static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
12289
    sp_int* v, sp_int* b, sp_int* c)
12290
0
{
12291
0
    int err = MP_OKAY;
12292
12293
    /* 1. u = m, v = a, b = 0, c = 1 */
12294
0
    _sp_copy(m, u);
12295
0
    if (a != v) {
12296
0
        _sp_copy(a, v);
12297
0
    }
12298
0
    _sp_zero(b);
12299
0
    _sp_set(c, 1);
12300
12301
    /* 2. While v != 1 and u != 0 */
12302
0
    while (!sp_isone(v) && !sp_iszero(u)) {
12303
        /* 2.1. If u even */
12304
0
        if ((u->dp[0] & 1) == 0) {
12305
            /* 2.1.1. u /= 2 */
12306
0
            _sp_div_2(u, u);
12307
            /* 2.1.2. b = (b / 2) mod m */
12308
0
            if (sp_isodd(b)) {
12309
0
                _sp_add_off(b, m, b, 0);
12310
0
            }
12311
0
            _sp_div_2(b, b);
12312
0
        }
12313
        /* 2.2. Else if v even */
12314
0
        else if ((v->dp[0] & 1) == 0) {
12315
            /* 2.2.1. v /= 2 */
12316
0
            _sp_div_2(v, v);
12317
            /* 2.1.2. c = (c / 2) mod m */
12318
0
            if (sp_isodd(c)) {
12319
0
                _sp_add_off(c, m, c, 0);
12320
0
            }
12321
0
            _sp_div_2(c, c);
12322
0
        }
12323
        /* 2.3. Else if u >= v */
12324
0
        else if (_sp_cmp_abs(u, v) != MP_LT) {
12325
            /* 2.3.1. u -= v */
12326
0
            _sp_sub_off(u, v, u, 0);
12327
            /* 2.3.2. b = (c - b) mod m */
12328
0
            if (_sp_cmp_abs(b, c) == MP_LT) {
12329
0
                _sp_add_off(b, m, b, 0);
12330
0
            }
12331
0
            _sp_sub_off(b, c, b, 0);
12332
0
        }
12333
        /* 2.4. Else (v > u) */
12334
0
        else {
12335
            /* 2.4.1. v -= u */
12336
0
            _sp_sub_off(v, u, v, 0);
12337
            /* 2.4.2. c = (b - c) mod m */
12338
0
            if (_sp_cmp_abs(c, b) == MP_LT) {
12339
0
                _sp_add_off(c, m, c, 0);
12340
0
            }
12341
0
            _sp_sub_off(c, b, c, 0);
12342
0
        }
12343
0
    }
12344
    /* 3. NO_INVERSE if u == 0 */
12345
0
    if (sp_iszero(u)) {
12346
0
        err = MP_VAL;
12347
0
    }
12348
12349
0
    return err;
12350
0
}
12351
12352
#if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12353
    (!defined(NO_RSA) || !defined(NO_DH))
12354
/* Calculates the multiplicative inverse in the field. r*a = x*m + 1
12355
 * Extended Euclidean Algorithm. NOT constant time.
12356
 *
12357
 * Creates two new SP ints.
12358
 *
12359
 * Algorithm:
12360
 *  1. x = m, y = a, b = 1, c = 0
12361
 *  2. while x > 1
12362
 *   2.1. d = x / y, r = x mod y
12363
 *   2.2. c -= d * b
12364
 *   2.3. x = y, y = r
12365
 *   2.4. s = b, b = c, c = s
12366
 *  3. If y != 0 then NO_INVERSE
12367
 *  4. If c < 0 then c += m
12368
 *  5. inv = c
12369
 *
12370
 * @param  [in]   a    SP integer to find inverse of.
12371
 * @param  [in]   m    SP integer this is the modulus.
12372
 * @param  [in]   u    SP integer to use in calculation.
12373
 * @param  [in]   v    SP integer to use in calculation.
12374
 * @param  [in]   b    SP integer to use in calculation
12375
 * @param  [in]   c    SP integer to use in calculation
12376
 * @param  [out]  inv  SP integer that is the inverse.
12377
 *
12378
 * @return  MP_OKAY on success.
12379
 * @return  MP_VAL when no inverse.
12380
 * @return  MP_MEM when dynamic memory allocation fails.
12381
 */
12382
static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
12383
    sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
12384
0
{
12385
0
    int err = MP_OKAY;
12386
0
    sp_int* s;
12387
0
#ifndef WOLFSSL_SP_INT_NEGATIVE
12388
0
    int bneg = 0;
12389
0
    int cneg = 0;
12390
0
    int neg;
12391
0
#endif
12392
0
    DECL_SP_INT(d, m->used + 1);
12393
12394
0
    ALLOC_SP_INT(d, m->used + 1, err, NULL);
12395
0
    if (err == MP_OKAY) {
12396
0
        err = sp_init_size(d, (sp_size_t)(m->used + 1U));
12397
0
    }
12398
12399
0
    if (err == MP_OKAY) {
12400
        /* 1. x = m, y = a, b = 1, c = 0 */
12401
0
        if (a != y) {
12402
0
            _sp_copy(a, y);
12403
0
        }
12404
0
        _sp_copy(m, x);
12405
0
        _sp_set(b, 1);
12406
0
        _sp_zero(c);
12407
0
    }
12408
#ifdef WOLFSSL_SP_INT_NEGATIVE
12409
    /* 2. while x > 1 */
12410
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12411
        /* 2.1. d = x / y, r = x mod y */
12412
        err = sp_div(x, y, d, x);
12413
        if (err == MP_OKAY) {
12414
            /* 2.2. c -= d * b */
12415
            if (sp_isone(d)) {
12416
                /* c -= 1 * b */
12417
                err = sp_sub(c, b, c);
12418
            }
12419
            else {
12420
                /* d *= b */
12421
                err = sp_mul(d, b, d);
12422
                /* c -= d */
12423
                if (err == MP_OKAY) {
12424
                    err = sp_sub(c, d, c);
12425
                }
12426
            }
12427
            /* 2.3. x = y, y = r */
12428
            s = y; y = x; x = s;
12429
            /* 2.4. s = b, b = c, c = s */
12430
            s = b; b = c; c = s;
12431
        }
12432
    }
12433
    /* 3. If y != 0 then NO_INVERSE */
12434
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12435
        err = MP_VAL;
12436
    }
12437
    /* 4. If c < 0 then c += m */
12438
    if ((err == MP_OKAY) && sp_isneg(c)) {
12439
        err = sp_add(c, m, c);
12440
    }
12441
    if (err == MP_OKAY) {
12442
        /* 5. inv = c */
12443
        err = sp_copy(c, inv);
12444
    }
12445
#else
12446
    /* 2. while x > 1 */
12447
0
    while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
12448
        /* 2.1. d = x / y, r = x mod y */
12449
0
        err = sp_div(x, y, d, x);
12450
0
        if (err == MP_OKAY) {
12451
0
            if (sp_isone(d)) {
12452
                /* c -= 1 * b */
12453
0
                if ((bneg ^ cneg) == 1) {
12454
                    /* c -= -b or -c -= b, therefore add. */
12455
0
                    _sp_add_off(c, b, c, 0);
12456
0
                }
12457
0
                else if (_sp_cmp_abs(c, b) == MP_LT) {
12458
                    /* |c| < |b| and same sign, reverse subtract and negate. */
12459
0
                    _sp_sub_off(b, c, c, 0);
12460
0
                    cneg = !cneg;
12461
0
                }
12462
0
                else {
12463
                    /* |c| >= |b| */
12464
0
                    _sp_sub_off(c, b, c, 0);
12465
0
                }
12466
0
            }
12467
0
            else {
12468
                /* d *= b */
12469
0
                err = sp_mul(d, b, d);
12470
                /* c -= d */
12471
0
                if (err == MP_OKAY) {
12472
0
                    if ((bneg ^ cneg) == 1) {
12473
                        /* c -= -d or -c -= d, therefore add. */
12474
0
                        _sp_add_off(c, d, c, 0);
12475
0
                    }
12476
0
                    else if (_sp_cmp_abs(c, d) == MP_LT) {
12477
                        /* |c| < |d| and same sign, reverse subtract and negate.
12478
                         */
12479
0
                        _sp_sub_off(d, c, c, 0);
12480
0
                        cneg = !cneg;
12481
0
                    }
12482
0
                    else {
12483
0
                        _sp_sub_off(c, d, c, 0);
12484
0
                    }
12485
0
                }
12486
0
            }
12487
            /* 2.3. x = y, y = r */
12488
0
            s = y; y = x; x = s;
12489
            /* 2.4. s = b, b = c, c = s */
12490
0
            s = b; b = c; c = s;
12491
0
            neg = bneg; bneg = cneg; cneg = neg;
12492
0
        }
12493
0
    }
12494
    /* 3. If y != 0 then NO_INVERSE */
12495
0
    if ((err == MP_OKAY) && (!sp_iszero(y))) {
12496
0
        err = MP_VAL;
12497
0
    }
12498
    /* 4. If c < 0 then c += m */
12499
0
    if ((err == MP_OKAY) && cneg) {
12500
        /* c = m - |c| */
12501
0
        _sp_sub_off(m, c, c, 0);
12502
0
    }
12503
0
    if (err == MP_OKAY) {
12504
        /* 5. inv = c */
12505
0
        err = sp_copy(c, inv);
12506
0
    }
12507
0
#endif
12508
12509
0
    FREE_SP_INT(d, NULL);
12510
0
    return err;
12511
0
}
12512
#endif
12513
12514
/* Calculates the multiplicative inverse in the field.
12515
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12516
 *
12517
 * r*a = x*m + 1
12518
 *
12519
 * @param  [in]   a  SP integer to find inverse of.
12520
 * @param  [in]   m  SP integer this is the modulus.
12521
 * @param  [out]  r  SP integer to hold result. r cannot be m.
12522
 *
12523
 * @return  MP_OKAY on success.
12524
 * @return  MP_VAL when m is even and a divides m evenly.
12525
 * @return  MP_MEM when dynamic memory allocation fails.
12526
 */
12527
static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12528
0
{
12529
0
    int err = MP_OKAY;
12530
0
    sp_int* u = NULL;
12531
0
    sp_int* v = NULL;
12532
0
    sp_int* b = NULL;
12533
0
    DECL_SP_INT_ARRAY(t, m->used + 1, 3);
12534
0
    DECL_SP_INT(c, 2 * m->used + 1);
12535
12536
    /* Allocate SP ints:
12537
     *  - x3 one word larger than modulus
12538
     *  - x1 one word longer than twice modulus used
12539
     */
12540
0
    ALLOC_SP_INT_ARRAY(t, m->used + 1U, 3, err, NULL);
12541
0
    ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
12542
0
    if (err == MP_OKAY) {
12543
0
        u = t[0];
12544
0
        v = t[1];
12545
0
        b = t[2];
12546
        /* c allocated separately and larger for even mod case. */
12547
0
    }
12548
12549
    /* Initialize intermediate values with minimal sizes. */
12550
0
    if (err == MP_OKAY) {
12551
0
        err = sp_init_size(u, (sp_size_t)(m->used + 1U));
12552
0
    }
12553
0
    if (err == MP_OKAY) {
12554
0
        err = sp_init_size(v, (sp_size_t)(m->used + 1U));
12555
0
    }
12556
0
    if (err == MP_OKAY) {
12557
0
        err = sp_init_size(b, (sp_size_t)(m->used + 1U));
12558
0
    }
12559
0
    if (err == MP_OKAY) {
12560
0
        err = sp_init_size(c, (sp_size_t)(2U * m->used + 1U));
12561
0
    }
12562
12563
0
    if (err == MP_OKAY) {
12564
0
        const sp_int* mm = m;
12565
0
        const sp_int* ma = a;
12566
0
        int evenMod = 0;
12567
12568
0
        if (sp_iseven(m)) {
12569
            /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
12570
0
            mm = a;
12571
0
            ma = v;
12572
0
            _sp_copy(a, u);
12573
0
            err = sp_mod(m, a, v);
12574
            /* v == 0 when a divides m evenly - no inverse.  */
12575
0
            if ((err == MP_OKAY) && sp_iszero(v)) {
12576
0
                err = MP_VAL;
12577
0
            }
12578
0
            evenMod = 1;
12579
0
        }
12580
12581
0
        if (err == MP_OKAY) {
12582
            /* Calculate inverse. */
12583
0
        #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
12584
0
            (!defined(NO_RSA) || !defined(NO_DH))
12585
0
            if (sp_count_bits(mm) >= 1024) {
12586
0
                err = _sp_invmod_div(ma, mm, u, v, b, c, c);
12587
0
            }
12588
0
            else
12589
0
        #endif
12590
0
            {
12591
0
                err = _sp_invmod_bin(ma, mm, u, v, b, c);
12592
0
            }
12593
0
        }
12594
12595
        /* Fixup for even modulus. */
12596
0
        if ((err == MP_OKAY) && evenMod) {
12597
            /* Finish operation.
12598
             *    a^-1 mod m = m + ((1 - m*c) / a)
12599
             * => a^-1 mod m = m - ((m*c - 1) / a)
12600
             */
12601
0
            err = sp_mul(c, m, c);
12602
0
            if (err == MP_OKAY) {
12603
0
                _sp_sub_d(c, 1, c);
12604
0
                err = sp_div(c, a, c, NULL);
12605
0
            }
12606
0
            if (err == MP_OKAY) {
12607
0
                err = sp_sub(m, c, r);
12608
0
            }
12609
0
        }
12610
0
        else if (err == MP_OKAY) {
12611
0
            _sp_copy(c, r);
12612
0
        }
12613
0
    }
12614
12615
0
    FREE_SP_INT(c, NULL);
12616
0
    FREE_SP_INT_ARRAY(t, NULL);
12617
0
    return err;
12618
0
}
12619
12620
/* Calculates the multiplicative inverse in the field.
12621
 * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
12622
 *
12623
 * r*a = x*m + 1
12624
 *
12625
 * @param  [in]   a  SP integer to find inverse of.
12626
 * @param  [in]   m  SP integer this is the modulus.
12627
 * @param  [out]  r  SP integer to hold result. r cannot be m.
12628
 *
12629
 * @return  MP_OKAY on success.
12630
 * @return  MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
12631
 *          m is negative.
12632
 * @return  MP_MEM when dynamic memory allocation fails.
12633
 */
12634
int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
12635
0
{
12636
0
    int err = MP_OKAY;
12637
12638
    /* Validate parameters. */
12639
0
    if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
12640
0
        err = MP_VAL;
12641
0
    }
12642
0
    if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
12643
0
        err = MP_VAL;
12644
0
    }
12645
12646
#ifdef WOLFSSL_SP_INT_NEGATIVE
12647
    /* Don't support negative modulus. */
12648
    if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
12649
        err = MP_VAL;
12650
    }
12651
#endif
12652
12653
0
    if (err == MP_OKAY) {
12654
        /* Ensure number is less than modulus. */
12655
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
12656
0
            err = sp_mod(a, m, r);
12657
0
            a = r;
12658
0
        }
12659
0
    }
12660
12661
#ifdef WOLFSSL_SP_INT_NEGATIVE
12662
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
12663
        /* Make 'a' positive */
12664
        err = sp_add(m, a, r);
12665
        a = r;
12666
    }
12667
#endif
12668
12669
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1)  */
12670
0
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
12671
0
        err = MP_VAL;
12672
0
    }
12673
    /* r*2*x != n*2*y + 1 for integer x,y */
12674
0
    if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
12675
0
        err = MP_VAL;
12676
0
    }
12677
    /* 1*1 = 0*m + 1  */
12678
0
    if ((err == MP_OKAY) && sp_isone(a)) {
12679
0
        _sp_set(r, 1);
12680
0
    }
12681
0
    else if (err == MP_OKAY) {
12682
0
        err = _sp_invmod(a, m, r);
12683
0
    }
12684
12685
0
    return err;
12686
0
}
12687
#endif /* WOLFSSL_SP_INVMOD */
12688
12689
#ifdef WOLFSSL_SP_INVMOD_MONT_CT
12690
12691
/* Number of entries to pre-compute.
12692
 * Many pre-defined primes have multiple of 8 consecutive 1s.
12693
 * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
12694
 */
12695
0
#define CT_INV_MOD_PRE_CNT      8
12696
12697
/* Calculates the multiplicative inverse in the field - constant time.
12698
 *
12699
 * Modulus (m) must be a prime and greater than 2.
12700
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12701
 *
12702
 * Algorithm:
12703
 *  pre = pre-computed values, m = modulus, a = value to find inverse of,
12704
 *  e = exponent
12705
 *  Pre-calc:
12706
 *   1. pre[0] = 2^0 * a mod m
12707
 *   2. For i in 2..CT_INV_MOD_PRE_CNT
12708
 *    2.1. pre[i-1] = ((pre[i-2] ^ 2) * a) mod m
12709
 *  Calc inverse:
12710
 *   1. e = m - 2
12711
 *   2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12712
 *   3. t = pre[j-1]
12713
 *   4. s = 0
12714
 *   5. j = 0
12715
 *   6. For i index of next top bit..0
12716
 *    6.1. bit = e[i]
12717
 *    6.2. j += bit
12718
 *    6.3. s += 1
12719
 *    6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12720
 *     6.4.1. s -= 1 - bit
12721
 *     6.4.2. For s downto 1
12722
 *      6.4.2.1. t = (t ^ 2) mod m
12723
 *     6.4.3. s = 1 - bit
12724
 *     6.4.4. t = (t * pre[j-1]) mod m
12725
 *     6.4.5. j = 0
12726
 *   7. For s downto 1
12727
 *    7.1. t = (t ^ 2) mod m
12728
 *   8. If j > 0 then r = (t * pre[j-1]) mod m
12729
 *   9. Else r = t
12730
 *
12731
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
12732
 * @param  [in]   m   SP integer this is the modulus.
12733
 * @param  [out]  r   SP integer to hold result.
12734
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
12735
 *
12736
 * @return  MP_OKAY on success.
12737
 * @return  MP_MEM when dynamic memory allocation fails.
12738
 */
12739
static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12740
    sp_int_digit mp)
12741
0
{
12742
0
    int err = MP_OKAY;
12743
0
    int i;
12744
0
    int j = 0;
12745
0
    int s = 0;
12746
0
    sp_int* t = NULL;
12747
0
    sp_int* e = NULL;
12748
0
#ifndef WOLFSSL_SP_NO_MALLOC
12749
0
    DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12750
#else
12751
    DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
12752
#endif
12753
12754
0
#ifndef WOLFSSL_SP_NO_MALLOC
12755
0
    ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err,
12756
0
        NULL);
12757
#else
12758
    ALLOC_SP_INT_ARRAY(pre, m->used * 2U + 1U, CT_INV_MOD_PRE_CNT + 2, err, NULL);
12759
#endif
12760
0
    if (err == MP_OKAY) {
12761
0
        t = pre[CT_INV_MOD_PRE_CNT + 0];
12762
0
        e = pre[CT_INV_MOD_PRE_CNT + 1];
12763
        /* Space for sqr and mul result. */
12764
0
        _sp_init_size(t, (sp_size_t)(m->used * 2 + 1));
12765
        /* e = mod - 2 */
12766
0
        _sp_init_size(e, (sp_size_t)(m->used + 1));
12767
12768
        /* Create pre-computation results: ((2^(1..8))-1).a. */
12769
0
        _sp_init_size(pre[0], (sp_size_t)(m->used * 2 + 1));
12770
        /* 1. pre[0] = 2^0 * a mod m
12771
         *    Start with 1.a = a.
12772
         */
12773
0
        _sp_copy(a, pre[0]);
12774
        /* 2. For i in 2..CT_INV_MOD_PRE_CNT
12775
         *    For rest of entries in table.
12776
         */
12777
0
        for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
12778
            /* 2.1 pre[i-1] = ((pre[i-1] ^ 2) * a) mod m */
12779
            /* Previous value ..1 -> ..10 */
12780
0
            _sp_init_size(pre[i], (sp_size_t)(m->used * 2 + 1));
12781
0
            err = sp_sqr(pre[i-1], pre[i]);
12782
0
            if (err == MP_OKAY) {
12783
0
                err = _sp_mont_red(pre[i], m, mp, 0);
12784
0
            }
12785
            /* ..10 -> ..11 */
12786
0
            if (err == MP_OKAY) {
12787
0
                err = sp_mul(pre[i], a, pre[i]);
12788
0
            }
12789
0
            if (err == MP_OKAY) {
12790
0
                err = _sp_mont_red(pre[i], m, mp, 0);
12791
0
            }
12792
0
        }
12793
0
    }
12794
12795
0
    if (err == MP_OKAY) {
12796
        /* 1. e = m - 2 */
12797
0
        _sp_sub_d(m, 2, e);
12798
        /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
12799
         *    One or more of the top bits is 1 so count.
12800
         */
12801
0
        for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
12802
0
            if ((!sp_is_bit_set(e, (unsigned int)i)) ||
12803
0
                    (j == CT_INV_MOD_PRE_CNT)) {
12804
0
                break;
12805
0
            }
12806
0
        }
12807
        /* 3. Set tmp to product of leading bits. */
12808
0
        _sp_copy(pre[j-1], t);
12809
12810
        /* 4. s = 0 */
12811
0
        s = 0;
12812
        /* 5. j = 0 */
12813
0
        j = 0;
12814
        /* 6. For i index of next top bit..0
12815
         *    Do remaining bits in exponent.
12816
         */
12817
0
        for (; (err == MP_OKAY) && (i >= 0); i--) {
12818
            /* 6.1. bit = e[i] */
12819
0
            int bit = sp_is_bit_set(e, (unsigned int)i);
12820
12821
            /* 6.2. j += bit
12822
             *      Update count of consecutive 1 bits.
12823
             */
12824
0
            j += bit;
12825
            /* 6.3. s += 1
12826
             *      Update count of squares required.
12827
             */
12828
0
            s++;
12829
12830
            /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
12831
             *      Check if max 1 bits or 0 and have seen at least one 1 bit.
12832
             */
12833
0
            if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
12834
                /* 6.4.1. s -= 1 - bit */
12835
0
                bit = 1 - bit;
12836
0
                s -= bit;
12837
                /* 6.4.2. For s downto 1
12838
                 *        Do s squares.
12839
                 */
12840
0
                for (; (err == MP_OKAY) && (s > 0); s--) {
12841
                    /* 6.4.2.1. t = (t ^ 2) mod m */
12842
0
                    err = sp_sqr(t, t);
12843
0
                    if (err == MP_OKAY) {
12844
0
                        err = _sp_mont_red(t, m, mp, 0);
12845
0
                    }
12846
0
                }
12847
                /* 6.4.3. s = 1 - bit */
12848
0
                s = bit;
12849
12850
                /* 6.4.4. t = (t * pre[j-1]) mod m */
12851
0
                if (err == MP_OKAY) {
12852
0
                    err = sp_mul(t, pre[j-1], t);
12853
0
                }
12854
0
                if (err == MP_OKAY) {
12855
0
                    err = _sp_mont_red(t, m, mp, 0);
12856
0
                }
12857
                /* 6.4.5. j = 0
12858
                 *        Reset number of 1 bits seen.
12859
                 */
12860
0
                j = 0;
12861
0
            }
12862
0
        }
12863
0
    }
12864
0
    if (err == MP_OKAY) {
12865
        /* 7. For s downto 1
12866
         *    Do s squares - total remaining. */
12867
0
        for (; (err == MP_OKAY) && (s > 0); s--) {
12868
            /* 7.1. t = (t ^ 2) mod m */
12869
0
            err = sp_sqr(t, t);
12870
0
            if (err == MP_OKAY) {
12871
0
                err = _sp_mont_red(t, m, mp, 0);
12872
0
            }
12873
0
        }
12874
0
    }
12875
0
    if (err == MP_OKAY) {
12876
        /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
12877
0
        if (j > 0) {
12878
0
            err = sp_mul(t, pre[j-1], r);
12879
0
            if (err == MP_OKAY) {
12880
0
                err = _sp_mont_red(r, m, mp, 0);
12881
0
            }
12882
0
        }
12883
        /* 9. Else r = t */
12884
0
        else {
12885
0
            _sp_copy(t, r);
12886
0
        }
12887
0
    }
12888
12889
0
#ifndef WOLFSSL_SP_NO_MALLOC
12890
0
    FREE_DYN_SP_INT_ARRAY(pre, NULL);
12891
#else
12892
    FREE_SP_INT_ARRAY(pre, NULL);
12893
#endif
12894
0
    return err;
12895
0
}
12896
12897
/* Calculates the multiplicative inverse in the field - constant time.
12898
 *
12899
 * Modulus (m) must be a prime and greater than 2.
12900
 * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
12901
 *
12902
 * @param  [in]   a   SP integer, Montgomery form, to find inverse of.
12903
 * @param  [in]   m   SP integer this is the modulus.
12904
 * @param  [out]  r   SP integer to hold result.
12905
 * @param  [in]   mp  SP integer digit that is the bottom digit of inv(-m).
12906
 *
12907
 * @return  MP_OKAY on success.
12908
 * @return  MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
12909
 * @return  MP_MEM when dynamic memory allocation fails.
12910
 */
12911
int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
12912
    sp_int_digit mp)
12913
0
{
12914
0
    int err = MP_OKAY;
12915
12916
    /* Validate parameters. */
12917
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
12918
0
        err = MP_VAL;
12919
0
    }
12920
    /* Ensure m is not too big. */
12921
0
    else if (m->used * 2 >= SP_INT_DIGITS) {
12922
0
        err = MP_VAL;
12923
0
    }
12924
    /* check that r can hold the range of the modulus result */
12925
0
    else if (m->used > r->size) {
12926
0
        err = MP_VAL;
12927
0
    }
12928
12929
    /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
12930
0
    if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
12931
0
            ((m->used == 1) && (m->dp[0] < 3)))) {
12932
0
        err = MP_VAL;
12933
0
    }
12934
12935
0
    if (err == MP_OKAY) {
12936
        /* Do operation. */
12937
0
        err = _sp_invmod_mont_ct(a, m, r, mp);
12938
0
    }
12939
12940
0
    return err;
12941
0
}
12942
12943
#endif /* WOLFSSL_SP_INVMOD_MONT_CT */
12944
12945
12946
/**************************
12947
 * Exponentiation functions
12948
 **************************/
12949
12950
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
12951
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
12952
    defined(OPENSSL_ALL)
12953
12954
#ifndef WC_PROTECT_ENCRYPTED_MEM
12955
12956
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
12957
 * Process the exponent one bit at a time.
12958
 * Is constant time and can be cache attack resistant.
12959
 *
12960
 * Algorithm:
12961
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
12962
 *  1. s = 0
12963
 *  2. t[0] = b mod m.
12964
 *  3. t[1] = t[0]
12965
 *  4. For i in (bits-1)...0
12966
 *   4.1. t[s] = t[s] ^ 2
12967
 *   4.2. y = e[i]
12968
 *   4.3  j = y & s
12969
 *   4.4  s = s | y
12970
 *   4.5. t[j] = t[j] * b
12971
 *  5. r = t[1]
12972
 *
12973
 * @param  [in]   b     SP integer that is the base.
12974
 * @param  [in]   e     SP integer that is the exponent.
12975
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
12976
 *                      count of bits in e.
12977
 * @param  [in]   m     SP integer that is the modulus.
12978
 * @param  [out]  r     SP integer to hold result.
12979
 *
12980
 * @return  MP_OKAY on success.
12981
 * @return  MP_MEM when dynamic memory allocation fails.
12982
 */
12983
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
12984
    const sp_int* m, sp_int* r)
12985
0
{
12986
0
    int i;
12987
0
    int err = MP_OKAY;
12988
0
    int done = 0;
12989
    /* 1. s = 0 */
12990
0
    int s = 0;
12991
#ifdef WC_NO_CACHE_RESISTANT
12992
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
12993
#else
12994
0
    DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
12995
0
#endif
12996
12997
    /* Allocate temporaries. */
12998
#ifdef WC_NO_CACHE_RESISTANT
12999
    ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
13000
#else
13001
    /* Working SP int needed when cache resistant. */
13002
0
    ALLOC_SP_INT_ARRAY(t, 2U * m->used + 1U, 3, err, NULL);
13003
0
#endif
13004
0
    if (err == MP_OKAY) {
13005
        /* Initialize temporaries. */
13006
0
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13007
0
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13008
0
    #ifndef WC_NO_CACHE_RESISTANT
13009
0
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13010
0
    #endif
13011
13012
        /* 2. t[0] = b mod m
13013
         * Ensure base is less than modulus - set fake working value to base.
13014
         */
13015
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
13016
0
            err = sp_mod(b, m, t[0]);
13017
            /* Handle base == modulus. */
13018
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13019
0
                _sp_set(r, 0);
13020
0
                done = 1;
13021
0
            }
13022
0
        }
13023
0
        else {
13024
            /* Copy base into working variable. */
13025
0
            _sp_copy(b, t[0]);
13026
0
        }
13027
0
    }
13028
13029
0
    if ((!done) && (err == MP_OKAY)) {
13030
        /* 3. t[1] = t[0]
13031
         *    Set real working value to base.
13032
         */
13033
0
        _sp_copy(t[0], t[1]);
13034
13035
        /* 4. For i in (bits-1)...0 */
13036
0
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13037
#ifdef WC_NO_CACHE_RESISTANT
13038
            /* 4.1. t[s] = t[s] ^ 2 */
13039
            err = sp_sqrmod(t[s], m, t[s]);
13040
            if (err == MP_OKAY) {
13041
                /* 4.2. y = e[i] */
13042
                int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13043
                /* 4.3. j = y & s */
13044
                int j = y & s;
13045
                /* 4.4  s = s | y */
13046
                s |= y;
13047
                /* 4.5. t[j] = t[j] * b */
13048
                err = _sp_mulmod(t[j], b, m, t[j]);
13049
            }
13050
#else
13051
            /* 4.1. t[s] = t[s] ^ 2 */
13052
0
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13053
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13054
0
                     t[2]);
13055
0
            err = sp_sqrmod(t[2], m, t[2]);
13056
0
            _sp_copy(t[2],
13057
0
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13058
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13059
13060
0
            if (err == MP_OKAY) {
13061
                /* 4.2. y = e[i] */
13062
0
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & (int)SP_WORD_MASK)) & 1);
13063
                /* 4.3. j = y & s */
13064
0
                int j = y & s;
13065
                /* 4.4  s = s | y */
13066
0
                s |= y;
13067
                /* 4.5. t[j] = t[j] * b */
13068
0
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13069
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13070
0
                         t[2]);
13071
0
                err = _sp_mulmod(t[2], b, m, t[2]);
13072
0
                _sp_copy(t[2],
13073
0
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13074
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13075
0
            }
13076
0
#endif
13077
0
        }
13078
0
    }
13079
0
    if ((!done) && (err == MP_OKAY)) {
13080
        /* 5. r = t[1] */
13081
0
        _sp_copy(t[1], r);
13082
0
    }
13083
13084
0
    FREE_SP_INT_ARRAY(t, NULL);
13085
0
    return err;
13086
0
}
13087
13088
#else
13089
13090
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13091
 * Process the exponent one bit at a time with base in Montgomery form.
13092
 * Is constant time and cache attack resistant.
13093
 *
13094
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13095
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13096
 *
13097
 * Algorithm:
13098
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13099
 *  1. t[1] = b mod m.
13100
 *  2. t[0] = 1
13101
 *  3. For i in (bits-1)...0
13102
 *   3.1. y = e[i]
13103
 *   3.2. t[2] = t[0] * t[1]
13104
 *   3.3. t[3] = t[y] ^ 2
13105
 *   3.4. t[y] = t[3], t[y^1] = t[2]
13106
 *  4. r = t[0]
13107
 *
13108
 * @param  [in]   b     SP integer that is the base.
13109
 * @param  [in]   e     SP integer that is the exponent.
13110
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13111
 *                      count of bits in e.
13112
 * @param  [in]   m     SP integer that is the modulus.
13113
 * @param  [out]  r     SP integer to hold result.
13114
 *
13115
 * @return  MP_OKAY on success.
13116
 * @return  MP_MEM when dynamic memory allocation fails.
13117
 */
13118
static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
13119
    const sp_int* m, sp_int* r)
13120
{
13121
    int err = MP_OKAY;
13122
    int done = 0;
13123
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13124
13125
    /* Allocate temporaries. */
13126
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13127
    if (err == MP_OKAY) {
13128
        /* Initialize temporaries. */
13129
        _sp_init_size(t[0], m->used * 2 + 1);
13130
        _sp_init_size(t[1], m->used * 2 + 1);
13131
        _sp_init_size(t[2], m->used * 2 + 1);
13132
        _sp_init_size(t[3], m->used * 2 + 1);
13133
13134
        /* 1. Ensure base is less than modulus. */
13135
        if (_sp_cmp_abs(b, m) != MP_LT) {
13136
            err = sp_mod(b, m, t[1]);
13137
            /* Handle base == modulus. */
13138
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13139
                _sp_set(r, 0);
13140
                done = 1;
13141
            }
13142
        }
13143
        else {
13144
            /* Copy base into working variable. */
13145
            err = sp_copy(b, t[1]);
13146
        }
13147
    }
13148
13149
    if ((!done) && (err == MP_OKAY)) {
13150
        int i;
13151
13152
        /* 2. t[0] = 1 */
13153
        _sp_set(t[0], 1);
13154
13155
        /* 3. For i in (bits-1)...0 */
13156
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13157
            /* 3.1. y = e[i] */
13158
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13159
13160
            /* 3.2. t[2] = t[0] * t[1] */
13161
            err = sp_mulmod(t[0], t[1], m, t[2]);
13162
            /* 3.3. t[3] = t[y] ^ 2 */
13163
            if (err == MP_OKAY) {
13164
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13165
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13166
                         t[3]);
13167
                err = sp_sqrmod(t[3], m, t[3]);
13168
            }
13169
            /* 3.4. t[y] = t[3], t[y^1] = t[2] */
13170
            if (err == MP_OKAY) {
13171
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13172
            }
13173
        }
13174
    }
13175
    if ((!done) && (err == MP_OKAY)) {
13176
        /* 4. r = t[0] */
13177
        err = sp_copy(t[0], r);
13178
    }
13179
13180
    FREE_SP_INT_ARRAY(t, NULL);
13181
    return err;
13182
}
13183
13184
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13185
13186
#endif
13187
13188
#if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
13189
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
13190
    defined(OPENSSL_ALL)
13191
#ifndef WC_NO_HARDEN
13192
#if !defined(WC_NO_CACHE_RESISTANT)
13193
13194
#ifndef WC_PROTECT_ENCRYPTED_MEM
13195
13196
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13197
 * Process the exponent one bit at a time with base in Montgomery form.
13198
 * Is constant time and cache attack resistant.
13199
 *
13200
 * Algorithm:
13201
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13202
 *  1. t[0] = b mod m.
13203
 *  2. s = 0
13204
 *  3. t[0] = ToMont(t[0])
13205
 *  4. t[1] = t[0]
13206
 *  5. bm = t[0]
13207
 *  6. For i in (bits-1)...0
13208
 *   6.1. t[s] = t[s] ^ 2
13209
 *   6.2. y = e[i]
13210
 *   6.3  j = y & s
13211
 *   6.4  s = s | y
13212
 *   6.5. t[j] = t[j] * bm
13213
 *  7. t[1] = FromMont(t[1])
13214
 *  8. r = t[1]
13215
 *
13216
 * @param  [in]   b     SP integer that is the base.
13217
 * @param  [in]   e     SP integer that is the exponent.
13218
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13219
 *                      count of bits in e.
13220
 * @param  [in]   m     SP integer that is the modulus.
13221
 * @param  [out]  r     SP integer to hold result.
13222
 *
13223
 * @return  MP_OKAY on success.
13224
 * @return  MP_MEM when dynamic memory allocation fails.
13225
 */
13226
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13227
    const sp_int* m, sp_int* r)
13228
0
{
13229
0
    int err = MP_OKAY;
13230
0
    int done = 0;
13231
0
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13232
13233
    /* Allocate temporaries. */
13234
0
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, 4, err, NULL);
13235
0
    if (err == MP_OKAY) {
13236
        /* Initialize temporaries. */
13237
0
        _sp_init_size(t[0], (sp_size_t)(m->used * 2 + 1));
13238
0
        _sp_init_size(t[1], (sp_size_t)(m->used * 2 + 1));
13239
0
        _sp_init_size(t[2], (sp_size_t)(m->used * 2 + 1));
13240
0
        _sp_init_size(t[3], (sp_size_t)(m->used * 2 + 1));
13241
13242
        /* 1. Ensure base is less than modulus. */
13243
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
13244
0
            err = sp_mod(b, m, t[0]);
13245
            /* Handle base == modulus. */
13246
0
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
13247
0
                _sp_set(r, 0);
13248
0
                done = 1;
13249
0
            }
13250
0
        }
13251
0
        else {
13252
            /* Copy base into working variable. */
13253
0
            _sp_copy(b, t[0]);
13254
0
        }
13255
0
    }
13256
13257
0
    if ((!done) && (err == MP_OKAY)) {
13258
0
        int i;
13259
        /* 2. s = 0 */
13260
0
        int s = 0;
13261
0
        sp_int_digit mp;
13262
13263
        /* Calculate Montgomery multiplier for reduction. */
13264
0
        _sp_mont_setup(m, &mp);
13265
        /* 3. t[0] = ToMont(t[0])
13266
         *    Convert base to Montgomery form - as fake working value.
13267
         */
13268
0
        err = sp_mont_norm(t[1], m);
13269
0
        if (err == MP_OKAY) {
13270
0
            err = sp_mul(t[0], t[1], t[0]);
13271
0
        }
13272
0
        if (err == MP_OKAY) {
13273
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
13274
0
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1U);
13275
0
        }
13276
0
        if (err == MP_OKAY) {
13277
            /* 4. t[1] = t[0]
13278
             *    Set real working value to base.
13279
             */
13280
0
            _sp_copy(t[0], t[1]);
13281
            /* 5. bm = t[0]. */
13282
0
            _sp_copy(t[0], t[2]);
13283
0
        }
13284
13285
        /* 6. For i in (bits-1)...0 */
13286
0
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13287
            /* 6.1. t[s] = t[s] ^ 2 */
13288
0
            _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13289
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])),
13290
0
                     t[3]);
13291
0
            err = sp_sqr(t[3], t[3]);
13292
0
            if (err == MP_OKAY) {
13293
0
                err = _sp_mont_red(t[3], m, mp, 0);
13294
0
            }
13295
0
            _sp_copy(t[3],
13296
0
                     (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
13297
0
                               ((size_t)t[1] & sp_off_on_addr[s  ])));
13298
13299
0
            if (err == MP_OKAY) {
13300
                /* 6.2. y = e[i] */
13301
0
                int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & (int)SP_WORD_MASK)) & 1);
13302
                /* 6.3  j = y & s */
13303
0
                int j = y & s;
13304
                /* 6.4  s = s | y */
13305
0
                s |= y;
13306
13307
                /* 6.5. t[j] = t[j] * bm */
13308
0
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13309
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])),
13310
0
                         t[3]);
13311
0
                err = sp_mul(t[3], t[2], t[3]);
13312
0
                if (err == MP_OKAY) {
13313
0
                    err = _sp_mont_red(t[3], m, mp, 0);
13314
0
                }
13315
0
                _sp_copy(t[3],
13316
0
                         (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
13317
0
                                   ((size_t)t[1] & sp_off_on_addr[j  ])));
13318
0
            }
13319
0
        }
13320
0
        if (err == MP_OKAY) {
13321
            /* 7. t[1] = FromMont(t[1]) */
13322
0
            err = _sp_mont_red(t[1], m, mp, 0);
13323
            /* Reduction implementation returns number to range: 0..m-1. */
13324
0
        }
13325
0
    }
13326
0
    if ((!done) && (err == MP_OKAY)) {
13327
        /* 8. r = t[1] */
13328
0
        _sp_copy(t[1], r);
13329
0
    }
13330
13331
0
    FREE_SP_INT_ARRAY(t, NULL);
13332
0
    return err;
13333
0
}
13334
13335
#else
13336
13337
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13338
 * Process the exponent one bit at a time with base in Montgomery form.
13339
 * Is constant time and cache attack resistant.
13340
 *
13341
 * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
13342
 * Cryptographic Hardware and Embedded Systems, CHES 2002
13343
 *
13344
 * Algorithm:
13345
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13346
 *  1. t[1] = b mod m.
13347
 *  2. t[0] = ToMont(1)
13348
 *  3. t[1] = ToMont(t[1])
13349
 *  4. For i in (bits-1)...0
13350
 *   4.1. y = e[i]
13351
 *   4.2. t[2] = t[0] * t[1]
13352
 *   4.3. t[3] = t[y] ^ 2
13353
 *   4.4. t[y] = t[3], t[y^1] = t[2]
13354
 *  5. t[0] = FromMont(t[0])
13355
 *  6. r = t[0]
13356
 *
13357
 * @param  [in]   b     SP integer that is the base.
13358
 * @param  [in]   e     SP integer that is the exponent.
13359
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13360
 *                      count of bits in e.
13361
 * @param  [in]   m     SP integer that is the modulus.
13362
 * @param  [out]  r     SP integer to hold result.
13363
 *
13364
 * @return  MP_OKAY on success.
13365
 * @return  MP_MEM when dynamic memory allocation fails.
13366
 */
13367
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13368
    const sp_int* m, sp_int* r)
13369
{
13370
    int err = MP_OKAY;
13371
    int done = 0;
13372
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
13373
13374
    /* Allocate temporaries. */
13375
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
13376
    if (err == MP_OKAY) {
13377
        /* Initialize temporaries. */
13378
        _sp_init_size(t[0], m->used * 2 + 1);
13379
        _sp_init_size(t[1], m->used * 2 + 1);
13380
        _sp_init_size(t[2], m->used * 2 + 1);
13381
        _sp_init_size(t[3], m->used * 2 + 1);
13382
13383
        /* 1. Ensure base is less than modulus. */
13384
        if (_sp_cmp_abs(b, m) != MP_LT) {
13385
            err = sp_mod(b, m, t[1]);
13386
            /* Handle base == modulus. */
13387
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13388
                _sp_set(r, 0);
13389
                done = 1;
13390
            }
13391
        }
13392
        else {
13393
            /* Copy base into working variable. */
13394
            err = sp_copy(b, t[1]);
13395
        }
13396
    }
13397
13398
    if ((!done) && (err == MP_OKAY)) {
13399
        int i;
13400
        sp_int_digit mp;
13401
13402
        /* Calculate Montgomery multiplier for reduction. */
13403
        _sp_mont_setup(m, &mp);
13404
        /* 2. t[0] = ToMont(1)
13405
          *    Calculate 1 in Montgomery form.
13406
          */
13407
        err = sp_mont_norm(t[0], m);
13408
        if (err == MP_OKAY) {
13409
            /* 3. t[1] = ToMont(t[1])
13410
             *    Convert base to Montgomery form.
13411
             */
13412
            err = sp_mulmod(t[1], t[0], m, t[1]);
13413
        }
13414
13415
        /* 4. For i in (bits-1)...0 */
13416
        for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
13417
            /* 4.1. y = e[i] */
13418
            int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
13419
13420
            /* 4.2. t[2] = t[0] * t[1] */
13421
            err = sp_mul(t[0], t[1], t[2]);
13422
            if (err == MP_OKAY) {
13423
                err = _sp_mont_red(t[2], m, mp, 0);
13424
            }
13425
            /* 4.3. t[3] = t[y] ^ 2 */
13426
            if (err == MP_OKAY) {
13427
                _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
13428
                                   ((size_t)t[1] & sp_off_on_addr[y  ])),
13429
                         t[3]);
13430
                err = sp_sqr(t[3], t[3]);
13431
            }
13432
            if (err == MP_OKAY) {
13433
                err = _sp_mont_red(t[3], m, mp, 0);
13434
            }
13435
            /* 4.4. t[y] = t[3], t[y^1] = t[2] */
13436
            if (err == MP_OKAY) {
13437
                _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
13438
            }
13439
        }
13440
13441
        if (err == MP_OKAY) {
13442
            /* 5. t[0] = FromMont(t[0]) */
13443
            err = _sp_mont_red(t[0], m, mp, 0);
13444
            /* Reduction implementation returns number to range: 0..m-1. */
13445
        }
13446
    }
13447
    if ((!done) && (err == MP_OKAY)) {
13448
        /* 6. r = t[0] */
13449
        err = sp_copy(t[0], r);
13450
    }
13451
13452
    FREE_SP_INT_ARRAY(t, NULL);
13453
    return err;
13454
}
13455
13456
#endif /* WC_PROTECT_ENCRYPTED_MEM */
13457
13458
#else
13459
13460
#ifdef SP_ALLOC
13461
#define SP_ALLOC_PREDEFINED
13462
#endif
13463
/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
13464
#define SP_ALLOC
13465
13466
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13467
 * Creates a window of precalculated exponents with base in Montgomery form.
13468
 * Is constant time but NOT cache attack resistant.
13469
 *
13470
 * Algorithm:
13471
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13472
 *  w: window size based on bits.
13473
 *  1. t[1] = b mod m.
13474
 *  2. t[0] = MontNorm(m) = ToMont(1)
13475
 *  3. t[1] = ToMont(t[1])
13476
 *  4. For i in 2..(2 ^ w) - 1
13477
 *   4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
13478
 *   4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
13479
 *  5. cb = w * (bits / w)
13480
 *  5. tr = t[e / (2 ^ cb)]
13481
 *  6. For i in cb..w
13482
 *   6.1. y = e[(i-1)..(i-w)]
13483
 *   6.2. tr = tr ^ (2 * w)
13484
 *   6.3. tr = tr * t[y]
13485
 *  7. tr = FromMont(tr)
13486
 *  8. r = tr
13487
 *
13488
 * @param  [in]   b     SP integer that is the base.
13489
 * @param  [in]   e     SP integer that is the exponent.
13490
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
13491
 *                      count of bits in e.
13492
 * @param  [in]   m     SP integer that is the modulus.
13493
 * @param  [out]  r     SP integer to hold result.
13494
 *
13495
 * @return  MP_OKAY on success.
13496
 * @return  MP_MEM when dynamic memory allocation fails.
13497
 */
13498
static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
13499
    const sp_int* m, sp_int* r)
13500
{
13501
    int i;
13502
    int c;
13503
    int y;
13504
    int winBits;
13505
    int preCnt;
13506
    int err = MP_OKAY;
13507
    int done = 0;
13508
    sp_int_digit mask;
13509
    sp_int* tr = NULL;
13510
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
13511
13512
    /* Window bits based on number of pre-calculations versus number of loop
13513
     * calculations.
13514
     * Exponents for RSA and DH will result in 6-bit windows.
13515
     */
13516
    if (bits > 450) {
13517
        winBits = 6;
13518
    }
13519
    else if (bits <= 21) {
13520
        winBits = 1;
13521
    }
13522
    else if (bits <= 36) {
13523
        winBits = 3;
13524
    }
13525
    else if (bits <= 140) {
13526
        winBits = 4;
13527
    }
13528
    else {
13529
        winBits = 5;
13530
    }
13531
    /* An entry for each possible 0..2^winBits-1 value. */
13532
    preCnt = 1 << winBits;
13533
    /* Mask for calculating index into pre-computed table. */
13534
    mask = preCnt - 1;
13535
13536
    /* Allocate sp_ints for:
13537
     *  - pre-computation table
13538
     *  - temporary result
13539
     */
13540
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
13541
    if (err == MP_OKAY) {
13542
        /* Set variable to use allocate memory. */
13543
        tr = t[preCnt];
13544
13545
        /* Initialize all allocated. */
13546
        for (i = 0; i < preCnt; i++) {
13547
            _sp_init_size(t[i], m->used * 2 + 1);
13548
        }
13549
        _sp_init_size(tr, m->used * 2 + 1);
13550
13551
        /* 1. t[1] = b mod m. */
13552
        if (_sp_cmp_abs(b, m) != MP_LT) {
13553
            err = sp_mod(b, m, t[1]);
13554
            /* Handle base == modulus. */
13555
            if ((err == MP_OKAY) && sp_iszero(t[1])) {
13556
                _sp_set(r, 0);
13557
                done = 1;
13558
            }
13559
        }
13560
        else {
13561
            /* Copy base into entry of table to contain b^1. */
13562
            _sp_copy(b, t[1]);
13563
        }
13564
    }
13565
13566
    if ((!done) && (err == MP_OKAY)) {
13567
        sp_int_digit mp;
13568
        sp_int_digit n;
13569
13570
        /* Calculate Montgomery multiplier for reduction. */
13571
        _sp_mont_setup(m, &mp);
13572
        /* 2. t[0] = MontNorm(m) = ToMont(1) */
13573
        err = sp_mont_norm(t[0], m);
13574
        if (err == MP_OKAY) {
13575
            /* 3. t[1] = ToMont(t[1]) */
13576
            err = sp_mul(t[1], t[0], t[1]);
13577
        }
13578
        if (err == MP_OKAY) {
13579
            /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
13580
            err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
13581
        }
13582
13583
        /* 4. For i in 2..(2 ^ w) - 1 */
13584
        for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
13585
            /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
13586
            if ((i & 1) == 0) {
13587
                err = sp_sqr(t[i/2], t[i]);
13588
            }
13589
            /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
13590
            else {
13591
                err = sp_mul(t[i-1], t[1], t[i]);
13592
            }
13593
            /* Montgomery reduce square or multiplication result. */
13594
            if (err == MP_OKAY) {
13595
                err = _sp_mont_red(t[i], m, mp, 0);
13596
            }
13597
        }
13598
13599
        if (err == MP_OKAY) {
13600
            /* 5. cb = w * (bits / w) */
13601
            i = (bits - 1) >> SP_WORD_SHIFT;
13602
            n = e->dp[i--];
13603
            /* Find top bit index in last word. */
13604
            c = bits & (SP_WORD_SIZE - 1);
13605
            if (c == 0) {
13606
                c = SP_WORD_SIZE;
13607
            }
13608
            /* Use as many bits from top to make remaining a multiple of window
13609
             * size.
13610
             */
13611
            if ((bits % winBits) != 0) {
13612
                c -= bits % winBits;
13613
            }
13614
            else {
13615
                c -= winBits;
13616
            }
13617
13618
            /* 5. tr = t[e / (2 ^ cb)] */
13619
            y = (int)(n >> c);
13620
            n <<= SP_WORD_SIZE - c;
13621
            /* 5. Copy table value for first window. */
13622
            _sp_copy(t[y], tr);
13623
13624
            /* 6. For i in cb..w */
13625
            for (; (i >= 0) || (c >= winBits); ) {
13626
                int j;
13627
13628
                /* 6.1. y = e[(i-1)..(i-w)] */
13629
                if (c == 0) {
13630
                    /* Bits up to end of digit */
13631
                    n = e->dp[i--];
13632
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13633
                    n <<= winBits;
13634
                    c = SP_WORD_SIZE - winBits;
13635
                }
13636
                else if (c < winBits) {
13637
                    /* Bits to end of digit and part of next */
13638
                    y = (int)(n >> (SP_WORD_SIZE - winBits));
13639
                    n = e->dp[i--];
13640
                    c = winBits - c;
13641
                    y |= (int)(n >> (SP_WORD_SIZE - c));
13642
                    n <<= c;
13643
                    c = SP_WORD_SIZE - c;
13644
                }
13645
                else {
13646
                    /* Bits from middle of digit */
13647
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
13648
                    n <<= winBits;
13649
                    c -= winBits;
13650
                }
13651
13652
                /* 6.2. tr = tr ^ (2 * w) */
13653
                for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
13654
                    err = sp_sqr(tr, tr);
13655
                    if (err == MP_OKAY) {
13656
                        err = _sp_mont_red(tr, m, mp, 0);
13657
                    }
13658
                }
13659
13660
                /* 6.3. tr = tr * t[y] */
13661
                if (err == MP_OKAY) {
13662
                    err = sp_mul(tr, t[y], tr);
13663
                }
13664
                if (err == MP_OKAY) {
13665
                    err = _sp_mont_red(tr, m, mp, 0);
13666
                }
13667
            }
13668
        }
13669
13670
        if (err == MP_OKAY) {
13671
            /* 7. tr = FromMont(tr) */
13672
            err = _sp_mont_red(tr, m, mp, 0);
13673
            /* Reduction implementation returns number to range: 0..m-1. */
13674
        }
13675
    }
13676
    if ((!done) && (err == MP_OKAY)) {
13677
        /* 8. r = tr */
13678
        _sp_copy(tr, r);
13679
    }
13680
13681
    FREE_SP_INT_ARRAY(t, NULL);
13682
    return err;
13683
}
13684
13685
#ifndef SP_ALLOC_PREDEFINED
13686
#undef SP_ALLOC
13687
#undef SP_ALLOC_PREDEFINED
13688
#endif
13689
13690
#endif /* !WC_NO_CACHE_RESISTANT */
13691
#endif /* !WC_NO_HARDEN */
13692
13693
/* w = Log2(SP_WORD_SIZE) - 1 */
13694
#if SP_WORD_SIZE == 8
13695
    #define EXP2_WINSIZE    2
13696
#elif SP_WORD_SIZE == 16
13697
    #define EXP2_WINSIZE    3
13698
#elif SP_WORD_SIZE == 32
13699
    #define EXP2_WINSIZE    4
13700
#elif SP_WORD_SIZE == 64
13701
0
    #define EXP2_WINSIZE    5
13702
#else
13703
    #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
13704
#endif
13705
/* Mask is all bits in window set. */
13706
0
#define EXP2_MASK           ((1 << EXP2_WINSIZE) - 1)
13707
13708
/* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
13709
 * Is constant time and cache attack resistant.
13710
 *
13711
 * Calculates value to make mod operations constant time expect when
13712
 * WC_NO_HARDERN defined or modulus fits in one word.
13713
 *
13714
 * Algorithm:
13715
 *  b: base, e: exponent, m: modulus, r: result, bits: #bits to use
13716
 *  w: window size based on #bits in word.
13717
 *  1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13718
 *     else                 tr = 1
13719
 *  2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13720
 *     else                            a = 0
13721
 *  3. cb = w * (bits / w)
13722
 *  4. y = e / (2 ^ cb)
13723
 *  5. tr = (tr * (2 ^ y) + a) mod m
13724
 *  6. For i in cb..w
13725
 *   6.1. y = e[(i-1)..(i-w)]
13726
 *   6.2. tr = tr ^ (2 * w)
13727
 *   6.3. tr = ((tr * (2 ^ y) + a) mod m
13728
 *  7. if Words(m) > 1 then tr = FromMont(tr)
13729
 *  8. r = tr
13730
 *
13731
 * @param  [in]   e       SP integer that is the exponent.
13732
 * @param  [in]   digits  Number of digits in base to use. May be greater than
13733
 *                        count of bits in b.
13734
 * @param  [in]   m       SP integer that is the modulus.
13735
 * @param  [out]  r       SP integer to hold result.
13736
 *
13737
 * @return  MP_OKAY on success.
13738
 * @return  MP_MEM when dynamic memory allocation fails.
13739
 */
13740
static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
13741
    sp_int* r)
13742
0
{
13743
0
    int i = 0;
13744
0
    int c = 0;
13745
0
    int y;
13746
0
    int err = MP_OKAY;
13747
0
    sp_int_digit mp = 0;
13748
0
    sp_int_digit n = 0;
13749
0
#ifndef WC_NO_HARDEN
13750
0
    sp_int* a = NULL;
13751
0
    sp_int* tr = NULL;
13752
0
    DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
13753
#else
13754
    DECL_SP_INT(tr, m->used * 2 + 1);
13755
#endif
13756
0
    int useMont = (m->used > 1);
13757
13758
#if 0
13759
    sp_print_int(2, "a");
13760
    sp_print(e, "b");
13761
    sp_print(m, "m");
13762
#endif
13763
13764
0
#ifndef WC_NO_HARDEN
13765
    /* Allocate sp_ints for:
13766
     *  - constant time add value for mod operation
13767
     *  - temporary result
13768
     */
13769
0
    ALLOC_SP_INT_ARRAY(d, m->used * 2U + 1U, 2, err, NULL);
13770
#else
13771
    /* Allocate sp_int for temporary result. */
13772
    ALLOC_SP_INT(tr, m->used * 2U + 1U, err, NULL);
13773
#endif
13774
0
    if (err == MP_OKAY) {
13775
0
    #ifndef WC_NO_HARDEN
13776
0
        a  = d[0];
13777
0
        tr = d[1];
13778
13779
0
        _sp_init_size(a, (sp_size_t)(m->used * 2 + 1));
13780
0
    #endif
13781
0
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
13782
13783
0
    }
13784
13785
0
    if ((err == MP_OKAY) && useMont) {
13786
        /* Calculate Montgomery multiplier for reduction. */
13787
0
        _sp_mont_setup(m, &mp);
13788
0
    }
13789
0
    if (err == MP_OKAY) {
13790
        /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
13791
         *    else                 tr = 1
13792
         */
13793
0
        if (useMont) {
13794
            /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
13795
             * form.
13796
             */
13797
0
            err = sp_mont_norm(tr, m);
13798
0
        }
13799
0
        else {
13800
             /* For single word modulus don't use Montgomery form. */
13801
0
            err = sp_set(tr, 1);
13802
0
        }
13803
0
    }
13804
    /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
13805
     *    else                            a = 0
13806
     */
13807
0
#ifndef WC_NO_HARDEN
13808
0
    if ((err == MP_OKAY) && useMont) {
13809
0
        err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
13810
0
    }
13811
0
#endif
13812
13813
0
    if (err == MP_OKAY) {
13814
        /* 3. cb = w * (bits / w) */
13815
0
        i = digits - 1;
13816
0
        n = e->dp[i--];
13817
0
        c = SP_WORD_SIZE;
13818
0
    #if EXP2_WINSIZE != 1
13819
0
        c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
13820
0
        if (c != SP_WORD_SIZE) {
13821
            /* 4. y = e / (2 ^ cb) */
13822
0
            y = (int)(n >> c);
13823
0
            n <<= SP_WORD_SIZE - c;
13824
0
        }
13825
0
        else
13826
0
    #endif
13827
0
        {
13828
            /* 4. y = e / (2 ^ cb) */
13829
0
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
13830
0
            n <<= EXP2_WINSIZE;
13831
0
            c -= EXP2_WINSIZE;
13832
0
        }
13833
13834
        /* 5. tr = (tr * (2 ^ y) + a) mod m */
13835
0
        err = sp_mul_2d(tr, y, tr);
13836
0
    }
13837
0
#ifndef WC_NO_HARDEN
13838
0
    if ((err == MP_OKAY) && useMont) {
13839
        /* Add value to make mod operation constant time. */
13840
0
        err = sp_add(tr, a, tr);
13841
0
    }
13842
0
#endif
13843
0
    if (err == MP_OKAY) {
13844
0
        err = sp_mod(tr, m, tr);
13845
0
    }
13846
    /* 6. For i in cb..w */
13847
0
    for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
13848
0
        int j;
13849
13850
        /* 6.1. y = e[(i-1)..(i-w)] */
13851
0
        if (c == 0) {
13852
            /* Bits from next digit. */
13853
0
            n = e->dp[i--];
13854
0
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
13855
0
            n <<= EXP2_WINSIZE;
13856
0
            c = SP_WORD_SIZE - EXP2_WINSIZE;
13857
0
        }
13858
0
    #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
13859
0
        else if (c < EXP2_WINSIZE) {
13860
            /* Bits to end of digit and part of next */
13861
0
            y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
13862
0
            n = e->dp[i--];
13863
0
            c = EXP2_WINSIZE - c;
13864
0
            y |= (int)(n >> (SP_WORD_SIZE - c));
13865
0
            n <<= c;
13866
0
            c = SP_WORD_SIZE - c;
13867
0
        }
13868
0
    #endif
13869
0
        else {
13870
            /* Bits from middle of digit */
13871
0
            y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
13872
0
            n <<= EXP2_WINSIZE;
13873
0
            c -= EXP2_WINSIZE;
13874
0
        }
13875
13876
        /* 6.2. tr = tr ^ (2 * w) */
13877
0
        for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
13878
0
            err = sp_sqr(tr, tr);
13879
0
            if (err == MP_OKAY) {
13880
0
                if (useMont) {
13881
0
                    err = _sp_mont_red(tr, m, mp, 0);
13882
0
                }
13883
0
                else {
13884
0
                    err = sp_mod(tr, m, tr);
13885
0
                }
13886
0
            }
13887
0
        }
13888
13889
        /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
13890
0
        if (err == MP_OKAY) {
13891
0
            err = sp_mul_2d(tr, y, tr);
13892
0
        }
13893
0
    #ifndef WC_NO_HARDEN
13894
0
        if ((err == MP_OKAY) && useMont) {
13895
            /* Add value to make mod operation constant time. */
13896
0
            err = sp_add(tr, a, tr);
13897
0
        }
13898
0
    #endif
13899
0
        if (err == MP_OKAY) {
13900
            /* Reduce current result by modulus. */
13901
0
            err = sp_mod(tr, m, tr);
13902
0
        }
13903
0
    }
13904
13905
    /* 7. if Words(m) > 1 then tr = FromMont(tr) */
13906
0
    if ((err == MP_OKAY) && useMont) {
13907
0
        err = _sp_mont_red(tr, m, mp, 0);
13908
        /* Reduction implementation returns number to range: 0..m-1. */
13909
0
    }
13910
0
    if (err == MP_OKAY) {
13911
        /* 8. r = tr */
13912
0
        _sp_copy(tr, r);
13913
0
    }
13914
13915
#if 0
13916
    sp_print(r, "rme");
13917
#endif
13918
13919
0
#ifndef WC_NO_HARDEN
13920
0
    FREE_SP_INT_ARRAY(d, NULL);
13921
#else
13922
    FREE_SP_INT(tr, NULL);
13923
#endif
13924
0
    return err;
13925
0
}
13926
#endif
13927
13928
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
13929
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
13930
    defined(OPENSSL_ALL)
13931
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
13932
 *
13933
 * Error returned when parameters r == e or r == m and base >= modulus.
13934
 *
13935
 * @param  [in]   b       SP integer that is the base.
13936
 * @param  [in]   e       SP integer that is the exponent.
13937
 * @param  [in]   digits  Number of digits in exponent to use. May be greater
13938
 *                        than count of digits in e.
13939
 * @param  [in]   m       SP integer that is the modulus.
13940
 * @param  [out]  r       SP integer to hold result.
13941
 *
13942
 * @return  MP_OKAY on success.
13943
 * @return  MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
13944
 *          e is negative.
13945
 * @return  MP_MEM when dynamic memory allocation fails.
13946
 */
13947
int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
13948
    sp_int* r)
13949
0
{
13950
0
    int err = MP_OKAY;
13951
0
    int done = 0;
13952
0
    int mBits = sp_count_bits(m);
13953
0
    int bBits = sp_count_bits(b);
13954
0
    int eBits = sp_count_bits(e);
13955
13956
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
13957
0
             (digits < 0)) {
13958
0
        err = MP_VAL;
13959
0
    }
13960
    /* Ensure m is not too big. */
13961
0
    else if (m->used * 2 >= SP_INT_DIGITS) {
13962
0
        err = MP_VAL;
13963
0
    }
13964
13965
#if 0
13966
    if (err == MP_OKAY) {
13967
        sp_print(b, "a");
13968
        sp_print(e, "b");
13969
        sp_print(m, "m");
13970
    }
13971
#endif
13972
13973
    /* Check for invalid modulus. */
13974
0
    if ((err == MP_OKAY) && sp_iszero(m)) {
13975
0
        err = MP_VAL;
13976
0
    }
13977
#ifdef WOLFSSL_SP_INT_NEGATIVE
13978
    /* Check for unsupported negative values of exponent and modulus. */
13979
    if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
13980
        err = MP_VAL;
13981
    }
13982
#endif
13983
13984
    /* Check for degenerate cases. */
13985
0
    if ((err == MP_OKAY) && sp_isone(m)) {
13986
0
        _sp_set(r, 0);
13987
0
        done = 1;
13988
0
    }
13989
0
    if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
13990
0
        _sp_set(r, 1);
13991
0
        done = 1;
13992
0
    }
13993
13994
    /* Ensure base is less than modulus. */
13995
0
    if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
13996
0
        if ((r == e) || (r == m)) {
13997
0
            err = MP_VAL;
13998
0
        }
13999
0
        if (err == MP_OKAY) {
14000
0
            err = sp_mod(b, m, r);
14001
0
        }
14002
0
        if (err == MP_OKAY) {
14003
0
            b = r;
14004
0
        }
14005
0
    }
14006
    /* Check for degenerate case of base. */
14007
0
    if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
14008
0
        _sp_set(r, 0);
14009
0
        done = 1;
14010
0
    }
14011
14012
    /* Ensure SP integers have space for intermediate values. */
14013
0
    if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
14014
0
        err = MP_VAL;
14015
0
    }
14016
14017
0
    if ((!done) && (err == MP_OKAY)) {
14018
        /* Use code optimized for specific sizes if possible */
14019
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
14020
    ((defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
14021
        defined(WOLFSSL_HAVE_SP_DH))
14022
    #ifndef WOLFSSL_SP_NO_2048
14023
        if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
14024
                (eBits <= 1024)) {
14025
            err = sp_ModExp_1024((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14026
            done = 1;
14027
        }
14028
        else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
14029
                 (eBits <= 2048)) {
14030
            err = sp_ModExp_2048((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14031
            done = 1;
14032
        }
14033
        else
14034
    #endif
14035
    #ifndef WOLFSSL_SP_NO_3072
14036
        if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
14037
                (eBits <= 1536)) {
14038
            err = sp_ModExp_1536((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14039
            done = 1;
14040
        }
14041
        else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
14042
                 (eBits <= 3072)) {
14043
            err = sp_ModExp_3072((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14044
            done = 1;
14045
        }
14046
        else
14047
    #endif
14048
    #ifdef WOLFSSL_SP_4096
14049
        if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
14050
                (eBits <= 4096)) {
14051
            err = sp_ModExp_4096((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
14052
            done = 1;
14053
        }
14054
        else
14055
    #endif
14056
#endif
14057
0
        {
14058
            /* SP does not support size. */
14059
0
        }
14060
0
    }
14061
0
#if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
14062
#if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
14063
    defined(NO_DH)
14064
    if ((!done) && (err == MP_OKAY)) {
14065
        /* Use non-constant time version - fastest. */
14066
        err = sp_exptmod_nct(b, e, m, r);
14067
    }
14068
#else
14069
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
14070
0
    if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
14071
0
         mp_isodd(m)) {
14072
        /* Use the generic base 2 implementation. */
14073
0
        err = _sp_exptmod_base_2(e, digits, m, r);
14074
0
    }
14075
0
    else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
14076
0
    #ifndef WC_NO_HARDEN
14077
        /* Use constant time version hardened against timing attacks and
14078
         * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
14079
0
        err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
14080
    #else
14081
        /* Use non-constant time version - fastest. */
14082
        err = sp_exptmod_nct(b, e, m, r);
14083
    #endif
14084
0
    }
14085
0
    else
14086
0
#endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
14087
0
    if ((!done) && (err == MP_OKAY)) {
14088
        /* Otherwise use the generic implementation hardened against
14089
         * timing and cache attacks. */
14090
0
        err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
14091
0
    }
14092
0
#endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
14093
#else
14094
    if ((!done) && (err == MP_OKAY)) {
14095
        err = MP_VAL;
14096
    }
14097
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14098
14099
0
    (void)mBits;
14100
0
    (void)bBits;
14101
0
    (void)eBits;
14102
0
    (void)digits;
14103
14104
#if 0
14105
    if (err == MP_OKAY) {
14106
        sp_print(r, "rme");
14107
    }
14108
#endif
14109
0
    return err;
14110
0
}
14111
#endif
14112
14113
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14114
    !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
14115
    defined(OPENSSL_ALL)
14116
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14117
 *
14118
 * @param  [in]   b  SP integer that is the base.
14119
 * @param  [in]   e  SP integer that is the exponent.
14120
 * @param  [in]   m  SP integer that is the modulus.
14121
 * @param  [out]  r  SP integer to hold result.
14122
 *
14123
 * @return  MP_OKAY on success.
14124
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14125
 * @return  MP_MEM when dynamic memory allocation fails.
14126
 */
14127
int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14128
0
{
14129
0
    int err = MP_OKAY;
14130
14131
    /* Validate parameters. */
14132
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14133
0
        err = MP_VAL;
14134
0
    }
14135
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
14136
0
    if (err == MP_OKAY) {
14137
0
        err = sp_exptmod_ex(b, e, (int)e->used, m, r);
14138
0
    }
14139
0
    RESTORE_VECTOR_REGISTERS();
14140
0
    return err;
14141
0
}
14142
#endif
14143
14144
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
14145
#if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
14146
14147
/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14148
 * Creates a window of precalculated exponents with base in Montgomery form.
14149
 * Sliding window and is NOT constant time.
14150
 *
14151
 * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
14152
 * e.g. when n=6, b^32..b^63
14153
 * Algorithm:
14154
 *   1. Ensure base is less than modulus.
14155
 *   2. Convert base to Montgomery form
14156
 *   3. Set result to table entry for top window bits, or
14157
 *      if less than windows bits in exponent, 1 in Montgomery form.
14158
 *   4. While at least window bits left:
14159
 *     4.1. Count number of and skip leading 0 bits unless less then window bits
14160
 *          left.
14161
 *     4.2. Montgomery square result for each leading 0 and window bits if bits
14162
 *          left.
14163
 *     4.3. Break if less than window bits left.
14164
 *     4.4. Get top window bits from expononent and drop.
14165
 *     4.5. Montgomery multiply result by table entry.
14166
 *   5. While bits left:
14167
 *     5.1. Montogmery square result
14168
 *     5.2. If exponent bit set
14169
 *       5.2.1. Montgomery multiply result by Montgomery form of base.
14170
 *   6. Convert result back from Montgomery form.
14171
 *
14172
 * @param  [in]   b     SP integer that is the base.
14173
 * @param  [in]   e     SP integer that is the exponent.
14174
 * @param  [in]   bits  Number of bits in exponent to use. May be greater than
14175
 *                      count of bits in e.
14176
 * @param  [in]   m     SP integer that is the modulus.
14177
 * @param  [out]  r     SP integer to hold result.
14178
 *
14179
 * @return  MP_OKAY on success.
14180
 * @return  MP_MEM when dynamic memory allocation fails.
14181
 */
14182
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14183
    sp_int* r)
14184
0
{
14185
0
    int i = 0;
14186
0
    int bits;
14187
0
    int winBits;
14188
0
    int preCnt;
14189
0
    int err = MP_OKAY;
14190
0
    int done = 0;
14191
0
    sp_int* tr = NULL;
14192
0
    sp_int* bm = NULL;
14193
    /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
14194
0
#ifndef WOLFSSL_SP_NO_MALLOC
14195
0
    DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14196
#else
14197
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
14198
#endif
14199
14200
0
    bits = sp_count_bits(e);
14201
14202
    /* Window bits based on number of pre-calculations versus number of loop
14203
     * calculations.
14204
     * Exponents for RSA and DH will result in 6-bit windows.
14205
     * Note: for 4096-bit values, 7-bit window is slightly better.
14206
     */
14207
0
    if (bits > 450) {
14208
0
        winBits = 6;
14209
0
    }
14210
0
    else if (bits <= 21) {
14211
0
        winBits = 1;
14212
0
    }
14213
0
    else if (bits <= 36) {
14214
0
        winBits = 3;
14215
0
    }
14216
0
    else if (bits <= 140) {
14217
0
        winBits = 4;
14218
0
    }
14219
0
    else {
14220
0
        winBits = 5;
14221
0
    }
14222
    /* Top bit of exponent fixed as 1 for pre-calculated window. */
14223
0
    preCnt = 1 << (winBits - 1);
14224
14225
    /* Allocate sp_ints for:
14226
     *  - pre-computation table
14227
     *  - temporary result
14228
     *  - Montgomery form of base
14229
     */
14230
0
#ifndef WOLFSSL_SP_NO_MALLOC
14231
0
    ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14232
#else
14233
    ALLOC_SP_INT_ARRAY(t, m->used * 2U + 1U, (size_t)preCnt + 2, err, NULL);
14234
#endif
14235
0
    if (err == MP_OKAY) {
14236
        /* Set variables to use allocate memory. */
14237
0
        tr = t[preCnt + 0];
14238
0
        bm = t[preCnt + 1];
14239
14240
        /* Initialize all allocated  */
14241
0
        for (i = 0; i < preCnt; i++) {
14242
0
            _sp_init_size(t[i], (sp_size_t)(m->used * 2 + 1));
14243
0
        }
14244
0
        _sp_init_size(tr, (sp_size_t)(m->used * 2 + 1));
14245
0
        _sp_init_size(bm, (sp_size_t)(m->used * 2 + 1));
14246
14247
        /* 1. Ensure base is less than modulus. */
14248
0
        if (_sp_cmp_abs(b, m) != MP_LT) {
14249
0
            err = sp_mod(b, m, bm);
14250
            /* Handle base == modulus. */
14251
0
            if ((err == MP_OKAY) && sp_iszero(bm)) {
14252
0
                _sp_set(r, 0);
14253
0
                done = 1;
14254
0
            }
14255
0
        }
14256
0
        else {
14257
            /* Copy base into Montogmery base variable. */
14258
0
            _sp_copy(b, bm);
14259
0
        }
14260
0
    }
14261
14262
0
    if ((!done) && (err == MP_OKAY)) {
14263
0
        int y = 0;
14264
0
        int c = 0;
14265
0
        sp_int_digit mp;
14266
14267
        /* Calculate Montgomery multiplier for reduction. */
14268
0
        _sp_mont_setup(m, &mp);
14269
        /* Calculate Montgomery normalizer for modulus. */
14270
0
        err = sp_mont_norm(t[0], m);
14271
0
        if (err == MP_OKAY) {
14272
            /* 2. Convert base to Montgomery form. */
14273
0
            err = sp_mul(bm, t[0], bm);
14274
0
        }
14275
0
        if (err == MP_OKAY) {
14276
            /* bm = bm mod m, temporary size has to be bigger than bm->used. */
14277
0
            err = _sp_div(bm, m, NULL, bm, bm->used + 1U);
14278
0
        }
14279
0
        if (err == MP_OKAY) {
14280
            /* Copy Montgomery form of base into first element of table. */
14281
0
            _sp_copy(bm, t[0]);
14282
0
        }
14283
        /* Calculate b^(2^(winBits-1)) */
14284
0
        for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
14285
0
            err = sp_sqr(t[0], t[0]);
14286
0
            if (err == MP_OKAY) {
14287
0
                err = _sp_mont_red(t[0], m, mp, 0);
14288
0
            }
14289
0
        }
14290
        /* For each table entry after first. */
14291
0
        for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
14292
            /* Multiply previous entry by the base in Mont form into table. */
14293
0
            err = sp_mul(t[i-1], bm, t[i]);
14294
0
            if (err == MP_OKAY) {
14295
0
                err = _sp_mont_red(t[i], m, mp, 0);
14296
0
            }
14297
0
        }
14298
14299
        /* 3. Set result to table entry for top window bits, or
14300
         *    if less than windows bits in exponent, 1 in Montgomery form.
14301
         */
14302
0
        if (err == MP_OKAY) {
14303
0
            sp_int_digit n;
14304
            /* Mask for calculating index into pre-computed table. */
14305
0
            sp_int_digit mask = (sp_int_digit)preCnt - 1;
14306
14307
            /* Find the top bit. */
14308
0
            i = (bits - 1) >> SP_WORD_SHIFT;
14309
0
            n = e->dp[i--];
14310
0
            c = bits % SP_WORD_SIZE;
14311
0
            if (c == 0) {
14312
0
                c = SP_WORD_SIZE;
14313
0
            }
14314
            /* Put top bit at highest offset in digit. */
14315
0
            n <<= SP_WORD_SIZE - c;
14316
14317
0
            if (bits >= winBits) {
14318
                /* Top bit set. Copy from window. */
14319
0
                if (c < winBits) {
14320
                    /* Bits to end of digit and part of next */
14321
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14322
0
                    n = e->dp[i--];
14323
0
                    c = winBits - c;
14324
0
                    y |= (int)(n >> (SP_WORD_SIZE - c));
14325
0
                    n <<= c;
14326
0
                    c = SP_WORD_SIZE - c;
14327
0
                }
14328
0
                else {
14329
                    /* Bits from middle of digit */
14330
0
                    y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
14331
0
                    n <<= winBits;
14332
0
                    c -= winBits;
14333
0
                }
14334
0
                _sp_copy(t[y], tr);
14335
0
            }
14336
0
            else {
14337
                /* 1 in Montgomery form. */
14338
0
                err = sp_mont_norm(tr, m);
14339
0
            }
14340
14341
            /* 4. While at least window bits left. */
14342
0
            while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14343
                /* Number of squares to before due to top bits being 0. */
14344
0
                int sqrs = 0;
14345
14346
                /* 4.1. Count number of and skip leading 0 bits unless less
14347
                 *      than window bits.
14348
                 */
14349
0
                do {
14350
                    /* Make sure n has bits from the right digit. */
14351
0
                    if (c == 0) {
14352
0
                        n = e->dp[i--];
14353
0
                        c = SP_WORD_SIZE;
14354
0
                    }
14355
                    /* Mask off the next bit. */
14356
0
                    if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
14357
0
                        break;
14358
0
                    }
14359
14360
                    /* Another square needed. */
14361
0
                    sqrs++;
14362
                    /* Skip bit. */
14363
0
                    n <<= 1;
14364
0
                    c--;
14365
0
                }
14366
0
                while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
14367
14368
0
                if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
14369
                    /* Add squares needed before using table entry. */
14370
0
                    sqrs += winBits;
14371
0
                }
14372
14373
                /* 4.2. Montgomery square result for each leading 0 and window
14374
                 *      bits if bits left.
14375
                 */
14376
0
                for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
14377
0
                    err = sp_sqr(tr, tr);
14378
0
                    if (err == MP_OKAY) {
14379
0
                        err = _sp_mont_red(tr, m, mp, 0);
14380
0
                    }
14381
0
                }
14382
14383
                /* 4.3. Break if less than window bits left. */
14384
0
                if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
14385
0
                    break;
14386
0
                }
14387
14388
                /* 4.4. Get top window bits from exponent and drop. */
14389
0
                if (err == MP_OKAY) {
14390
0
                    if (c == 0) {
14391
                        /* Bits from next digit. */
14392
0
                        n = e->dp[i--];
14393
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14394
0
                        n <<= winBits;
14395
0
                        c = SP_WORD_SIZE - winBits;
14396
0
                    }
14397
0
                    else if (c < winBits) {
14398
                        /* Bits to end of digit and part of next. */
14399
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14400
0
                        n = e->dp[i--];
14401
0
                        c = winBits - c;
14402
0
                        y |= (int)(n >> (SP_WORD_SIZE - c));
14403
0
                        n <<= c;
14404
0
                        c = SP_WORD_SIZE - c;
14405
0
                    }
14406
0
                    else {
14407
                        /* Bits from middle of digit. */
14408
0
                        y = (int)(n >> (SP_WORD_SIZE - winBits));
14409
0
                        n <<= winBits;
14410
0
                        c -= winBits;
14411
0
                    }
14412
0
                    y &= (int)mask;
14413
0
                }
14414
14415
                /* 4.5. Montgomery multiply result by table entry. */
14416
0
                if (err == MP_OKAY) {
14417
0
                    err = sp_mul(tr, t[y], tr);
14418
0
                }
14419
0
                if (err == MP_OKAY) {
14420
0
                    err = _sp_mont_red(tr, m, mp, 0);
14421
0
                }
14422
0
            }
14423
14424
            /* Finished multiplying in table entries. */
14425
0
            if ((err == MP_OKAY) && (c > 0)) {
14426
                /* Handle remaining bits.
14427
                 * Window values have top bit set and can't be used. */
14428
0
                n = e->dp[0];
14429
                /*  5. While bits left: */
14430
0
                for (--c; (err == MP_OKAY) && (c >= 0); c--) {
14431
                    /* 5.1. Montogmery square result */
14432
0
                    err = sp_sqr(tr, tr);
14433
0
                    if (err == MP_OKAY) {
14434
0
                        err = _sp_mont_red(tr, m, mp, 0);
14435
0
                    }
14436
                    /* 5.2. If exponent bit set */
14437
0
                    if ((err == MP_OKAY) && ((n >> c) & 1)) {
14438
                        /* 5.2.1. Montgomery multiply result by Montgomery form
14439
                         * of base.
14440
                         */
14441
0
                        err = sp_mul(tr, bm, tr);
14442
0
                        if (err == MP_OKAY) {
14443
0
                            err = _sp_mont_red(tr, m, mp, 0);
14444
0
                        }
14445
0
                    }
14446
0
                }
14447
0
            }
14448
0
        }
14449
14450
0
        if (err == MP_OKAY) {
14451
            /* 6. Convert result back from Montgomery form. */
14452
0
            err = _sp_mont_red(tr, m, mp, 0);
14453
            /* Reduction implementation returns number to range: 0..m-1. */
14454
0
        }
14455
0
    }
14456
0
    if ((!done) && (err == MP_OKAY)) {
14457
        /* Copy temporary result into parameter. */
14458
0
        _sp_copy(tr, r);
14459
0
    }
14460
14461
0
#ifndef WOLFSSL_SP_NO_MALLOC
14462
0
    FREE_DYN_SP_INT_ARRAY(t, NULL);
14463
#else
14464
    FREE_SP_INT_ARRAY(t, NULL);
14465
#endif
14466
0
    return err;
14467
0
}
14468
14469
#else
14470
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14471
 * Non-constant time implementation.
14472
 *
14473
 * Algorithm:
14474
 *   1. Convert base to Montgomery form
14475
 *   2. Set result to base (assumes exponent is not zero)
14476
 *   3. For each bit in exponent starting at second highest
14477
 *     3.1. Montogmery square result
14478
 *     3.2. If exponent bit set
14479
 *       3.2.1. Montgomery multiply result by Montgomery form of base.
14480
 *   4. Convert result back from Montgomery form.
14481
 *
14482
 * @param  [in]   b  SP integer that is the base.
14483
 * @param  [in]   e  SP integer that is the exponent.
14484
 * @param  [in]   m  SP integer that is the modulus.
14485
 * @param  [out]  r  SP integer to hold result.
14486
 *
14487
 * @return  MP_OKAY on success.
14488
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14489
 * @return  MP_MEM when dynamic memory allocation fails.
14490
 */
14491
static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
14492
    sp_int* r)
14493
{
14494
    int i;
14495
    int err = MP_OKAY;
14496
    int done = 0;
14497
    int y = 0;
14498
    int bits = sp_count_bits(e);
14499
    sp_int_digit mp;
14500
    DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
14501
14502
    /* Allocate memory for:
14503
     *  - Montgomery form of base
14504
     *  - Temporary result (in case r is same var as another parameter). */
14505
    ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
14506
    if (err == MP_OKAY) {
14507
        _sp_init_size(t[0], m->used * 2 + 1);
14508
        _sp_init_size(t[1], m->used * 2 + 1);
14509
14510
        /* Ensure base is less than modulus and copy into temp. */
14511
        if (_sp_cmp_abs(b, m) != MP_LT) {
14512
            err = sp_mod(b, m, t[0]);
14513
            /* Handle base == modulus. */
14514
            if ((err == MP_OKAY) && sp_iszero(t[0])) {
14515
                _sp_set(r, 0);
14516
                done = 1;
14517
            }
14518
        }
14519
        else {
14520
            /* Copy base into temp. */
14521
            _sp_copy(b, t[0]);
14522
        }
14523
    }
14524
14525
    if ((!done) && (err == MP_OKAY)) {
14526
        /* Calculate Montgomery multiplier for reduction. */
14527
        _sp_mont_setup(m, &mp);
14528
        /* Calculate Montgomery normalizer for modulus. */
14529
        err = sp_mont_norm(t[1], m);
14530
        if (err == MP_OKAY) {
14531
            /* 1. Convert base to Montgomery form. */
14532
            err = sp_mul(t[0], t[1], t[0]);
14533
        }
14534
        if (err == MP_OKAY) {
14535
            /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
14536
            err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
14537
        }
14538
        if (err == MP_OKAY) {
14539
            /* 2. Result starts as Montgomery form of base (assuming e > 0). */
14540
            _sp_copy(t[0], t[1]);
14541
        }
14542
14543
        /* 3. For each bit in exponent starting at second highest. */
14544
        for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
14545
            /* 3.1. Montgomery square result. */
14546
            err = sp_sqr(t[0], t[0]);
14547
            if (err == MP_OKAY) {
14548
                err = _sp_mont_red(t[0], m, mp, 0);
14549
            }
14550
            if (err == MP_OKAY) {
14551
                /* Get bit and index i. */
14552
                y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
14553
                /* 3.2. If exponent bit set */
14554
                if (y != 0) {
14555
                    /* 3.2.1. Montgomery multiply result by Mont of base. */
14556
                    err = sp_mul(t[0], t[1], t[0]);
14557
                    if (err == MP_OKAY) {
14558
                        err = _sp_mont_red(t[0], m, mp, 0);
14559
                    }
14560
                }
14561
            }
14562
        }
14563
        if (err == MP_OKAY) {
14564
            /* 4. Convert from Montgomery form. */
14565
            err = _sp_mont_red(t[0], m, mp, 0);
14566
            /* Reduction implementation returns number of range 0..m-1. */
14567
        }
14568
    }
14569
    if ((!done) && (err == MP_OKAY)) {
14570
        /* Copy temporary result into parameter. */
14571
        _sp_copy(t[0], r);
14572
    }
14573
14574
    FREE_SP_INT_ARRAY(t, NULL);
14575
    return err;
14576
}
14577
#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
14578
14579
/* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
14580
 * Non-constant time implementation.
14581
 *
14582
 * @param  [in]   b  SP integer that is the base.
14583
 * @param  [in]   e  SP integer that is the exponent.
14584
 * @param  [in]   m  SP integer that is the modulus.
14585
 * @param  [out]  r  SP integer to hold result.
14586
 *
14587
 * @return  MP_OKAY on success.
14588
 * @return  MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
14589
 * @return  MP_MEM when dynamic memory allocation fails.
14590
 */
14591
int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
14592
0
{
14593
0
    int err = MP_OKAY;
14594
14595
    /* Validate parameters. */
14596
0
    if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
14597
0
        err = MP_VAL;
14598
0
    }
14599
14600
#if 0
14601
    if (err == MP_OKAY) {
14602
        sp_print(b, "a");
14603
        sp_print(e, "b");
14604
        sp_print(m, "m");
14605
    }
14606
#endif
14607
14608
0
    if (err != MP_OKAY) {
14609
0
    }
14610
    /* Handle special cases. */
14611
0
    else if (sp_iszero(m)) {
14612
0
        err = MP_VAL;
14613
0
    }
14614
#ifdef WOLFSSL_SP_INT_NEGATIVE
14615
    else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
14616
        err = MP_VAL;
14617
    }
14618
#endif
14619
    /* x mod 1 is always 0. */
14620
0
    else if (sp_isone(m)) {
14621
0
        _sp_set(r, 0);
14622
0
    }
14623
    /* b^0 mod m = 1 mod m = 1. */
14624
0
    else if (sp_iszero(e)) {
14625
0
        _sp_set(r, 1);
14626
0
    }
14627
    /* 0^x mod m = 0 mod m = 0. */
14628
0
    else if (sp_iszero(b)) {
14629
0
        _sp_set(r, 0);
14630
0
    }
14631
    /* Ensure SP integers have space for intermediate values. */
14632
0
    else if (m->used * 2 >= r->size) {
14633
0
        err = MP_VAL;
14634
0
    }
14635
0
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
14636
0
    else if (mp_iseven(m)) {
14637
0
        err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
14638
0
    }
14639
0
#endif
14640
0
    else {
14641
0
        err = _sp_exptmod_nct(b, e, m, r);
14642
0
    }
14643
14644
#if 0
14645
    if (err == MP_OKAY) {
14646
        sp_print(r, "rme");
14647
    }
14648
#endif
14649
14650
0
    return err;
14651
0
}
14652
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
14653
14654
/***************
14655
 * 2^e functions
14656
 ***************/
14657
14658
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
14659
/* Divide by 2^e: r = a >> e and rem = bits shifted out
14660
 *
14661
 * @param  [in]   a    SP integer to divide.
14662
 * @param  [in]   e    Exponent bits (dividing by 2^e).
14663
 * @param  [in]   m    SP integer that is the modulus.
14664
 * @param  [out]  r    SP integer to hold result.
14665
 * @param  [out]  rem  SP integer to hold remainder.
14666
 *
14667
 * @return  MP_OKAY on success.
14668
 * @return  MP_VAL when a is NULL or e is negative.
14669
 */
14670
int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
14671
0
{
14672
0
    int err = MP_OKAY;
14673
14674
0
    if ((a == NULL) || (e < 0)) {
14675
0
        err = MP_VAL;
14676
0
    }
14677
14678
0
    if (err == MP_OKAY) {
14679
        /* Number of bits remaining after shift. */
14680
0
        int remBits = sp_count_bits(a) - e;
14681
14682
0
        if (remBits <= 0) {
14683
            /* Shifting down by more bits than in number. */
14684
0
            _sp_zero(r);
14685
0
            if (rem != NULL) {
14686
0
                err = sp_copy(a, rem);
14687
0
            }
14688
0
        }
14689
0
        else {
14690
0
            if (rem != NULL) {
14691
                /* Copy a in to remainder. */
14692
0
                err = sp_copy(a, rem);
14693
0
            }
14694
0
            if (err == MP_OKAY) {
14695
                /* Shift a down by into result. */
14696
0
                err = sp_rshb(a, e, r);
14697
0
            }
14698
0
            if ((err == MP_OKAY) && (rem != NULL)) {
14699
                /* Set used and mask off top digit of remainder. */
14700
0
                rem->used = (sp_size_t)((e + SP_WORD_SIZE - 1) >>
14701
0
                                        SP_WORD_SHIFT);
14702
0
                e &= SP_WORD_MASK;
14703
0
                if (e > 0) {
14704
0
                    rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
14705
0
                }
14706
14707
                /* Remove leading zeros from remainder. */
14708
0
                sp_clamp(rem);
14709
            #ifdef WOLFSSL_SP_INT_NEGATIVE
14710
                rem->sign = MP_ZPOS;
14711
            #endif
14712
0
            }
14713
0
        }
14714
0
    }
14715
14716
0
    return err;
14717
0
}
14718
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14719
14720
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
14721
    defined(HAVE_ECC)
14722
/* The bottom e bits: r = a & ((1 << e) - 1)
14723
 *
14724
 * @param  [in]   a  SP integer to reduce.
14725
 * @param  [in]   e  Modulus bits (modulus equals 2^e).
14726
 * @param  [out]  r  SP integer to hold result.
14727
 *
14728
 * @return  MP_OKAY on success.
14729
 * @return  MP_VAL when a or r is NULL, e is negative or e is too large for
14730
 *          result.
14731
 */
14732
int sp_mod_2d(const sp_int* a, int e, sp_int* r)
14733
0
{
14734
0
    int err = MP_OKAY;
14735
0
    sp_size_t digits = (sp_size_t)((e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT);
14736
14737
0
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14738
0
        err = MP_VAL;
14739
0
    }
14740
0
    if ((err == MP_OKAY) && (digits > r->size)) {
14741
0
        err = MP_VAL;
14742
0
    }
14743
14744
0
    if (err == MP_OKAY) {
14745
        /* Copy a into r if not same pointer. */
14746
0
        if (a != r) {
14747
0
            XMEMCPY(r->dp, a->dp, digits * (word32)SP_WORD_SIZEOF);
14748
0
            r->used = a->used;
14749
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14750
            r->sign = a->sign;
14751
        #endif
14752
0
        }
14753
14754
        /* Modify result if a is bigger or same digit size. */
14755
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
14756
0
        if (digits <= a->used)
14757
    #else
14758
        /* Need to make negative positive and mask. */
14759
        if ((a->sign == MP_NEG) || (digits <= a->used))
14760
    #endif
14761
0
        {
14762
        #ifdef WOLFSSL_SP_INT_NEGATIVE
14763
            if (a->sign == MP_NEG) {
14764
                unsigned int i;
14765
                sp_int_digit carry = 0;
14766
14767
                /* Negate value. */
14768
                for (i = 0; i < r->used; i++) {
14769
                    sp_int_digit next = r->dp[i] > 0;
14770
                    r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
14771
                    carry |= next;
14772
                }
14773
                for (; i < digits; i++) {
14774
                    r->dp[i] = (sp_int_digit)0 - carry;
14775
                }
14776
                r->sign = MP_ZPOS;
14777
            }
14778
        #endif
14779
            /* Set used and mask off top digit of result. */
14780
0
            r->used = digits;
14781
0
            e &= SP_WORD_MASK;
14782
0
            if (e > 0) {
14783
0
                r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
14784
0
            }
14785
0
            sp_clamp(r);
14786
0
        }
14787
0
    }
14788
14789
0
    return err;
14790
0
}
14791
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY)) || HAVE_ECC */
14792
14793
#if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
14794
    !defined(NO_DH))) || defined(OPENSSL_ALL)
14795
/* Multiply by 2^e: r = a << e
14796
 *
14797
 * @param  [in]   a  SP integer to multiply.
14798
 * @param  [in]   e  Multiplier bits (multiplier equals 2^e).
14799
 * @param  [out]  r  SP integer to hold result.
14800
 *
14801
 * @return  MP_OKAY on success.
14802
 * @return  MP_VAL when a or r is NULL, e is negative, or result is too big for
14803
 *          result size.
14804
 */
14805
int sp_mul_2d(const sp_int* a, int e, sp_int* r)
14806
0
{
14807
0
    int err = MP_OKAY;
14808
14809
    /* Validate parameters. */
14810
0
    if ((a == NULL) || (r == NULL) || (e < 0)) {
14811
0
        err = MP_VAL;
14812
0
    }
14813
14814
    /* Ensure result has enough allocated digits for result. */
14815
0
    if ((err == MP_OKAY) &&
14816
0
            ((unsigned int)(sp_count_bits(a) + e) >
14817
0
             (unsigned int)r->size * SP_WORD_SIZE)) {
14818
0
        err = MP_VAL;
14819
0
    }
14820
14821
0
    if (err == MP_OKAY) {
14822
        /* Copy a into r as left shift function works on the number. */
14823
0
        if (a != r) {
14824
0
            err = sp_copy(a, r);
14825
0
        }
14826
0
    }
14827
14828
0
    if (err == MP_OKAY) {
14829
#if 0
14830
        sp_print(a, "a");
14831
        sp_print_int(e, "n");
14832
#endif
14833
0
        err = sp_lshb(r, e);
14834
#if 0
14835
        sp_print(r, "rsl");
14836
#endif
14837
0
    }
14838
14839
0
    return err;
14840
0
}
14841
#endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
14842
14843
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
14844
    defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
14845
14846
/* START SP_SQR implementations */
14847
/* This code is generated.
14848
 * To generate:
14849
 *   cd scripts/sp/sp_int
14850
 *   ./gen.sh
14851
 * File sp_sqr.c contains code.
14852
 */
14853
14854
#if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
14855
#ifdef SQR_MUL_ASM
14856
/* Square a and store in r. r = a * a
14857
 *
14858
 * @param  [in]   a  SP integer to square.
14859
 * @param  [out]  r  SP integer result.
14860
 *
14861
 * @return  MP_OKAY on success.
14862
 * @return  MP_MEM when dynamic memory allocation fails.
14863
 */
14864
static int _sp_sqr(const sp_int* a, sp_int* r)
14865
0
{
14866
0
    int err = MP_OKAY;
14867
0
    sp_size_t i;
14868
0
    int j;
14869
0
    sp_size_t k;
14870
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14871
    sp_int_digit* t = NULL;
14872
#elif defined(WOLFSSL_SP_DYN_STACK)
14873
    sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
14874
#else
14875
    sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
14876
#endif
14877
14878
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14879
    t = (sp_int_digit*)XMALLOC(
14880
        sizeof(sp_int_digit) * (size_t)(((a->used + 1) / 2) * 2 + 1), NULL,
14881
        DYNAMIC_TYPE_BIGINT);
14882
    if (t == NULL) {
14883
        err = MP_MEM;
14884
    }
14885
#endif
14886
0
    if ((err == MP_OKAY) && (a->used <= 1)) {
14887
0
        sp_int_digit l;
14888
0
        sp_int_digit h;
14889
14890
0
        h = 0;
14891
0
        l = 0;
14892
0
        SP_ASM_SQR(h, l, a->dp[0]);
14893
0
        r->dp[0] = h;
14894
0
        r->dp[1] = l;
14895
0
    }
14896
0
    else if (err == MP_OKAY) {
14897
0
        sp_int_digit l;
14898
0
        sp_int_digit h;
14899
0
        sp_int_digit o;
14900
0
        sp_int_digit* p = t;
14901
14902
0
        h = 0;
14903
0
        l = 0;
14904
0
        SP_ASM_SQR(h, l, a->dp[0]);
14905
0
        t[0] = h;
14906
0
        h = 0;
14907
0
        o = 0;
14908
0
        for (k = 1; k < (sp_size_t)((a->used + 1) / 2); k++) {
14909
0
            i = k;
14910
0
            j = (int)(k - 1);
14911
0
            for (; (j >= 0); i++, j--) {
14912
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14913
0
            }
14914
0
            t[k * 2 - 1] = l;
14915
0
            l = h;
14916
0
            h = o;
14917
0
            o = 0;
14918
14919
0
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
14920
0
            i = (sp_size_t)(k + 1);
14921
0
            j = (int)(k - 1);
14922
0
            for (; (j >= 0); i++, j--) {
14923
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14924
0
            }
14925
0
            t[k * 2] = l;
14926
0
            l = h;
14927
0
            h = o;
14928
0
            o = 0;
14929
0
        }
14930
0
        for (; k < a->used; k++) {
14931
0
            i = k;
14932
0
            j = (int)(k - 1);
14933
0
            for (; (i < a->used); i++, j--) {
14934
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14935
0
            }
14936
0
            p[k * 2 - 1] = l;
14937
0
            l = h;
14938
0
            h = o;
14939
0
            o = 0;
14940
14941
0
            SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
14942
0
            i = (sp_size_t)(k + 1);
14943
0
            j = (int)(k - 1);
14944
0
            for (; (i < a->used); i++, j--) {
14945
0
                SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
14946
0
            }
14947
0
            p[k * 2] = l;
14948
0
            l = h;
14949
0
            h = o;
14950
0
            o = 0;
14951
14952
0
            p = r->dp;
14953
0
        }
14954
0
        r->dp[k * 2 - 1] = l;
14955
0
        XMEMCPY(r->dp, t, (size_t)(((a->used + 1) / 2) * 2 + 1) *
14956
0
            sizeof(sp_int_digit));
14957
0
    }
14958
14959
0
    if (err == MP_OKAY) {
14960
0
        r->used = (sp_size_t)(a->used * 2U);
14961
0
        sp_clamp(r);
14962
0
    }
14963
14964
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14965
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
14966
#endif
14967
0
    return err;
14968
0
}
14969
#else /* !SQR_MUL_ASM */
14970
/* Square a and store in r. r = a * a
14971
 *
14972
 * @param  [in]   a  SP integer to square.
14973
 * @param  [out]  r  SP integer result.
14974
 *
14975
 * @return  MP_OKAY on success.
14976
 * @return  MP_MEM when dynamic memory allocation fails.
14977
 */
14978
static int _sp_sqr(const sp_int* a, sp_int* r)
14979
{
14980
    int err = MP_OKAY;
14981
    sp_size_t i;
14982
    int j;
14983
    sp_size_t k;
14984
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14985
    sp_int_digit* t = NULL;
14986
#elif defined(WOLFSSL_SP_DYN_STACK)
14987
    sp_int_digit t[a->used * 2];
14988
#else
14989
    sp_int_digit t[SP_INT_DIGITS];
14990
#endif
14991
14992
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14993
    t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) *
14994
                               (size_t)(a->used * 2), NULL,
14995
                               DYNAMIC_TYPE_BIGINT);
14996
    if (t == NULL) {
14997
        err = MP_MEM;
14998
    }
14999
#endif
15000
    if (err == MP_OKAY) {
15001
    #ifndef WOLFSSL_SP_INT_SQR_VOLATILE
15002
        sp_int_word w;
15003
        sp_int_word l;
15004
        sp_int_word h;
15005
    #else
15006
        volatile sp_int_word w;
15007
        volatile sp_int_word l;
15008
        volatile sp_int_word h;
15009
    #endif
15010
    #ifdef SP_WORD_OVERFLOW
15011
        sp_int_word o;
15012
    #endif
15013
15014
        w = (sp_int_word)a->dp[0] * a->dp[0];
15015
        t[0] = (sp_int_digit)w;
15016
        l = (sp_int_digit)(w >> SP_WORD_SIZE);
15017
        h = 0;
15018
    #ifdef SP_WORD_OVERFLOW
15019
        o = 0;
15020
    #endif
15021
        for (k = 1; k <= (sp_size_t)((a->used - 1) * 2); k++) {
15022
            i = k / 2;
15023
            j = (int)(k - i);
15024
            if (i == (unsigned int)j) {
15025
                w = (sp_int_word)a->dp[i] * a->dp[j];
15026
                l += (sp_int_digit)w;
15027
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15028
            #ifdef SP_WORD_OVERFLOW
15029
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15030
                l &= SP_MASK;
15031
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15032
                h &= SP_MASK;
15033
            #endif
15034
            }
15035
            for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
15036
                w = (sp_int_word)a->dp[i] * a->dp[j];
15037
                l += (sp_int_digit)w;
15038
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15039
            #ifdef SP_WORD_OVERFLOW
15040
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15041
                l &= SP_MASK;
15042
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15043
                h &= SP_MASK;
15044
            #endif
15045
                l += (sp_int_digit)w;
15046
                h += (sp_int_digit)(w >> SP_WORD_SIZE);
15047
            #ifdef SP_WORD_OVERFLOW
15048
                h += (sp_int_digit)(l >> SP_WORD_SIZE);
15049
                l &= SP_MASK;
15050
                o += (sp_int_digit)(h >> SP_WORD_SIZE);
15051
                h &= SP_MASK;
15052
            #endif
15053
            }
15054
            t[k] = (sp_int_digit)l;
15055
            l >>= SP_WORD_SIZE;
15056
            l += (sp_int_digit)h;
15057
            h >>= SP_WORD_SIZE;
15058
        #ifdef SP_WORD_OVERFLOW
15059
            h += o & SP_MASK;
15060
            o >>= SP_WORD_SIZE;
15061
        #endif
15062
        }
15063
        t[k] = (sp_int_digit)l;
15064
        r->used = (sp_size_t)(k + 1);
15065
        XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
15066
        sp_clamp(r);
15067
    }
15068
15069
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15070
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
15071
#endif
15072
    return err;
15073
}
15074
#endif /* SQR_MUL_ASM */
15075
#endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
15076
15077
#ifndef WOLFSSL_SP_SMALL
15078
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
15079
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
15080
#ifndef SQR_MUL_ASM
15081
/* Square a and store in r. r = a * a
15082
 *
15083
 * Long-hand implementation.
15084
 *
15085
 * @param  [in]   a  SP integer to square.
15086
 * @param  [out]  r  SP integer result.
15087
 *
15088
 * @return  MP_OKAY on success.
15089
 * @return  MP_MEM when dynamic memory allocation fails.
15090
 */
15091
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15092
{
15093
    int err = MP_OKAY;
15094
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15095
    sp_int_word* w = NULL;
15096
#else
15097
    sp_int_word w[10];
15098
#endif
15099
    const sp_int_digit* da = a->dp;
15100
15101
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15102
    w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
15103
        DYNAMIC_TYPE_BIGINT);
15104
    if (w == NULL) {
15105
        err = MP_MEM;
15106
    }
15107
#endif
15108
15109
15110
    if (err == MP_OKAY) {
15111
        w[0] = (sp_int_word)da[0] * da[0];
15112
        w[1] = (sp_int_word)da[0] * da[1];
15113
        w[2] = (sp_int_word)da[0] * da[2];
15114
        w[3] = (sp_int_word)da[1] * da[1];
15115
        w[4] = (sp_int_word)da[0] * da[3];
15116
        w[5] = (sp_int_word)da[1] * da[2];
15117
        w[6] = (sp_int_word)da[1] * da[3];
15118
        w[7] = (sp_int_word)da[2] * da[2];
15119
        w[8] = (sp_int_word)da[2] * da[3];
15120
        w[9] = (sp_int_word)da[3] * da[3];
15121
15122
        r->dp[0] = (sp_int_digit)w[0];
15123
        w[0] >>= SP_WORD_SIZE;
15124
        w[0] += (sp_int_digit)w[1];
15125
        w[0] += (sp_int_digit)w[1];
15126
        r->dp[1] = (sp_int_digit)w[0];
15127
        w[0] >>= SP_WORD_SIZE;
15128
        w[1] >>= SP_WORD_SIZE;
15129
        w[0] += (sp_int_digit)w[1];
15130
        w[0] += (sp_int_digit)w[1];
15131
        w[0] += (sp_int_digit)w[2];
15132
        w[0] += (sp_int_digit)w[2];
15133
        w[0] += (sp_int_digit)w[3];
15134
        r->dp[2] = (sp_int_digit)w[0];
15135
        w[0] >>= SP_WORD_SIZE;
15136
        w[2] >>= SP_WORD_SIZE;
15137
        w[0] += (sp_int_digit)w[2];
15138
        w[0] += (sp_int_digit)w[2];
15139
        w[3] >>= SP_WORD_SIZE;
15140
        w[0] += (sp_int_digit)w[3];
15141
        w[0] += (sp_int_digit)w[4];
15142
        w[0] += (sp_int_digit)w[4];
15143
        w[0] += (sp_int_digit)w[5];
15144
        w[0] += (sp_int_digit)w[5];
15145
        r->dp[3] = (sp_int_digit)w[0];
15146
        w[0] >>= SP_WORD_SIZE;
15147
        w[4] >>= SP_WORD_SIZE;
15148
        w[0] += (sp_int_digit)w[4];
15149
        w[0] += (sp_int_digit)w[4];
15150
        w[5] >>= SP_WORD_SIZE;
15151
        w[0] += (sp_int_digit)w[5];
15152
        w[0] += (sp_int_digit)w[5];
15153
        w[0] += (sp_int_digit)w[6];
15154
        w[0] += (sp_int_digit)w[6];
15155
        w[0] += (sp_int_digit)w[7];
15156
        r->dp[4] = (sp_int_digit)w[0];
15157
        w[0] >>= SP_WORD_SIZE;
15158
        w[6] >>= SP_WORD_SIZE;
15159
        w[0] += (sp_int_digit)w[6];
15160
        w[0] += (sp_int_digit)w[6];
15161
        w[7] >>= SP_WORD_SIZE;
15162
        w[0] += (sp_int_digit)w[7];
15163
        w[0] += (sp_int_digit)w[8];
15164
        w[0] += (sp_int_digit)w[8];
15165
        r->dp[5] = (sp_int_digit)w[0];
15166
        w[0] >>= SP_WORD_SIZE;
15167
        w[8] >>= SP_WORD_SIZE;
15168
        w[0] += (sp_int_digit)w[8];
15169
        w[0] += (sp_int_digit)w[8];
15170
        w[0] += (sp_int_digit)w[9];
15171
        r->dp[6] = (sp_int_digit)w[0];
15172
        w[0] >>= SP_WORD_SIZE;
15173
        w[9] >>= SP_WORD_SIZE;
15174
        w[0] += (sp_int_digit)w[9];
15175
        r->dp[7] = (sp_int_digit)w[0];
15176
15177
        r->used = 8;
15178
        sp_clamp(r);
15179
    }
15180
15181
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15182
    XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
15183
#endif
15184
    return err;
15185
}
15186
#else /* SQR_MUL_ASM */
15187
/* Square a and store in r. r = a * a
15188
 *
15189
 * Comba implementation.
15190
 *
15191
 * @param  [in]   a  SP integer to square.
15192
 * @param  [out]  r  SP integer result.
15193
 *
15194
 * @return  MP_OKAY on success.
15195
 * @return  MP_MEM when dynamic memory allocation fails.
15196
 */
15197
static int _sp_sqr_4(const sp_int* a, sp_int* r)
15198
0
{
15199
0
    sp_int_digit l = 0;
15200
0
    sp_int_digit h = 0;
15201
0
    sp_int_digit o = 0;
15202
0
    sp_int_digit t[4];
15203
15204
0
    SP_ASM_SQR(h, l, a->dp[0]);
15205
0
    t[0] = h;
15206
0
    h = 0;
15207
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15208
0
    t[1] = l;
15209
0
    l = h;
15210
0
    h = o;
15211
0
    o = 0;
15212
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15213
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15214
0
    t[2] = l;
15215
0
    l = h;
15216
0
    h = o;
15217
0
    o = 0;
15218
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15219
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15220
0
    t[3] = l;
15221
0
    l = h;
15222
0
    h = o;
15223
0
    o = 0;
15224
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15225
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15226
0
    r->dp[4] = l;
15227
0
    l = h;
15228
0
    h = o;
15229
0
    o = 0;
15230
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
15231
0
    r->dp[5] = l;
15232
0
    l = h;
15233
0
    h = o;
15234
0
    SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
15235
0
    r->dp[6] = l;
15236
0
    r->dp[7] = h;
15237
0
    XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
15238
0
    r->used = 8;
15239
0
    sp_clamp(r);
15240
15241
0
    return MP_OKAY;
15242
0
}
15243
#endif /* SQR_MUL_ASM */
15244
#endif /* SP_WORD_SIZE == 64 */
15245
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
15246
#ifdef SQR_MUL_ASM
15247
/* Square a and store in r. r = a * a
15248
 *
15249
 * Comba implementation.
15250
 *
15251
 * @param  [in]   a  SP integer to square.
15252
 * @param  [out]  r  SP integer result.
15253
 *
15254
 * @return  MP_OKAY on success.
15255
 * @return  MP_MEM when dynamic memory allocation fails.
15256
 */
15257
static int _sp_sqr_6(const sp_int* a, sp_int* r)
15258
0
{
15259
0
    sp_int_digit l = 0;
15260
0
    sp_int_digit h = 0;
15261
0
    sp_int_digit o = 0;
15262
0
    sp_int_digit tl = 0;
15263
0
    sp_int_digit th = 0;
15264
0
    sp_int_digit to;
15265
0
    sp_int_digit t[6];
15266
15267
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15268
    to = 0;
15269
#endif
15270
15271
0
    SP_ASM_SQR(h, l, a->dp[0]);
15272
0
    t[0] = h;
15273
0
    h = 0;
15274
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15275
0
    t[1] = l;
15276
0
    l = h;
15277
0
    h = o;
15278
0
    o = 0;
15279
0
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15280
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15281
0
    t[2] = l;
15282
0
    l = h;
15283
0
    h = o;
15284
0
    o = 0;
15285
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15286
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15287
0
    t[3] = l;
15288
0
    l = h;
15289
0
    h = o;
15290
0
    o = 0;
15291
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15292
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15293
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15294
0
    t[4] = l;
15295
0
    l = h;
15296
0
    h = o;
15297
0
    o = 0;
15298
0
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15299
0
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15300
0
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15301
0
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15302
0
    t[5] = l;
15303
0
    l = h;
15304
0
    h = o;
15305
0
    o = 0;
15306
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
15307
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
15308
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15309
0
    r->dp[6] = l;
15310
0
    l = h;
15311
0
    h = o;
15312
0
    o = 0;
15313
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
15314
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
15315
0
    r->dp[7] = l;
15316
0
    l = h;
15317
0
    h = o;
15318
0
    o = 0;
15319
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
15320
0
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15321
0
    r->dp[8] = l;
15322
0
    l = h;
15323
0
    h = o;
15324
0
    o = 0;
15325
0
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
15326
0
    r->dp[9] = l;
15327
0
    l = h;
15328
0
    h = o;
15329
0
    SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
15330
0
    r->dp[10] = l;
15331
0
    r->dp[11] = h;
15332
0
    XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
15333
0
    r->used = 12;
15334
0
    sp_clamp(r);
15335
15336
0
    return MP_OKAY;
15337
0
}
15338
#endif /* SQR_MUL_ASM */
15339
#endif /* SP_WORD_SIZE == 64 */
15340
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
15341
#ifdef SQR_MUL_ASM
15342
/* Square a and store in r. r = a * a
15343
 *
15344
 * Comba implementation.
15345
 *
15346
 * @param  [in]   a  SP integer to square.
15347
 * @param  [out]  r  SP integer result.
15348
 *
15349
 * @return  MP_OKAY on success.
15350
 * @return  MP_MEM when dynamic memory allocation fails.
15351
 */
15352
static int _sp_sqr_8(const sp_int* a, sp_int* r)
15353
{
15354
    sp_int_digit l = 0;
15355
    sp_int_digit h = 0;
15356
    sp_int_digit o = 0;
15357
    sp_int_digit tl = 0;
15358
    sp_int_digit th = 0;
15359
    sp_int_digit to;
15360
    sp_int_digit t[8];
15361
15362
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15363
    to = 0;
15364
#endif
15365
15366
    SP_ASM_SQR(h, l, a->dp[0]);
15367
    t[0] = h;
15368
    h = 0;
15369
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15370
    t[1] = l;
15371
    l = h;
15372
    h = o;
15373
    o = 0;
15374
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15375
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15376
    t[2] = l;
15377
    l = h;
15378
    h = o;
15379
    o = 0;
15380
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15381
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15382
    t[3] = l;
15383
    l = h;
15384
    h = o;
15385
    o = 0;
15386
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15387
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15388
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15389
    t[4] = l;
15390
    l = h;
15391
    h = o;
15392
    o = 0;
15393
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15394
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15395
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15396
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15397
    t[5] = l;
15398
    l = h;
15399
    h = o;
15400
    o = 0;
15401
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15402
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15403
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15404
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15405
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15406
    t[6] = l;
15407
    l = h;
15408
    h = o;
15409
    o = 0;
15410
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15411
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15412
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15413
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15414
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15415
    t[7] = l;
15416
    l = h;
15417
    h = o;
15418
    o = 0;
15419
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
15420
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15421
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15422
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15423
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15424
    r->dp[8] = l;
15425
    l = h;
15426
    h = o;
15427
    o = 0;
15428
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
15429
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15430
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15431
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15432
    r->dp[9] = l;
15433
    l = h;
15434
    h = o;
15435
    o = 0;
15436
    SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
15437
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
15438
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15439
    r->dp[10] = l;
15440
    l = h;
15441
    h = o;
15442
    o = 0;
15443
    SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
15444
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
15445
    r->dp[11] = l;
15446
    l = h;
15447
    h = o;
15448
    o = 0;
15449
    SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
15450
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15451
    r->dp[12] = l;
15452
    l = h;
15453
    h = o;
15454
    o = 0;
15455
    SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
15456
    r->dp[13] = l;
15457
    l = h;
15458
    h = o;
15459
    SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
15460
    r->dp[14] = l;
15461
    r->dp[15] = h;
15462
    XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
15463
    r->used = 16;
15464
    sp_clamp(r);
15465
15466
    return MP_OKAY;
15467
}
15468
#endif /* SQR_MUL_ASM */
15469
#endif /* SP_WORD_SIZE == 32 */
15470
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
15471
#ifdef SQR_MUL_ASM
15472
/* Square a and store in r. r = a * a
15473
 *
15474
 * Comba implementation.
15475
 *
15476
 * @param  [in]   a  SP integer to square.
15477
 * @param  [out]  r  SP integer result.
15478
 *
15479
 * @return  MP_OKAY on success.
15480
 * @return  MP_MEM when dynamic memory allocation fails.
15481
 */
15482
static int _sp_sqr_12(const sp_int* a, sp_int* r)
15483
{
15484
    sp_int_digit l = 0;
15485
    sp_int_digit h = 0;
15486
    sp_int_digit o = 0;
15487
    sp_int_digit tl = 0;
15488
    sp_int_digit th = 0;
15489
    sp_int_digit to;
15490
    sp_int_digit t[12];
15491
15492
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15493
    to = 0;
15494
#endif
15495
15496
    SP_ASM_SQR(h, l, a->dp[0]);
15497
    t[0] = h;
15498
    h = 0;
15499
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15500
    t[1] = l;
15501
    l = h;
15502
    h = o;
15503
    o = 0;
15504
    SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15505
    SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15506
    t[2] = l;
15507
    l = h;
15508
    h = o;
15509
    o = 0;
15510
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15511
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15512
    t[3] = l;
15513
    l = h;
15514
    h = o;
15515
    o = 0;
15516
    SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15517
    SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15518
    SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15519
    t[4] = l;
15520
    l = h;
15521
    h = o;
15522
    o = 0;
15523
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15524
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15525
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15526
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15527
    t[5] = l;
15528
    l = h;
15529
    h = o;
15530
    o = 0;
15531
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15532
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15533
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15534
    SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15535
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15536
    t[6] = l;
15537
    l = h;
15538
    h = o;
15539
    o = 0;
15540
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15541
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15542
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15543
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15544
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15545
    t[7] = l;
15546
    l = h;
15547
    h = o;
15548
    o = 0;
15549
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15550
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15551
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15552
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15553
    SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15554
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15555
    t[8] = l;
15556
    l = h;
15557
    h = o;
15558
    o = 0;
15559
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15560
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15561
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15562
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15563
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15564
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15565
    t[9] = l;
15566
    l = h;
15567
    h = o;
15568
    o = 0;
15569
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15570
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15571
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15572
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15573
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15574
    SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15575
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15576
    t[10] = l;
15577
    l = h;
15578
    h = o;
15579
    o = 0;
15580
    SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15581
    SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15582
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15583
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15584
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15585
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15586
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15587
    t[11] = l;
15588
    l = h;
15589
    h = o;
15590
    o = 0;
15591
    SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
15592
    SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15593
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15594
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15595
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15596
    SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15597
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15598
    r->dp[12] = l;
15599
    l = h;
15600
    h = o;
15601
    o = 0;
15602
    SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
15603
    SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15604
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15605
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15606
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15607
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15608
    r->dp[13] = l;
15609
    l = h;
15610
    h = o;
15611
    o = 0;
15612
    SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
15613
    SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15614
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15615
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15616
    SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15617
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15618
    r->dp[14] = l;
15619
    l = h;
15620
    h = o;
15621
    o = 0;
15622
    SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
15623
    SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15624
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15625
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15626
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15627
    r->dp[15] = l;
15628
    l = h;
15629
    h = o;
15630
    o = 0;
15631
    SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
15632
    SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15633
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15634
    SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15635
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15636
    r->dp[16] = l;
15637
    l = h;
15638
    h = o;
15639
    o = 0;
15640
    SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
15641
    SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15642
    SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15643
    SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15644
    r->dp[17] = l;
15645
    l = h;
15646
    h = o;
15647
    o = 0;
15648
    SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
15649
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
15650
    SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15651
    r->dp[18] = l;
15652
    l = h;
15653
    h = o;
15654
    o = 0;
15655
    SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
15656
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
15657
    r->dp[19] = l;
15658
    l = h;
15659
    h = o;
15660
    o = 0;
15661
    SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
15662
    SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15663
    r->dp[20] = l;
15664
    l = h;
15665
    h = o;
15666
    o = 0;
15667
    SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
15668
    r->dp[21] = l;
15669
    l = h;
15670
    h = o;
15671
    SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
15672
    r->dp[22] = l;
15673
    r->dp[23] = h;
15674
    XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
15675
    r->used = 24;
15676
    sp_clamp(r);
15677
15678
    return MP_OKAY;
15679
}
15680
#endif /* SQR_MUL_ASM */
15681
#endif /* SP_WORD_SIZE == 32 */
15682
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
15683
15684
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
15685
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
15686
    (SP_WORD_SIZE == 64)))
15687
    #if SP_INT_DIGITS >= 32
15688
/* Square a and store in r. r = a * a
15689
 *
15690
 * Comba implementation.
15691
 *
15692
 * @param  [in]   a  SP integer to square.
15693
 * @param  [out]  r  SP integer result.
15694
 *
15695
 * @return  MP_OKAY on success.
15696
 * @return  MP_MEM when dynamic memory allocation fails.
15697
 */
15698
static int _sp_sqr_16(const sp_int* a, sp_int* r)
15699
{
15700
    int err = MP_OKAY;
15701
    sp_int_digit l = 0;
15702
    sp_int_digit h = 0;
15703
    sp_int_digit o = 0;
15704
    sp_int_digit tl = 0;
15705
    sp_int_digit th = 0;
15706
    sp_int_digit to;
15707
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15708
    sp_int_digit* t = NULL;
15709
#else
15710
    sp_int_digit t[16];
15711
#endif
15712
15713
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
15714
    to = 0;
15715
#endif
15716
15717
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15718
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
15719
         DYNAMIC_TYPE_BIGINT);
15720
     if (t == NULL) {
15721
         err = MP_MEM;
15722
     }
15723
#endif
15724
    if (err == MP_OKAY) {
15725
        SP_ASM_SQR(h, l, a->dp[0]);
15726
        t[0] = h;
15727
        h = 0;
15728
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
15729
        t[1] = l;
15730
        l = h;
15731
        h = o;
15732
        o = 0;
15733
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
15734
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
15735
        t[2] = l;
15736
        l = h;
15737
        h = o;
15738
        o = 0;
15739
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
15740
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
15741
        t[3] = l;
15742
        l = h;
15743
        h = o;
15744
        o = 0;
15745
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
15746
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
15747
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
15748
        t[4] = l;
15749
        l = h;
15750
        h = o;
15751
        o = 0;
15752
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
15753
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
15754
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
15755
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15756
        t[5] = l;
15757
        l = h;
15758
        h = o;
15759
        o = 0;
15760
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
15761
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
15762
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
15763
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
15764
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15765
        t[6] = l;
15766
        l = h;
15767
        h = o;
15768
        o = 0;
15769
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
15770
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
15771
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
15772
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
15773
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15774
        t[7] = l;
15775
        l = h;
15776
        h = o;
15777
        o = 0;
15778
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
15779
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
15780
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
15781
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
15782
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
15783
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15784
        t[8] = l;
15785
        l = h;
15786
        h = o;
15787
        o = 0;
15788
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
15789
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
15790
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
15791
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
15792
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
15793
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15794
        t[9] = l;
15795
        l = h;
15796
        h = o;
15797
        o = 0;
15798
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
15799
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
15800
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
15801
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
15802
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
15803
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
15804
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15805
        t[10] = l;
15806
        l = h;
15807
        h = o;
15808
        o = 0;
15809
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
15810
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
15811
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
15812
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
15813
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
15814
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
15815
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15816
        t[11] = l;
15817
        l = h;
15818
        h = o;
15819
        o = 0;
15820
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
15821
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
15822
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
15823
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
15824
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
15825
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
15826
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
15827
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15828
        t[12] = l;
15829
        l = h;
15830
        h = o;
15831
        o = 0;
15832
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
15833
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
15834
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
15835
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
15836
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
15837
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
15838
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
15839
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15840
        t[13] = l;
15841
        l = h;
15842
        h = o;
15843
        o = 0;
15844
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
15845
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
15846
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
15847
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
15848
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
15849
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
15850
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
15851
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
15852
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15853
        t[14] = l;
15854
        l = h;
15855
        h = o;
15856
        o = 0;
15857
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
15858
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
15859
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
15860
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
15861
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
15862
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
15863
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
15864
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
15865
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15866
        t[15] = l;
15867
        l = h;
15868
        h = o;
15869
        o = 0;
15870
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
15871
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
15872
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
15873
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
15874
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
15875
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
15876
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
15877
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
15878
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15879
        r->dp[16] = l;
15880
        l = h;
15881
        h = o;
15882
        o = 0;
15883
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
15884
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
15885
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
15886
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
15887
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
15888
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
15889
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
15890
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15891
        r->dp[17] = l;
15892
        l = h;
15893
        h = o;
15894
        o = 0;
15895
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
15896
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
15897
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
15898
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
15899
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
15900
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
15901
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
15902
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15903
        r->dp[18] = l;
15904
        l = h;
15905
        h = o;
15906
        o = 0;
15907
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
15908
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
15909
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
15910
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
15911
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
15912
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
15913
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15914
        r->dp[19] = l;
15915
        l = h;
15916
        h = o;
15917
        o = 0;
15918
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
15919
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
15920
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
15921
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
15922
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
15923
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
15924
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15925
        r->dp[20] = l;
15926
        l = h;
15927
        h = o;
15928
        o = 0;
15929
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
15930
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
15931
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
15932
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
15933
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
15934
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15935
        r->dp[21] = l;
15936
        l = h;
15937
        h = o;
15938
        o = 0;
15939
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
15940
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
15941
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
15942
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
15943
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
15944
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15945
        r->dp[22] = l;
15946
        l = h;
15947
        h = o;
15948
        o = 0;
15949
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
15950
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
15951
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
15952
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
15953
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15954
        r->dp[23] = l;
15955
        l = h;
15956
        h = o;
15957
        o = 0;
15958
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
15959
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
15960
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
15961
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
15962
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15963
        r->dp[24] = l;
15964
        l = h;
15965
        h = o;
15966
        o = 0;
15967
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
15968
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
15969
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
15970
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
15971
        r->dp[25] = l;
15972
        l = h;
15973
        h = o;
15974
        o = 0;
15975
        SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
15976
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
15977
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
15978
        r->dp[26] = l;
15979
        l = h;
15980
        h = o;
15981
        o = 0;
15982
        SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
15983
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
15984
        r->dp[27] = l;
15985
        l = h;
15986
        h = o;
15987
        o = 0;
15988
        SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
15989
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
15990
        r->dp[28] = l;
15991
        l = h;
15992
        h = o;
15993
        o = 0;
15994
        SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
15995
        r->dp[29] = l;
15996
        l = h;
15997
        h = o;
15998
        SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
15999
        r->dp[30] = l;
16000
        r->dp[31] = h;
16001
        XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
16002
        r->used = 32;
16003
        sp_clamp(r);
16004
    }
16005
16006
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16007
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16008
#endif
16009
    return err;
16010
}
16011
    #endif /* SP_INT_DIGITS >= 32 */
16012
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
16013
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
16014
16015
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
16016
    #if SP_INT_DIGITS >= 48
16017
/* Square a and store in r. r = a * a
16018
 *
16019
 * Comba implementation.
16020
 *
16021
 * @param  [in]   a  SP integer to square.
16022
 * @param  [out]  r  SP integer result.
16023
 *
16024
 * @return  MP_OKAY on success.
16025
 * @return  MP_MEM when dynamic memory allocation fails.
16026
 */
16027
static int _sp_sqr_24(const sp_int* a, sp_int* r)
16028
{
16029
    int err = MP_OKAY;
16030
    sp_int_digit l = 0;
16031
    sp_int_digit h = 0;
16032
    sp_int_digit o = 0;
16033
    sp_int_digit tl = 0;
16034
    sp_int_digit th = 0;
16035
    sp_int_digit to;
16036
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16037
    sp_int_digit* t = NULL;
16038
#else
16039
    sp_int_digit t[24];
16040
#endif
16041
16042
#if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
16043
    to = 0;
16044
#endif
16045
16046
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16047
     t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
16048
         DYNAMIC_TYPE_BIGINT);
16049
     if (t == NULL) {
16050
         err = MP_MEM;
16051
     }
16052
#endif
16053
    if (err == MP_OKAY) {
16054
        SP_ASM_SQR(h, l, a->dp[0]);
16055
        t[0] = h;
16056
        h = 0;
16057
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
16058
        t[1] = l;
16059
        l = h;
16060
        h = o;
16061
        o = 0;
16062
        SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
16063
        SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
16064
        t[2] = l;
16065
        l = h;
16066
        h = o;
16067
        o = 0;
16068
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
16069
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
16070
        t[3] = l;
16071
        l = h;
16072
        h = o;
16073
        o = 0;
16074
        SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
16075
        SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
16076
        SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
16077
        t[4] = l;
16078
        l = h;
16079
        h = o;
16080
        o = 0;
16081
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
16082
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
16083
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
16084
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16085
        t[5] = l;
16086
        l = h;
16087
        h = o;
16088
        o = 0;
16089
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
16090
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
16091
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
16092
        SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
16093
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16094
        t[6] = l;
16095
        l = h;
16096
        h = o;
16097
        o = 0;
16098
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
16099
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
16100
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
16101
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
16102
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16103
        t[7] = l;
16104
        l = h;
16105
        h = o;
16106
        o = 0;
16107
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
16108
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
16109
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
16110
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
16111
        SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
16112
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16113
        t[8] = l;
16114
        l = h;
16115
        h = o;
16116
        o = 0;
16117
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
16118
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
16119
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
16120
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
16121
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
16122
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16123
        t[9] = l;
16124
        l = h;
16125
        h = o;
16126
        o = 0;
16127
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
16128
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
16129
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
16130
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
16131
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
16132
        SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
16133
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16134
        t[10] = l;
16135
        l = h;
16136
        h = o;
16137
        o = 0;
16138
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
16139
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
16140
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
16141
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
16142
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
16143
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
16144
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16145
        t[11] = l;
16146
        l = h;
16147
        h = o;
16148
        o = 0;
16149
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
16150
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
16151
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
16152
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
16153
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
16154
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
16155
        SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
16156
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16157
        t[12] = l;
16158
        l = h;
16159
        h = o;
16160
        o = 0;
16161
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
16162
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
16163
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
16164
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
16165
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
16166
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
16167
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
16168
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16169
        t[13] = l;
16170
        l = h;
16171
        h = o;
16172
        o = 0;
16173
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
16174
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
16175
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
16176
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
16177
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
16178
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
16179
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
16180
        SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
16181
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16182
        t[14] = l;
16183
        l = h;
16184
        h = o;
16185
        o = 0;
16186
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
16187
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
16188
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
16189
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
16190
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
16191
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
16192
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
16193
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
16194
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16195
        t[15] = l;
16196
        l = h;
16197
        h = o;
16198
        o = 0;
16199
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
16200
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
16201
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
16202
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
16203
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
16204
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
16205
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
16206
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
16207
        SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
16208
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16209
        t[16] = l;
16210
        l = h;
16211
        h = o;
16212
        o = 0;
16213
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
16214
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
16215
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
16216
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
16217
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
16218
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
16219
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
16220
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
16221
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
16222
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16223
        t[17] = l;
16224
        l = h;
16225
        h = o;
16226
        o = 0;
16227
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
16228
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
16229
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
16230
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
16231
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
16232
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
16233
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
16234
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
16235
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
16236
        SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
16237
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16238
        t[18] = l;
16239
        l = h;
16240
        h = o;
16241
        o = 0;
16242
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
16243
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
16244
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
16245
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
16246
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
16247
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
16248
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
16249
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
16250
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
16251
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
16252
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16253
        t[19] = l;
16254
        l = h;
16255
        h = o;
16256
        o = 0;
16257
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
16258
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
16259
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
16260
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
16261
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
16262
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
16263
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
16264
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
16265
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
16266
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
16267
        SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
16268
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16269
        t[20] = l;
16270
        l = h;
16271
        h = o;
16272
        o = 0;
16273
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
16274
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
16275
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
16276
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
16277
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
16278
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
16279
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
16280
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
16281
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
16282
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
16283
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
16284
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16285
        t[21] = l;
16286
        l = h;
16287
        h = o;
16288
        o = 0;
16289
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
16290
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
16291
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
16292
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
16293
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
16294
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
16295
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
16296
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
16297
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
16298
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
16299
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
16300
        SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
16301
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16302
        t[22] = l;
16303
        l = h;
16304
        h = o;
16305
        o = 0;
16306
        SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
16307
        SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
16308
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
16309
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
16310
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
16311
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
16312
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
16313
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
16314
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
16315
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
16316
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
16317
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
16318
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16319
        t[23] = l;
16320
        l = h;
16321
        h = o;
16322
        o = 0;
16323
        SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
16324
        SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
16325
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
16326
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
16327
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
16328
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
16329
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
16330
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
16331
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
16332
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
16333
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
16334
        SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
16335
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16336
        r->dp[24] = l;
16337
        l = h;
16338
        h = o;
16339
        o = 0;
16340
        SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
16341
        SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
16342
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
16343
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
16344
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
16345
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
16346
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
16347
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
16348
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
16349
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
16350
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
16351
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16352
        r->dp[25] = l;
16353
        l = h;
16354
        h = o;
16355
        o = 0;
16356
        SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
16357
        SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
16358
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
16359
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
16360
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
16361
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
16362
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
16363
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
16364
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
16365
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
16366
        SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
16367
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16368
        r->dp[26] = l;
16369
        l = h;
16370
        h = o;
16371
        o = 0;
16372
        SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
16373
        SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
16374
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
16375
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
16376
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
16377
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
16378
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
16379
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
16380
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
16381
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
16382
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16383
        r->dp[27] = l;
16384
        l = h;
16385
        h = o;
16386
        o = 0;
16387
        SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
16388
        SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
16389
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
16390
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
16391
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
16392
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
16393
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
16394
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
16395
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
16396
        SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
16397
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16398
        r->dp[28] = l;
16399
        l = h;
16400
        h = o;
16401
        o = 0;
16402
        SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
16403
        SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
16404
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
16405
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
16406
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
16407
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
16408
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
16409
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
16410
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
16411
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16412
        r->dp[29] = l;
16413
        l = h;
16414
        h = o;
16415
        o = 0;
16416
        SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
16417
        SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
16418
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
16419
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
16420
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
16421
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
16422
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
16423
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
16424
        SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
16425
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16426
        r->dp[30] = l;
16427
        l = h;
16428
        h = o;
16429
        o = 0;
16430
        SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
16431
        SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
16432
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
16433
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
16434
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
16435
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
16436
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
16437
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
16438
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16439
        r->dp[31] = l;
16440
        l = h;
16441
        h = o;
16442
        o = 0;
16443
        SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
16444
        SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
16445
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
16446
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
16447
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
16448
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
16449
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
16450
        SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
16451
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16452
        r->dp[32] = l;
16453
        l = h;
16454
        h = o;
16455
        o = 0;
16456
        SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
16457
        SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
16458
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
16459
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
16460
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
16461
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
16462
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
16463
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16464
        r->dp[33] = l;
16465
        l = h;
16466
        h = o;
16467
        o = 0;
16468
        SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
16469
        SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
16470
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
16471
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
16472
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
16473
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
16474
        SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
16475
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16476
        r->dp[34] = l;
16477
        l = h;
16478
        h = o;
16479
        o = 0;
16480
        SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
16481
        SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
16482
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
16483
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
16484
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
16485
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
16486
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16487
        r->dp[35] = l;
16488
        l = h;
16489
        h = o;
16490
        o = 0;
16491
        SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
16492
        SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
16493
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
16494
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
16495
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
16496
        SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
16497
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16498
        r->dp[36] = l;
16499
        l = h;
16500
        h = o;
16501
        o = 0;
16502
        SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
16503
        SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
16504
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
16505
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
16506
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
16507
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16508
        r->dp[37] = l;
16509
        l = h;
16510
        h = o;
16511
        o = 0;
16512
        SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
16513
        SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
16514
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
16515
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
16516
        SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
16517
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16518
        r->dp[38] = l;
16519
        l = h;
16520
        h = o;
16521
        o = 0;
16522
        SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
16523
        SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
16524
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
16525
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
16526
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16527
        r->dp[39] = l;
16528
        l = h;
16529
        h = o;
16530
        o = 0;
16531
        SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
16532
        SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
16533
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
16534
        SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
16535
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16536
        r->dp[40] = l;
16537
        l = h;
16538
        h = o;
16539
        o = 0;
16540
        SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
16541
        SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
16542
        SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
16543
        SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
16544
        r->dp[41] = l;
16545
        l = h;
16546
        h = o;
16547
        o = 0;
16548
        SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
16549
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
16550
        SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
16551
        r->dp[42] = l;
16552
        l = h;
16553
        h = o;
16554
        o = 0;
16555
        SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
16556
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
16557
        r->dp[43] = l;
16558
        l = h;
16559
        h = o;
16560
        o = 0;
16561
        SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
16562
        SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
16563
        r->dp[44] = l;
16564
        l = h;
16565
        h = o;
16566
        o = 0;
16567
        SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
16568
        r->dp[45] = l;
16569
        l = h;
16570
        h = o;
16571
        SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
16572
        r->dp[46] = l;
16573
        r->dp[47] = h;
16574
        XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
16575
        r->used = 48;
16576
        sp_clamp(r);
16577
    }
16578
16579
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
16580
    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
16581
#endif
16582
    return err;
16583
}
16584
    #endif /* SP_INT_DIGITS >= 48 */
16585
16586
    #if SP_INT_DIGITS >= 64
16587
/* Square a and store in r. r = a * a
16588
 *
16589
 * Karatsuba implementation.
16590
 *
16591
 * @param  [in]   a  SP integer to square.
16592
 * @param  [out]  r  SP integer result.
16593
 *
16594
 * @return  MP_OKAY on success.
16595
 * @return  MP_MEM when dynamic memory allocation fails.
16596
 */
16597
static int _sp_sqr_32(const sp_int* a, sp_int* r)
16598
{
16599
    int err = MP_OKAY;
16600
    unsigned int i;
16601
    sp_int_digit l;
16602
    sp_int_digit h;
16603
    sp_int* z0;
16604
    sp_int* z1;
16605
    sp_int* z2;
16606
    sp_int_digit ca;
16607
    DECL_SP_INT(a1, 16);
16608
    DECL_SP_INT_ARRAY(z, 33, 2);
16609
16610
    ALLOC_SP_INT(a1, 16, err, NULL);
16611
    ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
16612
    if (err == MP_OKAY) {
16613
        z1 = z[0];
16614
        z2 = z[1];
16615
        z0 = r;
16616
16617
        XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
16618
        a1->used = 16;
16619
16620
        /* z2 = a1 ^ 2 */
16621
        err = _sp_sqr_16(a1, z2);
16622
    }
16623
    if (err == MP_OKAY) {
16624
        l = 0;
16625
        h = 0;
16626
        for (i = 0; i < 16; i++) {
16627
            SP_ASM_ADDC(l, h, a1->dp[i]);
16628
            SP_ASM_ADDC(l, h, a->dp[i]);
16629
            a1->dp[i] = l;
16630
            l = h;
16631
            h = 0;
16632
        }
16633
        ca = l;
16634
16635
        /* z0 = a0 ^ 2 */
16636
        err = _sp_sqr_16(a, z0);
16637
    }
16638
    if (err == MP_OKAY) {
16639
        /* z1 = (a0 + a1) ^ 2 */
16640
        err = _sp_sqr_16(a1, z1);
16641
    }
16642
    if (err == MP_OKAY) {
16643
        /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
16644
        /* r = z0 */
16645
        /* r += (z1 - z0 - z2) << 16 */
16646
        z1->dp[32] = ca;
16647
        l = 0;
16648
        if (ca) {
16649
            l = z1->dp[0 + 16];
16650
            h = 0;
16651
            SP_ASM_ADDC(l, h, a1->dp[0]);
16652
            SP_ASM_ADDC(l, h, a1->dp[0]);
16653
            z1->dp[0 + 16] = l;
16654
            l = h;
16655
            h = 0;
16656
            for (i = 1; i < 16; i++) {
16657
                SP_ASM_ADDC(l, h, z1->dp[i + 16]);
16658
                SP_ASM_ADDC(l, h, a1->dp[i]);
16659
                SP_ASM_ADDC(l, h, a1->dp[i]);
16660
                z1->dp[i + 16] = l;
16661
                l = h;
16662
                h = 0;
16663
            }
16664
        }
16665
        z1->dp[32] += l;
16666
        /* z1 = z1 - z0 - z1 */
16667
        l = z1->dp[0];
16668
        h = 0;
16669
        SP_ASM_SUBB(l, h, z0->dp[0]);
16670
        SP_ASM_SUBB(l, h, z2->dp[0]);
16671
        z1->dp[0] = l;
16672
        l = h;
16673
        h = 0;
16674
        for (i = 1; i < 32; i++) {
16675
            l += z1->dp[i];
16676
            SP_ASM_SUBB(l, h, z0->dp[i]);
16677
            SP_ASM_SUBB(l, h, z2->dp[i]);
16678
            z1->dp[i] = l;
16679
            l = h;
16680
            h = 0;
16681
        }
16682
        z1->dp[i] += l;
16683
        /* r += z1 << 16 */
16684
        l = 0;
16685
        h = 0;
16686
        for (i = 0; i < 16; i++) {
16687
            SP_ASM_ADDC(l, h, r->dp[i + 16]);
16688
            SP_ASM_ADDC(l, h, z1->dp[i]);
16689
            r->dp[i + 16] = l;
16690
            l = h;
16691
            h = 0;
16692
        }
16693
        for (; i < 33; i++) {
16694
            SP_ASM_ADDC(l, h, z1->dp[i]);
16695
            r->dp[i + 16] = l;
16696
            l = h;
16697
            h = 0;
16698
        }
16699
        /* r += z2 << 32  */
16700
        l = 0;
16701
        h = 0;
16702
        for (i = 0; i < 17; i++) {
16703
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
16704
            SP_ASM_ADDC(l, h, z2->dp[i]);
16705
            r->dp[i + 32] = l;
16706
            l = h;
16707
            h = 0;
16708
        }
16709
        for (; i < 32; i++) {
16710
            SP_ASM_ADDC(l, h, z2->dp[i]);
16711
            r->dp[i + 32] = l;
16712
            l = h;
16713
            h = 0;
16714
        }
16715
        r->used = 64;
16716
        sp_clamp(r);
16717
    }
16718
16719
    FREE_SP_INT_ARRAY(z, NULL);
16720
    FREE_SP_INT(a1, NULL);
16721
    return err;
16722
}
16723
    #endif /* SP_INT_DIGITS >= 64 */
16724
16725
    #if SP_INT_DIGITS >= 96
16726
/* Square a and store in r. r = a * a
16727
 *
16728
 * Karatsuba implementation.
16729
 *
16730
 * @param  [in]   a  SP integer to square.
16731
 * @param  [out]  r  SP integer result.
16732
 *
16733
 * @return  MP_OKAY on success.
16734
 * @return  MP_MEM when dynamic memory allocation fails.
16735
 */
16736
static int _sp_sqr_48(const sp_int* a, sp_int* r)
16737
{
16738
    int err = MP_OKAY;
16739
    unsigned int i;
16740
    sp_int_digit l;
16741
    sp_int_digit h;
16742
    sp_int* z0;
16743
    sp_int* z1;
16744
    sp_int* z2;
16745
    sp_int_digit ca;
16746
    DECL_SP_INT(a1, 24);
16747
    DECL_SP_INT_ARRAY(z, 49, 2);
16748
16749
    ALLOC_SP_INT(a1, 24, err, NULL);
16750
    ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
16751
    if (err == MP_OKAY) {
16752
        z1 = z[0];
16753
        z2 = z[1];
16754
        z0 = r;
16755
16756
        XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
16757
        a1->used = 24;
16758
16759
        /* z2 = a1 ^ 2 */
16760
        err = _sp_sqr_24(a1, z2);
16761
    }
16762
    if (err == MP_OKAY) {
16763
        l = 0;
16764
        h = 0;
16765
        for (i = 0; i < 24; i++) {
16766
            SP_ASM_ADDC(l, h, a1->dp[i]);
16767
            SP_ASM_ADDC(l, h, a->dp[i]);
16768
            a1->dp[i] = l;
16769
            l = h;
16770
            h = 0;
16771
        }
16772
        ca = l;
16773
16774
        /* z0 = a0 ^ 2 */
16775
        err = _sp_sqr_24(a, z0);
16776
    }
16777
    if (err == MP_OKAY) {
16778
        /* z1 = (a0 + a1) ^ 2 */
16779
        err = _sp_sqr_24(a1, z1);
16780
    }
16781
    if (err == MP_OKAY) {
16782
        /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
16783
        /* r = z0 */
16784
        /* r += (z1 - z0 - z2) << 24 */
16785
        z1->dp[48] = ca;
16786
        l = 0;
16787
        if (ca) {
16788
            l = z1->dp[0 + 24];
16789
            h = 0;
16790
            SP_ASM_ADDC(l, h, a1->dp[0]);
16791
            SP_ASM_ADDC(l, h, a1->dp[0]);
16792
            z1->dp[0 + 24] = l;
16793
            l = h;
16794
            h = 0;
16795
            for (i = 1; i < 24; i++) {
16796
                SP_ASM_ADDC(l, h, z1->dp[i + 24]);
16797
                SP_ASM_ADDC(l, h, a1->dp[i]);
16798
                SP_ASM_ADDC(l, h, a1->dp[i]);
16799
                z1->dp[i + 24] = l;
16800
                l = h;
16801
                h = 0;
16802
            }
16803
        }
16804
        z1->dp[48] += l;
16805
        /* z1 = z1 - z0 - z1 */
16806
        l = z1->dp[0];
16807
        h = 0;
16808
        SP_ASM_SUBB(l, h, z0->dp[0]);
16809
        SP_ASM_SUBB(l, h, z2->dp[0]);
16810
        z1->dp[0] = l;
16811
        l = h;
16812
        h = 0;
16813
        for (i = 1; i < 48; i++) {
16814
            l += z1->dp[i];
16815
            SP_ASM_SUBB(l, h, z0->dp[i]);
16816
            SP_ASM_SUBB(l, h, z2->dp[i]);
16817
            z1->dp[i] = l;
16818
            l = h;
16819
            h = 0;
16820
        }
16821
        z1->dp[i] += l;
16822
        /* r += z1 << 16 */
16823
        l = 0;
16824
        h = 0;
16825
        for (i = 0; i < 24; i++) {
16826
            SP_ASM_ADDC(l, h, r->dp[i + 24]);
16827
            SP_ASM_ADDC(l, h, z1->dp[i]);
16828
            r->dp[i + 24] = l;
16829
            l = h;
16830
            h = 0;
16831
        }
16832
        for (; i < 49; i++) {
16833
            SP_ASM_ADDC(l, h, z1->dp[i]);
16834
            r->dp[i + 24] = l;
16835
            l = h;
16836
            h = 0;
16837
        }
16838
        /* r += z2 << 48  */
16839
        l = 0;
16840
        h = 0;
16841
        for (i = 0; i < 25; i++) {
16842
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
16843
            SP_ASM_ADDC(l, h, z2->dp[i]);
16844
            r->dp[i + 48] = l;
16845
            l = h;
16846
            h = 0;
16847
        }
16848
        for (; i < 48; i++) {
16849
            SP_ASM_ADDC(l, h, z2->dp[i]);
16850
            r->dp[i + 48] = l;
16851
            l = h;
16852
            h = 0;
16853
        }
16854
        r->used = 96;
16855
        sp_clamp(r);
16856
    }
16857
16858
    FREE_SP_INT_ARRAY(z, NULL);
16859
    FREE_SP_INT(a1, NULL);
16860
    return err;
16861
}
16862
    #endif /* SP_INT_DIGITS >= 96 */
16863
16864
    #if SP_INT_DIGITS >= 128
16865
/* Square a and store in r. r = a * a
16866
 *
16867
 * Karatsuba implementation.
16868
 *
16869
 * @param  [in]   a  SP integer to square.
16870
 * @param  [out]  r  SP integer result.
16871
 *
16872
 * @return  MP_OKAY on success.
16873
 * @return  MP_MEM when dynamic memory allocation fails.
16874
 */
16875
static int _sp_sqr_64(const sp_int* a, sp_int* r)
16876
{
16877
    int err = MP_OKAY;
16878
    unsigned int i;
16879
    sp_int_digit l;
16880
    sp_int_digit h;
16881
    sp_int* z0;
16882
    sp_int* z1;
16883
    sp_int* z2;
16884
    sp_int_digit ca;
16885
    DECL_SP_INT(a1, 32);
16886
    DECL_SP_INT_ARRAY(z, 65, 2);
16887
16888
    ALLOC_SP_INT(a1, 32, err, NULL);
16889
    ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
16890
    if (err == MP_OKAY) {
16891
        z1 = z[0];
16892
        z2 = z[1];
16893
        z0 = r;
16894
16895
        XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
16896
        a1->used = 32;
16897
16898
        /* z2 = a1 ^ 2 */
16899
        err = _sp_sqr_32(a1, z2);
16900
    }
16901
    if (err == MP_OKAY) {
16902
        l = 0;
16903
        h = 0;
16904
        for (i = 0; i < 32; i++) {
16905
            SP_ASM_ADDC(l, h, a1->dp[i]);
16906
            SP_ASM_ADDC(l, h, a->dp[i]);
16907
            a1->dp[i] = l;
16908
            l = h;
16909
            h = 0;
16910
        }
16911
        ca = l;
16912
16913
        /* z0 = a0 ^ 2 */
16914
        err = _sp_sqr_32(a, z0);
16915
    }
16916
    if (err == MP_OKAY) {
16917
        /* z1 = (a0 + a1) ^ 2 */
16918
        err = _sp_sqr_32(a1, z1);
16919
    }
16920
    if (err == MP_OKAY) {
16921
        /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
16922
        /* r = z0 */
16923
        /* r += (z1 - z0 - z2) << 32 */
16924
        z1->dp[64] = ca;
16925
        l = 0;
16926
        if (ca) {
16927
            l = z1->dp[0 + 32];
16928
            h = 0;
16929
            SP_ASM_ADDC(l, h, a1->dp[0]);
16930
            SP_ASM_ADDC(l, h, a1->dp[0]);
16931
            z1->dp[0 + 32] = l;
16932
            l = h;
16933
            h = 0;
16934
            for (i = 1; i < 32; i++) {
16935
                SP_ASM_ADDC(l, h, z1->dp[i + 32]);
16936
                SP_ASM_ADDC(l, h, a1->dp[i]);
16937
                SP_ASM_ADDC(l, h, a1->dp[i]);
16938
                z1->dp[i + 32] = l;
16939
                l = h;
16940
                h = 0;
16941
            }
16942
        }
16943
        z1->dp[64] += l;
16944
        /* z1 = z1 - z0 - z1 */
16945
        l = z1->dp[0];
16946
        h = 0;
16947
        SP_ASM_SUBB(l, h, z0->dp[0]);
16948
        SP_ASM_SUBB(l, h, z2->dp[0]);
16949
        z1->dp[0] = l;
16950
        l = h;
16951
        h = 0;
16952
        for (i = 1; i < 64; i++) {
16953
            l += z1->dp[i];
16954
            SP_ASM_SUBB(l, h, z0->dp[i]);
16955
            SP_ASM_SUBB(l, h, z2->dp[i]);
16956
            z1->dp[i] = l;
16957
            l = h;
16958
            h = 0;
16959
        }
16960
        z1->dp[i] += l;
16961
        /* r += z1 << 16 */
16962
        l = 0;
16963
        h = 0;
16964
        for (i = 0; i < 32; i++) {
16965
            SP_ASM_ADDC(l, h, r->dp[i + 32]);
16966
            SP_ASM_ADDC(l, h, z1->dp[i]);
16967
            r->dp[i + 32] = l;
16968
            l = h;
16969
            h = 0;
16970
        }
16971
        for (; i < 65; i++) {
16972
            SP_ASM_ADDC(l, h, z1->dp[i]);
16973
            r->dp[i + 32] = l;
16974
            l = h;
16975
            h = 0;
16976
        }
16977
        /* r += z2 << 64  */
16978
        l = 0;
16979
        h = 0;
16980
        for (i = 0; i < 33; i++) {
16981
            SP_ASM_ADDC(l, h, r->dp[i + 64]);
16982
            SP_ASM_ADDC(l, h, z2->dp[i]);
16983
            r->dp[i + 64] = l;
16984
            l = h;
16985
            h = 0;
16986
        }
16987
        for (; i < 64; i++) {
16988
            SP_ASM_ADDC(l, h, z2->dp[i]);
16989
            r->dp[i + 64] = l;
16990
            l = h;
16991
            h = 0;
16992
        }
16993
        r->used = 128;
16994
        sp_clamp(r);
16995
    }
16996
16997
    FREE_SP_INT_ARRAY(z, NULL);
16998
    FREE_SP_INT(a1, NULL);
16999
    return err;
17000
}
17001
    #endif /* SP_INT_DIGITS >= 128 */
17002
17003
    #if SP_INT_DIGITS >= 192
17004
/* Square a and store in r. r = a * a
17005
 *
17006
 * Karatsuba implementation.
17007
 *
17008
 * @param  [in]   a  SP integer to square.
17009
 * @param  [out]  r  SP integer result.
17010
 *
17011
 * @return  MP_OKAY on success.
17012
 * @return  MP_MEM when dynamic memory allocation fails.
17013
 */
17014
static int _sp_sqr_96(const sp_int* a, sp_int* r)
17015
{
17016
    int err = MP_OKAY;
17017
    unsigned int i;
17018
    sp_int_digit l;
17019
    sp_int_digit h;
17020
    sp_int* z0;
17021
    sp_int* z1;
17022
    sp_int* z2;
17023
    sp_int_digit ca;
17024
    DECL_SP_INT(a1, 48);
17025
    DECL_SP_INT_ARRAY(z, 97, 2);
17026
17027
    ALLOC_SP_INT(a1, 48, err, NULL);
17028
    ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
17029
    if (err == MP_OKAY) {
17030
        z1 = z[0];
17031
        z2 = z[1];
17032
        z0 = r;
17033
17034
        XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
17035
        a1->used = 48;
17036
17037
        /* z2 = a1 ^ 2 */
17038
        err = _sp_sqr_48(a1, z2);
17039
    }
17040
    if (err == MP_OKAY) {
17041
        l = 0;
17042
        h = 0;
17043
        for (i = 0; i < 48; i++) {
17044
            SP_ASM_ADDC(l, h, a1->dp[i]);
17045
            SP_ASM_ADDC(l, h, a->dp[i]);
17046
            a1->dp[i] = l;
17047
            l = h;
17048
            h = 0;
17049
        }
17050
        ca = l;
17051
17052
        /* z0 = a0 ^ 2 */
17053
        err = _sp_sqr_48(a, z0);
17054
    }
17055
    if (err == MP_OKAY) {
17056
        /* z1 = (a0 + a1) ^ 2 */
17057
        err = _sp_sqr_48(a1, z1);
17058
    }
17059
    if (err == MP_OKAY) {
17060
        /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
17061
        /* r = z0 */
17062
        /* r += (z1 - z0 - z2) << 48 */
17063
        z1->dp[96] = ca;
17064
        l = 0;
17065
        if (ca) {
17066
            l = z1->dp[0 + 48];
17067
            h = 0;
17068
            SP_ASM_ADDC(l, h, a1->dp[0]);
17069
            SP_ASM_ADDC(l, h, a1->dp[0]);
17070
            z1->dp[0 + 48] = l;
17071
            l = h;
17072
            h = 0;
17073
            for (i = 1; i < 48; i++) {
17074
                SP_ASM_ADDC(l, h, z1->dp[i + 48]);
17075
                SP_ASM_ADDC(l, h, a1->dp[i]);
17076
                SP_ASM_ADDC(l, h, a1->dp[i]);
17077
                z1->dp[i + 48] = l;
17078
                l = h;
17079
                h = 0;
17080
            }
17081
        }
17082
        z1->dp[96] += l;
17083
        /* z1 = z1 - z0 - z1 */
17084
        l = z1->dp[0];
17085
        h = 0;
17086
        SP_ASM_SUBB(l, h, z0->dp[0]);
17087
        SP_ASM_SUBB(l, h, z2->dp[0]);
17088
        z1->dp[0] = l;
17089
        l = h;
17090
        h = 0;
17091
        for (i = 1; i < 96; i++) {
17092
            l += z1->dp[i];
17093
            SP_ASM_SUBB(l, h, z0->dp[i]);
17094
            SP_ASM_SUBB(l, h, z2->dp[i]);
17095
            z1->dp[i] = l;
17096
            l = h;
17097
            h = 0;
17098
        }
17099
        z1->dp[i] += l;
17100
        /* r += z1 << 16 */
17101
        l = 0;
17102
        h = 0;
17103
        for (i = 0; i < 48; i++) {
17104
            SP_ASM_ADDC(l, h, r->dp[i + 48]);
17105
            SP_ASM_ADDC(l, h, z1->dp[i]);
17106
            r->dp[i + 48] = l;
17107
            l = h;
17108
            h = 0;
17109
        }
17110
        for (; i < 97; i++) {
17111
            SP_ASM_ADDC(l, h, z1->dp[i]);
17112
            r->dp[i + 48] = l;
17113
            l = h;
17114
            h = 0;
17115
        }
17116
        /* r += z2 << 96  */
17117
        l = 0;
17118
        h = 0;
17119
        for (i = 0; i < 49; i++) {
17120
            SP_ASM_ADDC(l, h, r->dp[i + 96]);
17121
            SP_ASM_ADDC(l, h, z2->dp[i]);
17122
            r->dp[i + 96] = l;
17123
            l = h;
17124
            h = 0;
17125
        }
17126
        for (; i < 96; i++) {
17127
            SP_ASM_ADDC(l, h, z2->dp[i]);
17128
            r->dp[i + 96] = l;
17129
            l = h;
17130
            h = 0;
17131
        }
17132
        r->used = 192;
17133
        sp_clamp(r);
17134
    }
17135
17136
    FREE_SP_INT_ARRAY(z, NULL);
17137
    FREE_SP_INT(a1, NULL);
17138
    return err;
17139
}
17140
    #endif /* SP_INT_DIGITS >= 192 */
17141
17142
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17143
#endif /* !WOLFSSL_SP_SMALL */
17144
17145
/* Square a and store in r. r = a * a
17146
 *
17147
 * @param  [in]   a  SP integer to square.
17148
 * @param  [out]  r  SP integer result.
17149
 *
17150
 * @return  MP_OKAY on success.
17151
 * @return  MP_VAL when a or r is NULL, or the result will be too big for fixed
17152
 *          data length.
17153
 * @return  MP_MEM when dynamic memory allocation fails.
17154
 */
17155
int sp_sqr(const sp_int* a, sp_int* r)
17156
0
{
17157
#if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
17158
    return sp_mul(a, a, r);
17159
#else
17160
0
    int err = MP_OKAY;
17161
17162
0
    if ((a == NULL) || (r == NULL)) {
17163
0
        err = MP_VAL;
17164
0
    }
17165
    /* Need extra digit during calculation. */
17166
0
    if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
17167
0
        err = MP_VAL;
17168
0
    }
17169
17170
#if 0
17171
    if (err == MP_OKAY) {
17172
        sp_print(a, "a");
17173
    }
17174
#endif
17175
17176
0
    if (err == MP_OKAY) {
17177
0
        if (a->used == 0) {
17178
0
            _sp_zero(r);
17179
0
        }
17180
0
    else
17181
0
#ifndef WOLFSSL_SP_SMALL
17182
0
#if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
17183
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
17184
0
        if (a->used == 4) {
17185
0
            err = _sp_sqr_4(a, r);
17186
0
        }
17187
0
        else
17188
0
#endif /* SP_WORD_SIZE == 64 */
17189
0
#if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
17190
0
#ifdef SQR_MUL_ASM
17191
0
        if (a->used == 6) {
17192
0
            err = _sp_sqr_6(a, r);
17193
0
        }
17194
0
        else
17195
0
#endif /* SQR_MUL_ASM */
17196
0
#endif /* SP_WORD_SIZE == 64 */
17197
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
17198
#ifdef SQR_MUL_ASM
17199
        if (a->used == 8) {
17200
            err = _sp_sqr_8(a, r);
17201
        }
17202
        else
17203
#endif /* SQR_MUL_ASM */
17204
#endif /* SP_WORD_SIZE == 32 */
17205
#if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
17206
#ifdef SQR_MUL_ASM
17207
        if (a->used == 12) {
17208
            err = _sp_sqr_12(a, r);
17209
        }
17210
        else
17211
#endif /* SQR_MUL_ASM */
17212
#endif /* SP_WORD_SIZE == 32 */
17213
0
#endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
17214
#if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
17215
    (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
17216
    (SP_WORD_SIZE == 64)))
17217
    #if SP_INT_DIGITS >= 32
17218
        if (a->used == 16) {
17219
            err = _sp_sqr_16(a, r);
17220
        }
17221
        else
17222
    #endif /* SP_INT_DIGITS >= 32 */
17223
#endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
17224
        * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
17225
#if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
17226
    #if SP_INT_DIGITS >= 48
17227
        if (a->used == 24) {
17228
            err = _sp_sqr_24(a, r);
17229
        }
17230
        else
17231
    #endif /* SP_INT_DIGITS >= 48 */
17232
    #if SP_INT_DIGITS >= 64
17233
        if (a->used == 32) {
17234
            err = _sp_sqr_32(a, r);
17235
        }
17236
        else
17237
    #endif /* SP_INT_DIGITS >= 64 */
17238
    #if SP_INT_DIGITS >= 96
17239
        if (a->used == 48) {
17240
            err = _sp_sqr_48(a, r);
17241
        }
17242
        else
17243
    #endif /* SP_INT_DIGITS >= 96 */
17244
    #if SP_INT_DIGITS >= 128
17245
        if (a->used == 64) {
17246
            err = _sp_sqr_64(a, r);
17247
        }
17248
        else
17249
    #endif /* SP_INT_DIGITS >= 128 */
17250
    #if SP_INT_DIGITS >= 192
17251
        if (a->used == 96) {
17252
            err = _sp_sqr_96(a, r);
17253
        }
17254
        else
17255
    #endif /* SP_INT_DIGITS >= 192 */
17256
#endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
17257
0
#endif /* !WOLFSSL_SP_SMALL */
17258
0
        {
17259
0
            err = _sp_sqr(a, r);
17260
0
        }
17261
0
    }
17262
17263
#ifdef WOLFSSL_SP_INT_NEGATIVE
17264
    if (err == MP_OKAY) {
17265
        r->sign = MP_ZPOS;
17266
    }
17267
#endif
17268
17269
#if 0
17270
    if (err == MP_OKAY) {
17271
        sp_print(r, "rsqr");
17272
    }
17273
#endif
17274
17275
0
    return err;
17276
0
#endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
17277
0
}
17278
/* END SP_SQR implementations */
17279
17280
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
17281
        * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
17282
17283
#if defined(WOLFSSL_SP_MATH_ALL) || \
17284
    (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
17285
    !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
17286
/* Square a mod m and store in r: r = (a * a) mod m
17287
 *
17288
 * @param  [in]   a  SP integer to square.
17289
 * @param  [in]   m  SP integer that is the modulus.
17290
 * @param  [out]  r  SP integer result.
17291
 *
17292
 * @return  MP_OKAY on success.
17293
 * @return  MP_MEM when dynamic memory allocation fails.
17294
 */
17295
static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17296
0
{
17297
0
    int err = MP_OKAY;
17298
    /* Create temporary for multiplication result. */
17299
0
    DECL_SP_INT(t, a->used * 2);
17300
17301
0
    ALLOC_SP_INT(t, a->used * 2, err, NULL);
17302
0
    if (err == MP_OKAY) {
17303
0
        err = sp_init_size(t, a->used * 2U);
17304
0
    }
17305
17306
    /* Square and reduce. */
17307
0
    if (err == MP_OKAY) {
17308
0
        err = sp_sqr(a, t);
17309
0
    }
17310
0
    if (err == MP_OKAY) {
17311
0
        err = sp_mod(t, m, r);
17312
0
    }
17313
17314
    /* Dispose of an allocated SP int. */
17315
0
    FREE_SP_INT(t, NULL);
17316
0
    return err;
17317
0
}
17318
17319
/* Square a mod m and store in r: r = (a * a) mod m
17320
 *
17321
 * @param  [in]   a  SP integer to square.
17322
 * @param  [in]   m  SP integer that is the modulus.
17323
 * @param  [out]  r  SP integer result.
17324
 *
17325
 * @return  MP_OKAY on success.
17326
 * @return  MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
17327
 *          for fixed data length.
17328
 * @return  MP_MEM when dynamic memory allocation fails.
17329
 */
17330
int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
17331
0
{
17332
0
    int err = MP_OKAY;
17333
17334
    /* Validate parameters. */
17335
0
    if ((a == NULL) || (m == NULL) || (r == NULL)) {
17336
0
        err = MP_VAL;
17337
0
    }
17338
    /* Ensure r has space for intermediate result. */
17339
0
    if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
17340
0
        err = MP_VAL;
17341
0
    }
17342
    /* Ensure a is not too big. */
17343
0
    if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
17344
0
        err = MP_VAL;
17345
0
    }
17346
17347
    /* Use r as intermediate result if not same as pointer m which is needed
17348
     * after first intermediate result.
17349
     */
17350
0
    if ((err == MP_OKAY) && (r != m)) {
17351
        /* Square and reduce. */
17352
0
        err = sp_sqr(a, r);
17353
0
        if (err == MP_OKAY) {
17354
0
            err = sp_mod(r, m, r);
17355
0
        }
17356
0
    }
17357
0
    else if (err == MP_OKAY) {
17358
        /* Do operation with temporary. */
17359
0
        err = _sp_sqrmod(a, m, r);
17360
0
    }
17361
17362
0
    return err;
17363
0
}
17364
#endif /* !WOLFSSL_RSA_VERIFY_ONLY */
17365
17366
/**********************
17367
 * Montgomery functions
17368
 **********************/
17369
17370
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
17371
    defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
17372
    defined(OPENSSL_ALL)
17373
/* Reduce a number in Montgomery form.
17374
 *
17375
 * Assumes a and m are not NULL and m is not 0.
17376
 *
17377
 * DigitMask(a,i) := mask out the 'i'th digit in place.
17378
 *
17379
 * Algorithm:
17380
 *  1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17381
 *  2. For i = 0..NumDigits(m)-1
17382
 *   2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
17383
 *   2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
17384
 *   2.3. a += mu * DigitMask(m, 0)
17385
 *   2.4. For j = 1 up to NumDigits(m)-2
17386
 *    2.4.1 a += mu * DigitMask(m, j)
17387
 *   2.5 a += mu * DigitMask(m, NumDigits(m)-1))
17388
 * 3. a >>= NumBits(m)
17389
 * 4. a = a % m
17390
 *
17391
 * @param  [in,out]  a   SP integer to Montgomery reduce.
17392
 * @param  [in]      m   SP integer that is the modulus.
17393
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
17394
 * @param  [in]      ct  Indicates operation must be constant time.
17395
 *
17396
 * @return  MP_OKAY on success.
17397
 */
17398
static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17399
0
{
17400
#if !defined(SQR_MUL_ASM)
17401
    unsigned int i;
17402
    int bits;
17403
    sp_int_word w;
17404
    sp_int_digit mu;
17405
17406
#if 0
17407
    sp_print(a, "a");
17408
    sp_print(m, "m");
17409
#endif
17410
17411
    /* Count bits in modulus. */
17412
    bits = sp_count_bits(m);
17413
17414
    /* Adding numbers into m->used * 2 digits - zero out unused digits. */
17415
#ifndef WOLFSSL_NO_CT_OPS
17416
    if (ct) {
17417
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17418
            a->dp[i] &=
17419
                (sp_int_digit)
17420
                (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
17421
        }
17422
    }
17423
    else
17424
#endif /* !WOLFSSL_NO_CT_OPS */
17425
    {
17426
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17427
            a->dp[i] = 0;
17428
        }
17429
    }
17430
17431
    /* Special case when modulus is 1 digit or less. */
17432
    if (m->used <= 1) {
17433
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17434
        mu = mp * a->dp[0];
17435
        /* a += mu * m */
17436
        w = a->dp[0];
17437
        w += (sp_int_word)mu * m->dp[0];
17438
        a->dp[0] = (sp_int_digit)w;
17439
        w >>= SP_WORD_SIZE;
17440
        w += a->dp[1];
17441
        a->dp[1] = (sp_int_digit)w;
17442
        w >>= SP_WORD_SIZE;
17443
        a->dp[2] = (sp_int_digit)w;
17444
        a->used = 3;
17445
        /* mp is SP_WORD_SIZE */
17446
        bits = SP_WORD_SIZE;
17447
    }
17448
    else {
17449
        /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
17450
         *    Mask when last digit of modulus doesn't have highest bit set.
17451
         */
17452
        sp_int_digit mask = (sp_int_digit)
17453
            (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
17454
        /* Overflow. */
17455
        sp_int_word o = 0;
17456
17457
        /* 2. For i = 0..NumDigits(m)-1 */
17458
        for (i = 0; i < m->used; i++) {
17459
            unsigned int j;
17460
17461
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17462
            mu = mp * a->dp[i];
17463
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17464
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17465
                mu &= mask;
17466
            }
17467
17468
            /* 2.3. a += mu * DigitMask(m, 0) */
17469
            w = a->dp[i];
17470
            w += (sp_int_word)mu * m->dp[0];
17471
            a->dp[i] = (sp_int_digit)w;
17472
            w >>= SP_WORD_SIZE;
17473
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17474
            for (j = 1; j < (unsigned int)m->used - 1; j++) {
17475
                /* 2.4.1 a += mu * DigitMask(m, j) */
17476
                w += a->dp[i + j];
17477
                w += (sp_int_word)mu * m->dp[j];
17478
                a->dp[i + j] = (sp_int_digit)w;
17479
                w >>= SP_WORD_SIZE;
17480
            }
17481
            /* Handle overflow. */
17482
            w += o;
17483
            w += a->dp[i + j];
17484
            o = (sp_int_digit)(w >> SP_WORD_SIZE);
17485
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1)) */
17486
            w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
17487
            a->dp[i + j] = (sp_int_digit)w;
17488
            w >>= SP_WORD_SIZE;
17489
            o += w;
17490
        }
17491
        /* Handle overflow. */
17492
        o += a->dp[m->used * 2 - 1];
17493
        a->dp[m->used * 2 - 1] = (sp_int_digit)o;
17494
        o >>= SP_WORD_SIZE;
17495
        a->dp[m->used * 2] = (sp_int_digit)o;
17496
        a->used = (sp_size_t)(m->used * 2 + 1);
17497
    }
17498
17499
    if (!ct) {
17500
        /* Remove leading zeros. */
17501
        sp_clamp(a);
17502
        /* 3. a >>= NumBits(m) */
17503
        (void)sp_rshb(a, bits, a);
17504
        /* 4. a = a mod m */
17505
        if (_sp_cmp_abs(a, m) != MP_LT) {
17506
            _sp_sub_off(a, m, a, 0);
17507
        }
17508
    }
17509
    else {
17510
        /* 3. a >>= NumBits(m) */
17511
        (void)sp_rshb(a, bits, a);
17512
        /* Constant time clamping. */
17513
        sp_clamp_ct(a);
17514
17515
        /* 4. a = a mod m
17516
         * Always subtract but at a too high offset if a is less than m.
17517
         */
17518
        _sp_submod_ct(a, m, m, m->used + 1U, a);
17519
    }
17520
17521
17522
#if 0
17523
    sp_print(a, "rr");
17524
#endif
17525
17526
    return MP_OKAY;
17527
#else /* !SQR_MUL_ASM */
17528
0
    unsigned int i;
17529
0
    unsigned int j;
17530
0
    int bits;
17531
0
    sp_int_digit mu;
17532
0
    sp_int_digit o;
17533
0
    sp_int_digit mask;
17534
17535
#if 0
17536
    sp_print(a, "a");
17537
    sp_print(m, "m");
17538
#endif
17539
17540
0
    bits = sp_count_bits(m);
17541
0
    mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
17542
17543
0
#ifndef WOLFSSL_NO_CT_OPS
17544
0
    if (ct) {
17545
0
        for (i = 0; i < (unsigned int)m->used * 2; i++) {
17546
0
            a->dp[i] &=
17547
0
                (sp_int_digit)
17548
0
                (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i);
17549
0
        }
17550
0
    }
17551
0
    else
17552
0
#endif
17553
0
    {
17554
0
        for (i = a->used; i < (unsigned int)m->used * 2; i++) {
17555
0
            a->dp[i] = 0;
17556
0
        }
17557
0
    }
17558
17559
0
    if (m->used <= 1) {
17560
0
        sp_int_digit l;
17561
0
        sp_int_digit h;
17562
17563
        /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17564
0
        mu = mp * a->dp[0];
17565
        /* a += mu * m */
17566
0
        l = a->dp[0];
17567
0
        h = 0;
17568
0
        SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17569
0
        a->dp[0] = l;
17570
0
        l = h;
17571
0
        h = 0;
17572
0
        SP_ASM_ADDC(l, h, a->dp[1]);
17573
0
        a->dp[1] = l;
17574
0
        a->dp[2] = h;
17575
0
        a->used = (sp_size_t)(m->used * 2 + 1);
17576
        /* mp is SP_WORD_SIZE */
17577
0
        bits = SP_WORD_SIZE;
17578
0
    }
17579
0
#if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
17580
0
#if SP_WORD_SIZE == 64
17581
0
#if SP_INT_DIGITS >= 8
17582
0
    else if ((m->used == 4) && (mask == 0)) {
17583
0
        sp_int_digit l;
17584
0
        sp_int_digit h;
17585
0
        sp_int_digit o2;
17586
17587
0
        l = 0;
17588
0
        h = 0;
17589
0
        o = 0;
17590
0
        o2 = 0;
17591
        /* For i = 0..NumDigits(m)-1 */
17592
0
        for (i = 0; i < 4; i++) {
17593
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17594
0
            mu = mp * a->dp[0];
17595
0
            l = a->dp[0];
17596
            /* a = (a + mu * m) >> WORD_SIZE */
17597
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17598
0
            l = h;
17599
0
            h = 0;
17600
0
            SP_ASM_ADDC(l, h, a->dp[1]);
17601
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17602
0
            a->dp[0] = l;
17603
0
            l = h;
17604
0
            h = 0;
17605
0
            SP_ASM_ADDC(l, h, a->dp[2]);
17606
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17607
0
            a->dp[1] = l;
17608
0
            l = h;
17609
0
            h = o2;
17610
0
            o2 = 0;
17611
0
            SP_ASM_ADDC_REG(l, h, o);
17612
0
            SP_ASM_ADDC(l, h, a->dp[i + 3]);
17613
0
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
17614
0
            a->dp[2] = l;
17615
0
            o = h;
17616
0
            l = h;
17617
0
            h = 0;
17618
0
        }
17619
        /* Handle overflow. */
17620
0
        SP_ASM_ADDC(l, o2, a->dp[7]);
17621
0
        a->dp[3] = l;
17622
0
        a->dp[4] = o2;
17623
0
        a->used = 5;
17624
17625
        /* Remove leading zeros. */
17626
0
        sp_clamp(a);
17627
17628
        /* a = a mod m */
17629
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
17630
0
            _sp_sub_off(a, m, a, 0);
17631
0
        }
17632
17633
0
        return MP_OKAY;
17634
0
    }
17635
0
#endif /* SP_INT_DIGITS >= 8 */
17636
0
#if SP_INT_DIGITS >= 12
17637
0
    else if ((m->used == 6) && (mask == 0)) {
17638
0
        sp_int_digit l;
17639
0
        sp_int_digit h;
17640
0
        sp_int_digit o2;
17641
17642
0
        l = 0;
17643
0
        h = 0;
17644
0
        o = 0;
17645
0
        o2 = 0;
17646
        /* For i = 0..NumDigits(m)-1 */
17647
0
        for (i = 0; i < 6; i++) {
17648
            /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
17649
0
            mu = mp * a->dp[0];
17650
0
            l = a->dp[0];
17651
            /* a = (a + mu * m) >> WORD_SIZE */
17652
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
17653
0
            l = h;
17654
0
            h = 0;
17655
0
            SP_ASM_ADDC(l, h, a->dp[1]);
17656
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
17657
0
            a->dp[0] = l;
17658
0
            l = h;
17659
0
            h = 0;
17660
0
            SP_ASM_ADDC(l, h, a->dp[2]);
17661
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
17662
0
            a->dp[1] = l;
17663
0
            l = h;
17664
0
            h = 0;
17665
0
            SP_ASM_ADDC(l, h, a->dp[3]);
17666
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
17667
0
            a->dp[2] = l;
17668
0
            l = h;
17669
0
            h = 0;
17670
0
            SP_ASM_ADDC(l, h, a->dp[4]);
17671
0
            SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
17672
0
            a->dp[3] = l;
17673
0
            l = h;
17674
0
            h = o2;
17675
0
            o2 = 0;
17676
0
            SP_ASM_ADDC_REG(l, h, o);
17677
0
            SP_ASM_ADDC(l, h, a->dp[i + 5]);
17678
0
            SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
17679
0
            a->dp[4] = l;
17680
0
            o = h;
17681
0
            l = h;
17682
0
            h = 0;
17683
0
        }
17684
        /* Handle overflow. */
17685
0
        SP_ASM_ADDC(l, o2, a->dp[11]);
17686
0
        a->dp[5] = l;
17687
0
        a->dp[6] = o2;
17688
0
        a->used = 7;
17689
17690
        /* Remove leading zeros. */
17691
0
        sp_clamp(a);
17692
17693
        /* a = a mod m */
17694
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
17695
0
            _sp_sub_off(a, m, a, 0);
17696
0
        }
17697
17698
0
        return MP_OKAY;
17699
0
    }
17700
0
#endif /* SP_INT_DIGITS >= 12 */
17701
#elif SP_WORD_SIZE == 32
17702
    else if ((m->used <= 12) && (mask == 0)) {
17703
        sp_int_digit l;
17704
        sp_int_digit h;
17705
        sp_int_digit o2;
17706
        sp_int_digit* ad;
17707
        const sp_int_digit* md;
17708
17709
        o = 0;
17710
        o2 = 0;
17711
        ad = a->dp;
17712
        /* For i = 0..NumDigits(m)-1 */
17713
        for (i = 0; i < m->used; i++) {
17714
            md = m->dp;
17715
            /*  mu = (mp * DigitMask(a, i)) & WORD_MASK */
17716
            mu = mp * ad[0];
17717
17718
            /* a = (a + mu * m, 0) >> WORD_SIZE */
17719
            l = ad[0];
17720
            h = 0;
17721
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17722
            l = h;
17723
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17724
                h = 0;
17725
                SP_ASM_ADDC(l, h, ad[j]);
17726
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17727
                ad[j - 1] = l;
17728
                l = 0;
17729
                SP_ASM_ADDC(h, l, ad[j + 1]);
17730
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17731
                ad[j] = h;
17732
            }
17733
            for (; j < (unsigned int)m->used - 1; j++) {
17734
                h = 0;
17735
                SP_ASM_ADDC(l, h, ad[j]);
17736
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17737
                ad[j - 1] = l;
17738
                l = h;
17739
            }
17740
            h = o2;
17741
            o2 = 0;
17742
            SP_ASM_ADDC_REG(l, h, o);
17743
            SP_ASM_ADDC(l, h, ad[i + j]);
17744
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17745
            ad[j - 1] = l;
17746
            o = h;
17747
        }
17748
        /* Handle overflow. */
17749
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17750
        a->dp[m->used  - 1] = o;
17751
        a->dp[m->used] = o2;
17752
        a->used = m->used + 1;
17753
17754
        /* Remove leading zeros. */
17755
        sp_clamp(a);
17756
17757
        /* a = a mod m */
17758
        if (_sp_cmp_abs(a, m) != MP_LT) {
17759
            _sp_sub_off(a, m, a, 0);
17760
        }
17761
17762
        return MP_OKAY;
17763
    }
17764
#endif /* SP_WORD_SIZE == 64 | 32 */
17765
0
#endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
17766
0
    else {
17767
0
        sp_int_digit l;
17768
0
        sp_int_digit h;
17769
0
        sp_int_digit o2;
17770
0
        sp_int_digit* ad;
17771
0
        const sp_int_digit* md;
17772
17773
0
        o = 0;
17774
0
        o2 = 0;
17775
0
        ad = a->dp;
17776
        /* 2. For i = 0..NumDigits(m)-1 */
17777
0
        for (i = 0; i < m->used; i++, ad++) {
17778
0
            md = m->dp;
17779
            /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
17780
0
            mu = mp * ad[0];
17781
            /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
17782
0
            if ((i == (unsigned int)m->used - 1) && (mask != 0)) {
17783
0
                mu &= mask;
17784
0
            }
17785
17786
            /* 2.3 a += mu * DigitMask(m, 0) */
17787
0
            l = ad[0];
17788
0
            h = 0;
17789
0
            SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17790
0
            ad[0] = l;
17791
0
            l = h;
17792
            /* 2.4. For j = 1 up to NumDigits(m)-2 */
17793
0
            for (j = 1; j < (unsigned int)m->used - 2; j += 2) {
17794
0
                h = 0;
17795
                /* 2.4.1. a += mu * DigitMask(m, j) */
17796
0
                SP_ASM_ADDC(l, h, ad[j + 0]);
17797
0
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17798
0
                ad[j + 0] = l;
17799
0
                l = 0;
17800
                /* 2.4.1. a += mu * DigitMask(m, j) */
17801
0
                SP_ASM_ADDC(h, l, ad[j + 1]);
17802
0
                SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
17803
0
                ad[j + 1] = h;
17804
0
            }
17805
0
            for (; j < (unsigned int)m->used - 1; j++) {
17806
0
                h = 0;
17807
                /* 2.4.1. a += mu * DigitMask(m, j) */
17808
0
                SP_ASM_ADDC(l, h, ad[j]);
17809
0
                SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
17810
0
                ad[j] = l;
17811
0
                l = h;
17812
0
            }
17813
0
            h = o2;
17814
0
            o2 = 0;
17815
0
            SP_ASM_ADDC_REG(l, h, o);
17816
            /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
17817
0
            SP_ASM_ADDC(l, h, ad[j]);
17818
0
            SP_ASM_MUL_ADD(l, h, o2, mu, *md);
17819
0
            ad[j] = l;
17820
0
            o = h;
17821
0
        }
17822
        /* Handle overflow. */
17823
0
        SP_ASM_ADDC(o, o2, a->dp[m->used * 2 - 1]);
17824
0
        a->dp[m->used * 2 - 1] = o;
17825
0
        a->dp[m->used * 2] = o2;
17826
0
        a->used = (sp_size_t)(m->used * 2 + 1);
17827
0
    }
17828
17829
0
    if (!ct) {
17830
        /* Remove leading zeros. */
17831
0
        sp_clamp(a);
17832
0
        (void)sp_rshb(a, bits, a);
17833
        /* a = a mod m */
17834
0
        if (_sp_cmp_abs(a, m) != MP_LT) {
17835
0
            _sp_sub_off(a, m, a, 0);
17836
0
        }
17837
0
    }
17838
0
    else {
17839
0
        (void)sp_rshb(a, bits, a);
17840
        /* Constant time clamping. */
17841
0
        sp_clamp_ct(a);
17842
17843
0
        _sp_submod_ct(a, m, m, m->used + 1U, a);
17844
0
    }
17845
17846
#if 0
17847
    sp_print(a, "rr");
17848
#endif
17849
17850
0
    return MP_OKAY;
17851
0
#endif /* !SQR_MUL_ASM */
17852
0
}
17853
17854
#if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
17855
    (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
17856
/* Reduce a number in Montgomery form.
17857
 *
17858
 * @param  [in,out]  a   SP integer to Montgomery reduce.
17859
 * @param  [in]      m   SP integer that is the modulus.
17860
 * @param  [in]      mp  SP integer digit that is the bottom digit of inv(-m).
17861
 * @param  [in]      ct  Indicates operation must be constant time.
17862
 *
17863
 * @return  MP_OKAY on success.
17864
 * @return  MP_VAL when a or m is NULL or m is zero.
17865
 */
17866
int sp_mont_red_ex(sp_int* a, const sp_int* m, sp_int_digit mp, int ct)
17867
0
{
17868
0
    int err;
17869
17870
    /* Validate parameters. */
17871
0
    if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
17872
0
        err = MP_VAL;
17873
0
    }
17874
#ifdef WOLFSSL_SP_INT_NEGATIVE
17875
    else if ((a->sign == MP_NEG) || (m->sign == MP_NEG)) {
17876
        err = MP_VAL;
17877
    }
17878
#endif
17879
    /* Ensure a has enough space for calculation. */
17880
0
    else if (a->size < m->used * 2 + 1) {
17881
0
        err = MP_VAL;
17882
0
    }
17883
0
    else {
17884
        /* Perform Montogomery Reduction. */
17885
0
        err = _sp_mont_red(a, m, mp, ct);
17886
0
    }
17887
17888
0
    return err;
17889
0
}
17890
#endif
17891
17892
/* Calculate the bottom digit of the inverse of negative m.
17893
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
17894
 *
17895
 * Used when performing Montgomery Reduction.
17896
 * m must be odd.
17897
 * Jeffrey Hurchalla's method.
17898
 *   https://arxiv.org/pdf/2204.04342.pdf
17899
 *
17900
 * @param  [in]   m   SP integer that is the modulus.
17901
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
17902
 */
17903
static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
17904
0
{
17905
0
    sp_int_digit d = m->dp[0];
17906
0
    sp_int_digit x = (3 * d) ^ 2;
17907
0
    sp_int_digit y = 1 - d * x;
17908
17909
0
#if SP_WORD_SIZE >= 16
17910
0
    x *= 1 + y; y *= y;
17911
0
#endif
17912
0
#if SP_WORD_SIZE >= 32
17913
0
    x *= 1 + y; y *= y;
17914
0
#endif
17915
0
#if SP_WORD_SIZE >= 64
17916
0
    x *= 1 + y; y *= y;
17917
0
#endif
17918
0
    x *= 1 + y;
17919
17920
    /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
17921
0
    *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
17922
0
}
17923
17924
/* Calculate the bottom digit of the inverse of negative m.
17925
 * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
17926
 *
17927
 * Used when performing Montgomery Reduction.
17928
 *
17929
 * @param  [in]   m   SP integer that is the modulus.
17930
 * @param  [out]  mp  SP integer digit that is the bottom digit of inv(-m).
17931
 *
17932
 * @return  MP_OKAY on success.
17933
 * @return  MP_VAL when m or rho is NULL.
17934
 */
17935
int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
17936
0
{
17937
0
    int err = MP_OKAY;
17938
17939
    /* Validate parameters. */
17940
0
    if ((m == NULL) || (rho == NULL)) {
17941
0
        err = MP_VAL;
17942
0
    }
17943
    /* Calculation only works with odd modulus. */
17944
0
    if ((err == MP_OKAY) && !sp_isodd(m)) {
17945
0
        err = MP_VAL;
17946
0
    }
17947
17948
0
    if (err == MP_OKAY) {
17949
        /* Calculate negative of inverse mod 2^n. */
17950
0
        _sp_mont_setup(m, rho);
17951
0
    }
17952
17953
0
    return err;
17954
0
}
17955
17956
/* Calculate the normalization value of m.
17957
 *   norm = 2^k - m, where k is the number of bits in m
17958
 *
17959
 * @param  [out]  norm   SP integer that normalises numbers into Montgomery
17960
 *                       form.
17961
 * @param  [in]   m      SP integer that is the modulus.
17962
 *
17963
 * @return  MP_OKAY on success.
17964
 * @return  MP_VAL when norm or m is NULL, or number of bits in m is maximual.
17965
 */
17966
int sp_mont_norm(sp_int* norm, const sp_int* m)
17967
0
{
17968
0
    int err = MP_OKAY;
17969
0
    unsigned int bits = 0;
17970
17971
    /* Validate parameters. */
17972
0
    if ((norm == NULL) || (m == NULL)) {
17973
0
        err = MP_VAL;
17974
0
    }
17975
0
    if (err == MP_OKAY) {
17976
        /* Find top bit and ensure norm has enough space. */
17977
0
        bits = (unsigned int)sp_count_bits(m);
17978
        /* NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) */
17979
        /* clang-tidy falsely believes that norm->size was corrupted by the
17980
         * _sp_copy() to "Set real working value to base." in _sp_exptmod_ex().
17981
         */
17982
0
        if (bits >= (unsigned int)norm->size * SP_WORD_SIZE) {
17983
0
            err = MP_VAL;
17984
0
        }
17985
        /* NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) */
17986
0
    }
17987
0
    if (err == MP_OKAY) {
17988
        /* Round up for case when m is less than a word - no advantage in using
17989
         * a smaller mask and would take more operations.
17990
         */
17991
0
        if (bits < SP_WORD_SIZE) {
17992
0
            bits = SP_WORD_SIZE;
17993
0
        }
17994
        /* Smallest number greater than m of form 2^n. */
17995
0
        _sp_zero(norm);
17996
0
        err = sp_set_bit(norm, (int)bits);
17997
0
    }
17998
0
    if (err == MP_OKAY) {
17999
        /* norm = 2^n % m */
18000
0
        err = sp_sub(norm, m, norm);
18001
0
    }
18002
0
    if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
18003
        /* Sub made norm one word and now finish calculation. */
18004
0
        norm->dp[0] %= m->dp[0];
18005
0
    }
18006
0
    if (err == MP_OKAY) {
18007
        /* Remove leading zeros. */
18008
0
        sp_clamp(norm);
18009
0
    }
18010
18011
0
    return err;
18012
0
}
18013
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
18014
        * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
18015
18016
/*********************************
18017
 * To and from binary and strings.
18018
 *********************************/
18019
18020
/* Calculate the number of 8-bit values required to represent the
18021
 * multi-precision number.
18022
 *
18023
 * When a is NULL, return s 0.
18024
 *
18025
 * @param  [in]  a  SP integer.
18026
 *
18027
 * @return  The count of 8-bit values.
18028
 * @return  0 when a is NULL.
18029
 */
18030
int sp_unsigned_bin_size(const sp_int* a)
18031
0
{
18032
0
    int cnt = 0;
18033
18034
0
    if (a != NULL) {
18035
0
        cnt = (sp_count_bits(a) + 7) / 8;
18036
0
    }
18037
18038
0
    return cnt;
18039
0
}
18040
18041
/* Convert a number as an array of bytes in big-endian format to a
18042
 * multi-precision number.
18043
 *
18044
 * @param  [out]  a     SP integer.
18045
 * @param  [in]   in    Array of bytes.
18046
 * @param  [in]   inSz  Number of data bytes in array.
18047
 *
18048
 * @return  MP_OKAY on success.
18049
 * @return  MP_VAL when the number is too big to fit in an SP.
18050
 */
18051
int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
18052
0
{
18053
0
    int err = MP_OKAY;
18054
18055
    /* Validate parameters. */
18056
0
    if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
18057
0
        err = MP_VAL;
18058
0
    }
18059
18060
    /* Check a has enough space for number. */
18061
0
    if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
18062
0
        err = MP_VAL;
18063
0
    }
18064
18065
0
    if (err == MP_OKAY) {
18066
        /* Load full digits at a time from in. */
18067
0
        int i;
18068
0
        int j = 0;
18069
18070
0
        a->used = (sp_size_t)((inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF);
18071
18072
    #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
18073
        /* Data endian matches representation of number.
18074
         * Directly copy if we don't have alignment issues.
18075
         */
18076
        for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
18077
            a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
18078
        }
18079
    #else
18080
        /* Construct digit from required number of bytes. */
18081
0
        for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
18082
0
            a->dp[j]  = ((sp_int_digit)in[i - 0] <<  0)
18083
0
        #if SP_WORD_SIZE >= 16
18084
0
                      | ((sp_int_digit)in[i - 1] <<  8)
18085
0
        #endif
18086
0
        #if SP_WORD_SIZE >= 32
18087
0
                      | ((sp_int_digit)in[i - 2] << 16) |
18088
0
                        ((sp_int_digit)in[i - 3] << 24)
18089
0
        #endif
18090
0
        #if SP_WORD_SIZE >= 64
18091
0
                      | ((sp_int_digit)in[i - 4] << 32) |
18092
0
                        ((sp_int_digit)in[i - 5] << 40) |
18093
0
                        ((sp_int_digit)in[i - 6] << 48) |
18094
0
                        ((sp_int_digit)in[i - 7] << 56)
18095
0
        #endif
18096
0
                                                       ;
18097
0
            j++;
18098
0
        }
18099
0
    #endif
18100
18101
0
#if SP_WORD_SIZE >= 16
18102
        /* Handle leftovers. */
18103
0
        if (i >= 0) {
18104
    #ifdef BIG_ENDIAN_ORDER
18105
            int s;
18106
18107
            /* Place remaining bytes into last digit. */
18108
            a->dp[a->used - 1] = 0;
18109
            for (s = 0; i >= 0; i--,s += 8) {
18110
                a->dp[j] |= ((sp_int_digit)in[i]) << s;
18111
            }
18112
    #else
18113
            /* Cast digits to an array of bytes so we can insert directly. */
18114
0
            byte *d = (byte*)a->dp;
18115
18116
            /* Zero out all bytes in last digit. */
18117
0
            a->dp[a->used - 1] = 0;
18118
            /* Place remaining bytes directly into digit. */
18119
0
            switch (i) {
18120
0
            #if SP_WORD_SIZE >= 64
18121
0
                case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
18122
0
                case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
18123
0
                case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
18124
0
                case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
18125
0
            #endif
18126
0
            #if SP_WORD_SIZE >= 32
18127
0
                case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
18128
0
                case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
18129
0
            #endif
18130
0
                case 0: d[inSz - 1 - 0] = in[0];
18131
0
            }
18132
0
    #endif /* LITTLE_ENDIAN_ORDER */
18133
0
        }
18134
0
#endif
18135
0
        sp_clamp_ct(a);
18136
0
    }
18137
18138
0
    return err;
18139
0
}
18140
18141
/* Convert the multi-precision number to an array of bytes in big-endian format.
18142
 *
18143
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18144
 * to calculate the number of bytes required.
18145
 *
18146
 * @param  [in]   a    SP integer.
18147
 * @param  [out]  out  Array to put encoding into.
18148
 *
18149
 * @return  MP_OKAY on success.
18150
 * @return  MP_VAL when a or out is NULL.
18151
 */
18152
int sp_to_unsigned_bin(const sp_int* a, byte* out)
18153
0
{
18154
    /* Write assuming output buffer is big enough. */
18155
0
    return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
18156
0
}
18157
18158
/* Convert the multi-precision number to an array of bytes in big-endian format.
18159
 *
18160
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18161
 * to calculate the number of bytes required.
18162
 * Front-pads the output array with zeros to make number the size of the array.
18163
 *
18164
 * @param  [in]   a      SP integer.
18165
 * @param  [out]  out    Array to put encoding into.
18166
 * @param  [in]   outSz  Size of the array in bytes.
18167
 *
18168
 * @return  MP_OKAY on success.
18169
 * @return  MP_VAL when a or out is NULL.
18170
 */
18171
int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
18172
0
{
18173
0
    int err = MP_OKAY;
18174
18175
    /* Validate parameters. */
18176
0
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18177
0
        err = MP_VAL;
18178
0
    }
18179
18180
0
#if SP_WORD_SIZE > 8
18181
0
    if (err == MP_OKAY) {
18182
        /* Start at the end of the buffer - least significant byte. */
18183
0
        int j = outSz - 1;
18184
18185
0
        if (!sp_iszero(a)) {
18186
0
            unsigned int i;
18187
18188
            /* Put each digit in. */
18189
0
            for (i = 0; (j >= 0) && (i < a->used); i++) {
18190
0
                int b;
18191
0
                sp_int_digit d = a->dp[i];
18192
                /* Place each byte of a digit into the buffer. */
18193
0
                for (b = 0; b < SP_WORD_SIZE; b += 8) {
18194
0
                    out[j--] = (byte)d;
18195
0
                    d >>= 8;
18196
                    /* Stop if the output buffer is filled. */
18197
0
                    if (j < 0) {
18198
0
                        if ((i < (unsigned int)a->used - 1) || (d > 0)) {
18199
0
                            err = MP_VAL;
18200
0
                        }
18201
0
                        break;
18202
0
                    }
18203
0
                }
18204
0
            }
18205
0
        }
18206
        /* Front pad buffer with 0s. */
18207
0
        for (; j >= 0; j--) {
18208
0
            out[j] = 0;
18209
0
        }
18210
0
    }
18211
#else
18212
    if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18213
        err = MP_VAL;
18214
    }
18215
    if (err == MP_OKAY) {
18216
        unsigned int i;
18217
        int j;
18218
18219
        XMEMSET(out, 0, (unsigned int)outSz - a->used);
18220
18221
        for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
18222
            out[j] = a->dp[i];
18223
        }
18224
    }
18225
#endif
18226
18227
0
    return err;
18228
0
}
18229
18230
/* Convert the multi-precision number to an array of bytes in big-endian format.
18231
 *
18232
 * Constant-time implementation.
18233
 *
18234
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18235
 * to calculate the number of bytes required.
18236
 * Front-pads the output array with zeros to make number the size of the array.
18237
 *
18238
 * @param  [in]   a      SP integer.
18239
 * @param  [out]  out    Array to put encoding into.
18240
 * @param  [in]   outSz  Size of the array in bytes.
18241
 *
18242
 * @return  MP_OKAY on success.
18243
 * @return  MP_VAL when a or out is NULL.
18244
 */
18245
int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz)
18246
0
{
18247
0
    int err = MP_OKAY;
18248
18249
    /* Validate parameters. */
18250
0
    if ((a == NULL) || (out == NULL) || (outSz < 0)) {
18251
0
        err = MP_VAL;
18252
0
    }
18253
18254
0
#if SP_WORD_SIZE > 8
18255
0
    if (err == MP_OKAY) {
18256
        /* Start at the end of the buffer - least significant byte. */
18257
0
        int j;
18258
0
        unsigned int i;
18259
0
        sp_int_digit mask = (sp_int_digit)-1;
18260
0
        sp_int_digit d;
18261
18262
        /* Put each digit in. */
18263
0
        i = 0;
18264
0
        for (j = outSz - 1; j >= 0; ) {
18265
0
            unsigned int b;
18266
0
            d = a->dp[i];
18267
            /* Place each byte of a digit into the buffer. */
18268
0
            for (b = 0; (j >= 0) && (b < SP_WORD_SIZEOF); b++) {
18269
0
                out[j--] = (byte)(d & mask);
18270
0
                d >>= 8;
18271
0
            }
18272
0
            mask &= (sp_int_digit)0 - (i < (unsigned int)a->used - 1);
18273
0
            i += (unsigned int)(1 & mask);
18274
0
        }
18275
0
    }
18276
#else
18277
    if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
18278
        err = MP_VAL;
18279
    }
18280
    if (err == MP_OKAY) {
18281
        unsigned int i;
18282
        int j;
18283
        sp_int_digit mask = (sp_int_digit)-1;
18284
18285
        i = 0;
18286
        for (j = outSz - 1; j >= 0; j--) {
18287
            out[j] = a->dp[i] & mask;
18288
            mask &= (sp_int_digit)0 - (i < (unsigned int)a->used - 1);
18289
            i += (unsigned int)(1 & mask);
18290
        }
18291
    }
18292
#endif
18293
18294
0
    return err;
18295
0
}
18296
18297
#if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
18298
    !defined(WOLFSSL_RSA_VERIFY_ONLY)
18299
/* Store the number in big-endian format in array at an offset.
18300
 * The array must be large enough for encoded number - use mp_unsigned_bin_size
18301
 * to calculate the number of bytes required.
18302
 *
18303
 * @param  [in]   o    Offset into array o start encoding.
18304
 * @param  [in]   a    SP integer.
18305
 * @param  [out]  out  Array to put encoding into.
18306
 *
18307
 * @return  Index of next byte after data.
18308
 * @return  MP_VAL when a or out is NULL.
18309
 */
18310
int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
18311
0
{
18312
    /* Get length of data that will be written. */
18313
0
    int len = sp_unsigned_bin_size(a);
18314
    /* Write number to buffer at offset. */
18315
0
    int ret = sp_to_unsigned_bin_len(a, out + o, len);
18316
18317
0
    if (ret == MP_OKAY) {
18318
        /* Return offset of next byte after number. */
18319
0
        ret = o + len;
18320
0
    }
18321
18322
0
    return ret;
18323
0
}
18324
#endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
18325
18326
#ifdef WOLFSSL_SP_READ_RADIX_16
18327
/* Convert hexadecimal number as string in big-endian format to a
18328
 * multi-precision number.
18329
 *
18330
 * Assumes negative sign and leading zeros have been stripped.
18331
 *
18332
 * @param  [out]  a   SP integer.
18333
 * @param  [in]   in  NUL terminated string.
18334
 *
18335
 * @return  MP_OKAY on success.
18336
 * @return  MP_VAL when radix not supported, value is negative, or a character
18337
 *          is not valid.
18338
 */
18339
static int _sp_read_radix_16(sp_int* a, const char* in)
18340
0
{
18341
0
    int err = MP_OKAY;
18342
0
    int i;
18343
0
    unsigned int s = 0;
18344
0
    sp_size_t j = 0;
18345
0
    sp_int_digit d;
18346
    /* Skip whitespace at end of line */
18347
0
    int eol_done = 0;
18348
18349
    /* Make all nibbles in digit 0. */
18350
0
    d = 0;
18351
    /* Step through string a character at a time starting at end - least
18352
     * significant byte. */
18353
0
    for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
18354
        /* Convert character from hex. */
18355
0
        int ch = (int)HexCharToByte(in[i]);
18356
        /* Check for invalid character. */
18357
0
        if (ch < 0) {
18358
0
            if (!eol_done && CharIsWhiteSpace(in[i]))
18359
0
                continue;
18360
0
            err = MP_VAL;
18361
0
            break;
18362
0
        }
18363
0
        eol_done = 1;
18364
18365
        /* Check whether we have filled the digit. */
18366
0
        if (s == SP_WORD_SIZE) {
18367
            /* Store digit and move index to next in a. */
18368
0
            a->dp[j++] = d;
18369
            /* Fail if we are out of space in a. */
18370
0
            if (j >= a->size) {
18371
0
                err = MP_VAL;
18372
0
                break;
18373
0
            }
18374
            /* Set shift back to 0 - lowest nibble. */
18375
0
            s = 0;
18376
            /* Make all nibbles in digit 0. */
18377
0
            d = 0;
18378
0
        }
18379
18380
        /* Put next nibble into digit. */
18381
0
        d |= ((sp_int_digit)ch) << s;
18382
        /* Update shift for next nibble. */
18383
0
        s += 4;
18384
0
    }
18385
18386
0
    if (err == MP_OKAY) {
18387
        /* If space, store last digit. */
18388
0
        if (j < a->size) {
18389
0
            a->dp[j] = d;
18390
0
        }
18391
        /* Update used count. */
18392
0
        a->used = (sp_size_t)(j + 1U);
18393
        /* Remove leading zeros. */
18394
0
        sp_clamp(a);
18395
0
    }
18396
18397
0
    return err;
18398
0
}
18399
#endif /* WOLFSSL_SP_READ_RADIX_16 */
18400
18401
#ifdef WOLFSSL_SP_READ_RADIX_10
18402
/* Convert decimal number as string in big-endian format to a multi-precision
18403
 * number.
18404
 *
18405
 * Assumes negative sign and leading zeros have been stripped.
18406
 *
18407
 * @param  [out]  a   SP integer.
18408
 * @param  [in]   in  NUL terminated string.
18409
 *
18410
 * @return  MP_OKAY on success.
18411
 * @return  MP_VAL when radix not supported, value is negative, or a character
18412
 *          is not valid.
18413
 */
18414
static int _sp_read_radix_10(sp_int* a, const char* in)
18415
0
{
18416
0
    int  err = MP_OKAY;
18417
0
    int  i;
18418
0
    char ch;
18419
18420
    /* Start with a being zero. */
18421
0
    _sp_zero(a);
18422
18423
    /* Process all characters. */
18424
0
    for (i = 0; in[i] != '\0'; i++) {
18425
        /* Get character. */
18426
0
        ch = in[i];
18427
        /* Check character is valid. */
18428
0
        if ((ch >= '0') && (ch <= '9')) {
18429
            /* Assume '0'..'9' are continuous values as characters. */
18430
0
            ch = (char)(ch - '0');
18431
0
        }
18432
0
        else {
18433
0
            if (CharIsWhiteSpace(ch))
18434
0
                continue;
18435
            /* Return error on invalid character. */
18436
0
            err = MP_VAL;
18437
0
            break;
18438
0
        }
18439
18440
        /* Multiply a by 10. */
18441
0
        err = _sp_mul_d(a, 10, a, 0);
18442
0
        if (err != MP_OKAY) {
18443
0
            break;
18444
0
        }
18445
        /* Add character value. */
18446
0
        err = _sp_add_d(a, (sp_int_digit)ch, a);
18447
0
        if (err != MP_OKAY) {
18448
0
            break;
18449
0
        }
18450
0
    }
18451
18452
0
    return err;
18453
0
}
18454
#endif /* WOLFSSL_SP_READ_RADIX_10 */
18455
18456
#if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
18457
/* Convert a number as string in big-endian format to a big number.
18458
 * Only supports base-16 (hexadecimal) and base-10 (decimal).
18459
 *
18460
 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
18461
 *
18462
 * @param  [out]  a      SP integer.
18463
 * @param  [in]   in     NUL terminated string.
18464
 * @param  [in]   radix  Number of values in a digit.
18465
 *
18466
 * @return  MP_OKAY on success.
18467
 * @return  MP_VAL when a or in is NULL, radix not supported, value is negative,
18468
 *          or a character is not valid.
18469
 */
18470
int sp_read_radix(sp_int* a, const char* in, int radix)
18471
0
{
18472
0
    int err = MP_OKAY;
18473
#ifdef WOLFSSL_SP_INT_NEGATIVE
18474
    sp_uint8 sign = MP_ZPOS;
18475
#endif
18476
18477
0
    if ((a == NULL) || (in == NULL)) {
18478
0
        err = MP_VAL;
18479
0
    }
18480
18481
0
    if (err == MP_OKAY) {
18482
0
    #ifndef WOLFSSL_SP_INT_NEGATIVE
18483
0
        if (*in == '-') {
18484
0
            err = MP_VAL;
18485
0
        }
18486
0
        else
18487
0
    #endif
18488
0
        {
18489
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18490
            if (*in == '-') {
18491
                /* Make number negative if signed string. */
18492
                sign = MP_NEG;
18493
                in++;
18494
            }
18495
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18496
            /* Skip leading zeros. */
18497
0
            while (*in == '0') {
18498
0
                in++;
18499
0
            }
18500
18501
0
            if (radix == 16) {
18502
0
                err = _sp_read_radix_16(a, in);
18503
0
            }
18504
0
        #ifdef WOLFSSL_SP_READ_RADIX_10
18505
0
            else if (radix == 10) {
18506
0
                err = _sp_read_radix_10(a, in);
18507
0
            }
18508
0
        #endif
18509
0
            else {
18510
0
                err = MP_VAL;
18511
0
            }
18512
18513
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18514
            /* Ensure not negative when zero. */
18515
            if (err == MP_OKAY) {
18516
                if (sp_iszero(a)) {
18517
                    a->sign = MP_ZPOS;
18518
                }
18519
                else {
18520
                    a->sign = sign;
18521
                }
18522
            }
18523
        #endif
18524
0
        }
18525
0
    }
18526
18527
0
    return err;
18528
0
}
18529
#endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
18530
18531
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18532
    defined(WC_MP_TO_RADIX)
18533
/* Put the big-endian, hex string encoding of a into str.
18534
 *
18535
 * Assumes str is large enough for result.
18536
 * Use sp_radix_size() to calculate required length.
18537
 *
18538
 * @param  [in]   a    SP integer to convert.
18539
 * @param  [out]  str  String to hold hex string result.
18540
 *
18541
 * @return  MP_OKAY on success.
18542
 * @return  MP_VAL when a or str is NULL.
18543
 */
18544
int sp_tohex(const sp_int* a, char* str)
18545
0
{
18546
0
    int err = MP_OKAY;
18547
18548
    /* Validate parameters. */
18549
0
    if ((a == NULL) || (str == NULL)) {
18550
0
        err = MP_VAL;
18551
0
    }
18552
18553
0
    if (err == MP_OKAY) {
18554
        /* Quick out if number is zero. */
18555
0
        if (sp_iszero(a) == MP_YES) {
18556
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18557
            /* Make string represent complete bytes. */
18558
0
            *str++ = '0';
18559
0
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18560
0
            *str++ = '0';
18561
0
        }
18562
0
        else {
18563
0
            int i;
18564
0
            int j;
18565
0
            sp_int_digit d;
18566
18567
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18568
            if (a->sign == MP_NEG) {
18569
                /* Add negative sign character. */
18570
                *str = '-';
18571
                str++;
18572
            }
18573
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18574
18575
            /* Start at last digit - most significant digit. */
18576
0
            i = (int)(a->used - 1);
18577
0
            d = a->dp[i];
18578
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18579
            /* Find highest non-zero byte in most-significant word. */
18580
0
            for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
18581
                /* When a byte at this index is not 0 break out to start
18582
                 * writing.
18583
                 */
18584
0
                if (((d >> j) & 0xff) != 0) {
18585
0
                    break;
18586
0
                }
18587
                /* Skip this digit if it was 0. */
18588
0
                if (j == 0) {
18589
0
                    j = SP_WORD_SIZE - 8;
18590
0
                    d = a->dp[--i];
18591
0
                }
18592
0
            }
18593
            /* Start with high nibble of byte. */
18594
0
            j += 4;
18595
        #else
18596
            /* Find highest non-zero nibble in most-significant word. */
18597
            for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18598
                /* When a nibble at this index is not 0 break out to start
18599
                 * writing.
18600
                 */
18601
                if (((d >> j) & 0xf) != 0) {
18602
                    break;
18603
                }
18604
                /* Skip this digit if it was 0. */
18605
                if (j == 0) {
18606
                    j = SP_WORD_SIZE - 4;
18607
                    d = a->dp[--i];
18608
                }
18609
            }
18610
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18611
            /* Write out as much as required from most-significant digit. */
18612
0
            for (; j >= 0; j -= 4) {
18613
0
                *(str++) = ByteToHex((byte)(d >> j));
18614
0
            }
18615
            /* Write rest of digits. */
18616
0
            for (--i; i >= 0; i--) {
18617
                /* Get digit from memory. */
18618
0
                d = a->dp[i];
18619
                /* Write out all nibbles of digit. */
18620
0
                for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
18621
0
                    *(str++) = (char)ByteToHex((byte)(d >> j));
18622
0
                }
18623
0
            }
18624
0
        }
18625
        /* Terminate string. */
18626
0
        *str = '\0';
18627
0
    }
18628
18629
0
    return err;
18630
0
}
18631
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18632
18633
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18634
    defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
18635
    defined(WC_MP_TO_RADIX)
18636
/* Put the big-endian, decimal string encoding of a into str.
18637
 *
18638
 * Assumes str is large enough for result.
18639
 * Use sp_radix_size() to calculate required length.
18640
 *
18641
 * @param  [in]   a    SP integer to convert.
18642
 * @param  [out]  str  String to hold hex string result.
18643
 *
18644
 * @return  MP_OKAY on success.
18645
 * @return  MP_VAL when a or str is NULL.
18646
 * @return  MP_MEM when dynamic memory allocation fails.
18647
 */
18648
int sp_todecimal(const sp_int* a, char* str)
18649
0
{
18650
0
    int err = MP_OKAY;
18651
0
    int i;
18652
0
    int j;
18653
0
    sp_int_digit d = 0;
18654
18655
    /* Validate parameters. */
18656
0
    if ((a == NULL) || (str == NULL)) {
18657
0
        err = MP_VAL;
18658
0
    }
18659
    /* Quick out if number is zero. */
18660
0
    else if (sp_iszero(a) == MP_YES) {
18661
0
        *str++ = '0';
18662
0
        *str = '\0';
18663
0
    }
18664
0
    else if (a->used >= SP_INT_DIGITS) {
18665
0
        err = MP_VAL;
18666
0
    }
18667
0
    else {
18668
        /* Temporary that is divided by 10. */
18669
0
        DECL_SP_INT(t, a->used + 1);
18670
18671
0
        ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
18672
0
        if (err == MP_OKAY) {
18673
0
            _sp_copy(a, t);
18674
0
        }
18675
0
        if (err == MP_OKAY) {
18676
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18677
            if (a->sign == MP_NEG) {
18678
                /* Add negative sign character. */
18679
                *str = '-';
18680
                str++;
18681
            }
18682
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18683
18684
            /* Write out little endian. */
18685
0
            i = 0;
18686
0
            do {
18687
                /* Divide by 10 and get remainder of division. */
18688
0
                (void)sp_div_d(t, 10, t, &d);
18689
                /* Write out remainder as a character. */
18690
0
                str[i++] = (char)('0' + d);
18691
0
            }
18692
            /* Keep going while we there is a value to write. */
18693
0
            while (!sp_iszero(t));
18694
            /* Terminate string. */
18695
0
            str[i] = '\0';
18696
18697
0
            if (err == MP_OKAY) {
18698
                /* Reverse string to big endian. */
18699
0
                for (j = 0; j <= (i - 1) / 2; j++) {
18700
0
                    int c = (unsigned char)str[j];
18701
0
                    str[j] = str[i - 1 - j];
18702
0
                    str[i - 1 - j] = (char)c;
18703
0
                }
18704
0
            }
18705
0
        }
18706
18707
0
        FREE_SP_INT(t, NULL);
18708
0
    }
18709
18710
0
    return err;
18711
0
}
18712
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18713
18714
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18715
    defined(WC_MP_TO_RADIX)
18716
/* Put the string version, big-endian, of a in str using the given radix.
18717
 *
18718
 * @param  [in]   a      SP integer to convert.
18719
 * @param  [out]  str    String to hold hex string result.
18720
 * @param  [in]   radix  Base of character.
18721
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18722
 *
18723
 * @return  MP_OKAY on success.
18724
 * @return  MP_VAL when a or str is NULL, or radix not supported.
18725
 */
18726
int sp_toradix(const sp_int* a, char* str, int radix)
18727
0
{
18728
0
    int err = MP_OKAY;
18729
18730
    /* Validate parameters. */
18731
0
    if ((a == NULL) || (str == NULL)) {
18732
0
        err = MP_VAL;
18733
0
    }
18734
    /* Handle base 16 if requested. */
18735
0
    else if (radix == MP_RADIX_HEX) {
18736
0
        err = sp_tohex(a, str);
18737
0
    }
18738
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18739
0
    defined(HAVE_COMP_KEY)
18740
    /* Handle base 10 if requested. */
18741
0
    else if (radix == MP_RADIX_DEC) {
18742
0
        err = sp_todecimal(a, str);
18743
0
    }
18744
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18745
0
    else {
18746
        /* Base not supported. */
18747
0
        err = MP_VAL;
18748
0
    }
18749
18750
0
    return err;
18751
0
}
18752
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18753
18754
#if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
18755
    defined(WC_MP_TO_RADIX)
18756
/* Calculate the length of the string version, big-endian, of a using the given
18757
 * radix.
18758
 *
18759
 * @param  [in]   a      SP integer to convert.
18760
 * @param  [in]   radix  Base of character.
18761
 *                       Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
18762
 * @param  [out]  size   The number of characters in encoding.
18763
 *
18764
 * @return  MP_OKAY on success.
18765
 * @return  MP_VAL when a or size is NULL, or radix not supported.
18766
 */
18767
int sp_radix_size(const sp_int* a, int radix, int* size)
18768
0
{
18769
0
    int err = MP_OKAY;
18770
18771
    /* Validate parameters. */
18772
0
    if ((a == NULL) || (size == NULL)) {
18773
0
        err = MP_VAL;
18774
0
    }
18775
    /* Handle base 16 if requested. */
18776
0
    else if (radix == MP_RADIX_HEX) {
18777
0
        if (a->used == 0) {
18778
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18779
            /* 00 and '\0' */
18780
0
            *size = 2 + 1;
18781
        #else
18782
            /* Zero and '\0' */
18783
            *size = 1 + 1;
18784
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18785
0
        }
18786
0
        else {
18787
            /* Count of nibbles. */
18788
0
            int cnt = (sp_count_bits(a) + 3) / 4;
18789
0
        #ifndef WC_DISABLE_RADIX_ZERO_PAD
18790
            /* Must have even number of nibbles to have complete bytes. */
18791
0
            if (cnt & 1) {
18792
0
                cnt++;
18793
0
            }
18794
0
        #endif /* WC_DISABLE_RADIX_ZERO_PAD */
18795
        #ifdef WOLFSSL_SP_INT_NEGATIVE
18796
            /* Add to count of characters for negative sign. */
18797
            if (a->sign == MP_NEG) {
18798
                cnt++;
18799
            }
18800
        #endif /* WOLFSSL_SP_INT_NEGATIVE */
18801
            /* One more for \0 */
18802
0
            *size = cnt + 1;
18803
0
        }
18804
0
    }
18805
0
#if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
18806
0
    defined(HAVE_COMP_KEY)
18807
    /* Handle base 10 if requested. */
18808
0
    else if (radix == MP_RADIX_DEC) {
18809
0
        int i;
18810
0
        sp_int_digit d;
18811
18812
        /* quick out if its zero */
18813
0
        if (sp_iszero(a) == MP_YES) {
18814
            /* Zero and '\0' */
18815
0
            *size = 1 + 1;
18816
0
        }
18817
0
        else {
18818
0
            DECL_SP_INT(t, a->used);
18819
18820
            /* Temporary to be divided by 10. */
18821
0
            ALLOC_SP_INT(t, a->used, err, NULL);
18822
0
            if (err == MP_OKAY) {
18823
0
                t->size = a->used;
18824
0
                _sp_copy(a, t);
18825
0
            }
18826
18827
0
            if (err == MP_OKAY) {
18828
                /* Count number of times number can be divided by 10. */
18829
0
                for (i = 0; !sp_iszero(t); i++) {
18830
0
                    (void)sp_div_d(t, 10, t, &d);
18831
0
                }
18832
            #ifdef WOLFSSL_SP_INT_NEGATIVE
18833
                /* Add to count of characters for negative sign. */
18834
                if (a->sign == MP_NEG) {
18835
                    i++;
18836
                }
18837
            #endif /* WOLFSSL_SP_INT_NEGATIVE */
18838
                /* One more for \0 */
18839
0
                *size = i + 1;
18840
0
            }
18841
18842
0
            FREE_SP_INT(t, NULL);
18843
0
        }
18844
0
    }
18845
0
#endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
18846
0
    else {
18847
        /* Base not supported. */
18848
0
        err = MP_VAL;
18849
0
    }
18850
18851
0
    return err;
18852
0
}
18853
#endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
18854
18855
/***************************************
18856
 * Prime number generation and checking.
18857
 ***************************************/
18858
18859
#if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
18860
    !defined(NO_DSA)) && !defined(WC_NO_RNG)
18861
#ifndef WOLFSSL_SP_MILLER_RABIN_CNT
18862
/* Always done 8 iterations of Miller-Rabin on check of primality when
18863
 * generating.
18864
 */
18865
#define WOLFSSL_SP_MILLER_RABIN_CNT     8
18866
#endif
18867
18868
/* Generate a random prime for RSA only.
18869
 *
18870
 * @param  [out]  r     SP integer to hold result.
18871
 * @param  [in]   len   Number of bytes in prime. Use -ve to indicate the two
18872
 *                      lowest bits must be set.
18873
 * @param  [in]   rng   Random number generator.
18874
 * @param  [in]   heap  Heap hint. Unused.
18875
 *
18876
 * @return  MP_OKAY on success
18877
 * @return  MP_VAL when r or rng is NULL, length is not supported or random
18878
 *          number generator fails.
18879
 */
18880
int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
18881
{
18882
    static const byte USE_BBS = 3;
18883
    int  err = MP_OKAY;
18884
    byte low_bits = 1;
18885
    int  isPrime = MP_NO;
18886
#if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
18887
    int  bits = 0;
18888
#endif /* WOLFSSL_SP_MATH_ALL */
18889
    unsigned int digits = 0;
18890
18891
    (void)heap;
18892
18893
    /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
18894
    if ((r == NULL) || (rng == NULL) || (len == 0)) {
18895
        err = MP_VAL;
18896
    }
18897
18898
    if (err == MP_OKAY) {
18899
        /* Get type. */
18900
        if (len < 0) {
18901
            low_bits = USE_BBS;
18902
            len = -len;
18903
        }
18904
18905
        /* Get number of digits required to handle required number of bytes. */
18906
        digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
18907
        /* Ensure result has space. */
18908
        if (r->size < digits) {
18909
            err = MP_VAL;
18910
        }
18911
    }
18912
18913
    if (err == MP_OKAY) {
18914
    #ifndef WOLFSSL_SP_MATH_ALL
18915
        /* For minimal maths, support only what's in SP and needed for DH. */
18916
    #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
18917
        if (len == 32) {
18918
        }
18919
        else
18920
    #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
18921
        /* Generate RSA primes that are half the modulus length. */
18922
    #ifdef WOLFSSL_SP_4096
18923
        if (len == 256) {
18924
            /* Support 2048-bit operations compiled in. */
18925
        }
18926
        else
18927
    #endif
18928
    #ifndef WOLFSSL_SP_NO_3072
18929
        if (len == 192) {
18930
            /* Support 1536-bit operations compiled in. */
18931
        }
18932
        else
18933
    #endif
18934
    #ifndef WOLFSSL_SP_NO_2048
18935
        if (len == 128) {
18936
            /* Support 1024-bit operations compiled in. */
18937
        }
18938
        else
18939
    #endif
18940
        {
18941
            /* Bit length not supported in SP. */
18942
            err = MP_VAL;
18943
        }
18944
    #endif /* !WOLFSSL_SP_MATH_ALL */
18945
18946
    #ifdef WOLFSSL_SP_INT_NEGATIVE
18947
        /* Generated number is always positive. */
18948
        r->sign = MP_ZPOS;
18949
    #endif /* WOLFSSL_SP_INT_NEGATIVE */
18950
        /* Set number of digits that will be used. */
18951
        r->used = (sp_size_t)digits;
18952
    #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
18953
        /* Calculate number of bits in last digit. */
18954
        bits = (len * 8) & SP_WORD_MASK;
18955
    #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
18956
    }
18957
18958
    /* Assume the candidate is probably prime and then test until it is proven
18959
     * composite.
18960
     */
18961
    while ((err == MP_OKAY) && (isPrime == MP_NO)) {
18962
#ifdef SHOW_GEN
18963
        printf(".");
18964
        fflush(stdout);
18965
#endif /* SHOW_GEN */
18966
        /* Generate bytes into digit array. */
18967
        err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
18968
        if (err != 0) {
18969
            err = MP_VAL;
18970
            break;
18971
        }
18972
18973
        /* Set top bits to ensure bit length required is generated.
18974
         * Also set second top to help ensure product of two primes is
18975
         * going to be twice the number of bits of each.
18976
         */
18977
#ifdef LITTLE_ENDIAN_ORDER
18978
        ((byte*)r->dp)[len-1]             |= 0x80 | 0x40;
18979
#else
18980
        ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
18981
#endif /* LITTLE_ENDIAN_ORDER */
18982
18983
#ifdef BIG_ENDIAN_ORDER
18984
        /* Bytes were put into wrong place when less than full digit. */
18985
        if (bits != 0) {
18986
            r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
18987
        }
18988
#endif /* BIG_ENDIAN_ORDER */
18989
#ifdef WOLFSSL_SP_MATH_ALL
18990
        /* Mask top digit when less than a digit requested. */
18991
        if (bits > 0) {
18992
            r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
18993
        }
18994
#endif /* WOLFSSL_SP_MATH_ALL */
18995
        /* Set mandatory low bits
18996
         *  - bottom bit to make odd.
18997
         *  - For BBS, second lowest too to make Blum integer (3 mod 4).
18998
         */
18999
        r->dp[0] |= low_bits;
19000
19001
        /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
19002
         * of a 1024-bit candidate being a false positive, when it is our
19003
         * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
19004
         */
19005
        err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
19006
            rng);
19007
    }
19008
19009
    return err;
19010
}
19011
#endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
19012
19013
#ifdef WOLFSSL_SP_PRIME_GEN
19014
/* Miller-Rabin test of "a" to the base of "b" as described in
19015
 * HAC pp. 139 Algorithm 4.24
19016
 *
19017
 * Sets result to 0 if definitely composite or 1 if probably prime.
19018
 * Randomly the chance of error is no more than 1/4 and often
19019
 * very much lower.
19020
 *
19021
 * a is assumed to be odd.
19022
 *
19023
 * @param  [in]   a       SP integer to check.
19024
 * @param  [in]   b       SP integer that is a small prime.
19025
 * @param  [out]  result  MP_YES when number is likely prime.
19026
 *                        MP_NO otherwise.
19027
 * @param  [in]   n1      SP integer temporary.
19028
 * @param  [in]   r       SP integer temporary.
19029
 *
19030
 * @return  MP_OKAY on success.
19031
 * @return  MP_MEM when dynamic memory allocation fails.
19032
 */
19033
static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
19034
    sp_int* n1, sp_int* r)
19035
0
{
19036
0
    int err = MP_OKAY;
19037
0
    int s = 0;
19038
0
    sp_int* y = b;
19039
19040
    /* Assume not prime. */
19041
0
    *result = MP_NO;
19042
19043
    /* Ensure small prime is 2 or more. */
19044
0
    if (sp_cmp_d(b, 1) != MP_GT) {
19045
0
        err = MP_VAL;
19046
0
    }
19047
0
    if (err == MP_OKAY) {
19048
        /* n1 = a - 1 (a is assumed odd.) */
19049
0
        (void)sp_copy(a, n1);
19050
0
        n1->dp[0]--;
19051
19052
        /* Set 2**s * r = n1 */
19053
        /* Count the number of least significant bits which are zero. */
19054
0
        s = sp_cnt_lsb(n1);
19055
        /* Divide n - 1 by 2**s into r. */
19056
0
        (void)sp_rshb(n1, s, r);
19057
19058
        /* Compute y = b**r mod a */
19059
0
        err = sp_exptmod(b, r, a, y);
19060
0
    }
19061
0
    if (err == MP_OKAY) {
19062
        /* Assume probably prime until shown otherwise. */
19063
0
        *result = MP_YES;
19064
19065
        /* If y != 1 and y != n1 do */
19066
0
        if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
19067
0
            int j = 1;
19068
            /* While j <= s-1 and y != n1 */
19069
0
            while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
19070
                /* Square for bit shifted down. */
19071
0
                err = sp_sqrmod(y, a, y);
19072
0
                if (err != MP_OKAY) {
19073
0
                    break;
19074
0
                }
19075
19076
                /* If y == 1 then composite. */
19077
0
                if (sp_cmp_d(y, 1) == MP_EQ) {
19078
0
                    *result = MP_NO;
19079
0
                    break;
19080
0
                }
19081
0
                ++j;
19082
0
            }
19083
19084
            /* If y != n1 then composite. */
19085
0
            if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
19086
0
                *result = MP_NO;
19087
0
            }
19088
0
        }
19089
0
    }
19090
19091
0
    return err;
19092
0
}
19093
19094
#if SP_WORD_SIZE == 8
19095
/* Number of pre-computed primes. First n primes - fitting in a digit. */
19096
#define SP_PRIME_SIZE      54
19097
19098
static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
19099
    0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
19100
    0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
19101
    0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
19102
    0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
19103
    0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
19104
    0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
19105
    0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
19106
};
19107
#else
19108
/* Number of pre-computed primes. First n primes. */
19109
0
#define SP_PRIME_SIZE      256
19110
19111
/* The first 256 primes. */
19112
static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
19113
    0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
19114
    0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
19115
    0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
19116
    0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
19117
    0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
19118
    0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
19119
    0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
19120
    0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
19121
19122
    0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
19123
    0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
19124
    0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
19125
    0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
19126
    0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
19127
    0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
19128
    0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
19129
    0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
19130
19131
    0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
19132
    0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
19133
    0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
19134
    0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
19135
    0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
19136
    0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
19137
    0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
19138
    0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
19139
19140
    0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
19141
    0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
19142
    0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
19143
    0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
19144
    0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
19145
    0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
19146
    0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
19147
    0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
19148
};
19149
#endif
19150
19151
/* Compare the first n primes with a.
19152
 *
19153
 * @param [in]  a       Number to check.
19154
 * @param [out] result  Whether number was found to be prime.
19155
 * @return  0 when no small prime matches.
19156
 * @return  1 when small prime matches.
19157
 */
19158
static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
19159
0
{
19160
0
    int i;
19161
0
    int haveRes = 0;
19162
19163
0
    *result = MP_NO;
19164
    /* Check one digit a against primes table. */
19165
0
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19166
0
        if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
19167
0
            *result = MP_YES;
19168
0
            haveRes = 1;
19169
0
            break;
19170
0
        }
19171
0
    }
19172
19173
0
    return haveRes;
19174
0
}
19175
19176
/* Using composites is only faster when using 64-bit values. */
19177
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19178
/* Number of composites. */
19179
0
#define SP_COMP_CNT     38
19180
19181
/* Products of small primes that fit into 64-bits. */
19182
static sp_int_digit sp_comp[SP_COMP_CNT] = {
19183
    0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
19184
    0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
19185
    0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
19186
    0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
19187
    0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
19188
    0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
19189
    0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
19190
    0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
19191
    0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
19192
    0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
19193
    0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
19194
    0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
19195
    0xcec010484e4ad877, 0xef972c3cfafbcd25
19196
};
19197
19198
/* Index of next prime after those used to create composite. */
19199
static int sp_comp_idx[SP_COMP_CNT] = {
19200
     15,  25,  34,  42,  50,  58,  65,  72,  79,  86,  93, 100, 106, 112, 118,
19201
    124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
19202
    214, 220, 226, 232, 238, 244, 250, 256
19203
};
19204
#endif
19205
19206
/* Determines whether any of the first n small primes divide a evenly.
19207
 *
19208
 * @param [in]      a        Number to check.
19209
 * @param [in, out] haveRes  Boolean indicating a no prime result found.
19210
 * @param [in, out] result   Whether a is known to be prime.
19211
 * @return  MP_OKAY on success.
19212
 * @return  Negative on failure.
19213
 */
19214
static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
19215
0
{
19216
0
    int i;
19217
0
#if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
19218
0
    int j;
19219
0
#endif
19220
0
    sp_int_digit d;
19221
0
    int err = MP_OKAY;
19222
19223
#if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
19224
    /* Do trial division of a with all known small primes. */
19225
    for (i = 0; i < SP_PRIME_SIZE; i++) {
19226
        /* Small prime divides a when remainder is 0. */
19227
        err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
19228
        if ((err != MP_OKAY) || (d == 0)) {
19229
            *result = MP_NO;
19230
            *haveRes = 1;
19231
            break;
19232
        }
19233
    }
19234
#else
19235
    /* Start with first prime in composite. */
19236
0
    i = 0;
19237
0
    for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
19238
        /* Reduce a down to a single word.  */
19239
0
        err = sp_mod_d(a, sp_comp[j], &d);
19240
0
        if ((err != MP_OKAY) || (d == 0)) {
19241
0
            *result = MP_NO;
19242
0
            *haveRes = 1;
19243
0
            break;
19244
0
        }
19245
        /* Do trial division of d with small primes that make up composite. */
19246
0
        for (; i < sp_comp_idx[j]; i++) {
19247
            /* Small prime divides a when remainder is 0. */
19248
0
            if (d % sp_primes[i] == 0) {
19249
0
                *result = MP_NO;
19250
0
                *haveRes = 1;
19251
0
                break;
19252
0
            }
19253
0
        }
19254
0
    }
19255
0
#endif
19256
19257
0
    return err;
19258
0
}
19259
19260
/* Check whether a is prime by checking t iterations of Miller-Rabin.
19261
 *
19262
 * @param  [in]   a       SP integer to check.
19263
 * @param  [in]   trials  Number of trials of Miller-Rabin test to perform.
19264
 * @param  [out]  result  MP_YES when number is prime.
19265
 *                        MP_NO otherwise.
19266
 *
19267
 * @return  MP_OKAY on success.
19268
 * @return  MP_MEM when dynamic memory allocation fails.
19269
 */
19270
static int _sp_prime_trials(const sp_int* a, int trials, int* result)
19271
0
{
19272
0
    int err = MP_OKAY;
19273
0
    int i;
19274
0
    DECL_SP_INT(n1, a->used + 1);
19275
0
    DECL_SP_INT(r, a->used + 1);
19276
0
    DECL_SP_INT(b, a->used * 2 + 1);
19277
19278
0
    ALLOC_SP_INT(n1, a->used + 1, err, NULL);
19279
0
    ALLOC_SP_INT(r, a->used + 1, err, NULL);
19280
    /* Allocate number that will hold modular exponentiation result. */
19281
0
    ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
19282
0
    if (err == MP_OKAY) {
19283
0
        _sp_init_size(n1, a->used + 1U);
19284
0
        _sp_init_size(r, a->used + 1U);
19285
0
        _sp_init_size(b, (sp_size_t)(a->used * 2U + 1U));
19286
19287
        /* Do requested number of trials of Miller-Rabin test. */
19288
0
        for (i = 0; i < trials; i++) {
19289
            /* Miller-Rabin test with known small prime. */
19290
0
            _sp_set(b, sp_primes[i]);
19291
0
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19292
0
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19293
0
                break;
19294
0
            }
19295
0
        }
19296
19297
        /* Clear temporary values. */
19298
0
        sp_clear(n1);
19299
0
        sp_clear(r);
19300
0
        sp_clear(b);
19301
0
    }
19302
19303
    /* Free allocated temporary. */
19304
0
    FREE_SP_INT(b, NULL);
19305
0
    FREE_SP_INT(r, NULL);
19306
0
    FREE_SP_INT(n1, NULL);
19307
0
    return err;
19308
0
}
19309
19310
/* Check whether a is prime.
19311
 * Checks against a number of small primes and does t iterations of
19312
 * Miller-Rabin.
19313
 *
19314
 * @param  [in]   a       SP integer to check.
19315
 * @param  [in]   trials  Number of trials of Miller-Rabin test to perform.
19316
 * @param  [out]  result  MP_YES when number is prime.
19317
 *                        MP_NO otherwise.
19318
 *
19319
 * @return  MP_OKAY on success.
19320
 * @return  MP_VAL when a or result is NULL, or trials is out of range.
19321
 * @return  MP_MEM when dynamic memory allocation fails.
19322
 */
19323
int sp_prime_is_prime(const sp_int* a, int trials, int* result)
19324
0
{
19325
0
    int         err = MP_OKAY;
19326
0
    int         haveRes = 0;
19327
19328
    /* Validate parameters. */
19329
0
    if ((a == NULL) || (result == NULL)) {
19330
0
        if (result != NULL) {
19331
0
            *result = MP_NO;
19332
0
        }
19333
0
        err = MP_VAL;
19334
0
    }
19335
0
    else if (a->used * 2 >= SP_INT_DIGITS) {
19336
0
        err = MP_VAL;
19337
0
    }
19338
    /* Check validity of Miller-Rabin iterations count.
19339
     * Must do at least one and need a unique pre-computed prime for each
19340
     * iteration.
19341
     */
19342
0
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19343
0
        *result = MP_NO;
19344
0
        err = MP_VAL;
19345
0
    }
19346
19347
    /* Short-cut, 1 is not prime. */
19348
0
    if ((err == MP_OKAY) && sp_isone(a)) {
19349
0
        *result = MP_NO;
19350
0
        haveRes = 1;
19351
0
    }
19352
19353
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19354
19355
    /* Check against known small primes when a has 1 digit. */
19356
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19357
0
            (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
19358
0
        haveRes = sp_cmp_primes(a, result);
19359
0
    }
19360
19361
    /* Check all small primes for even divisibility. */
19362
0
    if ((err == MP_OKAY) && (!haveRes)) {
19363
0
        err = sp_div_primes(a, &haveRes, result);
19364
0
    }
19365
19366
    /* Check a number of iterations of Miller-Rabin with small primes. */
19367
0
    if ((err == MP_OKAY) && (!haveRes)) {
19368
0
        err = _sp_prime_trials(a, trials, result);
19369
0
    }
19370
19371
0
    RESTORE_VECTOR_REGISTERS();
19372
19373
0
    return err;
19374
0
}
19375
19376
#ifndef WC_NO_RNG
19377
/* Check whether a is prime by doing t iterations of Miller-Rabin.
19378
 *
19379
 * t random numbers should give a (1/4)^t chance of a false prime.
19380
 *
19381
 * @param  [in]   a       SP integer to check.
19382
 * @param  [in]   trials  Number of iterations of Miller-Rabin test to perform.
19383
 * @param  [out]  result  MP_YES when number is prime.
19384
 *                        MP_NO otherwise.
19385
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
19386
 *
19387
 * @return  MP_OKAY on success.
19388
 * @return  MP_VAL when a, result or rng is NULL.
19389
 * @return  MP_MEM when dynamic memory allocation fails.
19390
 */
19391
static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
19392
    WC_RNG* rng)
19393
0
{
19394
0
    int err = MP_OKAY;
19395
0
    int bits = sp_count_bits(a);
19396
0
    word32 baseSz = ((word32)bits + 7) / 8;
19397
0
    DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
19398
0
    DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
19399
19400
0
    ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
19401
0
    ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
19402
0
    if (err == MP_OKAY) {
19403
0
        sp_int* c  = ds[0];
19404
0
        sp_int* n1 = ds[1];
19405
0
        sp_int* b  = d[0];
19406
0
        sp_int* r  = d[1];
19407
19408
0
        _sp_init_size(c , a->used + 1U);
19409
0
        _sp_init_size(n1, a->used + 1U);
19410
0
        _sp_init_size(b , (sp_size_t)(a->used * 2U + 1U));
19411
0
        _sp_init_size(r , (sp_size_t)(a->used * 2U + 1U));
19412
19413
0
        _sp_sub_d(a, 2, c);
19414
19415
0
        bits &= SP_WORD_MASK;
19416
19417
        /* Keep trying random numbers until all trials complete. */
19418
0
        while (trials > 0) {
19419
            /* Generate random trial number. */
19420
0
            err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
19421
0
            if (err != MP_OKAY) {
19422
0
                break;
19423
0
            }
19424
0
            b->used = a->used;
19425
        #ifdef BIG_ENDIAN_ORDER
19426
            /* Fix top digit if fewer bytes than a full digit generated. */
19427
            if (((baseSz * 8) & SP_WORD_MASK) != 0) {
19428
                b->dp[b->used-1] >>=
19429
                    SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
19430
            }
19431
        #endif /* BIG_ENDIAN_ORDER */
19432
19433
            /* Ensure the top word has no more bits than necessary. */
19434
0
            if (bits > 0) {
19435
0
                b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
19436
0
                sp_clamp(b);
19437
0
            }
19438
19439
            /* Can't use random value it is: 0, 1, a-2, a-1, >= a  */
19440
0
            if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
19441
0
                continue;
19442
0
            }
19443
19444
            /* Perform Miller-Rabin test with random value. */
19445
0
            err = sp_prime_miller_rabin(a, b, result, n1, r);
19446
0
            if ((err != MP_OKAY) || (*result == MP_NO)) {
19447
0
                break;
19448
0
            }
19449
19450
            /* Trial complete. */
19451
0
            trials--;
19452
0
        }
19453
19454
        /* Zeroize temporary values used when generating private prime. */
19455
0
        sp_forcezero(n1);
19456
0
        sp_forcezero(r);
19457
0
        sp_forcezero(b);
19458
0
        sp_forcezero(c);
19459
0
    }
19460
19461
0
    FREE_SP_INT_ARRAY(d, NULL);
19462
0
    FREE_SP_INT_ARRAY(ds, NULL);
19463
0
    return err;
19464
0
}
19465
#endif /*!WC_NO_RNG */
19466
19467
/* Check whether a is prime.
19468
 * Checks against a number of small primes and does t iterations of
19469
 * Miller-Rabin.
19470
 *
19471
 * @param  [in]   a       SP integer to check.
19472
 * @param  [in]   trials  Number of iterations of Miller-Rabin test to perform.
19473
 * @param  [out]  result  MP_YES when number is prime.
19474
 *                        MP_NO otherwise.
19475
 * @param  [in]   rng     Random number generator for Miller-Rabin testing.
19476
 *
19477
 * @return  MP_OKAY on success.
19478
 * @return  MP_VAL when a, result or rng is NULL.
19479
 * @return  MP_MEM when dynamic memory allocation fails.
19480
 */
19481
int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
19482
0
{
19483
0
    int err = MP_OKAY;
19484
0
    int ret = MP_YES;
19485
0
    int haveRes = 0;
19486
19487
0
    if ((a == NULL) || (result == NULL) || (rng == NULL)) {
19488
0
        err = MP_VAL;
19489
0
    }
19490
0
#ifndef WC_NO_RNG
19491
0
    if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
19492
0
        err = MP_VAL;
19493
0
    }
19494
0
#endif
19495
#ifdef WOLFSSL_SP_INT_NEGATIVE
19496
    if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
19497
        err = MP_VAL;
19498
    }
19499
#endif
19500
19501
    /* Ensure trials is valid. Maximum based on number of small primes
19502
     * available. */
19503
0
    if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
19504
0
        err = MP_VAL;
19505
0
    }
19506
19507
0
    if ((err == MP_OKAY) && sp_isone(a)) {
19508
0
        ret = MP_NO;
19509
0
        haveRes = 1;
19510
0
    }
19511
19512
0
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19513
19514
    /* Check against known small primes when a has 1 digit. */
19515
0
    if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
19516
0
            (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
19517
0
        haveRes = sp_cmp_primes(a, &ret);
19518
0
    }
19519
19520
    /* Check all small primes for even divisibility. */
19521
0
    if ((err == MP_OKAY) && (!haveRes)) {
19522
0
        err = sp_div_primes(a, &haveRes, &ret);
19523
0
    }
19524
19525
0
#ifndef WC_NO_RNG
19526
    /* Check a number of iterations of Miller-Rabin with random large values. */
19527
0
    if ((err == MP_OKAY) && (!haveRes)) {
19528
0
        err = _sp_prime_random_trials(a, trials, &ret, rng);
19529
0
    }
19530
#else
19531
    (void)trials;
19532
#endif /* !WC_NO_RNG */
19533
19534
0
    if (result != NULL) {
19535
0
        *result = ret;
19536
0
    }
19537
19538
0
    RESTORE_VECTOR_REGISTERS();
19539
19540
0
    return err;
19541
0
}
19542
#endif /* WOLFSSL_SP_PRIME_GEN */
19543
19544
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
19545
19546
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
19547
 *
19548
 * Find the largest number that divides both a and b without remainder.
19549
 * r <= a, r <= b, a % r == 0, b % r == 0
19550
 *
19551
 * a and b are positive integers.
19552
 *
19553
 * Euclidean Algorithm:
19554
 *  1. If a > b then a = b, b = a
19555
 *  2. u = a
19556
 *  3. v = b % a
19557
 *  4. While v != 0
19558
 *   4.1. t = u % v
19559
 *   4.2. u <= v, v <= t, t <= u
19560
 *  5. r = u
19561
 *
19562
 * @param  [in]   a  SP integer of first operand.
19563
 * @param  [in]   b  SP integer of second operand.
19564
 * @param  [out]  r  SP integer to hold result.
19565
 *
19566
 * @return  MP_OKAY on success.
19567
 * @return  MP_MEM when dynamic memory allocation fails.
19568
 */
19569
static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19570
{
19571
    int err = MP_OKAY;
19572
    sp_int* u = NULL;
19573
    sp_int* v = NULL;
19574
    sp_int* t = NULL;
19575
    /* Used for swapping sp_ints. */
19576
    sp_int* s;
19577
    /* Determine maximum digit length numbers will reach. */
19578
    unsigned int used = (a->used >= b->used) ? a->used + 1U : b->used + 1U;
19579
    DECL_SP_INT_ARRAY(d, used, 3);
19580
19581
    SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19582
19583
    ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
19584
    if (err == MP_OKAY) {
19585
        u = d[0];
19586
        v = d[1];
19587
        t = d[2];
19588
19589
        _sp_init_size(u, used);
19590
        _sp_init_size(v, used);
19591
        _sp_init_size(t, used);
19592
19593
        /* 1. If a > b then a = b, b = a.
19594
         *    Make a <= b.
19595
         */
19596
        if (_sp_cmp(a, b) == MP_GT) {
19597
            const sp_int* tmp;
19598
            tmp = a;
19599
            a = b;
19600
            b = tmp;
19601
        }
19602
        /* 2. u = a, v = b mod a */
19603
        _sp_copy(a, u);
19604
        /* 3. v = b mod a */
19605
        if (a->used == 1) {
19606
            err = sp_mod_d(b, a->dp[0], &v->dp[0]);
19607
            v->used = (v->dp[0] != 0);
19608
        }
19609
        else {
19610
            err = sp_mod(b, a, v);
19611
        }
19612
    }
19613
19614
    /* 4. While v != 0 */
19615
    /* Keep reducing larger by smaller until smaller is 0 or u and v both one
19616
     * digit.
19617
     */
19618
    while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
19619
        /* u' = v, v' = u mod v */
19620
        /* 4.1 t = u mod v */
19621
        if (v->used == 1) {
19622
            err = sp_mod_d(u, v->dp[0], &t->dp[0]);
19623
            t->used = (t->dp[0] != 0);
19624
        }
19625
        else {
19626
            err = sp_mod(u, v, t);
19627
        }
19628
        /* 4.2. u <= v, v <= t, t <= u */
19629
        s = u; u = v; v = t; t = s;
19630
    }
19631
    /* Only one digit remaining in u and v. */
19632
    while ((err == MP_OKAY) && (!sp_iszero(v))) {
19633
        /* u' = v, v' = u mod v */
19634
        /* 4.1 t = u mod v */
19635
        t->dp[0] = u->dp[0] % v->dp[0];
19636
        t->used = (t->dp[0] != 0);
19637
        /* 4.2. u <= v, v <= t, t <= u */
19638
        s = u; u = v; v = t; t = s;
19639
    }
19640
    if (err == MP_OKAY) {
19641
        /* 5. r = u */
19642
        _sp_copy(u, r);
19643
    }
19644
19645
    FREE_SP_INT_ARRAY(d, NULL);
19646
19647
    RESTORE_VECTOR_REGISTERS();
19648
19649
    return err;
19650
}
19651
19652
/* Calculates the Greatest Common Denominator (GCD) of a and b into r.
19653
 *
19654
 * Find the largest number that divides both a and b without remainder.
19655
 * r <= a, r <= b, a % r == 0, b % r == 0
19656
 *
19657
 * a and b are positive integers.
19658
 *
19659
 * @param  [in]   a  SP integer of first operand.
19660
 * @param  [in]   b  SP integer of second operand.
19661
 * @param  [out]  r  SP integer to hold result.
19662
 *
19663
 * @return  MP_OKAY on success.
19664
 * @return  MP_VAL when a, b or r is NULL or too large.
19665
 * @return  MP_MEM when dynamic memory allocation fails.
19666
 */
19667
int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
19668
{
19669
    int err = MP_OKAY;
19670
19671
    /* Validate parameters. */
19672
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19673
        err = MP_VAL;
19674
    }
19675
    /* Check that we have space in numbers to do work. */
19676
    else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
19677
        err = MP_VAL;
19678
    }
19679
    /* Check that r is large enough to hold maximum sized result. */
19680
    else if (((a->used <= b->used) && (r->size < a->used)) ||
19681
             ((b->used < a->used) && (r->size < b->used))) {
19682
        err = MP_VAL;
19683
    }
19684
#ifdef WOLFSSL_SP_INT_NEGATIVE
19685
    /* Algorithm doesn't work with negative numbers. */
19686
    else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
19687
        err = MP_VAL;
19688
    }
19689
#endif
19690
    else if (sp_iszero(a)) {
19691
        /* GCD of 0 and 0 is undefined - all integers divide 0. */
19692
        if (sp_iszero(b)) {
19693
            err = MP_VAL;
19694
        }
19695
        else {
19696
            /* GCD of 0 and b is b - b divides 0. */
19697
            err = sp_copy(b, r);
19698
        }
19699
    }
19700
    else if (sp_iszero(b)) {
19701
        /* GCD of 0 and a is a - a divides 0. */
19702
        err = sp_copy(a, r);
19703
    }
19704
    else {
19705
        /* Calculate GCD. */
19706
        err = _sp_gcd(a, b, r);
19707
    }
19708
19709
    return err;
19710
}
19711
19712
#endif /* !NO_RSA && WOLFSSL_KEY_GEN */
19713
19714
#if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
19715
    (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
19716
19717
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19718
 * Smallest number divisible by both numbers.
19719
 *
19720
 * a and b are positive integers.
19721
 *
19722
 * lcm(a, b) = (a / gcd(a, b)) * b
19723
 * Divide the common divisor from a and multiply by b.
19724
 *
19725
 * Algorithm:
19726
 *  1. t0 = gcd(a, b)
19727
 *  2. If a > b then
19728
 *   2.1. t1 = a / t0
19729
 *   2.2. r = b * t1
19730
 *  3. Else
19731
 *   3.1. t1 = b / t0
19732
 *   3.2. r = a * t1
19733
 *
19734
 * @param  [in]   a  SP integer of first operand.
19735
 * @param  [in]   b  SP integer of second operand.
19736
 * @param  [out]  r  SP integer to hold result.
19737
 *
19738
 * @return  MP_OKAY on success.
19739
 * @return  MP_MEM when dynamic memory allocation fails.
19740
 */
19741
static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19742
{
19743
    int err = MP_OKAY;
19744
    /* Determine maximum digit length numbers will reach. */
19745
    unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
19746
    DECL_SP_INT_ARRAY(t, used, 2);
19747
19748
    ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
19749
    if (err == MP_OKAY) {
19750
        _sp_init_size(t[0], used);
19751
        _sp_init_size(t[1], used);
19752
19753
        SAVE_VECTOR_REGISTERS(err = _svr_ret;);
19754
19755
        if (err == MP_OKAY) {
19756
            /* 1. t0 = gcd(a, b) */
19757
            err = sp_gcd(a, b, t[0]);
19758
        }
19759
19760
        if (err == MP_OKAY) {
19761
            /* Divide the greater by the common divisor and multiply by other
19762
             * to operate on the smallest length numbers.
19763
             */
19764
            /* 2. If a > b then */
19765
            if (_sp_cmp_abs(a, b) == MP_GT) {
19766
                /* 2.1. t1 = a / t0 */
19767
                err = sp_div(a, t[0], t[1], NULL);
19768
                if (err == MP_OKAY) {
19769
                    /* 2.2. r = b * t1 */
19770
                    err = sp_mul(b, t[1], r);
19771
                }
19772
            }
19773
            /* 3. Else */
19774
            else {
19775
                /* 3.1. t1 = b / t0 */
19776
                err = sp_div(b, t[0], t[1], NULL);
19777
                if (err == MP_OKAY) {
19778
                    /* 3.2. r = a * t1 */
19779
                    err = sp_mul(a, t[1], r);
19780
                }
19781
            }
19782
        }
19783
19784
        RESTORE_VECTOR_REGISTERS();
19785
    }
19786
19787
    FREE_SP_INT_ARRAY(t, NULL);
19788
    return err;
19789
}
19790
19791
/* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
19792
 * Smallest number divisible by both numbers.
19793
 *
19794
 * a and b are positive integers.
19795
 *
19796
 * @param  [in]   a  SP integer of first operand.
19797
 * @param  [in]   b  SP integer of second operand.
19798
 * @param  [out]  r  SP integer to hold result.
19799
 *
19800
 * @return  MP_OKAY on success.
19801
 * @return  MP_VAL when a, b or r is NULL; or a or b is zero.
19802
 * @return  MP_MEM when dynamic memory allocation fails.
19803
 */
19804
int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
19805
{
19806
    int err = MP_OKAY;
19807
19808
    /* Validate parameters. */
19809
    if ((a == NULL) || (b == NULL) || (r == NULL)) {
19810
        err = MP_VAL;
19811
    }
19812
#ifdef WOLFSSL_SP_INT_NEGATIVE
19813
    /* Ensure a and b are positive. */
19814
    else if ((a->sign == MP_NEG) || (b->sign >= MP_NEG)) {
19815
        err = MP_VAL;
19816
    }
19817
#endif
19818
    /* Ensure r has space for maximumal result. */
19819
    else if (r->size < a->used + b->used) {
19820
        err = MP_VAL;
19821
    }
19822
19823
    /* LCM of 0 and any number is undefined as 0 is not in the set of values
19824
     * being used.
19825
     */
19826
    if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
19827
        err = MP_VAL;
19828
    }
19829
19830
    if (err == MP_OKAY) {
19831
        /* Do operation. */
19832
        err = _sp_lcm(a, b, r);
19833
    }
19834
19835
    return err;
19836
}
19837
19838
#endif /* !NO_RSA && WOLFSSL_KEY_GEN && (!WC_RSA_BLINDING || HAVE_FIPS ||
19839
        * HAVE_SELFTEST) */
19840
19841
/* Returns the run time settings.
19842
 *
19843
 * @return  Settings value.
19844
 */
19845
word32 CheckRunTimeSettings(void)
19846
0
{
19847
0
    return CTC_SETTINGS;
19848
0
}
19849
19850
/* Returns the fast math settings.
19851
 *
19852
 * @return  Setting - number of bits in a digit.
19853
 */
19854
word32 CheckRunTimeFastMath(void)
19855
0
{
19856
0
    return SP_WORD_SIZE;
19857
0
}
19858
19859
#ifdef WOLFSSL_CHECK_MEM_ZERO
19860
/* Add an MP to check.
19861
 *
19862
 * @param [in] name  Name of address to check.
19863
 * @param [in] sp    sp_int that needs to be checked.
19864
 */
19865
void sp_memzero_add(const char* name, sp_int* sp)
19866
{
19867
    wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_int_digit));
19868
}
19869
19870
/* Check the memory in the data pointer for memory that must be zero.
19871
 *
19872
 * @param [in] sp    sp_int that needs to be checked.
19873
 */
19874
void sp_memzero_check(sp_int* sp)
19875
{
19876
    wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_int_digit));
19877
}
19878
#endif /* WOLFSSL_CHECK_MEM_ZERO */
19879
19880
#ifdef WOLFSSL_SP_DYN_STACK
19881
    PRAGMA_GCC_DIAG_POP
19882
#endif
19883
19884
#endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */